blob: 235e22ec705d98893ab0796dea4f66f41eb61c2e [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
29 AMDGPUTargetLowering(TM),
30 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
Tom Stellard75aadc22012-12-11 21:25:42 +000031 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
41
42 setOperationAction(ISD::ADD, MVT::v4i32, Expand);
43 setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000044 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
Tom Stellard3deddc52013-05-10 02:09:34 +000046 setOperationAction(ISD::MUL, MVT::v2i32, Expand);
47 setOperationAction(ISD::MUL, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000048 setOperationAction(ISD::OR, MVT::v4i32, Expand);
49 setOperationAction(ISD::OR, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000050 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000051 setOperationAction(ISD::SHL, MVT::v4i32, Expand);
52 setOperationAction(ISD::SHL, MVT::v2i32, Expand);
53 setOperationAction(ISD::SRL, MVT::v4i32, Expand);
54 setOperationAction(ISD::SRL, MVT::v2i32, Expand);
Tom Stellard7fb39632013-05-10 02:09:29 +000055 setOperationAction(ISD::SRA, MVT::v4i32, Expand);
56 setOperationAction(ISD::SRA, MVT::v2i32, Expand);
Tom Stellard3a7c34c2013-05-10 02:09:39 +000057 setOperationAction(ISD::SUB, MVT::v4i32, Expand);
58 setOperationAction(ISD::SUB, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000059 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000060 setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
61 setOperationAction(ISD::UREM, MVT::v4i32, Expand);
62 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000063 setOperationAction(ISD::XOR, MVT::v4i32, Expand);
64 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
75 setOperationAction(ISD::ROTL, MVT::i32, Custom);
76
77 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
78 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
79
Tom Stellarde8f9f282013-03-08 15:37:05 +000080 setOperationAction(ISD::SETCC, MVT::i32, Expand);
81 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000082 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
83
84 setOperationAction(ISD::SELECT, MVT::i32, Custom);
85 setOperationAction(ISD::SELECT, MVT::f32, Custom);
86
Tom Stellarda99c6ae2013-05-10 02:09:24 +000087 setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
88 setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
89
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000090 // Legalize loads and stores to the private address space.
91 setOperationAction(ISD::LOAD, MVT::i32, Custom);
92 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
93 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
94 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
95 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
96 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
98 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000099 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000100 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000101 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
102
Tom Stellard365366f2013-01-23 02:09:06 +0000103 setOperationAction(ISD::LOAD, MVT::i32, Custom);
104 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000105 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
106
Tom Stellard75aadc22012-12-11 21:25:42 +0000107 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000108 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000109 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000110 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +0000111
Tom Stellardb852af52013-03-08 15:37:03 +0000112 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setSchedulingPreference(Sched::VLIW);
115}
116
117MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
118 MachineInstr * MI, MachineBasicBlock * BB) const {
119 MachineFunction * MF = BB->getParent();
120 MachineRegisterInfo &MRI = MF->getRegInfo();
121 MachineBasicBlock::iterator I = *MI;
122
123 switch (MI->getOpcode()) {
124 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125 case AMDGPU::CLAMP_R600: {
126 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
127 AMDGPU::MOV,
128 MI->getOperand(0).getReg(),
129 MI->getOperand(1).getReg());
130 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
131 break;
132 }
133
134 case AMDGPU::FABS_R600: {
135 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
136 AMDGPU::MOV,
137 MI->getOperand(0).getReg(),
138 MI->getOperand(1).getReg());
139 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
140 break;
141 }
142
143 case AMDGPU::FNEG_R600: {
144 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
145 AMDGPU::MOV,
146 MI->getOperand(0).getReg(),
147 MI->getOperand(1).getReg());
148 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
149 break;
150 }
151
Tom Stellard75aadc22012-12-11 21:25:42 +0000152 case AMDGPU::MASK_WRITE: {
153 unsigned maskedRegister = MI->getOperand(0).getReg();
154 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
155 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
156 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
157 break;
158 }
159
160 case AMDGPU::MOV_IMM_F32:
161 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
162 MI->getOperand(1).getFPImm()->getValueAPF()
163 .bitcastToAPInt().getZExtValue());
164 break;
165 case AMDGPU::MOV_IMM_I32:
166 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
167 MI->getOperand(1).getImm());
168 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000169 case AMDGPU::CONST_COPY: {
170 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
171 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
172 TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
173 MI->getOperand(1).getImm());
174 break;
175 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000176
177 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
178 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
179 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
180
181 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
182 .addOperand(MI->getOperand(0))
183 .addOperand(MI->getOperand(1))
184 .addImm(EOP); // Set End of program bit
185 break;
186 }
187
Tom Stellard75aadc22012-12-11 21:25:42 +0000188 case AMDGPU::TXD: {
189 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
190 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000191 MachineOperand &RID = MI->getOperand(4);
192 MachineOperand &SID = MI->getOperand(5);
193 unsigned TextureId = MI->getOperand(6).getImm();
194 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
195 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000196
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000197 switch (TextureId) {
198 case 5: // Rect
199 CTX = CTY = 0;
200 break;
201 case 6: // Shadow1D
202 SrcW = SrcZ;
203 break;
204 case 7: // Shadow2D
205 SrcW = SrcZ;
206 break;
207 case 8: // ShadowRect
208 CTX = CTY = 0;
209 SrcW = SrcZ;
210 break;
211 case 9: // 1DArray
212 SrcZ = SrcY;
213 CTZ = 0;
214 break;
215 case 10: // 2DArray
216 CTZ = 0;
217 break;
218 case 11: // Shadow1DArray
219 SrcZ = SrcY;
220 CTZ = 0;
221 break;
222 case 12: // Shadow2DArray
223 CTZ = 0;
224 break;
225 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000226 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
227 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 .addImm(SrcX)
229 .addImm(SrcY)
230 .addImm(SrcZ)
231 .addImm(SrcW)
232 .addImm(0)
233 .addImm(0)
234 .addImm(0)
235 .addImm(0)
236 .addImm(1)
237 .addImm(2)
238 .addImm(3)
239 .addOperand(RID)
240 .addOperand(SID)
241 .addImm(CTX)
242 .addImm(CTY)
243 .addImm(CTZ)
244 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000245 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
246 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000247 .addImm(SrcX)
248 .addImm(SrcY)
249 .addImm(SrcZ)
250 .addImm(SrcW)
251 .addImm(0)
252 .addImm(0)
253 .addImm(0)
254 .addImm(0)
255 .addImm(1)
256 .addImm(2)
257 .addImm(3)
258 .addOperand(RID)
259 .addOperand(SID)
260 .addImm(CTX)
261 .addImm(CTY)
262 .addImm(CTZ)
263 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000264 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
265 .addOperand(MI->getOperand(0))
266 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000267 .addImm(SrcX)
268 .addImm(SrcY)
269 .addImm(SrcZ)
270 .addImm(SrcW)
271 .addImm(0)
272 .addImm(0)
273 .addImm(0)
274 .addImm(0)
275 .addImm(1)
276 .addImm(2)
277 .addImm(3)
278 .addOperand(RID)
279 .addOperand(SID)
280 .addImm(CTX)
281 .addImm(CTY)
282 .addImm(CTZ)
283 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 .addReg(T0, RegState::Implicit)
285 .addReg(T1, RegState::Implicit);
286 break;
287 }
288
289 case AMDGPU::TXD_SHADOW: {
290 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
291 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000292 MachineOperand &RID = MI->getOperand(4);
293 MachineOperand &SID = MI->getOperand(5);
294 unsigned TextureId = MI->getOperand(6).getImm();
295 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
296 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
297
298 switch (TextureId) {
299 case 5: // Rect
300 CTX = CTY = 0;
301 break;
302 case 6: // Shadow1D
303 SrcW = SrcZ;
304 break;
305 case 7: // Shadow2D
306 SrcW = SrcZ;
307 break;
308 case 8: // ShadowRect
309 CTX = CTY = 0;
310 SrcW = SrcZ;
311 break;
312 case 9: // 1DArray
313 SrcZ = SrcY;
314 CTZ = 0;
315 break;
316 case 10: // 2DArray
317 CTZ = 0;
318 break;
319 case 11: // Shadow1DArray
320 SrcZ = SrcY;
321 CTZ = 0;
322 break;
323 case 12: // Shadow2DArray
324 CTZ = 0;
325 break;
326 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000327
328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
329 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000330 .addImm(SrcX)
331 .addImm(SrcY)
332 .addImm(SrcZ)
333 .addImm(SrcW)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(0)
338 .addImm(1)
339 .addImm(2)
340 .addImm(3)
341 .addOperand(RID)
342 .addOperand(SID)
343 .addImm(CTX)
344 .addImm(CTY)
345 .addImm(CTZ)
346 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000347 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
348 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000349 .addImm(SrcX)
350 .addImm(SrcY)
351 .addImm(SrcZ)
352 .addImm(SrcW)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(0)
357 .addImm(1)
358 .addImm(2)
359 .addImm(3)
360 .addOperand(RID)
361 .addOperand(SID)
362 .addImm(CTX)
363 .addImm(CTY)
364 .addImm(CTZ)
365 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000366 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
367 .addOperand(MI->getOperand(0))
368 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000369 .addImm(SrcX)
370 .addImm(SrcY)
371 .addImm(SrcZ)
372 .addImm(SrcW)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(0)
377 .addImm(1)
378 .addImm(2)
379 .addImm(3)
380 .addOperand(RID)
381 .addOperand(SID)
382 .addImm(CTX)
383 .addImm(CTY)
384 .addImm(CTZ)
385 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000386 .addReg(T0, RegState::Implicit)
387 .addReg(T1, RegState::Implicit);
388 break;
389 }
390
391 case AMDGPU::BRANCH:
392 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000393 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000394 break;
395
396 case AMDGPU::BRANCH_COND_f32: {
397 MachineInstr *NewMI =
398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
399 AMDGPU::PREDICATE_BIT)
400 .addOperand(MI->getOperand(1))
401 .addImm(OPCODE_IS_NOT_ZERO)
402 .addImm(0); // Flags
403 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000404 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000405 .addOperand(MI->getOperand(0))
406 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
407 break;
408 }
409
410 case AMDGPU::BRANCH_COND_i32: {
411 MachineInstr *NewMI =
412 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
413 AMDGPU::PREDICATE_BIT)
414 .addOperand(MI->getOperand(1))
415 .addImm(OPCODE_IS_NOT_ZERO_INT)
416 .addImm(0); // Flags
417 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000418 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000419 .addOperand(MI->getOperand(0))
420 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
421 break;
422 }
423
Tom Stellard75aadc22012-12-11 21:25:42 +0000424 case AMDGPU::EG_ExportSwz:
425 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000426 // Instruction is left unmodified if its not the last one of its type
427 bool isLastInstructionOfItsType = true;
428 unsigned InstExportType = MI->getOperand(1).getImm();
429 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
430 EndBlock = BB->end(); NextExportInst != EndBlock;
431 NextExportInst = llvm::next(NextExportInst)) {
432 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
433 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
434 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
435 .getImm();
436 if (CurrentInstExportType == InstExportType) {
437 isLastInstructionOfItsType = false;
438 break;
439 }
440 }
441 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000443 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000444 return BB;
445 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
446 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
447 .addOperand(MI->getOperand(0))
448 .addOperand(MI->getOperand(1))
449 .addOperand(MI->getOperand(2))
450 .addOperand(MI->getOperand(3))
451 .addOperand(MI->getOperand(4))
452 .addOperand(MI->getOperand(5))
453 .addOperand(MI->getOperand(6))
454 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000455 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000456 break;
457 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000458 case AMDGPU::RETURN: {
459 // RETURN instructions must have the live-out registers as implicit uses,
460 // otherwise they appear dead.
461 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
462 MachineInstrBuilder MIB(*MF, MI);
463 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
464 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
465 return BB;
466 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000467 }
468
469 MI->eraseFromParent();
470 return BB;
471}
472
473//===----------------------------------------------------------------------===//
474// Custom DAG Lowering Operations
475//===----------------------------------------------------------------------===//
476
477using namespace llvm::Intrinsic;
478using namespace llvm::AMDGPUIntrinsic;
479
Tom Stellard75aadc22012-12-11 21:25:42 +0000480SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
481 switch (Op.getOpcode()) {
482 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000483 case ISD::ROTL: return LowerROTL(Op, DAG);
484 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
485 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000487 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000488 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000489 case ISD::INTRINSIC_VOID: {
490 SDValue Chain = Op.getOperand(0);
491 unsigned IntrinsicID =
492 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
493 switch (IntrinsicID) {
494 case AMDGPUIntrinsic::AMDGPU_store_output: {
495 MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000496 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000497 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
498 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000499 MFI->LiveOuts.push_back(Reg);
Tom Stellard75aadc22012-12-11 21:25:42 +0000500 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
501 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000502 case AMDGPUIntrinsic::R600_store_swizzle: {
503 const SDValue Args[8] = {
504 Chain,
505 Op.getOperand(2), // Export Value
506 Op.getOperand(3), // ArrayBase
507 Op.getOperand(4), // Type
508 DAG.getConstant(0, MVT::i32), // SWZ_X
509 DAG.getConstant(1, MVT::i32), // SWZ_Y
510 DAG.getConstant(2, MVT::i32), // SWZ_Z
511 DAG.getConstant(3, MVT::i32) // SWZ_W
512 };
513 return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
514 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000516
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 // default for switch(IntrinsicID)
518 default: break;
519 }
520 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
521 break;
522 }
523 case ISD::INTRINSIC_WO_CHAIN: {
524 unsigned IntrinsicID =
525 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
526 EVT VT = Op.getValueType();
527 DebugLoc DL = Op.getDebugLoc();
528 switch(IntrinsicID) {
529 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
530 case AMDGPUIntrinsic::R600_load_input: {
531 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
532 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
533 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
534 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000535
536 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000538 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
539 MachineSDNode *interp;
540 if (ijb < 0) {
541 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
542 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
543 return DAG.getTargetExtractSubreg(
544 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
545 DL, MVT::f32, SDValue(interp, 0));
546 }
547
548 if (slot % 4 < 2)
549 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
550 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
551 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
552 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
553 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
554 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
555 else
556 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
557 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
558 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
559 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
560 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
561 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
562
563 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000564 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000565 case AMDGPUIntrinsic::R600_tex:
566 case AMDGPUIntrinsic::R600_texc:
567 case AMDGPUIntrinsic::R600_txl:
568 case AMDGPUIntrinsic::R600_txlc:
569 case AMDGPUIntrinsic::R600_txb:
570 case AMDGPUIntrinsic::R600_txbc:
571 case AMDGPUIntrinsic::R600_txf:
572 case AMDGPUIntrinsic::R600_txq:
573 case AMDGPUIntrinsic::R600_ddx:
574 case AMDGPUIntrinsic::R600_ddy: {
575 unsigned TextureOp;
576 switch (IntrinsicID) {
577 case AMDGPUIntrinsic::R600_tex:
578 TextureOp = 0;
579 break;
580 case AMDGPUIntrinsic::R600_texc:
581 TextureOp = 1;
582 break;
583 case AMDGPUIntrinsic::R600_txl:
584 TextureOp = 2;
585 break;
586 case AMDGPUIntrinsic::R600_txlc:
587 TextureOp = 3;
588 break;
589 case AMDGPUIntrinsic::R600_txb:
590 TextureOp = 4;
591 break;
592 case AMDGPUIntrinsic::R600_txbc:
593 TextureOp = 5;
594 break;
595 case AMDGPUIntrinsic::R600_txf:
596 TextureOp = 6;
597 break;
598 case AMDGPUIntrinsic::R600_txq:
599 TextureOp = 7;
600 break;
601 case AMDGPUIntrinsic::R600_ddx:
602 TextureOp = 8;
603 break;
604 case AMDGPUIntrinsic::R600_ddy:
605 TextureOp = 9;
606 break;
607 default:
608 llvm_unreachable("Unknow Texture Operation");
609 }
610
611 SDValue TexArgs[19] = {
612 DAG.getConstant(TextureOp, MVT::i32),
613 Op.getOperand(1),
614 DAG.getConstant(0, MVT::i32),
615 DAG.getConstant(1, MVT::i32),
616 DAG.getConstant(2, MVT::i32),
617 DAG.getConstant(3, MVT::i32),
618 Op.getOperand(2),
619 Op.getOperand(3),
620 Op.getOperand(4),
621 DAG.getConstant(0, MVT::i32),
622 DAG.getConstant(1, MVT::i32),
623 DAG.getConstant(2, MVT::i32),
624 DAG.getConstant(3, MVT::i32),
625 Op.getOperand(5),
626 Op.getOperand(6),
627 Op.getOperand(7),
628 Op.getOperand(8),
629 Op.getOperand(9),
630 Op.getOperand(10)
631 };
632 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
633 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000634 case AMDGPUIntrinsic::AMDGPU_dp4: {
635 SDValue Args[8] = {
636 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
637 DAG.getConstant(0, MVT::i32)),
638 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
639 DAG.getConstant(0, MVT::i32)),
640 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
641 DAG.getConstant(1, MVT::i32)),
642 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
643 DAG.getConstant(1, MVT::i32)),
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
645 DAG.getConstant(2, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
647 DAG.getConstant(2, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
649 DAG.getConstant(3, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
651 DAG.getConstant(3, MVT::i32))
652 };
653 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
654 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000655
656 case r600_read_ngroups_x:
657 return LowerImplicitParameter(DAG, VT, DL, 0);
658 case r600_read_ngroups_y:
659 return LowerImplicitParameter(DAG, VT, DL, 1);
660 case r600_read_ngroups_z:
661 return LowerImplicitParameter(DAG, VT, DL, 2);
662 case r600_read_global_size_x:
663 return LowerImplicitParameter(DAG, VT, DL, 3);
664 case r600_read_global_size_y:
665 return LowerImplicitParameter(DAG, VT, DL, 4);
666 case r600_read_global_size_z:
667 return LowerImplicitParameter(DAG, VT, DL, 5);
668 case r600_read_local_size_x:
669 return LowerImplicitParameter(DAG, VT, DL, 6);
670 case r600_read_local_size_y:
671 return LowerImplicitParameter(DAG, VT, DL, 7);
672 case r600_read_local_size_z:
673 return LowerImplicitParameter(DAG, VT, DL, 8);
674
675 case r600_read_tgid_x:
676 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
677 AMDGPU::T1_X, VT);
678 case r600_read_tgid_y:
679 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
680 AMDGPU::T1_Y, VT);
681 case r600_read_tgid_z:
682 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
683 AMDGPU::T1_Z, VT);
684 case r600_read_tidig_x:
685 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
686 AMDGPU::T0_X, VT);
687 case r600_read_tidig_y:
688 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
689 AMDGPU::T0_Y, VT);
690 case r600_read_tidig_z:
691 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
692 AMDGPU::T0_Z, VT);
693 }
694 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
695 break;
696 }
697 } // end switch(Op.getOpcode())
698 return SDValue();
699}
700
701void R600TargetLowering::ReplaceNodeResults(SDNode *N,
702 SmallVectorImpl<SDValue> &Results,
703 SelectionDAG &DAG) const {
704 switch (N->getOpcode()) {
705 default: return;
706 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000707 return;
708 case ISD::LOAD: {
709 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
710 Results.push_back(SDValue(Node, 0));
711 Results.push_back(SDValue(Node, 1));
712 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
713 // function
714 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
715 return;
716 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000717 case ISD::STORE:
718 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
719 Results.push_back(SDValue(Node, 0));
720 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 }
722}
723
724SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
725 return DAG.getNode(
726 ISD::SETCC,
727 Op.getDebugLoc(),
728 MVT::i1,
729 Op, DAG.getConstantFP(0.0f, MVT::f32),
730 DAG.getCondCode(ISD::SETNE)
731 );
732}
733
Tom Stellard75aadc22012-12-11 21:25:42 +0000734SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
735 DebugLoc DL,
736 unsigned DwordOffset) const {
737 unsigned ByteOffset = DwordOffset * 4;
738 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
739 AMDGPUAS::PARAM_I_ADDRESS);
740
741 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
742 assert(isInt<16>(ByteOffset));
743
744 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
745 DAG.getConstant(ByteOffset, MVT::i32), // PTR
746 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
747 false, false, false, 0);
748}
749
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000750SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
751
752 MachineFunction &MF = DAG.getMachineFunction();
753 const AMDGPUFrameLowering *TFL =
754 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
755
756 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
757 assert(FIN);
758
759 unsigned FrameIndex = FIN->getIndex();
760 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
761 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
762}
763
Tom Stellard75aadc22012-12-11 21:25:42 +0000764SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
765 DebugLoc DL = Op.getDebugLoc();
766 EVT VT = Op.getValueType();
767
768 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
769 Op.getOperand(0),
770 Op.getOperand(0),
771 DAG.getNode(ISD::SUB, DL, VT,
772 DAG.getConstant(32, MVT::i32),
773 Op.getOperand(1)));
774}
775
776bool R600TargetLowering::isZero(SDValue Op) const {
777 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
778 return Cst->isNullValue();
779 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
780 return CstFP->isZero();
781 } else {
782 return false;
783 }
784}
785
786SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
787 DebugLoc DL = Op.getDebugLoc();
788 EVT VT = Op.getValueType();
789
790 SDValue LHS = Op.getOperand(0);
791 SDValue RHS = Op.getOperand(1);
792 SDValue True = Op.getOperand(2);
793 SDValue False = Op.getOperand(3);
794 SDValue CC = Op.getOperand(4);
795 SDValue Temp;
796
797 // LHS and RHS are guaranteed to be the same value type
798 EVT CompareVT = LHS.getValueType();
799
800 // Check if we can lower this to a native operation.
801
Tom Stellard2add82d2013-03-08 15:37:09 +0000802 // Try to lower to a SET* instruction:
803 //
804 // SET* can match the following patterns:
805 //
806 // select_cc f32, f32, -1, 0, cc_any
807 // select_cc f32, f32, 1.0f, 0.0f, cc_any
808 // select_cc i32, i32, -1, 0, cc_any
809 //
810
811 // Move hardware True/False values to the correct operand.
812 if (isHWTrueValue(False) && isHWFalseValue(True)) {
813 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
814 std::swap(False, True);
815 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
816 }
817
818 if (isHWTrueValue(True) && isHWFalseValue(False) &&
819 (CompareVT == VT || VT == MVT::i32)) {
820 // This can be matched by a SET* instruction.
821 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
822 }
823
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000825 //
826 // CND* can match the following patterns:
827 //
828 // select_cc f32, 0.0, f32, f32, cc_any
829 // select_cc f32, 0.0, i32, i32, cc_any
830 // select_cc i32, 0, f32, f32, cc_any
831 // select_cc i32, 0, i32, i32, cc_any
832 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000833 if (isZero(LHS) || isZero(RHS)) {
834 SDValue Cond = (isZero(LHS) ? RHS : LHS);
835 SDValue Zero = (isZero(LHS) ? LHS : RHS);
836 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
837 if (CompareVT != VT) {
838 // Bitcast True / False to the correct types. This will end up being
839 // a nop, but it allows us to define only a single pattern in the
840 // .TD files for each CND* instruction rather than having to have
841 // one pattern for integer True/False and one for fp True/False
842 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
843 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
844 }
845 if (isZero(LHS)) {
846 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
847 }
848
849 switch (CCOpcode) {
850 case ISD::SETONE:
851 case ISD::SETUNE:
852 case ISD::SETNE:
853 case ISD::SETULE:
854 case ISD::SETULT:
855 case ISD::SETOLE:
856 case ISD::SETOLT:
857 case ISD::SETLE:
858 case ISD::SETLT:
859 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
860 Temp = True;
861 True = False;
862 False = Temp;
863 break;
864 default:
865 break;
866 }
867 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
868 Cond, Zero,
869 True, False,
870 DAG.getCondCode(CCOpcode));
871 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
872 }
873
Tom Stellard75aadc22012-12-11 21:25:42 +0000874
875 // Possible Min/Max pattern
876 SDValue MinMax = LowerMinMax(Op, DAG);
877 if (MinMax.getNode()) {
878 return MinMax;
879 }
880
881 // If we make it this for it means we have no native instructions to handle
882 // this SELECT_CC, so we must lower it.
883 SDValue HWTrue, HWFalse;
884
885 if (CompareVT == MVT::f32) {
886 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
887 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
888 } else if (CompareVT == MVT::i32) {
889 HWTrue = DAG.getConstant(-1, CompareVT);
890 HWFalse = DAG.getConstant(0, CompareVT);
891 }
892 else {
893 assert(!"Unhandled value type in LowerSELECT_CC");
894 }
895
896 // Lower this unsupported SELECT_CC into a combination of two supported
897 // SELECT_CC operations.
898 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
899
900 return DAG.getNode(ISD::SELECT_CC, DL, VT,
901 Cond, HWFalse,
902 True, False,
903 DAG.getCondCode(ISD::SETNE));
904}
905
906SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
907 return DAG.getNode(ISD::SELECT_CC,
908 Op.getDebugLoc(),
909 Op.getValueType(),
910 Op.getOperand(0),
911 DAG.getConstant(0, MVT::i32),
912 Op.getOperand(1),
913 Op.getOperand(2),
914 DAG.getCondCode(ISD::SETNE));
915}
916
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000917/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
918/// convert these pointers to a register index. Each register holds
919/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
920/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
921/// for indirect addressing.
922SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
923 unsigned StackWidth,
924 SelectionDAG &DAG) const {
925 unsigned SRLPad;
926 switch(StackWidth) {
927 case 1:
928 SRLPad = 2;
929 break;
930 case 2:
931 SRLPad = 3;
932 break;
933 case 4:
934 SRLPad = 4;
935 break;
936 default: llvm_unreachable("Invalid stack width");
937 }
938
939 return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
940 DAG.getConstant(SRLPad, MVT::i32));
941}
942
943void R600TargetLowering::getStackAddress(unsigned StackWidth,
944 unsigned ElemIdx,
945 unsigned &Channel,
946 unsigned &PtrIncr) const {
947 switch (StackWidth) {
948 default:
949 case 1:
950 Channel = 0;
951 if (ElemIdx > 0) {
952 PtrIncr = 1;
953 } else {
954 PtrIncr = 0;
955 }
956 break;
957 case 2:
958 Channel = ElemIdx % 2;
959 if (ElemIdx == 2) {
960 PtrIncr = 1;
961 } else {
962 PtrIncr = 0;
963 }
964 break;
965 case 4:
966 Channel = ElemIdx;
967 PtrIncr = 0;
968 break;
969 }
970}
971
Tom Stellard75aadc22012-12-11 21:25:42 +0000972SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
973 DebugLoc DL = Op.getDebugLoc();
974 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
975 SDValue Chain = Op.getOperand(0);
976 SDValue Value = Op.getOperand(1);
977 SDValue Ptr = Op.getOperand(2);
978
979 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
980 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
981 // Convert pointer from byte address to dword address.
982 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
983 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
984 Ptr, DAG.getConstant(2, MVT::i32)));
985
986 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
987 assert(!"Truncated and indexed stores not supported yet");
988 } else {
989 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
990 }
991 return Chain;
992 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000993
994 EVT ValueVT = Value.getValueType();
995
996 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
997 return SDValue();
998 }
999
1000 // Lowering for indirect addressing
1001
1002 const MachineFunction &MF = DAG.getMachineFunction();
1003 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1004 getTargetMachine().getFrameLowering());
1005 unsigned StackWidth = TFL->getStackWidth(MF);
1006
1007 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1008
1009 if (ValueVT.isVector()) {
1010 unsigned NumElemVT = ValueVT.getVectorNumElements();
1011 EVT ElemVT = ValueVT.getVectorElementType();
1012 SDValue Stores[4];
1013
1014 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1015 "vector width in load");
1016
1017 for (unsigned i = 0; i < NumElemVT; ++i) {
1018 unsigned Channel, PtrIncr;
1019 getStackAddress(StackWidth, i, Channel, PtrIncr);
1020 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1021 DAG.getConstant(PtrIncr, MVT::i32));
1022 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1023 Value, DAG.getConstant(i, MVT::i32));
1024
1025 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1026 Chain, Elem, Ptr,
1027 DAG.getTargetConstant(Channel, MVT::i32));
1028 }
1029 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1030 } else {
1031 if (ValueVT == MVT::i8) {
1032 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1033 }
1034 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
1035 DAG.getTargetConstant(0, MVT::i32)); // Channel
1036 }
1037
1038 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001039}
1040
Tom Stellard365366f2013-01-23 02:09:06 +00001041// return (512 + (kc_bank << 12)
1042static int
1043ConstantAddressBlock(unsigned AddressSpace) {
1044 switch (AddressSpace) {
1045 case AMDGPUAS::CONSTANT_BUFFER_0:
1046 return 512;
1047 case AMDGPUAS::CONSTANT_BUFFER_1:
1048 return 512 + 4096;
1049 case AMDGPUAS::CONSTANT_BUFFER_2:
1050 return 512 + 4096 * 2;
1051 case AMDGPUAS::CONSTANT_BUFFER_3:
1052 return 512 + 4096 * 3;
1053 case AMDGPUAS::CONSTANT_BUFFER_4:
1054 return 512 + 4096 * 4;
1055 case AMDGPUAS::CONSTANT_BUFFER_5:
1056 return 512 + 4096 * 5;
1057 case AMDGPUAS::CONSTANT_BUFFER_6:
1058 return 512 + 4096 * 6;
1059 case AMDGPUAS::CONSTANT_BUFFER_7:
1060 return 512 + 4096 * 7;
1061 case AMDGPUAS::CONSTANT_BUFFER_8:
1062 return 512 + 4096 * 8;
1063 case AMDGPUAS::CONSTANT_BUFFER_9:
1064 return 512 + 4096 * 9;
1065 case AMDGPUAS::CONSTANT_BUFFER_10:
1066 return 512 + 4096 * 10;
1067 case AMDGPUAS::CONSTANT_BUFFER_11:
1068 return 512 + 4096 * 11;
1069 case AMDGPUAS::CONSTANT_BUFFER_12:
1070 return 512 + 4096 * 12;
1071 case AMDGPUAS::CONSTANT_BUFFER_13:
1072 return 512 + 4096 * 13;
1073 case AMDGPUAS::CONSTANT_BUFFER_14:
1074 return 512 + 4096 * 14;
1075 case AMDGPUAS::CONSTANT_BUFFER_15:
1076 return 512 + 4096 * 15;
1077 default:
1078 return -1;
1079 }
1080}
1081
1082SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1083{
1084 EVT VT = Op.getValueType();
1085 DebugLoc DL = Op.getDebugLoc();
1086 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1087 SDValue Chain = Op.getOperand(0);
1088 SDValue Ptr = Op.getOperand(1);
1089 SDValue LoweredLoad;
1090
1091 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1092 if (ConstantBlock > -1) {
1093 SDValue Result;
1094 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001095 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1096 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001097 SDValue Slots[4];
1098 for (unsigned i = 0; i < 4; i++) {
1099 // We want Const position encoded with the following formula :
1100 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1101 // const_index is Ptr computed by llvm using an alignment of 16.
1102 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1103 // then div by 4 at the ISel step
1104 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1105 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1106 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1107 }
1108 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1109 } else {
1110 // non constant ptr cant be folded, keeps it as a v4f32 load
1111 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001112 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001113 DAG.getConstant(LoadNode->getAddressSpace() -
1114 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001115 );
1116 }
1117
1118 if (!VT.isVector()) {
1119 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1120 DAG.getConstant(0, MVT::i32));
1121 }
1122
1123 SDValue MergedValues[2] = {
1124 Result,
1125 Chain
1126 };
1127 return DAG.getMergeValues(MergedValues, 2, DL);
1128 }
1129
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001130 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1131 return SDValue();
1132 }
1133
1134 // Lowering for indirect addressing
1135 const MachineFunction &MF = DAG.getMachineFunction();
1136 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1137 getTargetMachine().getFrameLowering());
1138 unsigned StackWidth = TFL->getStackWidth(MF);
1139
1140 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1141
1142 if (VT.isVector()) {
1143 unsigned NumElemVT = VT.getVectorNumElements();
1144 EVT ElemVT = VT.getVectorElementType();
1145 SDValue Loads[4];
1146
1147 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1148 "vector width in load");
1149
1150 for (unsigned i = 0; i < NumElemVT; ++i) {
1151 unsigned Channel, PtrIncr;
1152 getStackAddress(StackWidth, i, Channel, PtrIncr);
1153 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1154 DAG.getConstant(PtrIncr, MVT::i32));
1155 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1156 Chain, Ptr,
1157 DAG.getTargetConstant(Channel, MVT::i32),
1158 Op.getOperand(2));
1159 }
1160 for (unsigned i = NumElemVT; i < 4; ++i) {
1161 Loads[i] = DAG.getUNDEF(ElemVT);
1162 }
1163 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1164 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1165 } else {
1166 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1167 Chain, Ptr,
1168 DAG.getTargetConstant(0, MVT::i32), // Channel
1169 Op.getOperand(2));
1170 }
1171
1172 SDValue Ops[2];
1173 Ops[0] = LoweredLoad;
1174 Ops[1] = Chain;
1175
1176 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001177}
Tom Stellard75aadc22012-12-11 21:25:42 +00001178
Tom Stellard75aadc22012-12-11 21:25:42 +00001179/// XXX Only kernel functions are supported, so we can assume for now that
1180/// every function is a kernel function, but in the future we should use
1181/// separate calling conventions for kernel and non-kernel functions.
1182SDValue R600TargetLowering::LowerFormalArguments(
1183 SDValue Chain,
1184 CallingConv::ID CallConv,
1185 bool isVarArg,
1186 const SmallVectorImpl<ISD::InputArg> &Ins,
1187 DebugLoc DL, SelectionDAG &DAG,
1188 SmallVectorImpl<SDValue> &InVals) const {
1189 unsigned ParamOffsetBytes = 36;
1190 Function::const_arg_iterator FuncArg =
1191 DAG.getMachineFunction().getFunction()->arg_begin();
1192 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1193 EVT VT = Ins[i].VT;
1194 Type *ArgType = FuncArg->getType();
1195 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1196 32 : ArgType->getPrimitiveSizeInBits();
1197 unsigned ArgBytes = ArgSizeInBits >> 3;
1198 EVT ArgVT;
1199 if (ArgSizeInBits < VT.getSizeInBits()) {
1200 assert(!ArgType->isFloatTy() &&
1201 "Extending floating point arguments not supported yet");
1202 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1203 } else {
1204 ArgVT = VT;
1205 }
1206 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1207 AMDGPUAS::PARAM_I_ADDRESS);
1208 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
1209 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001210 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001211 ArgVT, false, false, ArgBytes);
1212 InVals.push_back(Arg);
1213 ParamOffsetBytes += ArgBytes;
1214 }
1215 return Chain;
1216}
1217
1218EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
1219 if (!VT.isVector()) return MVT::i32;
1220 return VT.changeVectorElementTypeToInteger();
1221}
1222
1223//===----------------------------------------------------------------------===//
1224// Custom DAG Optimizations
1225//===----------------------------------------------------------------------===//
1226
1227SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1228 DAGCombinerInfo &DCI) const {
1229 SelectionDAG &DAG = DCI.DAG;
1230
1231 switch (N->getOpcode()) {
1232 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1233 case ISD::FP_ROUND: {
1234 SDValue Arg = N->getOperand(0);
1235 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1236 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
1237 Arg.getOperand(0));
1238 }
1239 break;
1240 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001241
1242 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1243 // (i32 select_cc f32, f32, -1, 0 cc)
1244 //
1245 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1246 // this to one of the SET*_DX10 instructions.
1247 case ISD::FP_TO_SINT: {
1248 SDValue FNeg = N->getOperand(0);
1249 if (FNeg.getOpcode() != ISD::FNEG) {
1250 return SDValue();
1251 }
1252 SDValue SelectCC = FNeg.getOperand(0);
1253 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1254 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1255 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1256 !isHWTrueValue(SelectCC.getOperand(2)) ||
1257 !isHWFalseValue(SelectCC.getOperand(3))) {
1258 return SDValue();
1259 }
1260
1261 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
1262 SelectCC.getOperand(0), // LHS
1263 SelectCC.getOperand(1), // RHS
1264 DAG.getConstant(-1, MVT::i32), // True
1265 DAG.getConstant(0, MVT::i32), // Flase
1266 SelectCC.getOperand(4)); // CC
1267
1268 break;
1269 }
Tom Stellard365366f2013-01-23 02:09:06 +00001270 // Extract_vec (Build_vector) generated by custom lowering
1271 // also needs to be customly combined
1272 case ISD::EXTRACT_VECTOR_ELT: {
1273 SDValue Arg = N->getOperand(0);
1274 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1275 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1276 unsigned Element = Const->getZExtValue();
1277 return Arg->getOperand(Element);
1278 }
1279 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001280 if (Arg.getOpcode() == ISD::BITCAST &&
1281 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1282 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1283 unsigned Element = Const->getZExtValue();
1284 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
1285 Arg->getOperand(0).getOperand(Element));
1286 }
1287 }
Tom Stellard365366f2013-01-23 02:09:06 +00001288 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001289
1290 case ISD::SELECT_CC: {
1291 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1292 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001293 //
1294 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1295 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001296 SDValue LHS = N->getOperand(0);
1297 if (LHS.getOpcode() != ISD::SELECT_CC) {
1298 return SDValue();
1299 }
1300
1301 SDValue RHS = N->getOperand(1);
1302 SDValue True = N->getOperand(2);
1303 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001304 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001305
1306 if (LHS.getOperand(2).getNode() != True.getNode() ||
1307 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001308 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001309 return SDValue();
1310 }
1311
Tom Stellard5e524892013-03-08 15:37:11 +00001312 switch (NCC) {
1313 default: return SDValue();
1314 case ISD::SETNE: return LHS;
1315 case ISD::SETEQ: {
1316 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1317 LHSCC = ISD::getSetCCInverse(LHSCC,
1318 LHS.getOperand(0).getValueType().isInteger());
1319 return DAG.getSelectCC(N->getDebugLoc(),
1320 LHS.getOperand(0),
1321 LHS.getOperand(1),
1322 LHS.getOperand(2),
1323 LHS.getOperand(3),
1324 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001325 }
Tom Stellard5e524892013-03-08 15:37:11 +00001326 }
1327 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001328 case AMDGPUISD::EXPORT: {
1329 SDValue Arg = N->getOperand(1);
1330 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1331 break;
1332 SDValue NewBldVec[4] = {
1333 DAG.getUNDEF(MVT::f32),
1334 DAG.getUNDEF(MVT::f32),
1335 DAG.getUNDEF(MVT::f32),
1336 DAG.getUNDEF(MVT::f32)
1337 };
1338 SDValue NewArgs[8] = {
1339 N->getOperand(0), // Chain
1340 SDValue(),
1341 N->getOperand(2), // ArrayBase
1342 N->getOperand(3), // Type
1343 N->getOperand(4), // SWZ_X
1344 N->getOperand(5), // SWZ_Y
1345 N->getOperand(6), // SWZ_Z
1346 N->getOperand(7) // SWZ_W
1347 };
1348 for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
1349 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
1350 if (C->isZero()) {
1351 NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
1352 } else if (C->isExactlyValue(1.0)) {
1353 NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
1354 } else {
1355 NewBldVec[i] = Arg.getOperand(i);
1356 }
1357 } else {
1358 NewBldVec[i] = Arg.getOperand(i);
1359 }
1360 }
1361 DebugLoc DL = N->getDebugLoc();
1362 NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
1363 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001364 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001365 }
1366 return SDValue();
1367}