blob: 267f367c870d0ad5f9387f31a83256ca17dec0c2 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
29 AMDGPUTargetLowering(TM),
30 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
Tom Stellard75aadc22012-12-11 21:25:42 +000031 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
41
42 setOperationAction(ISD::ADD, MVT::v4i32, Expand);
43 setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000044 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
Tom Stellard3deddc52013-05-10 02:09:34 +000046 setOperationAction(ISD::MUL, MVT::v2i32, Expand);
47 setOperationAction(ISD::MUL, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000048 setOperationAction(ISD::OR, MVT::v4i32, Expand);
49 setOperationAction(ISD::OR, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000050 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000051 setOperationAction(ISD::SHL, MVT::v4i32, Expand);
52 setOperationAction(ISD::SHL, MVT::v2i32, Expand);
53 setOperationAction(ISD::SRL, MVT::v4i32, Expand);
54 setOperationAction(ISD::SRL, MVT::v2i32, Expand);
Tom Stellard7fb39632013-05-10 02:09:29 +000055 setOperationAction(ISD::SRA, MVT::v4i32, Expand);
56 setOperationAction(ISD::SRA, MVT::v2i32, Expand);
Tom Stellard3a7c34c2013-05-10 02:09:39 +000057 setOperationAction(ISD::SUB, MVT::v4i32, Expand);
58 setOperationAction(ISD::SUB, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000059 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000060 setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
61 setOperationAction(ISD::UREM, MVT::v4i32, Expand);
62 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000063 setOperationAction(ISD::XOR, MVT::v4i32, Expand);
64 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
82 setOperationAction(ISD::SELECT, MVT::i32, Custom);
83 setOperationAction(ISD::SELECT, MVT::f32, Custom);
84
Tom Stellarda99c6ae2013-05-10 02:09:24 +000085 setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
86 setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
87
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000088 // Legalize loads and stores to the private address space.
89 setOperationAction(ISD::LOAD, MVT::i32, Custom);
90 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
91 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
92 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
93 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
95 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
96 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000097 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000098 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000099 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
100
Tom Stellard365366f2013-01-23 02:09:06 +0000101 setOperationAction(ISD::LOAD, MVT::i32, Custom);
102 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
104
Tom Stellard75aadc22012-12-11 21:25:42 +0000105 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000106 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000107 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000108 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellardb852af52013-03-08 15:37:03 +0000110 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000111 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112 setSchedulingPreference(Sched::VLIW);
113}
114
115MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
116 MachineInstr * MI, MachineBasicBlock * BB) const {
117 MachineFunction * MF = BB->getParent();
118 MachineRegisterInfo &MRI = MF->getRegInfo();
119 MachineBasicBlock::iterator I = *MI;
120
121 switch (MI->getOpcode()) {
122 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000123 case AMDGPU::CLAMP_R600: {
124 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
125 AMDGPU::MOV,
126 MI->getOperand(0).getReg(),
127 MI->getOperand(1).getReg());
128 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
129 break;
130 }
131
132 case AMDGPU::FABS_R600: {
133 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
134 AMDGPU::MOV,
135 MI->getOperand(0).getReg(),
136 MI->getOperand(1).getReg());
137 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
138 break;
139 }
140
141 case AMDGPU::FNEG_R600: {
142 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
143 AMDGPU::MOV,
144 MI->getOperand(0).getReg(),
145 MI->getOperand(1).getReg());
146 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
147 break;
148 }
149
Tom Stellard75aadc22012-12-11 21:25:42 +0000150 case AMDGPU::MASK_WRITE: {
151 unsigned maskedRegister = MI->getOperand(0).getReg();
152 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
153 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
154 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
155 break;
156 }
157
158 case AMDGPU::MOV_IMM_F32:
159 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
160 MI->getOperand(1).getFPImm()->getValueAPF()
161 .bitcastToAPInt().getZExtValue());
162 break;
163 case AMDGPU::MOV_IMM_I32:
164 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
165 MI->getOperand(1).getImm());
166 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000167 case AMDGPU::CONST_COPY: {
168 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
169 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
170 TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
171 MI->getOperand(1).getImm());
172 break;
173 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000174
175 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
176 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
177 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
178
179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
180 .addOperand(MI->getOperand(0))
181 .addOperand(MI->getOperand(1))
182 .addImm(EOP); // Set End of program bit
183 break;
184 }
185
Tom Stellard75aadc22012-12-11 21:25:42 +0000186 case AMDGPU::TXD: {
187 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
188 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000189 MachineOperand &RID = MI->getOperand(4);
190 MachineOperand &SID = MI->getOperand(5);
191 unsigned TextureId = MI->getOperand(6).getImm();
192 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
193 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000194
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000195 switch (TextureId) {
196 case 5: // Rect
197 CTX = CTY = 0;
198 break;
199 case 6: // Shadow1D
200 SrcW = SrcZ;
201 break;
202 case 7: // Shadow2D
203 SrcW = SrcZ;
204 break;
205 case 8: // ShadowRect
206 CTX = CTY = 0;
207 SrcW = SrcZ;
208 break;
209 case 9: // 1DArray
210 SrcZ = SrcY;
211 CTZ = 0;
212 break;
213 case 10: // 2DArray
214 CTZ = 0;
215 break;
216 case 11: // Shadow1DArray
217 SrcZ = SrcY;
218 CTZ = 0;
219 break;
220 case 12: // Shadow2DArray
221 CTZ = 0;
222 break;
223 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000224 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
225 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 .addImm(SrcX)
227 .addImm(SrcY)
228 .addImm(SrcZ)
229 .addImm(SrcW)
230 .addImm(0)
231 .addImm(0)
232 .addImm(0)
233 .addImm(0)
234 .addImm(1)
235 .addImm(2)
236 .addImm(3)
237 .addOperand(RID)
238 .addOperand(SID)
239 .addImm(CTX)
240 .addImm(CTY)
241 .addImm(CTZ)
242 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000243 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
244 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000245 .addImm(SrcX)
246 .addImm(SrcY)
247 .addImm(SrcZ)
248 .addImm(SrcW)
249 .addImm(0)
250 .addImm(0)
251 .addImm(0)
252 .addImm(0)
253 .addImm(1)
254 .addImm(2)
255 .addImm(3)
256 .addOperand(RID)
257 .addOperand(SID)
258 .addImm(CTX)
259 .addImm(CTY)
260 .addImm(CTZ)
261 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000262 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
263 .addOperand(MI->getOperand(0))
264 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000265 .addImm(SrcX)
266 .addImm(SrcY)
267 .addImm(SrcZ)
268 .addImm(SrcW)
269 .addImm(0)
270 .addImm(0)
271 .addImm(0)
272 .addImm(0)
273 .addImm(1)
274 .addImm(2)
275 .addImm(3)
276 .addOperand(RID)
277 .addOperand(SID)
278 .addImm(CTX)
279 .addImm(CTY)
280 .addImm(CTZ)
281 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000282 .addReg(T0, RegState::Implicit)
283 .addReg(T1, RegState::Implicit);
284 break;
285 }
286
287 case AMDGPU::TXD_SHADOW: {
288 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
289 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000290 MachineOperand &RID = MI->getOperand(4);
291 MachineOperand &SID = MI->getOperand(5);
292 unsigned TextureId = MI->getOperand(6).getImm();
293 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
294 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
295
296 switch (TextureId) {
297 case 5: // Rect
298 CTX = CTY = 0;
299 break;
300 case 6: // Shadow1D
301 SrcW = SrcZ;
302 break;
303 case 7: // Shadow2D
304 SrcW = SrcZ;
305 break;
306 case 8: // ShadowRect
307 CTX = CTY = 0;
308 SrcW = SrcZ;
309 break;
310 case 9: // 1DArray
311 SrcZ = SrcY;
312 CTZ = 0;
313 break;
314 case 10: // 2DArray
315 CTZ = 0;
316 break;
317 case 11: // Shadow1DArray
318 SrcZ = SrcY;
319 CTZ = 0;
320 break;
321 case 12: // Shadow2DArray
322 CTZ = 0;
323 break;
324 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000325
326 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 .addImm(SrcX)
329 .addImm(SrcY)
330 .addImm(SrcZ)
331 .addImm(SrcW)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(1)
337 .addImm(2)
338 .addImm(3)
339 .addOperand(RID)
340 .addOperand(SID)
341 .addImm(CTX)
342 .addImm(CTY)
343 .addImm(CTZ)
344 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000347 .addImm(SrcX)
348 .addImm(SrcY)
349 .addImm(SrcZ)
350 .addImm(SrcW)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(1)
356 .addImm(2)
357 .addImm(3)
358 .addOperand(RID)
359 .addOperand(SID)
360 .addImm(CTX)
361 .addImm(CTY)
362 .addImm(CTZ)
363 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000364 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
365 .addOperand(MI->getOperand(0))
366 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000367 .addImm(SrcX)
368 .addImm(SrcY)
369 .addImm(SrcZ)
370 .addImm(SrcW)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(1)
376 .addImm(2)
377 .addImm(3)
378 .addOperand(RID)
379 .addOperand(SID)
380 .addImm(CTX)
381 .addImm(CTY)
382 .addImm(CTZ)
383 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000384 .addReg(T0, RegState::Implicit)
385 .addReg(T1, RegState::Implicit);
386 break;
387 }
388
389 case AMDGPU::BRANCH:
390 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000391 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000392 break;
393
394 case AMDGPU::BRANCH_COND_f32: {
395 MachineInstr *NewMI =
396 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
397 AMDGPU::PREDICATE_BIT)
398 .addOperand(MI->getOperand(1))
399 .addImm(OPCODE_IS_NOT_ZERO)
400 .addImm(0); // Flags
401 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000403 .addOperand(MI->getOperand(0))
404 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
405 break;
406 }
407
408 case AMDGPU::BRANCH_COND_i32: {
409 MachineInstr *NewMI =
410 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
411 AMDGPU::PREDICATE_BIT)
412 .addOperand(MI->getOperand(1))
413 .addImm(OPCODE_IS_NOT_ZERO_INT)
414 .addImm(0); // Flags
415 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000416 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addOperand(MI->getOperand(0))
418 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
419 break;
420 }
421
Tom Stellard75aadc22012-12-11 21:25:42 +0000422 case AMDGPU::EG_ExportSwz:
423 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000424 // Instruction is left unmodified if its not the last one of its type
425 bool isLastInstructionOfItsType = true;
426 unsigned InstExportType = MI->getOperand(1).getImm();
427 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
428 EndBlock = BB->end(); NextExportInst != EndBlock;
429 NextExportInst = llvm::next(NextExportInst)) {
430 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
431 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
432 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
433 .getImm();
434 if (CurrentInstExportType == InstExportType) {
435 isLastInstructionOfItsType = false;
436 break;
437 }
438 }
439 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000441 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 return BB;
443 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
444 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
445 .addOperand(MI->getOperand(0))
446 .addOperand(MI->getOperand(1))
447 .addOperand(MI->getOperand(2))
448 .addOperand(MI->getOperand(3))
449 .addOperand(MI->getOperand(4))
450 .addOperand(MI->getOperand(5))
451 .addOperand(MI->getOperand(6))
452 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000453 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 break;
455 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000456 case AMDGPU::RETURN: {
457 // RETURN instructions must have the live-out registers as implicit uses,
458 // otherwise they appear dead.
459 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
460 MachineInstrBuilder MIB(*MF, MI);
461 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
462 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
463 return BB;
464 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000465 }
466
467 MI->eraseFromParent();
468 return BB;
469}
470
471//===----------------------------------------------------------------------===//
472// Custom DAG Lowering Operations
473//===----------------------------------------------------------------------===//
474
Tom Stellard75aadc22012-12-11 21:25:42 +0000475SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
476 switch (Op.getOpcode()) {
477 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
479 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000480 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000481 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000482 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000483 case ISD::INTRINSIC_VOID: {
484 SDValue Chain = Op.getOperand(0);
485 unsigned IntrinsicID =
486 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
487 switch (IntrinsicID) {
488 case AMDGPUIntrinsic::AMDGPU_store_output: {
489 MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000490 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000491 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
492 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000493 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000494 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000496 case AMDGPUIntrinsic::R600_store_swizzle: {
497 const SDValue Args[8] = {
498 Chain,
499 Op.getOperand(2), // Export Value
500 Op.getOperand(3), // ArrayBase
501 Op.getOperand(4), // Type
502 DAG.getConstant(0, MVT::i32), // SWZ_X
503 DAG.getConstant(1, MVT::i32), // SWZ_Y
504 DAG.getConstant(2, MVT::i32), // SWZ_Z
505 DAG.getConstant(3, MVT::i32) // SWZ_W
506 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000507 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000508 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000510
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 // default for switch(IntrinsicID)
512 default: break;
513 }
514 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
515 break;
516 }
517 case ISD::INTRINSIC_WO_CHAIN: {
518 unsigned IntrinsicID =
519 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
520 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000521 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000522 switch(IntrinsicID) {
523 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
524 case AMDGPUIntrinsic::R600_load_input: {
525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000527 MachineFunction &MF = DAG.getMachineFunction();
528 MachineRegisterInfo &MRI = MF.getRegInfo();
529 MRI.addLiveIn(Reg);
530 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000531 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000532 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000533
534 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000536 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
537 MachineSDNode *interp;
538 if (ijb < 0) {
539 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
540 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
541 return DAG.getTargetExtractSubreg(
542 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
543 DL, MVT::f32, SDValue(interp, 0));
544 }
545
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000546 MachineFunction &MF = DAG.getMachineFunction();
547 MachineRegisterInfo &MRI = MF.getRegInfo();
548 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
549 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
550 MRI.addLiveIn(RegisterI);
551 MRI.addLiveIn(RegisterJ);
552 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
553 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
554 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
555 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
556
Tom Stellard41afe6a2013-02-05 17:09:14 +0000557 if (slot % 4 < 2)
558 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
559 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000560 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000561 else
562 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
563 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000564 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000565 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000566 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000567 case AMDGPUIntrinsic::R600_tex:
568 case AMDGPUIntrinsic::R600_texc:
569 case AMDGPUIntrinsic::R600_txl:
570 case AMDGPUIntrinsic::R600_txlc:
571 case AMDGPUIntrinsic::R600_txb:
572 case AMDGPUIntrinsic::R600_txbc:
573 case AMDGPUIntrinsic::R600_txf:
574 case AMDGPUIntrinsic::R600_txq:
575 case AMDGPUIntrinsic::R600_ddx:
576 case AMDGPUIntrinsic::R600_ddy: {
577 unsigned TextureOp;
578 switch (IntrinsicID) {
579 case AMDGPUIntrinsic::R600_tex:
580 TextureOp = 0;
581 break;
582 case AMDGPUIntrinsic::R600_texc:
583 TextureOp = 1;
584 break;
585 case AMDGPUIntrinsic::R600_txl:
586 TextureOp = 2;
587 break;
588 case AMDGPUIntrinsic::R600_txlc:
589 TextureOp = 3;
590 break;
591 case AMDGPUIntrinsic::R600_txb:
592 TextureOp = 4;
593 break;
594 case AMDGPUIntrinsic::R600_txbc:
595 TextureOp = 5;
596 break;
597 case AMDGPUIntrinsic::R600_txf:
598 TextureOp = 6;
599 break;
600 case AMDGPUIntrinsic::R600_txq:
601 TextureOp = 7;
602 break;
603 case AMDGPUIntrinsic::R600_ddx:
604 TextureOp = 8;
605 break;
606 case AMDGPUIntrinsic::R600_ddy:
607 TextureOp = 9;
608 break;
609 default:
610 llvm_unreachable("Unknow Texture Operation");
611 }
612
613 SDValue TexArgs[19] = {
614 DAG.getConstant(TextureOp, MVT::i32),
615 Op.getOperand(1),
616 DAG.getConstant(0, MVT::i32),
617 DAG.getConstant(1, MVT::i32),
618 DAG.getConstant(2, MVT::i32),
619 DAG.getConstant(3, MVT::i32),
620 Op.getOperand(2),
621 Op.getOperand(3),
622 Op.getOperand(4),
623 DAG.getConstant(0, MVT::i32),
624 DAG.getConstant(1, MVT::i32),
625 DAG.getConstant(2, MVT::i32),
626 DAG.getConstant(3, MVT::i32),
627 Op.getOperand(5),
628 Op.getOperand(6),
629 Op.getOperand(7),
630 Op.getOperand(8),
631 Op.getOperand(9),
632 Op.getOperand(10)
633 };
634 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
635 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000636 case AMDGPUIntrinsic::AMDGPU_dp4: {
637 SDValue Args[8] = {
638 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
639 DAG.getConstant(0, MVT::i32)),
640 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
641 DAG.getConstant(0, MVT::i32)),
642 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
643 DAG.getConstant(1, MVT::i32)),
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
645 DAG.getConstant(1, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
647 DAG.getConstant(2, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
649 DAG.getConstant(2, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
651 DAG.getConstant(3, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
653 DAG.getConstant(3, MVT::i32))
654 };
655 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
656 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000657
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000658 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000660 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000661 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000662 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000664 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000668 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 8);
676
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
679 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000680 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000681 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
682 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
685 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000686 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000687 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
688 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
691 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000692 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
694 AMDGPU::T0_Z, VT);
695 }
696 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
697 break;
698 }
699 } // end switch(Op.getOpcode())
700 return SDValue();
701}
702
703void R600TargetLowering::ReplaceNodeResults(SDNode *N,
704 SmallVectorImpl<SDValue> &Results,
705 SelectionDAG &DAG) const {
706 switch (N->getOpcode()) {
707 default: return;
708 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000709 return;
710 case ISD::LOAD: {
711 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
712 Results.push_back(SDValue(Node, 0));
713 Results.push_back(SDValue(Node, 1));
714 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
715 // function
716 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
717 return;
718 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000719 case ISD::STORE:
720 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
721 Results.push_back(SDValue(Node, 0));
722 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 }
724}
725
726SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
727 return DAG.getNode(
728 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000729 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 MVT::i1,
731 Op, DAG.getConstantFP(0.0f, MVT::f32),
732 DAG.getCondCode(ISD::SETNE)
733 );
734}
735
Tom Stellard75aadc22012-12-11 21:25:42 +0000736SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000737 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000738 unsigned DwordOffset) const {
739 unsigned ByteOffset = DwordOffset * 4;
740 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
741 AMDGPUAS::PARAM_I_ADDRESS);
742
743 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
744 assert(isInt<16>(ByteOffset));
745
746 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
747 DAG.getConstant(ByteOffset, MVT::i32), // PTR
748 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
749 false, false, false, 0);
750}
751
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000752SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
753
754 MachineFunction &MF = DAG.getMachineFunction();
755 const AMDGPUFrameLowering *TFL =
756 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
757
758 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
759 assert(FIN);
760
761 unsigned FrameIndex = FIN->getIndex();
762 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
763 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
764}
765
Tom Stellard75aadc22012-12-11 21:25:42 +0000766bool R600TargetLowering::isZero(SDValue Op) const {
767 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
768 return Cst->isNullValue();
769 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
770 return CstFP->isZero();
771 } else {
772 return false;
773 }
774}
775
776SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000777 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 EVT VT = Op.getValueType();
779
780 SDValue LHS = Op.getOperand(0);
781 SDValue RHS = Op.getOperand(1);
782 SDValue True = Op.getOperand(2);
783 SDValue False = Op.getOperand(3);
784 SDValue CC = Op.getOperand(4);
785 SDValue Temp;
786
787 // LHS and RHS are guaranteed to be the same value type
788 EVT CompareVT = LHS.getValueType();
789
790 // Check if we can lower this to a native operation.
791
Tom Stellard2add82d2013-03-08 15:37:09 +0000792 // Try to lower to a SET* instruction:
793 //
794 // SET* can match the following patterns:
795 //
796 // select_cc f32, f32, -1, 0, cc_any
797 // select_cc f32, f32, 1.0f, 0.0f, cc_any
798 // select_cc i32, i32, -1, 0, cc_any
799 //
800
801 // Move hardware True/False values to the correct operand.
802 if (isHWTrueValue(False) && isHWFalseValue(True)) {
803 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
804 std::swap(False, True);
805 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
806 }
807
808 if (isHWTrueValue(True) && isHWFalseValue(False) &&
809 (CompareVT == VT || VT == MVT::i32)) {
810 // This can be matched by a SET* instruction.
811 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
812 }
813
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000815 //
816 // CND* can match the following patterns:
817 //
818 // select_cc f32, 0.0, f32, f32, cc_any
819 // select_cc f32, 0.0, i32, i32, cc_any
820 // select_cc i32, 0, f32, f32, cc_any
821 // select_cc i32, 0, i32, i32, cc_any
822 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000823 if (isZero(LHS) || isZero(RHS)) {
824 SDValue Cond = (isZero(LHS) ? RHS : LHS);
825 SDValue Zero = (isZero(LHS) ? LHS : RHS);
826 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
827 if (CompareVT != VT) {
828 // Bitcast True / False to the correct types. This will end up being
829 // a nop, but it allows us to define only a single pattern in the
830 // .TD files for each CND* instruction rather than having to have
831 // one pattern for integer True/False and one for fp True/False
832 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
833 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
834 }
835 if (isZero(LHS)) {
836 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
837 }
838
839 switch (CCOpcode) {
840 case ISD::SETONE:
841 case ISD::SETUNE:
842 case ISD::SETNE:
843 case ISD::SETULE:
844 case ISD::SETULT:
845 case ISD::SETOLE:
846 case ISD::SETOLT:
847 case ISD::SETLE:
848 case ISD::SETLT:
849 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
850 Temp = True;
851 True = False;
852 False = Temp;
853 break;
854 default:
855 break;
856 }
857 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
858 Cond, Zero,
859 True, False,
860 DAG.getCondCode(CCOpcode));
861 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
862 }
863
Tom Stellard75aadc22012-12-11 21:25:42 +0000864
865 // Possible Min/Max pattern
866 SDValue MinMax = LowerMinMax(Op, DAG);
867 if (MinMax.getNode()) {
868 return MinMax;
869 }
870
871 // If we make it this for it means we have no native instructions to handle
872 // this SELECT_CC, so we must lower it.
873 SDValue HWTrue, HWFalse;
874
875 if (CompareVT == MVT::f32) {
876 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
877 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
878 } else if (CompareVT == MVT::i32) {
879 HWTrue = DAG.getConstant(-1, CompareVT);
880 HWFalse = DAG.getConstant(0, CompareVT);
881 }
882 else {
883 assert(!"Unhandled value type in LowerSELECT_CC");
884 }
885
886 // Lower this unsupported SELECT_CC into a combination of two supported
887 // SELECT_CC operations.
888 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
889
890 return DAG.getNode(ISD::SELECT_CC, DL, VT,
891 Cond, HWFalse,
892 True, False,
893 DAG.getCondCode(ISD::SETNE));
894}
895
896SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
897 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000898 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000899 Op.getValueType(),
900 Op.getOperand(0),
901 DAG.getConstant(0, MVT::i32),
902 Op.getOperand(1),
903 Op.getOperand(2),
904 DAG.getCondCode(ISD::SETNE));
905}
906
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000907/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
908/// convert these pointers to a register index. Each register holds
909/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
910/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
911/// for indirect addressing.
912SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
913 unsigned StackWidth,
914 SelectionDAG &DAG) const {
915 unsigned SRLPad;
916 switch(StackWidth) {
917 case 1:
918 SRLPad = 2;
919 break;
920 case 2:
921 SRLPad = 3;
922 break;
923 case 4:
924 SRLPad = 4;
925 break;
926 default: llvm_unreachable("Invalid stack width");
927 }
928
Andrew Trickef9de2a2013-05-25 02:42:55 +0000929 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000930 DAG.getConstant(SRLPad, MVT::i32));
931}
932
933void R600TargetLowering::getStackAddress(unsigned StackWidth,
934 unsigned ElemIdx,
935 unsigned &Channel,
936 unsigned &PtrIncr) const {
937 switch (StackWidth) {
938 default:
939 case 1:
940 Channel = 0;
941 if (ElemIdx > 0) {
942 PtrIncr = 1;
943 } else {
944 PtrIncr = 0;
945 }
946 break;
947 case 2:
948 Channel = ElemIdx % 2;
949 if (ElemIdx == 2) {
950 PtrIncr = 1;
951 } else {
952 PtrIncr = 0;
953 }
954 break;
955 case 4:
956 Channel = ElemIdx;
957 PtrIncr = 0;
958 break;
959 }
960}
961
Tom Stellard75aadc22012-12-11 21:25:42 +0000962SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000963 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000964 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
965 SDValue Chain = Op.getOperand(0);
966 SDValue Value = Op.getOperand(1);
967 SDValue Ptr = Op.getOperand(2);
968
969 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
970 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
971 // Convert pointer from byte address to dword address.
972 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
973 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
974 Ptr, DAG.getConstant(2, MVT::i32)));
975
976 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
977 assert(!"Truncated and indexed stores not supported yet");
978 } else {
979 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
980 }
981 return Chain;
982 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000983
984 EVT ValueVT = Value.getValueType();
985
986 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
987 return SDValue();
988 }
989
990 // Lowering for indirect addressing
991
992 const MachineFunction &MF = DAG.getMachineFunction();
993 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
994 getTargetMachine().getFrameLowering());
995 unsigned StackWidth = TFL->getStackWidth(MF);
996
997 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
998
999 if (ValueVT.isVector()) {
1000 unsigned NumElemVT = ValueVT.getVectorNumElements();
1001 EVT ElemVT = ValueVT.getVectorElementType();
1002 SDValue Stores[4];
1003
1004 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1005 "vector width in load");
1006
1007 for (unsigned i = 0; i < NumElemVT; ++i) {
1008 unsigned Channel, PtrIncr;
1009 getStackAddress(StackWidth, i, Channel, PtrIncr);
1010 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1011 DAG.getConstant(PtrIncr, MVT::i32));
1012 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1013 Value, DAG.getConstant(i, MVT::i32));
1014
1015 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1016 Chain, Elem, Ptr,
1017 DAG.getTargetConstant(Channel, MVT::i32));
1018 }
1019 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1020 } else {
1021 if (ValueVT == MVT::i8) {
1022 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1023 }
1024 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001025 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001026 }
1027
1028 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001029}
1030
Tom Stellard365366f2013-01-23 02:09:06 +00001031// return (512 + (kc_bank << 12)
1032static int
1033ConstantAddressBlock(unsigned AddressSpace) {
1034 switch (AddressSpace) {
1035 case AMDGPUAS::CONSTANT_BUFFER_0:
1036 return 512;
1037 case AMDGPUAS::CONSTANT_BUFFER_1:
1038 return 512 + 4096;
1039 case AMDGPUAS::CONSTANT_BUFFER_2:
1040 return 512 + 4096 * 2;
1041 case AMDGPUAS::CONSTANT_BUFFER_3:
1042 return 512 + 4096 * 3;
1043 case AMDGPUAS::CONSTANT_BUFFER_4:
1044 return 512 + 4096 * 4;
1045 case AMDGPUAS::CONSTANT_BUFFER_5:
1046 return 512 + 4096 * 5;
1047 case AMDGPUAS::CONSTANT_BUFFER_6:
1048 return 512 + 4096 * 6;
1049 case AMDGPUAS::CONSTANT_BUFFER_7:
1050 return 512 + 4096 * 7;
1051 case AMDGPUAS::CONSTANT_BUFFER_8:
1052 return 512 + 4096 * 8;
1053 case AMDGPUAS::CONSTANT_BUFFER_9:
1054 return 512 + 4096 * 9;
1055 case AMDGPUAS::CONSTANT_BUFFER_10:
1056 return 512 + 4096 * 10;
1057 case AMDGPUAS::CONSTANT_BUFFER_11:
1058 return 512 + 4096 * 11;
1059 case AMDGPUAS::CONSTANT_BUFFER_12:
1060 return 512 + 4096 * 12;
1061 case AMDGPUAS::CONSTANT_BUFFER_13:
1062 return 512 + 4096 * 13;
1063 case AMDGPUAS::CONSTANT_BUFFER_14:
1064 return 512 + 4096 * 14;
1065 case AMDGPUAS::CONSTANT_BUFFER_15:
1066 return 512 + 4096 * 15;
1067 default:
1068 return -1;
1069 }
1070}
1071
1072SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1073{
1074 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001075 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001076 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1077 SDValue Chain = Op.getOperand(0);
1078 SDValue Ptr = Op.getOperand(1);
1079 SDValue LoweredLoad;
1080
1081 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1082 if (ConstantBlock > -1) {
1083 SDValue Result;
1084 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001085 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1086 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001087 SDValue Slots[4];
1088 for (unsigned i = 0; i < 4; i++) {
1089 // We want Const position encoded with the following formula :
1090 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1091 // const_index is Ptr computed by llvm using an alignment of 16.
1092 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1093 // then div by 4 at the ISel step
1094 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1095 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1096 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1097 }
1098 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1099 } else {
1100 // non constant ptr cant be folded, keeps it as a v4f32 load
1101 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001102 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001103 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001104 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001105 );
1106 }
1107
1108 if (!VT.isVector()) {
1109 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1110 DAG.getConstant(0, MVT::i32));
1111 }
1112
1113 SDValue MergedValues[2] = {
1114 Result,
1115 Chain
1116 };
1117 return DAG.getMergeValues(MergedValues, 2, DL);
1118 }
1119
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001120 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1121 return SDValue();
1122 }
1123
1124 // Lowering for indirect addressing
1125 const MachineFunction &MF = DAG.getMachineFunction();
1126 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1127 getTargetMachine().getFrameLowering());
1128 unsigned StackWidth = TFL->getStackWidth(MF);
1129
1130 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1131
1132 if (VT.isVector()) {
1133 unsigned NumElemVT = VT.getVectorNumElements();
1134 EVT ElemVT = VT.getVectorElementType();
1135 SDValue Loads[4];
1136
1137 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1138 "vector width in load");
1139
1140 for (unsigned i = 0; i < NumElemVT; ++i) {
1141 unsigned Channel, PtrIncr;
1142 getStackAddress(StackWidth, i, Channel, PtrIncr);
1143 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1144 DAG.getConstant(PtrIncr, MVT::i32));
1145 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1146 Chain, Ptr,
1147 DAG.getTargetConstant(Channel, MVT::i32),
1148 Op.getOperand(2));
1149 }
1150 for (unsigned i = NumElemVT; i < 4; ++i) {
1151 Loads[i] = DAG.getUNDEF(ElemVT);
1152 }
1153 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1154 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1155 } else {
1156 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1157 Chain, Ptr,
1158 DAG.getTargetConstant(0, MVT::i32), // Channel
1159 Op.getOperand(2));
1160 }
1161
1162 SDValue Ops[2];
1163 Ops[0] = LoweredLoad;
1164 Ops[1] = Chain;
1165
1166 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001167}
Tom Stellard75aadc22012-12-11 21:25:42 +00001168
Tom Stellard75aadc22012-12-11 21:25:42 +00001169/// XXX Only kernel functions are supported, so we can assume for now that
1170/// every function is a kernel function, but in the future we should use
1171/// separate calling conventions for kernel and non-kernel functions.
1172SDValue R600TargetLowering::LowerFormalArguments(
1173 SDValue Chain,
1174 CallingConv::ID CallConv,
1175 bool isVarArg,
1176 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001177 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001178 SmallVectorImpl<SDValue> &InVals) const {
1179 unsigned ParamOffsetBytes = 36;
1180 Function::const_arg_iterator FuncArg =
1181 DAG.getMachineFunction().getFunction()->arg_begin();
1182 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1183 EVT VT = Ins[i].VT;
1184 Type *ArgType = FuncArg->getType();
1185 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1186 32 : ArgType->getPrimitiveSizeInBits();
1187 unsigned ArgBytes = ArgSizeInBits >> 3;
1188 EVT ArgVT;
1189 if (ArgSizeInBits < VT.getSizeInBits()) {
1190 assert(!ArgType->isFloatTy() &&
1191 "Extending floating point arguments not supported yet");
1192 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1193 } else {
1194 ArgVT = VT;
1195 }
1196 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1197 AMDGPUAS::PARAM_I_ADDRESS);
1198 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
1199 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001200 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001201 ArgVT, false, false, ArgBytes);
1202 InVals.push_back(Arg);
1203 ParamOffsetBytes += ArgBytes;
1204 }
1205 return Chain;
1206}
1207
Matt Arsenault758659232013-05-18 00:21:46 +00001208EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001209 if (!VT.isVector()) return MVT::i32;
1210 return VT.changeVectorElementTypeToInteger();
1211}
1212
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001213SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1214 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1215 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1216 assert(RemapSwizzle.empty());
1217 SDValue NewBldVec[4] = {
1218 VectorEntry.getOperand(0),
1219 VectorEntry.getOperand(1),
1220 VectorEntry.getOperand(2),
1221 VectorEntry.getOperand(3)
1222 };
1223
1224 for (unsigned i = 0; i < 4; i++) {
1225 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1226 if (C->isZero()) {
1227 RemapSwizzle[i] = 4; // SEL_0
1228 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1229 } else if (C->isExactlyValue(1.0)) {
1230 RemapSwizzle[i] = 5; // SEL_1
1231 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1232 }
1233 }
1234
1235 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1236 continue;
1237 for (unsigned j = 0; j < i; j++) {
1238 if (NewBldVec[i] == NewBldVec[j]) {
1239 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1240 RemapSwizzle[i] = j;
1241 break;
1242 }
1243 }
1244 }
1245
1246 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1247 VectorEntry.getValueType(), NewBldVec, 4);
1248}
1249
1250SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1251 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1252 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1253 assert(RemapSwizzle.empty());
1254 SDValue NewBldVec[4] = {
1255 VectorEntry.getOperand(0),
1256 VectorEntry.getOperand(1),
1257 VectorEntry.getOperand(2),
1258 VectorEntry.getOperand(3)
1259 };
1260 bool isUnmovable[4] = { false, false, false, false };
1261
1262 for (unsigned i = 0; i < 4; i++) {
1263 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1264 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1265 ->getZExtValue();
1266 if (!isUnmovable[Idx]) {
1267 // Swap i and Idx
1268 std::swap(NewBldVec[Idx], NewBldVec[i]);
1269 RemapSwizzle[Idx] = i;
1270 RemapSwizzle[i] = Idx;
1271 }
1272 isUnmovable[Idx] = true;
1273 }
1274 }
1275
1276 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1277 VectorEntry.getValueType(), NewBldVec, 4);
1278}
1279
1280
1281SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1282SDValue Swz[4], SelectionDAG &DAG) const {
1283 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1284 // Old -> New swizzle values
1285 DenseMap<unsigned, unsigned> SwizzleRemap;
1286
1287 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1288 for (unsigned i = 0; i < 4; i++) {
1289 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1290 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1291 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1292 }
1293
1294 SwizzleRemap.clear();
1295 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1296 for (unsigned i = 0; i < 4; i++) {
1297 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1298 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1299 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1300 }
1301
1302 return BuildVector;
1303}
1304
1305
Tom Stellard75aadc22012-12-11 21:25:42 +00001306//===----------------------------------------------------------------------===//
1307// Custom DAG Optimizations
1308//===----------------------------------------------------------------------===//
1309
1310SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1311 DAGCombinerInfo &DCI) const {
1312 SelectionDAG &DAG = DCI.DAG;
1313
1314 switch (N->getOpcode()) {
1315 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1316 case ISD::FP_ROUND: {
1317 SDValue Arg = N->getOperand(0);
1318 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001319 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001320 Arg.getOperand(0));
1321 }
1322 break;
1323 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001324
1325 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1326 // (i32 select_cc f32, f32, -1, 0 cc)
1327 //
1328 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1329 // this to one of the SET*_DX10 instructions.
1330 case ISD::FP_TO_SINT: {
1331 SDValue FNeg = N->getOperand(0);
1332 if (FNeg.getOpcode() != ISD::FNEG) {
1333 return SDValue();
1334 }
1335 SDValue SelectCC = FNeg.getOperand(0);
1336 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1337 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1338 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1339 !isHWTrueValue(SelectCC.getOperand(2)) ||
1340 !isHWFalseValue(SelectCC.getOperand(3))) {
1341 return SDValue();
1342 }
1343
Andrew Trickef9de2a2013-05-25 02:42:55 +00001344 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001345 SelectCC.getOperand(0), // LHS
1346 SelectCC.getOperand(1), // RHS
1347 DAG.getConstant(-1, MVT::i32), // True
1348 DAG.getConstant(0, MVT::i32), // Flase
1349 SelectCC.getOperand(4)); // CC
1350
1351 break;
1352 }
Tom Stellard365366f2013-01-23 02:09:06 +00001353 // Extract_vec (Build_vector) generated by custom lowering
1354 // also needs to be customly combined
1355 case ISD::EXTRACT_VECTOR_ELT: {
1356 SDValue Arg = N->getOperand(0);
1357 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1358 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1359 unsigned Element = Const->getZExtValue();
1360 return Arg->getOperand(Element);
1361 }
1362 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001363 if (Arg.getOpcode() == ISD::BITCAST &&
1364 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1365 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1366 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001367 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001368 Arg->getOperand(0).getOperand(Element));
1369 }
1370 }
Tom Stellard365366f2013-01-23 02:09:06 +00001371 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001372
1373 case ISD::SELECT_CC: {
1374 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1375 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001376 //
1377 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1378 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001379 SDValue LHS = N->getOperand(0);
1380 if (LHS.getOpcode() != ISD::SELECT_CC) {
1381 return SDValue();
1382 }
1383
1384 SDValue RHS = N->getOperand(1);
1385 SDValue True = N->getOperand(2);
1386 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001387 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001388
1389 if (LHS.getOperand(2).getNode() != True.getNode() ||
1390 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001391 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001392 return SDValue();
1393 }
1394
Tom Stellard5e524892013-03-08 15:37:11 +00001395 switch (NCC) {
1396 default: return SDValue();
1397 case ISD::SETNE: return LHS;
1398 case ISD::SETEQ: {
1399 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1400 LHSCC = ISD::getSetCCInverse(LHSCC,
1401 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001402 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001403 LHS.getOperand(0),
1404 LHS.getOperand(1),
1405 LHS.getOperand(2),
1406 LHS.getOperand(3),
1407 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001408 }
Tom Stellard5e524892013-03-08 15:37:11 +00001409 }
1410 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001411 case AMDGPUISD::EXPORT: {
1412 SDValue Arg = N->getOperand(1);
1413 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1414 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001415
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001416 SDValue NewArgs[8] = {
1417 N->getOperand(0), // Chain
1418 SDValue(),
1419 N->getOperand(2), // ArrayBase
1420 N->getOperand(3), // Type
1421 N->getOperand(4), // SWZ_X
1422 N->getOperand(5), // SWZ_Y
1423 N->getOperand(6), // SWZ_Z
1424 N->getOperand(7) // SWZ_W
1425 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001426 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001427 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001428 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001429 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001430 case AMDGPUISD::TEXTURE_FETCH: {
1431 SDValue Arg = N->getOperand(1);
1432 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1433 break;
1434
1435 SDValue NewArgs[19] = {
1436 N->getOperand(0),
1437 N->getOperand(1),
1438 N->getOperand(2),
1439 N->getOperand(3),
1440 N->getOperand(4),
1441 N->getOperand(5),
1442 N->getOperand(6),
1443 N->getOperand(7),
1444 N->getOperand(8),
1445 N->getOperand(9),
1446 N->getOperand(10),
1447 N->getOperand(11),
1448 N->getOperand(12),
1449 N->getOperand(13),
1450 N->getOperand(14),
1451 N->getOperand(15),
1452 N->getOperand(16),
1453 N->getOperand(17),
1454 N->getOperand(18),
1455 };
1456 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1457 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1458 NewArgs, 19);
1459 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001460 }
1461 return SDValue();
1462}