blob: 812df8359d38d0098907d13cc77f7add99ca9428 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Bill Wendling37e9adb2013-06-07 20:28:55 +000029 AMDGPUTargetLowering(TM) {
Tom Stellard75aadc22012-12-11 21:25:42 +000030 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34 computeRegisterProperties();
35
36 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
37 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
38 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
39 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
40
41 setOperationAction(ISD::ADD, MVT::v4i32, Expand);
42 setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000043 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
44 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
Tom Stellard3deddc52013-05-10 02:09:34 +000045 setOperationAction(ISD::MUL, MVT::v2i32, Expand);
46 setOperationAction(ISD::MUL, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000047 setOperationAction(ISD::OR, MVT::v4i32, Expand);
48 setOperationAction(ISD::OR, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000049 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000050 setOperationAction(ISD::SHL, MVT::v4i32, Expand);
51 setOperationAction(ISD::SHL, MVT::v2i32, Expand);
52 setOperationAction(ISD::SRL, MVT::v4i32, Expand);
53 setOperationAction(ISD::SRL, MVT::v2i32, Expand);
Tom Stellard7fb39632013-05-10 02:09:29 +000054 setOperationAction(ISD::SRA, MVT::v4i32, Expand);
55 setOperationAction(ISD::SRA, MVT::v2i32, Expand);
Tom Stellard3a7c34c2013-05-10 02:09:39 +000056 setOperationAction(ISD::SUB, MVT::v4i32, Expand);
57 setOperationAction(ISD::SUB, MVT::v2i32, Expand);
Tom Stellarda8b03512012-12-21 16:33:24 +000058 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000059 setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
60 setOperationAction(ISD::UREM, MVT::v4i32, Expand);
61 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard4489b852013-05-03 17:21:31 +000062 setOperationAction(ISD::XOR, MVT::v4i32, Expand);
63 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000064
Tom Stellard492ebea2013-03-08 15:37:07 +000065 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
66 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000067
68 setOperationAction(ISD::FSUB, MVT::f32, Expand);
69
70 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
71 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000073
Tom Stellard75aadc22012-12-11 21:25:42 +000074 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
75 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
76
Tom Stellarde8f9f282013-03-08 15:37:05 +000077 setOperationAction(ISD::SETCC, MVT::i32, Expand);
78 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
80
81 setOperationAction(ISD::SELECT, MVT::i32, Custom);
82 setOperationAction(ISD::SELECT, MVT::f32, Custom);
83
Tom Stellarda99c6ae2013-05-10 02:09:24 +000084 setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
85 setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
86
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000087 // Legalize loads and stores to the private address space.
88 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000089 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000090 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
91 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
92 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
93 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
94 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
95 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000096 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000097 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000098 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
99
Tom Stellard365366f2013-01-23 02:09:06 +0000100 setOperationAction(ISD::LOAD, MVT::i32, Custom);
101 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000102 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
103
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000105 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000106 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000107 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +0000108
Tom Stellardb852af52013-03-08 15:37:03 +0000109 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000110 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +0000111 setSchedulingPreference(Sched::VLIW);
112}
113
114MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
115 MachineInstr * MI, MachineBasicBlock * BB) const {
116 MachineFunction * MF = BB->getParent();
117 MachineRegisterInfo &MRI = MF->getRegInfo();
118 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000119 const R600InstrInfo *TII =
120 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000121
122 switch (MI->getOpcode()) {
123 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 case AMDGPU::CLAMP_R600: {
125 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
126 AMDGPU::MOV,
127 MI->getOperand(0).getReg(),
128 MI->getOperand(1).getReg());
129 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
130 break;
131 }
132
133 case AMDGPU::FABS_R600: {
134 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
135 AMDGPU::MOV,
136 MI->getOperand(0).getReg(),
137 MI->getOperand(1).getReg());
138 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
139 break;
140 }
141
142 case AMDGPU::FNEG_R600: {
143 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
144 AMDGPU::MOV,
145 MI->getOperand(0).getReg(),
146 MI->getOperand(1).getReg());
147 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
148 break;
149 }
150
Tom Stellard75aadc22012-12-11 21:25:42 +0000151 case AMDGPU::MASK_WRITE: {
152 unsigned maskedRegister = MI->getOperand(0).getReg();
153 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
154 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
155 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
156 break;
157 }
158
159 case AMDGPU::MOV_IMM_F32:
160 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
161 MI->getOperand(1).getFPImm()->getValueAPF()
162 .bitcastToAPInt().getZExtValue());
163 break;
164 case AMDGPU::MOV_IMM_I32:
165 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
166 MI->getOperand(1).getImm());
167 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000168 case AMDGPU::CONST_COPY: {
169 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
170 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
171 TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
172 MI->getOperand(1).getImm());
173 break;
174 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000175
176 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
177 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
178 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
179
180 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
181 .addOperand(MI->getOperand(0))
182 .addOperand(MI->getOperand(1))
183 .addImm(EOP); // Set End of program bit
184 break;
185 }
186
Tom Stellard75aadc22012-12-11 21:25:42 +0000187 case AMDGPU::TXD: {
188 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
189 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000190 MachineOperand &RID = MI->getOperand(4);
191 MachineOperand &SID = MI->getOperand(5);
192 unsigned TextureId = MI->getOperand(6).getImm();
193 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
194 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000195
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000196 switch (TextureId) {
197 case 5: // Rect
198 CTX = CTY = 0;
199 break;
200 case 6: // Shadow1D
201 SrcW = SrcZ;
202 break;
203 case 7: // Shadow2D
204 SrcW = SrcZ;
205 break;
206 case 8: // ShadowRect
207 CTX = CTY = 0;
208 SrcW = SrcZ;
209 break;
210 case 9: // 1DArray
211 SrcZ = SrcY;
212 CTZ = 0;
213 break;
214 case 10: // 2DArray
215 CTZ = 0;
216 break;
217 case 11: // Shadow1DArray
218 SrcZ = SrcY;
219 CTZ = 0;
220 break;
221 case 12: // Shadow2DArray
222 CTZ = 0;
223 break;
224 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000225 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
226 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000227 .addImm(SrcX)
228 .addImm(SrcY)
229 .addImm(SrcZ)
230 .addImm(SrcW)
231 .addImm(0)
232 .addImm(0)
233 .addImm(0)
234 .addImm(0)
235 .addImm(1)
236 .addImm(2)
237 .addImm(3)
238 .addOperand(RID)
239 .addOperand(SID)
240 .addImm(CTX)
241 .addImm(CTY)
242 .addImm(CTZ)
243 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000244 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
245 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000246 .addImm(SrcX)
247 .addImm(SrcY)
248 .addImm(SrcZ)
249 .addImm(SrcW)
250 .addImm(0)
251 .addImm(0)
252 .addImm(0)
253 .addImm(0)
254 .addImm(1)
255 .addImm(2)
256 .addImm(3)
257 .addOperand(RID)
258 .addOperand(SID)
259 .addImm(CTX)
260 .addImm(CTY)
261 .addImm(CTZ)
262 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000263 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
264 .addOperand(MI->getOperand(0))
265 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000266 .addImm(SrcX)
267 .addImm(SrcY)
268 .addImm(SrcZ)
269 .addImm(SrcW)
270 .addImm(0)
271 .addImm(0)
272 .addImm(0)
273 .addImm(0)
274 .addImm(1)
275 .addImm(2)
276 .addImm(3)
277 .addOperand(RID)
278 .addOperand(SID)
279 .addImm(CTX)
280 .addImm(CTY)
281 .addImm(CTZ)
282 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 .addReg(T0, RegState::Implicit)
284 .addReg(T1, RegState::Implicit);
285 break;
286 }
287
288 case AMDGPU::TXD_SHADOW: {
289 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
290 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000291 MachineOperand &RID = MI->getOperand(4);
292 MachineOperand &SID = MI->getOperand(5);
293 unsigned TextureId = MI->getOperand(6).getImm();
294 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
295 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
296
297 switch (TextureId) {
298 case 5: // Rect
299 CTX = CTY = 0;
300 break;
301 case 6: // Shadow1D
302 SrcW = SrcZ;
303 break;
304 case 7: // Shadow2D
305 SrcW = SrcZ;
306 break;
307 case 8: // ShadowRect
308 CTX = CTY = 0;
309 SrcW = SrcZ;
310 break;
311 case 9: // 1DArray
312 SrcZ = SrcY;
313 CTZ = 0;
314 break;
315 case 10: // 2DArray
316 CTZ = 0;
317 break;
318 case 11: // Shadow1DArray
319 SrcZ = SrcY;
320 CTZ = 0;
321 break;
322 case 12: // Shadow2DArray
323 CTZ = 0;
324 break;
325 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000326
327 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
328 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000329 .addImm(SrcX)
330 .addImm(SrcY)
331 .addImm(SrcZ)
332 .addImm(SrcW)
333 .addImm(0)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(1)
338 .addImm(2)
339 .addImm(3)
340 .addOperand(RID)
341 .addOperand(SID)
342 .addImm(CTX)
343 .addImm(CTY)
344 .addImm(CTZ)
345 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000346 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
347 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000348 .addImm(SrcX)
349 .addImm(SrcY)
350 .addImm(SrcZ)
351 .addImm(SrcW)
352 .addImm(0)
353 .addImm(0)
354 .addImm(0)
355 .addImm(0)
356 .addImm(1)
357 .addImm(2)
358 .addImm(3)
359 .addOperand(RID)
360 .addOperand(SID)
361 .addImm(CTX)
362 .addImm(CTY)
363 .addImm(CTZ)
364 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000365 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
366 .addOperand(MI->getOperand(0))
367 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000368 .addImm(SrcX)
369 .addImm(SrcY)
370 .addImm(SrcZ)
371 .addImm(SrcW)
372 .addImm(0)
373 .addImm(0)
374 .addImm(0)
375 .addImm(0)
376 .addImm(1)
377 .addImm(2)
378 .addImm(3)
379 .addOperand(RID)
380 .addOperand(SID)
381 .addImm(CTX)
382 .addImm(CTY)
383 .addImm(CTZ)
384 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000385 .addReg(T0, RegState::Implicit)
386 .addReg(T1, RegState::Implicit);
387 break;
388 }
389
390 case AMDGPU::BRANCH:
391 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000392 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000393 break;
394
395 case AMDGPU::BRANCH_COND_f32: {
396 MachineInstr *NewMI =
397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
398 AMDGPU::PREDICATE_BIT)
399 .addOperand(MI->getOperand(1))
400 .addImm(OPCODE_IS_NOT_ZERO)
401 .addImm(0); // Flags
402 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000403 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000404 .addOperand(MI->getOperand(0))
405 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
406 break;
407 }
408
409 case AMDGPU::BRANCH_COND_i32: {
410 MachineInstr *NewMI =
411 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
412 AMDGPU::PREDICATE_BIT)
413 .addOperand(MI->getOperand(1))
414 .addImm(OPCODE_IS_NOT_ZERO_INT)
415 .addImm(0); // Flags
416 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000417 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000418 .addOperand(MI->getOperand(0))
419 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
420 break;
421 }
422
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 case AMDGPU::EG_ExportSwz:
424 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000425 // Instruction is left unmodified if its not the last one of its type
426 bool isLastInstructionOfItsType = true;
427 unsigned InstExportType = MI->getOperand(1).getImm();
428 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
429 EndBlock = BB->end(); NextExportInst != EndBlock;
430 NextExportInst = llvm::next(NextExportInst)) {
431 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
432 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
433 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
434 .getImm();
435 if (CurrentInstExportType == InstExportType) {
436 isLastInstructionOfItsType = false;
437 break;
438 }
439 }
440 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000442 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000443 return BB;
444 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
445 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
446 .addOperand(MI->getOperand(0))
447 .addOperand(MI->getOperand(1))
448 .addOperand(MI->getOperand(2))
449 .addOperand(MI->getOperand(3))
450 .addOperand(MI->getOperand(4))
451 .addOperand(MI->getOperand(5))
452 .addOperand(MI->getOperand(6))
453 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000454 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 break;
456 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000457 case AMDGPU::RETURN: {
458 // RETURN instructions must have the live-out registers as implicit uses,
459 // otherwise they appear dead.
460 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
461 MachineInstrBuilder MIB(*MF, MI);
462 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
463 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
464 return BB;
465 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 }
467
468 MI->eraseFromParent();
469 return BB;
470}
471
472//===----------------------------------------------------------------------===//
473// Custom DAG Lowering Operations
474//===----------------------------------------------------------------------===//
475
Tom Stellard75aadc22012-12-11 21:25:42 +0000476SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
477 switch (Op.getOpcode()) {
478 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
480 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000481 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000482 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000483 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000484 case ISD::INTRINSIC_VOID: {
485 SDValue Chain = Op.getOperand(0);
486 unsigned IntrinsicID =
487 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
488 switch (IntrinsicID) {
489 case AMDGPUIntrinsic::AMDGPU_store_output: {
490 MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000491 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000492 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
493 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000494 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000495 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000496 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000497 case AMDGPUIntrinsic::R600_store_swizzle: {
498 const SDValue Args[8] = {
499 Chain,
500 Op.getOperand(2), // Export Value
501 Op.getOperand(3), // ArrayBase
502 Op.getOperand(4), // Type
503 DAG.getConstant(0, MVT::i32), // SWZ_X
504 DAG.getConstant(1, MVT::i32), // SWZ_Y
505 DAG.getConstant(2, MVT::i32), // SWZ_Z
506 DAG.getConstant(3, MVT::i32) // SWZ_W
507 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000508 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000509 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000510 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000511
Tom Stellard75aadc22012-12-11 21:25:42 +0000512 // default for switch(IntrinsicID)
513 default: break;
514 }
515 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
516 break;
517 }
518 case ISD::INTRINSIC_WO_CHAIN: {
519 unsigned IntrinsicID =
520 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
521 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000522 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 switch(IntrinsicID) {
524 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
525 case AMDGPUIntrinsic::R600_load_input: {
526 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
527 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000528 MachineFunction &MF = DAG.getMachineFunction();
529 MachineRegisterInfo &MRI = MF.getRegInfo();
530 MRI.addLiveIn(Reg);
531 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000532 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000533 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000534
535 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000536 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000537 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
538 MachineSDNode *interp;
539 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000540 const MachineFunction &MF = DAG.getMachineFunction();
541 const R600InstrInfo *TII =
542 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000543 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
544 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
545 return DAG.getTargetExtractSubreg(
546 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
547 DL, MVT::f32, SDValue(interp, 0));
548 }
549
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000550 MachineFunction &MF = DAG.getMachineFunction();
551 MachineRegisterInfo &MRI = MF.getRegInfo();
552 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
553 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
554 MRI.addLiveIn(RegisterI);
555 MRI.addLiveIn(RegisterJ);
556 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
557 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
558 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
559 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
560
Tom Stellard41afe6a2013-02-05 17:09:14 +0000561 if (slot % 4 < 2)
562 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
563 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000564 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000565 else
566 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
567 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000568 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000569 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000570 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000571 case AMDGPUIntrinsic::R600_tex:
572 case AMDGPUIntrinsic::R600_texc:
573 case AMDGPUIntrinsic::R600_txl:
574 case AMDGPUIntrinsic::R600_txlc:
575 case AMDGPUIntrinsic::R600_txb:
576 case AMDGPUIntrinsic::R600_txbc:
577 case AMDGPUIntrinsic::R600_txf:
578 case AMDGPUIntrinsic::R600_txq:
579 case AMDGPUIntrinsic::R600_ddx:
580 case AMDGPUIntrinsic::R600_ddy: {
581 unsigned TextureOp;
582 switch (IntrinsicID) {
583 case AMDGPUIntrinsic::R600_tex:
584 TextureOp = 0;
585 break;
586 case AMDGPUIntrinsic::R600_texc:
587 TextureOp = 1;
588 break;
589 case AMDGPUIntrinsic::R600_txl:
590 TextureOp = 2;
591 break;
592 case AMDGPUIntrinsic::R600_txlc:
593 TextureOp = 3;
594 break;
595 case AMDGPUIntrinsic::R600_txb:
596 TextureOp = 4;
597 break;
598 case AMDGPUIntrinsic::R600_txbc:
599 TextureOp = 5;
600 break;
601 case AMDGPUIntrinsic::R600_txf:
602 TextureOp = 6;
603 break;
604 case AMDGPUIntrinsic::R600_txq:
605 TextureOp = 7;
606 break;
607 case AMDGPUIntrinsic::R600_ddx:
608 TextureOp = 8;
609 break;
610 case AMDGPUIntrinsic::R600_ddy:
611 TextureOp = 9;
612 break;
613 default:
614 llvm_unreachable("Unknow Texture Operation");
615 }
616
617 SDValue TexArgs[19] = {
618 DAG.getConstant(TextureOp, MVT::i32),
619 Op.getOperand(1),
620 DAG.getConstant(0, MVT::i32),
621 DAG.getConstant(1, MVT::i32),
622 DAG.getConstant(2, MVT::i32),
623 DAG.getConstant(3, MVT::i32),
624 Op.getOperand(2),
625 Op.getOperand(3),
626 Op.getOperand(4),
627 DAG.getConstant(0, MVT::i32),
628 DAG.getConstant(1, MVT::i32),
629 DAG.getConstant(2, MVT::i32),
630 DAG.getConstant(3, MVT::i32),
631 Op.getOperand(5),
632 Op.getOperand(6),
633 Op.getOperand(7),
634 Op.getOperand(8),
635 Op.getOperand(9),
636 Op.getOperand(10)
637 };
638 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
639 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000640 case AMDGPUIntrinsic::AMDGPU_dp4: {
641 SDValue Args[8] = {
642 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
643 DAG.getConstant(0, MVT::i32)),
644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
645 DAG.getConstant(0, MVT::i32)),
646 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
647 DAG.getConstant(1, MVT::i32)),
648 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
649 DAG.getConstant(1, MVT::i32)),
650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
651 DAG.getConstant(2, MVT::i32)),
652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
653 DAG.getConstant(2, MVT::i32)),
654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
655 DAG.getConstant(3, MVT::i32)),
656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
657 DAG.getConstant(3, MVT::i32))
658 };
659 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
660 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000661
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000662 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000663 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000664 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000668 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000669 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000670 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000674 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000675 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000676 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000677 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return LowerImplicitParameter(DAG, VT, DL, 8);
680
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000681 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000682 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
683 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000684 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000685 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
686 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000687 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
689 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000690 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000691 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
692 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000693 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000694 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
695 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000696 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000697 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
698 AMDGPU::T0_Z, VT);
699 }
700 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
701 break;
702 }
703 } // end switch(Op.getOpcode())
704 return SDValue();
705}
706
707void R600TargetLowering::ReplaceNodeResults(SDNode *N,
708 SmallVectorImpl<SDValue> &Results,
709 SelectionDAG &DAG) const {
710 switch (N->getOpcode()) {
711 default: return;
712 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000713 return;
714 case ISD::LOAD: {
715 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
716 Results.push_back(SDValue(Node, 0));
717 Results.push_back(SDValue(Node, 1));
718 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
719 // function
720 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
721 return;
722 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000723 case ISD::STORE:
724 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
725 Results.push_back(SDValue(Node, 0));
726 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 }
728}
729
730SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
731 return DAG.getNode(
732 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000733 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000734 MVT::i1,
735 Op, DAG.getConstantFP(0.0f, MVT::f32),
736 DAG.getCondCode(ISD::SETNE)
737 );
738}
739
Tom Stellard75aadc22012-12-11 21:25:42 +0000740SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000741 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 unsigned DwordOffset) const {
743 unsigned ByteOffset = DwordOffset * 4;
744 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
745 AMDGPUAS::PARAM_I_ADDRESS);
746
747 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
748 assert(isInt<16>(ByteOffset));
749
750 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
751 DAG.getConstant(ByteOffset, MVT::i32), // PTR
752 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
753 false, false, false, 0);
754}
755
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000756SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
757
758 MachineFunction &MF = DAG.getMachineFunction();
759 const AMDGPUFrameLowering *TFL =
760 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
761
762 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
763 assert(FIN);
764
765 unsigned FrameIndex = FIN->getIndex();
766 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
767 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
768}
769
Tom Stellard75aadc22012-12-11 21:25:42 +0000770bool R600TargetLowering::isZero(SDValue Op) const {
771 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
772 return Cst->isNullValue();
773 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
774 return CstFP->isZero();
775 } else {
776 return false;
777 }
778}
779
780SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000781 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 EVT VT = Op.getValueType();
783
784 SDValue LHS = Op.getOperand(0);
785 SDValue RHS = Op.getOperand(1);
786 SDValue True = Op.getOperand(2);
787 SDValue False = Op.getOperand(3);
788 SDValue CC = Op.getOperand(4);
789 SDValue Temp;
790
791 // LHS and RHS are guaranteed to be the same value type
792 EVT CompareVT = LHS.getValueType();
793
794 // Check if we can lower this to a native operation.
795
Tom Stellard2add82d2013-03-08 15:37:09 +0000796 // Try to lower to a SET* instruction:
797 //
798 // SET* can match the following patterns:
799 //
800 // select_cc f32, f32, -1, 0, cc_any
801 // select_cc f32, f32, 1.0f, 0.0f, cc_any
802 // select_cc i32, i32, -1, 0, cc_any
803 //
804
805 // Move hardware True/False values to the correct operand.
806 if (isHWTrueValue(False) && isHWFalseValue(True)) {
807 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
808 std::swap(False, True);
809 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
810 }
811
812 if (isHWTrueValue(True) && isHWFalseValue(False) &&
813 (CompareVT == VT || VT == MVT::i32)) {
814 // This can be matched by a SET* instruction.
815 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
816 }
817
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000819 //
820 // CND* can match the following patterns:
821 //
822 // select_cc f32, 0.0, f32, f32, cc_any
823 // select_cc f32, 0.0, i32, i32, cc_any
824 // select_cc i32, 0, f32, f32, cc_any
825 // select_cc i32, 0, i32, i32, cc_any
826 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 if (isZero(LHS) || isZero(RHS)) {
828 SDValue Cond = (isZero(LHS) ? RHS : LHS);
829 SDValue Zero = (isZero(LHS) ? LHS : RHS);
830 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
831 if (CompareVT != VT) {
832 // Bitcast True / False to the correct types. This will end up being
833 // a nop, but it allows us to define only a single pattern in the
834 // .TD files for each CND* instruction rather than having to have
835 // one pattern for integer True/False and one for fp True/False
836 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
837 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
838 }
839 if (isZero(LHS)) {
840 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
841 }
842
843 switch (CCOpcode) {
844 case ISD::SETONE:
845 case ISD::SETUNE:
846 case ISD::SETNE:
847 case ISD::SETULE:
848 case ISD::SETULT:
849 case ISD::SETOLE:
850 case ISD::SETOLT:
851 case ISD::SETLE:
852 case ISD::SETLT:
853 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
854 Temp = True;
855 True = False;
856 False = Temp;
857 break;
858 default:
859 break;
860 }
861 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
862 Cond, Zero,
863 True, False,
864 DAG.getCondCode(CCOpcode));
865 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
866 }
867
Tom Stellard75aadc22012-12-11 21:25:42 +0000868
869 // Possible Min/Max pattern
870 SDValue MinMax = LowerMinMax(Op, DAG);
871 if (MinMax.getNode()) {
872 return MinMax;
873 }
874
875 // If we make it this for it means we have no native instructions to handle
876 // this SELECT_CC, so we must lower it.
877 SDValue HWTrue, HWFalse;
878
879 if (CompareVT == MVT::f32) {
880 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
881 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
882 } else if (CompareVT == MVT::i32) {
883 HWTrue = DAG.getConstant(-1, CompareVT);
884 HWFalse = DAG.getConstant(0, CompareVT);
885 }
886 else {
887 assert(!"Unhandled value type in LowerSELECT_CC");
888 }
889
890 // Lower this unsupported SELECT_CC into a combination of two supported
891 // SELECT_CC operations.
892 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
893
894 return DAG.getNode(ISD::SELECT_CC, DL, VT,
895 Cond, HWFalse,
896 True, False,
897 DAG.getCondCode(ISD::SETNE));
898}
899
900SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
901 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000902 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000903 Op.getValueType(),
904 Op.getOperand(0),
905 DAG.getConstant(0, MVT::i32),
906 Op.getOperand(1),
907 Op.getOperand(2),
908 DAG.getCondCode(ISD::SETNE));
909}
910
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000911/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
912/// convert these pointers to a register index. Each register holds
913/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
914/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
915/// for indirect addressing.
916SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
917 unsigned StackWidth,
918 SelectionDAG &DAG) const {
919 unsigned SRLPad;
920 switch(StackWidth) {
921 case 1:
922 SRLPad = 2;
923 break;
924 case 2:
925 SRLPad = 3;
926 break;
927 case 4:
928 SRLPad = 4;
929 break;
930 default: llvm_unreachable("Invalid stack width");
931 }
932
Andrew Trickef9de2a2013-05-25 02:42:55 +0000933 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000934 DAG.getConstant(SRLPad, MVT::i32));
935}
936
937void R600TargetLowering::getStackAddress(unsigned StackWidth,
938 unsigned ElemIdx,
939 unsigned &Channel,
940 unsigned &PtrIncr) const {
941 switch (StackWidth) {
942 default:
943 case 1:
944 Channel = 0;
945 if (ElemIdx > 0) {
946 PtrIncr = 1;
947 } else {
948 PtrIncr = 0;
949 }
950 break;
951 case 2:
952 Channel = ElemIdx % 2;
953 if (ElemIdx == 2) {
954 PtrIncr = 1;
955 } else {
956 PtrIncr = 0;
957 }
958 break;
959 case 4:
960 Channel = ElemIdx;
961 PtrIncr = 0;
962 break;
963 }
964}
965
Tom Stellard75aadc22012-12-11 21:25:42 +0000966SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000967 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000968 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
969 SDValue Chain = Op.getOperand(0);
970 SDValue Value = Op.getOperand(1);
971 SDValue Ptr = Op.getOperand(2);
972
973 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
974 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
975 // Convert pointer from byte address to dword address.
976 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
977 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
978 Ptr, DAG.getConstant(2, MVT::i32)));
979
980 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
981 assert(!"Truncated and indexed stores not supported yet");
982 } else {
983 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
984 }
985 return Chain;
986 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000987
988 EVT ValueVT = Value.getValueType();
989
990 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
991 return SDValue();
992 }
993
994 // Lowering for indirect addressing
995
996 const MachineFunction &MF = DAG.getMachineFunction();
997 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
998 getTargetMachine().getFrameLowering());
999 unsigned StackWidth = TFL->getStackWidth(MF);
1000
1001 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1002
1003 if (ValueVT.isVector()) {
1004 unsigned NumElemVT = ValueVT.getVectorNumElements();
1005 EVT ElemVT = ValueVT.getVectorElementType();
1006 SDValue Stores[4];
1007
1008 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1009 "vector width in load");
1010
1011 for (unsigned i = 0; i < NumElemVT; ++i) {
1012 unsigned Channel, PtrIncr;
1013 getStackAddress(StackWidth, i, Channel, PtrIncr);
1014 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1015 DAG.getConstant(PtrIncr, MVT::i32));
1016 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1017 Value, DAG.getConstant(i, MVT::i32));
1018
1019 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1020 Chain, Elem, Ptr,
1021 DAG.getTargetConstant(Channel, MVT::i32));
1022 }
1023 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1024 } else {
1025 if (ValueVT == MVT::i8) {
1026 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1027 }
1028 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001029 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001030 }
1031
1032 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001033}
1034
Tom Stellard365366f2013-01-23 02:09:06 +00001035// return (512 + (kc_bank << 12)
1036static int
1037ConstantAddressBlock(unsigned AddressSpace) {
1038 switch (AddressSpace) {
1039 case AMDGPUAS::CONSTANT_BUFFER_0:
1040 return 512;
1041 case AMDGPUAS::CONSTANT_BUFFER_1:
1042 return 512 + 4096;
1043 case AMDGPUAS::CONSTANT_BUFFER_2:
1044 return 512 + 4096 * 2;
1045 case AMDGPUAS::CONSTANT_BUFFER_3:
1046 return 512 + 4096 * 3;
1047 case AMDGPUAS::CONSTANT_BUFFER_4:
1048 return 512 + 4096 * 4;
1049 case AMDGPUAS::CONSTANT_BUFFER_5:
1050 return 512 + 4096 * 5;
1051 case AMDGPUAS::CONSTANT_BUFFER_6:
1052 return 512 + 4096 * 6;
1053 case AMDGPUAS::CONSTANT_BUFFER_7:
1054 return 512 + 4096 * 7;
1055 case AMDGPUAS::CONSTANT_BUFFER_8:
1056 return 512 + 4096 * 8;
1057 case AMDGPUAS::CONSTANT_BUFFER_9:
1058 return 512 + 4096 * 9;
1059 case AMDGPUAS::CONSTANT_BUFFER_10:
1060 return 512 + 4096 * 10;
1061 case AMDGPUAS::CONSTANT_BUFFER_11:
1062 return 512 + 4096 * 11;
1063 case AMDGPUAS::CONSTANT_BUFFER_12:
1064 return 512 + 4096 * 12;
1065 case AMDGPUAS::CONSTANT_BUFFER_13:
1066 return 512 + 4096 * 13;
1067 case AMDGPUAS::CONSTANT_BUFFER_14:
1068 return 512 + 4096 * 14;
1069 case AMDGPUAS::CONSTANT_BUFFER_15:
1070 return 512 + 4096 * 15;
1071 default:
1072 return -1;
1073 }
1074}
1075
1076SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1077{
1078 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001079 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001080 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1081 SDValue Chain = Op.getOperand(0);
1082 SDValue Ptr = Op.getOperand(1);
1083 SDValue LoweredLoad;
1084
1085 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1086 if (ConstantBlock > -1) {
1087 SDValue Result;
1088 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001089 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1090 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001091 SDValue Slots[4];
1092 for (unsigned i = 0; i < 4; i++) {
1093 // We want Const position encoded with the following formula :
1094 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1095 // const_index is Ptr computed by llvm using an alignment of 16.
1096 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1097 // then div by 4 at the ISel step
1098 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1099 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1100 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1101 }
1102 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1103 } else {
1104 // non constant ptr cant be folded, keeps it as a v4f32 load
1105 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001106 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001107 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001108 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001109 );
1110 }
1111
1112 if (!VT.isVector()) {
1113 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1114 DAG.getConstant(0, MVT::i32));
1115 }
1116
1117 SDValue MergedValues[2] = {
1118 Result,
1119 Chain
1120 };
1121 return DAG.getMergeValues(MergedValues, 2, DL);
1122 }
1123
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001124 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1125 return SDValue();
1126 }
1127
1128 // Lowering for indirect addressing
1129 const MachineFunction &MF = DAG.getMachineFunction();
1130 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1131 getTargetMachine().getFrameLowering());
1132 unsigned StackWidth = TFL->getStackWidth(MF);
1133
1134 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1135
1136 if (VT.isVector()) {
1137 unsigned NumElemVT = VT.getVectorNumElements();
1138 EVT ElemVT = VT.getVectorElementType();
1139 SDValue Loads[4];
1140
1141 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1142 "vector width in load");
1143
1144 for (unsigned i = 0; i < NumElemVT; ++i) {
1145 unsigned Channel, PtrIncr;
1146 getStackAddress(StackWidth, i, Channel, PtrIncr);
1147 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1148 DAG.getConstant(PtrIncr, MVT::i32));
1149 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1150 Chain, Ptr,
1151 DAG.getTargetConstant(Channel, MVT::i32),
1152 Op.getOperand(2));
1153 }
1154 for (unsigned i = NumElemVT; i < 4; ++i) {
1155 Loads[i] = DAG.getUNDEF(ElemVT);
1156 }
1157 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1158 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1159 } else {
1160 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1161 Chain, Ptr,
1162 DAG.getTargetConstant(0, MVT::i32), // Channel
1163 Op.getOperand(2));
1164 }
1165
1166 SDValue Ops[2];
1167 Ops[0] = LoweredLoad;
1168 Ops[1] = Chain;
1169
1170 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001171}
Tom Stellard75aadc22012-12-11 21:25:42 +00001172
Tom Stellard75aadc22012-12-11 21:25:42 +00001173/// XXX Only kernel functions are supported, so we can assume for now that
1174/// every function is a kernel function, but in the future we should use
1175/// separate calling conventions for kernel and non-kernel functions.
1176SDValue R600TargetLowering::LowerFormalArguments(
1177 SDValue Chain,
1178 CallingConv::ID CallConv,
1179 bool isVarArg,
1180 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001181 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001182 SmallVectorImpl<SDValue> &InVals) const {
1183 unsigned ParamOffsetBytes = 36;
1184 Function::const_arg_iterator FuncArg =
1185 DAG.getMachineFunction().getFunction()->arg_begin();
1186 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1187 EVT VT = Ins[i].VT;
1188 Type *ArgType = FuncArg->getType();
1189 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1190 32 : ArgType->getPrimitiveSizeInBits();
1191 unsigned ArgBytes = ArgSizeInBits >> 3;
1192 EVT ArgVT;
1193 if (ArgSizeInBits < VT.getSizeInBits()) {
1194 assert(!ArgType->isFloatTy() &&
1195 "Extending floating point arguments not supported yet");
1196 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1197 } else {
1198 ArgVT = VT;
1199 }
1200 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1201 AMDGPUAS::PARAM_I_ADDRESS);
1202 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
1203 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001204 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001205 ArgVT, false, false, ArgBytes);
1206 InVals.push_back(Arg);
1207 ParamOffsetBytes += ArgBytes;
1208 }
1209 return Chain;
1210}
1211
Matt Arsenault758659232013-05-18 00:21:46 +00001212EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001213 if (!VT.isVector()) return MVT::i32;
1214 return VT.changeVectorElementTypeToInteger();
1215}
1216
Benjamin Kramer193960c2013-06-11 13:32:25 +00001217static SDValue
1218CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1219 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001220 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1221 assert(RemapSwizzle.empty());
1222 SDValue NewBldVec[4] = {
1223 VectorEntry.getOperand(0),
1224 VectorEntry.getOperand(1),
1225 VectorEntry.getOperand(2),
1226 VectorEntry.getOperand(3)
1227 };
1228
1229 for (unsigned i = 0; i < 4; i++) {
1230 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1231 if (C->isZero()) {
1232 RemapSwizzle[i] = 4; // SEL_0
1233 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1234 } else if (C->isExactlyValue(1.0)) {
1235 RemapSwizzle[i] = 5; // SEL_1
1236 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1237 }
1238 }
1239
1240 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1241 continue;
1242 for (unsigned j = 0; j < i; j++) {
1243 if (NewBldVec[i] == NewBldVec[j]) {
1244 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1245 RemapSwizzle[i] = j;
1246 break;
1247 }
1248 }
1249 }
1250
1251 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1252 VectorEntry.getValueType(), NewBldVec, 4);
1253}
1254
Benjamin Kramer193960c2013-06-11 13:32:25 +00001255static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1256 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001257 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1258 assert(RemapSwizzle.empty());
1259 SDValue NewBldVec[4] = {
1260 VectorEntry.getOperand(0),
1261 VectorEntry.getOperand(1),
1262 VectorEntry.getOperand(2),
1263 VectorEntry.getOperand(3)
1264 };
1265 bool isUnmovable[4] = { false, false, false, false };
1266
1267 for (unsigned i = 0; i < 4; i++) {
1268 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1269 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1270 ->getZExtValue();
1271 if (!isUnmovable[Idx]) {
1272 // Swap i and Idx
1273 std::swap(NewBldVec[Idx], NewBldVec[i]);
1274 RemapSwizzle[Idx] = i;
1275 RemapSwizzle[i] = Idx;
1276 }
1277 isUnmovable[Idx] = true;
1278 }
1279 }
1280
1281 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1282 VectorEntry.getValueType(), NewBldVec, 4);
1283}
1284
1285
1286SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1287SDValue Swz[4], SelectionDAG &DAG) const {
1288 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1289 // Old -> New swizzle values
1290 DenseMap<unsigned, unsigned> SwizzleRemap;
1291
1292 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1293 for (unsigned i = 0; i < 4; i++) {
1294 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1295 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1296 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1297 }
1298
1299 SwizzleRemap.clear();
1300 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1301 for (unsigned i = 0; i < 4; i++) {
1302 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1303 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1304 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1305 }
1306
1307 return BuildVector;
1308}
1309
1310
Tom Stellard75aadc22012-12-11 21:25:42 +00001311//===----------------------------------------------------------------------===//
1312// Custom DAG Optimizations
1313//===----------------------------------------------------------------------===//
1314
1315SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1316 DAGCombinerInfo &DCI) const {
1317 SelectionDAG &DAG = DCI.DAG;
1318
1319 switch (N->getOpcode()) {
1320 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1321 case ISD::FP_ROUND: {
1322 SDValue Arg = N->getOperand(0);
1323 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001324 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001325 Arg.getOperand(0));
1326 }
1327 break;
1328 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001329
1330 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1331 // (i32 select_cc f32, f32, -1, 0 cc)
1332 //
1333 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1334 // this to one of the SET*_DX10 instructions.
1335 case ISD::FP_TO_SINT: {
1336 SDValue FNeg = N->getOperand(0);
1337 if (FNeg.getOpcode() != ISD::FNEG) {
1338 return SDValue();
1339 }
1340 SDValue SelectCC = FNeg.getOperand(0);
1341 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1342 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1343 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1344 !isHWTrueValue(SelectCC.getOperand(2)) ||
1345 !isHWFalseValue(SelectCC.getOperand(3))) {
1346 return SDValue();
1347 }
1348
Andrew Trickef9de2a2013-05-25 02:42:55 +00001349 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001350 SelectCC.getOperand(0), // LHS
1351 SelectCC.getOperand(1), // RHS
1352 DAG.getConstant(-1, MVT::i32), // True
1353 DAG.getConstant(0, MVT::i32), // Flase
1354 SelectCC.getOperand(4)); // CC
1355
1356 break;
1357 }
Tom Stellard365366f2013-01-23 02:09:06 +00001358 // Extract_vec (Build_vector) generated by custom lowering
1359 // also needs to be customly combined
1360 case ISD::EXTRACT_VECTOR_ELT: {
1361 SDValue Arg = N->getOperand(0);
1362 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1363 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1364 unsigned Element = Const->getZExtValue();
1365 return Arg->getOperand(Element);
1366 }
1367 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001368 if (Arg.getOpcode() == ISD::BITCAST &&
1369 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1370 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1371 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001372 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001373 Arg->getOperand(0).getOperand(Element));
1374 }
1375 }
Tom Stellard365366f2013-01-23 02:09:06 +00001376 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001377
1378 case ISD::SELECT_CC: {
1379 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1380 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001381 //
1382 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1383 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001384 SDValue LHS = N->getOperand(0);
1385 if (LHS.getOpcode() != ISD::SELECT_CC) {
1386 return SDValue();
1387 }
1388
1389 SDValue RHS = N->getOperand(1);
1390 SDValue True = N->getOperand(2);
1391 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001392 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001393
1394 if (LHS.getOperand(2).getNode() != True.getNode() ||
1395 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001396 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001397 return SDValue();
1398 }
1399
Tom Stellard5e524892013-03-08 15:37:11 +00001400 switch (NCC) {
1401 default: return SDValue();
1402 case ISD::SETNE: return LHS;
1403 case ISD::SETEQ: {
1404 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1405 LHSCC = ISD::getSetCCInverse(LHSCC,
1406 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001407 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001408 LHS.getOperand(0),
1409 LHS.getOperand(1),
1410 LHS.getOperand(2),
1411 LHS.getOperand(3),
1412 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001413 }
Tom Stellard5e524892013-03-08 15:37:11 +00001414 }
1415 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001416 case AMDGPUISD::EXPORT: {
1417 SDValue Arg = N->getOperand(1);
1418 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1419 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001420
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001421 SDValue NewArgs[8] = {
1422 N->getOperand(0), // Chain
1423 SDValue(),
1424 N->getOperand(2), // ArrayBase
1425 N->getOperand(3), // Type
1426 N->getOperand(4), // SWZ_X
1427 N->getOperand(5), // SWZ_Y
1428 N->getOperand(6), // SWZ_Z
1429 N->getOperand(7) // SWZ_W
1430 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001431 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001432 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001433 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001434 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001435 case AMDGPUISD::TEXTURE_FETCH: {
1436 SDValue Arg = N->getOperand(1);
1437 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1438 break;
1439
1440 SDValue NewArgs[19] = {
1441 N->getOperand(0),
1442 N->getOperand(1),
1443 N->getOperand(2),
1444 N->getOperand(3),
1445 N->getOperand(4),
1446 N->getOperand(5),
1447 N->getOperand(6),
1448 N->getOperand(7),
1449 N->getOperand(8),
1450 N->getOperand(9),
1451 N->getOperand(10),
1452 N->getOperand(11),
1453 N->getOperand(12),
1454 N->getOperand(13),
1455 N->getOperand(14),
1456 N->getOperand(15),
1457 N->getOperand(16),
1458 N->getOperand(17),
1459 N->getOperand(18),
1460 };
1461 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1462 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1463 NewArgs, 19);
1464 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001465 }
1466 return SDValue();
1467}