blob: b898af13b7afce23a00d1431f2a303541db12ed3 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000023#include "llvm/IR/Argument.h"
24#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025
26using namespace llvm;
27
28R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Bill Wendling37e9adb2013-06-07 20:28:55 +000029 AMDGPUTargetLowering(TM) {
Tom Stellard75aadc22012-12-11 21:25:42 +000030 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34 computeRegisterProperties();
35
36 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
37 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
38 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
39 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
40
Tom Stellarda8b03512012-12-21 16:33:24 +000041 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
42 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
43 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
44 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000045 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
46
Tom Stellard492ebea2013-03-08 15:37:07 +000047 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
48 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
50 setOperationAction(ISD::FSUB, MVT::f32, Expand);
51
52 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
53 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
57 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
58
Tom Stellarde8f9f282013-03-08 15:37:05 +000059 setOperationAction(ISD::SETCC, MVT::i32, Expand);
60 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
62
63 setOperationAction(ISD::SELECT, MVT::i32, Custom);
64 setOperationAction(ISD::SELECT, MVT::f32, Custom);
65
Tom Stellarda99c6ae2013-05-10 02:09:24 +000066 setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
67 setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
68
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000069 // Legalize loads and stores to the private address space.
70 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000071 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000072 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
73 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
74 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
75 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000078 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard6ec9e802013-06-20 21:55:23 +000079 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
81
Tom Stellard365366f2013-01-23 02:09:06 +000082 setOperationAction(ISD::LOAD, MVT::i32, Custom);
83 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000084 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
85
Tom Stellard75aadc22012-12-11 21:25:42 +000086 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000087 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000088 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000089 setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard75aadc22012-12-11 21:25:42 +000090
Tom Stellardb852af52013-03-08 15:37:03 +000091 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000092 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard75aadc22012-12-11 21:25:42 +000093 setSchedulingPreference(Sched::VLIW);
94}
95
96MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
97 MachineInstr * MI, MachineBasicBlock * BB) const {
98 MachineFunction * MF = BB->getParent();
99 MachineRegisterInfo &MRI = MF->getRegInfo();
100 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000101 const R600InstrInfo *TII =
102 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000103
104 switch (MI->getOpcode()) {
105 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 case AMDGPU::CLAMP_R600: {
107 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
108 AMDGPU::MOV,
109 MI->getOperand(0).getReg(),
110 MI->getOperand(1).getReg());
111 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
112 break;
113 }
114
115 case AMDGPU::FABS_R600: {
116 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
117 AMDGPU::MOV,
118 MI->getOperand(0).getReg(),
119 MI->getOperand(1).getReg());
120 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
121 break;
122 }
123
124 case AMDGPU::FNEG_R600: {
125 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
126 AMDGPU::MOV,
127 MI->getOperand(0).getReg(),
128 MI->getOperand(1).getReg());
129 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
130 break;
131 }
132
Tom Stellard75aadc22012-12-11 21:25:42 +0000133 case AMDGPU::MASK_WRITE: {
134 unsigned maskedRegister = MI->getOperand(0).getReg();
135 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
136 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
137 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
138 break;
139 }
140
141 case AMDGPU::MOV_IMM_F32:
142 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
143 MI->getOperand(1).getFPImm()->getValueAPF()
144 .bitcastToAPInt().getZExtValue());
145 break;
146 case AMDGPU::MOV_IMM_I32:
147 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
148 MI->getOperand(1).getImm());
149 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000150 case AMDGPU::CONST_COPY: {
151 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
152 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000153 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000154 MI->getOperand(1).getImm());
155 break;
156 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000157
158 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
159 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
160 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
161
162 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
163 .addOperand(MI->getOperand(0))
164 .addOperand(MI->getOperand(1))
165 .addImm(EOP); // Set End of program bit
166 break;
167 }
168
Tom Stellard75aadc22012-12-11 21:25:42 +0000169 case AMDGPU::TXD: {
170 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
171 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000172 MachineOperand &RID = MI->getOperand(4);
173 MachineOperand &SID = MI->getOperand(5);
174 unsigned TextureId = MI->getOperand(6).getImm();
175 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
176 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000177
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000178 switch (TextureId) {
179 case 5: // Rect
180 CTX = CTY = 0;
181 break;
182 case 6: // Shadow1D
183 SrcW = SrcZ;
184 break;
185 case 7: // Shadow2D
186 SrcW = SrcZ;
187 break;
188 case 8: // ShadowRect
189 CTX = CTY = 0;
190 SrcW = SrcZ;
191 break;
192 case 9: // 1DArray
193 SrcZ = SrcY;
194 CTZ = 0;
195 break;
196 case 10: // 2DArray
197 CTZ = 0;
198 break;
199 case 11: // Shadow1DArray
200 SrcZ = SrcY;
201 CTZ = 0;
202 break;
203 case 12: // Shadow2DArray
204 CTZ = 0;
205 break;
206 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000207 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
208 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000209 .addImm(SrcX)
210 .addImm(SrcY)
211 .addImm(SrcZ)
212 .addImm(SrcW)
213 .addImm(0)
214 .addImm(0)
215 .addImm(0)
216 .addImm(0)
217 .addImm(1)
218 .addImm(2)
219 .addImm(3)
220 .addOperand(RID)
221 .addOperand(SID)
222 .addImm(CTX)
223 .addImm(CTY)
224 .addImm(CTZ)
225 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000226 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
227 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 .addImm(SrcX)
229 .addImm(SrcY)
230 .addImm(SrcZ)
231 .addImm(SrcW)
232 .addImm(0)
233 .addImm(0)
234 .addImm(0)
235 .addImm(0)
236 .addImm(1)
237 .addImm(2)
238 .addImm(3)
239 .addOperand(RID)
240 .addOperand(SID)
241 .addImm(CTX)
242 .addImm(CTY)
243 .addImm(CTZ)
244 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000245 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
246 .addOperand(MI->getOperand(0))
247 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000248 .addImm(SrcX)
249 .addImm(SrcY)
250 .addImm(SrcZ)
251 .addImm(SrcW)
252 .addImm(0)
253 .addImm(0)
254 .addImm(0)
255 .addImm(0)
256 .addImm(1)
257 .addImm(2)
258 .addImm(3)
259 .addOperand(RID)
260 .addOperand(SID)
261 .addImm(CTX)
262 .addImm(CTY)
263 .addImm(CTZ)
264 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000265 .addReg(T0, RegState::Implicit)
266 .addReg(T1, RegState::Implicit);
267 break;
268 }
269
270 case AMDGPU::TXD_SHADOW: {
271 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
272 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000273 MachineOperand &RID = MI->getOperand(4);
274 MachineOperand &SID = MI->getOperand(5);
275 unsigned TextureId = MI->getOperand(6).getImm();
276 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
277 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
278
279 switch (TextureId) {
280 case 5: // Rect
281 CTX = CTY = 0;
282 break;
283 case 6: // Shadow1D
284 SrcW = SrcZ;
285 break;
286 case 7: // Shadow2D
287 SrcW = SrcZ;
288 break;
289 case 8: // ShadowRect
290 CTX = CTY = 0;
291 SrcW = SrcZ;
292 break;
293 case 9: // 1DArray
294 SrcZ = SrcY;
295 CTZ = 0;
296 break;
297 case 10: // 2DArray
298 CTZ = 0;
299 break;
300 case 11: // Shadow1DArray
301 SrcZ = SrcY;
302 CTZ = 0;
303 break;
304 case 12: // Shadow2DArray
305 CTZ = 0;
306 break;
307 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000308
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
310 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000311 .addImm(SrcX)
312 .addImm(SrcY)
313 .addImm(SrcZ)
314 .addImm(SrcW)
315 .addImm(0)
316 .addImm(0)
317 .addImm(0)
318 .addImm(0)
319 .addImm(1)
320 .addImm(2)
321 .addImm(3)
322 .addOperand(RID)
323 .addOperand(SID)
324 .addImm(CTX)
325 .addImm(CTY)
326 .addImm(CTZ)
327 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
329 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000330 .addImm(SrcX)
331 .addImm(SrcY)
332 .addImm(SrcZ)
333 .addImm(SrcW)
334 .addImm(0)
335 .addImm(0)
336 .addImm(0)
337 .addImm(0)
338 .addImm(1)
339 .addImm(2)
340 .addImm(3)
341 .addOperand(RID)
342 .addOperand(SID)
343 .addImm(CTX)
344 .addImm(CTY)
345 .addImm(CTZ)
346 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000347 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
348 .addOperand(MI->getOperand(0))
349 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000350 .addImm(SrcX)
351 .addImm(SrcY)
352 .addImm(SrcZ)
353 .addImm(SrcW)
354 .addImm(0)
355 .addImm(0)
356 .addImm(0)
357 .addImm(0)
358 .addImm(1)
359 .addImm(2)
360 .addImm(3)
361 .addOperand(RID)
362 .addOperand(SID)
363 .addImm(CTX)
364 .addImm(CTY)
365 .addImm(CTZ)
366 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000367 .addReg(T0, RegState::Implicit)
368 .addReg(T1, RegState::Implicit);
369 break;
370 }
371
372 case AMDGPU::BRANCH:
373 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000374 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000375 break;
376
377 case AMDGPU::BRANCH_COND_f32: {
378 MachineInstr *NewMI =
379 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
380 AMDGPU::PREDICATE_BIT)
381 .addOperand(MI->getOperand(1))
382 .addImm(OPCODE_IS_NOT_ZERO)
383 .addImm(0); // Flags
384 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000385 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000386 .addOperand(MI->getOperand(0))
387 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
388 break;
389 }
390
391 case AMDGPU::BRANCH_COND_i32: {
392 MachineInstr *NewMI =
393 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
394 AMDGPU::PREDICATE_BIT)
395 .addOperand(MI->getOperand(1))
396 .addImm(OPCODE_IS_NOT_ZERO_INT)
397 .addImm(0); // Flags
398 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000399 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000400 .addOperand(MI->getOperand(0))
401 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
402 break;
403 }
404
Tom Stellard75aadc22012-12-11 21:25:42 +0000405 case AMDGPU::EG_ExportSwz:
406 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000407 // Instruction is left unmodified if its not the last one of its type
408 bool isLastInstructionOfItsType = true;
409 unsigned InstExportType = MI->getOperand(1).getImm();
410 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
411 EndBlock = BB->end(); NextExportInst != EndBlock;
412 NextExportInst = llvm::next(NextExportInst)) {
413 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
414 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
415 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
416 .getImm();
417 if (CurrentInstExportType == InstExportType) {
418 isLastInstructionOfItsType = false;
419 break;
420 }
421 }
422 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000423 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000424 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 return BB;
426 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
428 .addOperand(MI->getOperand(0))
429 .addOperand(MI->getOperand(1))
430 .addOperand(MI->getOperand(2))
431 .addOperand(MI->getOperand(3))
432 .addOperand(MI->getOperand(4))
433 .addOperand(MI->getOperand(5))
434 .addOperand(MI->getOperand(6))
435 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000436 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000437 break;
438 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000439 case AMDGPU::RETURN: {
440 // RETURN instructions must have the live-out registers as implicit uses,
441 // otherwise they appear dead.
442 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
443 MachineInstrBuilder MIB(*MF, MI);
444 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
445 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
446 return BB;
447 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000448 }
449
450 MI->eraseFromParent();
451 return BB;
452}
453
454//===----------------------------------------------------------------------===//
455// Custom DAG Lowering Operations
456//===----------------------------------------------------------------------===//
457
Tom Stellard75aadc22012-12-11 21:25:42 +0000458SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
459 switch (Op.getOpcode()) {
460 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000461 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
462 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000463 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000464 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000465 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 case ISD::INTRINSIC_VOID: {
467 SDValue Chain = Op.getOperand(0);
468 unsigned IntrinsicID =
469 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
470 switch (IntrinsicID) {
471 case AMDGPUIntrinsic::AMDGPU_store_output: {
472 MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000473 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000474 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
475 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000476 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000477 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000479 case AMDGPUIntrinsic::R600_store_swizzle: {
480 const SDValue Args[8] = {
481 Chain,
482 Op.getOperand(2), // Export Value
483 Op.getOperand(3), // ArrayBase
484 Op.getOperand(4), // Type
485 DAG.getConstant(0, MVT::i32), // SWZ_X
486 DAG.getConstant(1, MVT::i32), // SWZ_Y
487 DAG.getConstant(2, MVT::i32), // SWZ_Z
488 DAG.getConstant(3, MVT::i32) // SWZ_W
489 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000490 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000491 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000492 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000493
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 // default for switch(IntrinsicID)
495 default: break;
496 }
497 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
498 break;
499 }
500 case ISD::INTRINSIC_WO_CHAIN: {
501 unsigned IntrinsicID =
502 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
503 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000504 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 switch(IntrinsicID) {
506 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
507 case AMDGPUIntrinsic::R600_load_input: {
508 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
509 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000510 MachineFunction &MF = DAG.getMachineFunction();
511 MachineRegisterInfo &MRI = MF.getRegInfo();
512 MRI.addLiveIn(Reg);
513 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000514 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000516
517 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000518 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000519 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
520 MachineSDNode *interp;
521 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000522 const MachineFunction &MF = DAG.getMachineFunction();
523 const R600InstrInfo *TII =
524 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000525 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
526 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
527 return DAG.getTargetExtractSubreg(
528 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
529 DL, MVT::f32, SDValue(interp, 0));
530 }
531
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000532 MachineFunction &MF = DAG.getMachineFunction();
533 MachineRegisterInfo &MRI = MF.getRegInfo();
534 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
535 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
536 MRI.addLiveIn(RegisterI);
537 MRI.addLiveIn(RegisterJ);
538 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
539 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
540 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
541 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
542
Tom Stellard41afe6a2013-02-05 17:09:14 +0000543 if (slot % 4 < 2)
544 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
545 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000546 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000547 else
548 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
549 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000550 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000551 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000552 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000553 case AMDGPUIntrinsic::R600_tex:
554 case AMDGPUIntrinsic::R600_texc:
555 case AMDGPUIntrinsic::R600_txl:
556 case AMDGPUIntrinsic::R600_txlc:
557 case AMDGPUIntrinsic::R600_txb:
558 case AMDGPUIntrinsic::R600_txbc:
559 case AMDGPUIntrinsic::R600_txf:
560 case AMDGPUIntrinsic::R600_txq:
561 case AMDGPUIntrinsic::R600_ddx:
562 case AMDGPUIntrinsic::R600_ddy: {
563 unsigned TextureOp;
564 switch (IntrinsicID) {
565 case AMDGPUIntrinsic::R600_tex:
566 TextureOp = 0;
567 break;
568 case AMDGPUIntrinsic::R600_texc:
569 TextureOp = 1;
570 break;
571 case AMDGPUIntrinsic::R600_txl:
572 TextureOp = 2;
573 break;
574 case AMDGPUIntrinsic::R600_txlc:
575 TextureOp = 3;
576 break;
577 case AMDGPUIntrinsic::R600_txb:
578 TextureOp = 4;
579 break;
580 case AMDGPUIntrinsic::R600_txbc:
581 TextureOp = 5;
582 break;
583 case AMDGPUIntrinsic::R600_txf:
584 TextureOp = 6;
585 break;
586 case AMDGPUIntrinsic::R600_txq:
587 TextureOp = 7;
588 break;
589 case AMDGPUIntrinsic::R600_ddx:
590 TextureOp = 8;
591 break;
592 case AMDGPUIntrinsic::R600_ddy:
593 TextureOp = 9;
594 break;
595 default:
596 llvm_unreachable("Unknow Texture Operation");
597 }
598
599 SDValue TexArgs[19] = {
600 DAG.getConstant(TextureOp, MVT::i32),
601 Op.getOperand(1),
602 DAG.getConstant(0, MVT::i32),
603 DAG.getConstant(1, MVT::i32),
604 DAG.getConstant(2, MVT::i32),
605 DAG.getConstant(3, MVT::i32),
606 Op.getOperand(2),
607 Op.getOperand(3),
608 Op.getOperand(4),
609 DAG.getConstant(0, MVT::i32),
610 DAG.getConstant(1, MVT::i32),
611 DAG.getConstant(2, MVT::i32),
612 DAG.getConstant(3, MVT::i32),
613 Op.getOperand(5),
614 Op.getOperand(6),
615 Op.getOperand(7),
616 Op.getOperand(8),
617 Op.getOperand(9),
618 Op.getOperand(10)
619 };
620 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
621 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000622 case AMDGPUIntrinsic::AMDGPU_dp4: {
623 SDValue Args[8] = {
624 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
625 DAG.getConstant(0, MVT::i32)),
626 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
627 DAG.getConstant(0, MVT::i32)),
628 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
629 DAG.getConstant(1, MVT::i32)),
630 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
631 DAG.getConstant(1, MVT::i32)),
632 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
633 DAG.getConstant(2, MVT::i32)),
634 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
635 DAG.getConstant(2, MVT::i32)),
636 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
637 DAG.getConstant(3, MVT::i32)),
638 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
639 DAG.getConstant(3, MVT::i32))
640 };
641 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
642 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000643
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000644 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000645 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000646 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000647 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000648 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000649 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000650 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000651 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000652 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000653 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000654 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000655 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000656 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000657 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000658 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000660 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000661 return LowerImplicitParameter(DAG, VT, DL, 8);
662
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000663 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
665 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000666 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000667 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
668 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000669 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
671 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000672 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000673 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
674 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000675 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
677 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000678 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000679 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
680 AMDGPU::T0_Z, VT);
681 }
682 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
683 break;
684 }
685 } // end switch(Op.getOpcode())
686 return SDValue();
687}
688
689void R600TargetLowering::ReplaceNodeResults(SDNode *N,
690 SmallVectorImpl<SDValue> &Results,
691 SelectionDAG &DAG) const {
692 switch (N->getOpcode()) {
693 default: return;
694 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000695 return;
696 case ISD::LOAD: {
697 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
698 Results.push_back(SDValue(Node, 0));
699 Results.push_back(SDValue(Node, 1));
700 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
701 // function
702 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
703 return;
704 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000705 case ISD::STORE:
706 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
707 Results.push_back(SDValue(Node, 0));
708 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000709 }
710}
711
712SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
713 return DAG.getNode(
714 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000715 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000716 MVT::i1,
717 Op, DAG.getConstantFP(0.0f, MVT::f32),
718 DAG.getCondCode(ISD::SETNE)
719 );
720}
721
Tom Stellard75aadc22012-12-11 21:25:42 +0000722SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000723 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 unsigned DwordOffset) const {
725 unsigned ByteOffset = DwordOffset * 4;
726 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
727 AMDGPUAS::PARAM_I_ADDRESS);
728
729 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
730 assert(isInt<16>(ByteOffset));
731
732 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
733 DAG.getConstant(ByteOffset, MVT::i32), // PTR
734 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
735 false, false, false, 0);
736}
737
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000738SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
739
740 MachineFunction &MF = DAG.getMachineFunction();
741 const AMDGPUFrameLowering *TFL =
742 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
743
744 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
745 assert(FIN);
746
747 unsigned FrameIndex = FIN->getIndex();
748 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
749 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
750}
751
Tom Stellard75aadc22012-12-11 21:25:42 +0000752bool R600TargetLowering::isZero(SDValue Op) const {
753 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
754 return Cst->isNullValue();
755 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
756 return CstFP->isZero();
757 } else {
758 return false;
759 }
760}
761
762SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000763 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000764 EVT VT = Op.getValueType();
765
766 SDValue LHS = Op.getOperand(0);
767 SDValue RHS = Op.getOperand(1);
768 SDValue True = Op.getOperand(2);
769 SDValue False = Op.getOperand(3);
770 SDValue CC = Op.getOperand(4);
771 SDValue Temp;
772
773 // LHS and RHS are guaranteed to be the same value type
774 EVT CompareVT = LHS.getValueType();
775
776 // Check if we can lower this to a native operation.
777
Tom Stellard2add82d2013-03-08 15:37:09 +0000778 // Try to lower to a SET* instruction:
779 //
780 // SET* can match the following patterns:
781 //
782 // select_cc f32, f32, -1, 0, cc_any
783 // select_cc f32, f32, 1.0f, 0.0f, cc_any
784 // select_cc i32, i32, -1, 0, cc_any
785 //
786
787 // Move hardware True/False values to the correct operand.
788 if (isHWTrueValue(False) && isHWFalseValue(True)) {
789 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
790 std::swap(False, True);
791 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
792 }
793
794 if (isHWTrueValue(True) && isHWFalseValue(False) &&
795 (CompareVT == VT || VT == MVT::i32)) {
796 // This can be matched by a SET* instruction.
797 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
798 }
799
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000801 //
802 // CND* can match the following patterns:
803 //
804 // select_cc f32, 0.0, f32, f32, cc_any
805 // select_cc f32, 0.0, i32, i32, cc_any
806 // select_cc i32, 0, f32, f32, cc_any
807 // select_cc i32, 0, i32, i32, cc_any
808 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 if (isZero(LHS) || isZero(RHS)) {
810 SDValue Cond = (isZero(LHS) ? RHS : LHS);
811 SDValue Zero = (isZero(LHS) ? LHS : RHS);
812 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
813 if (CompareVT != VT) {
814 // Bitcast True / False to the correct types. This will end up being
815 // a nop, but it allows us to define only a single pattern in the
816 // .TD files for each CND* instruction rather than having to have
817 // one pattern for integer True/False and one for fp True/False
818 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
819 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
820 }
821 if (isZero(LHS)) {
822 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
823 }
824
825 switch (CCOpcode) {
826 case ISD::SETONE:
827 case ISD::SETUNE:
828 case ISD::SETNE:
829 case ISD::SETULE:
830 case ISD::SETULT:
831 case ISD::SETOLE:
832 case ISD::SETOLT:
833 case ISD::SETLE:
834 case ISD::SETLT:
835 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
836 Temp = True;
837 True = False;
838 False = Temp;
839 break;
840 default:
841 break;
842 }
843 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
844 Cond, Zero,
845 True, False,
846 DAG.getCondCode(CCOpcode));
847 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
848 }
849
Tom Stellard75aadc22012-12-11 21:25:42 +0000850
851 // Possible Min/Max pattern
852 SDValue MinMax = LowerMinMax(Op, DAG);
853 if (MinMax.getNode()) {
854 return MinMax;
855 }
856
857 // If we make it this for it means we have no native instructions to handle
858 // this SELECT_CC, so we must lower it.
859 SDValue HWTrue, HWFalse;
860
861 if (CompareVT == MVT::f32) {
862 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
863 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
864 } else if (CompareVT == MVT::i32) {
865 HWTrue = DAG.getConstant(-1, CompareVT);
866 HWFalse = DAG.getConstant(0, CompareVT);
867 }
868 else {
869 assert(!"Unhandled value type in LowerSELECT_CC");
870 }
871
872 // Lower this unsupported SELECT_CC into a combination of two supported
873 // SELECT_CC operations.
874 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
875
876 return DAG.getNode(ISD::SELECT_CC, DL, VT,
877 Cond, HWFalse,
878 True, False,
879 DAG.getCondCode(ISD::SETNE));
880}
881
882SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
883 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000884 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000885 Op.getValueType(),
886 Op.getOperand(0),
887 DAG.getConstant(0, MVT::i32),
888 Op.getOperand(1),
889 Op.getOperand(2),
890 DAG.getCondCode(ISD::SETNE));
891}
892
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000893/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
894/// convert these pointers to a register index. Each register holds
895/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
896/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
897/// for indirect addressing.
898SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
899 unsigned StackWidth,
900 SelectionDAG &DAG) const {
901 unsigned SRLPad;
902 switch(StackWidth) {
903 case 1:
904 SRLPad = 2;
905 break;
906 case 2:
907 SRLPad = 3;
908 break;
909 case 4:
910 SRLPad = 4;
911 break;
912 default: llvm_unreachable("Invalid stack width");
913 }
914
Andrew Trickef9de2a2013-05-25 02:42:55 +0000915 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000916 DAG.getConstant(SRLPad, MVT::i32));
917}
918
919void R600TargetLowering::getStackAddress(unsigned StackWidth,
920 unsigned ElemIdx,
921 unsigned &Channel,
922 unsigned &PtrIncr) const {
923 switch (StackWidth) {
924 default:
925 case 1:
926 Channel = 0;
927 if (ElemIdx > 0) {
928 PtrIncr = 1;
929 } else {
930 PtrIncr = 0;
931 }
932 break;
933 case 2:
934 Channel = ElemIdx % 2;
935 if (ElemIdx == 2) {
936 PtrIncr = 1;
937 } else {
938 PtrIncr = 0;
939 }
940 break;
941 case 4:
942 Channel = ElemIdx;
943 PtrIncr = 0;
944 break;
945 }
946}
947
Tom Stellard75aadc22012-12-11 21:25:42 +0000948SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000949 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000950 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
951 SDValue Chain = Op.getOperand(0);
952 SDValue Value = Op.getOperand(1);
953 SDValue Ptr = Op.getOperand(2);
954
955 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
956 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
957 // Convert pointer from byte address to dword address.
958 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
959 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
960 Ptr, DAG.getConstant(2, MVT::i32)));
961
962 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
963 assert(!"Truncated and indexed stores not supported yet");
964 } else {
965 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
966 }
967 return Chain;
968 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000969
970 EVT ValueVT = Value.getValueType();
971
972 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
973 return SDValue();
974 }
975
976 // Lowering for indirect addressing
977
978 const MachineFunction &MF = DAG.getMachineFunction();
979 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
980 getTargetMachine().getFrameLowering());
981 unsigned StackWidth = TFL->getStackWidth(MF);
982
983 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
984
985 if (ValueVT.isVector()) {
986 unsigned NumElemVT = ValueVT.getVectorNumElements();
987 EVT ElemVT = ValueVT.getVectorElementType();
988 SDValue Stores[4];
989
990 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
991 "vector width in load");
992
993 for (unsigned i = 0; i < NumElemVT; ++i) {
994 unsigned Channel, PtrIncr;
995 getStackAddress(StackWidth, i, Channel, PtrIncr);
996 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
997 DAG.getConstant(PtrIncr, MVT::i32));
998 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
999 Value, DAG.getConstant(i, MVT::i32));
1000
1001 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1002 Chain, Elem, Ptr,
1003 DAG.getTargetConstant(Channel, MVT::i32));
1004 }
1005 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1006 } else {
1007 if (ValueVT == MVT::i8) {
1008 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1009 }
1010 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001011 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001012 }
1013
1014 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001015}
1016
Tom Stellard365366f2013-01-23 02:09:06 +00001017// return (512 + (kc_bank << 12)
1018static int
1019ConstantAddressBlock(unsigned AddressSpace) {
1020 switch (AddressSpace) {
1021 case AMDGPUAS::CONSTANT_BUFFER_0:
1022 return 512;
1023 case AMDGPUAS::CONSTANT_BUFFER_1:
1024 return 512 + 4096;
1025 case AMDGPUAS::CONSTANT_BUFFER_2:
1026 return 512 + 4096 * 2;
1027 case AMDGPUAS::CONSTANT_BUFFER_3:
1028 return 512 + 4096 * 3;
1029 case AMDGPUAS::CONSTANT_BUFFER_4:
1030 return 512 + 4096 * 4;
1031 case AMDGPUAS::CONSTANT_BUFFER_5:
1032 return 512 + 4096 * 5;
1033 case AMDGPUAS::CONSTANT_BUFFER_6:
1034 return 512 + 4096 * 6;
1035 case AMDGPUAS::CONSTANT_BUFFER_7:
1036 return 512 + 4096 * 7;
1037 case AMDGPUAS::CONSTANT_BUFFER_8:
1038 return 512 + 4096 * 8;
1039 case AMDGPUAS::CONSTANT_BUFFER_9:
1040 return 512 + 4096 * 9;
1041 case AMDGPUAS::CONSTANT_BUFFER_10:
1042 return 512 + 4096 * 10;
1043 case AMDGPUAS::CONSTANT_BUFFER_11:
1044 return 512 + 4096 * 11;
1045 case AMDGPUAS::CONSTANT_BUFFER_12:
1046 return 512 + 4096 * 12;
1047 case AMDGPUAS::CONSTANT_BUFFER_13:
1048 return 512 + 4096 * 13;
1049 case AMDGPUAS::CONSTANT_BUFFER_14:
1050 return 512 + 4096 * 14;
1051 case AMDGPUAS::CONSTANT_BUFFER_15:
1052 return 512 + 4096 * 15;
1053 default:
1054 return -1;
1055 }
1056}
1057
1058SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1059{
1060 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001061 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001062 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1063 SDValue Chain = Op.getOperand(0);
1064 SDValue Ptr = Op.getOperand(1);
1065 SDValue LoweredLoad;
1066
1067 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1068 if (ConstantBlock > -1) {
1069 SDValue Result;
1070 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001071 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1072 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001073 SDValue Slots[4];
1074 for (unsigned i = 0; i < 4; i++) {
1075 // We want Const position encoded with the following formula :
1076 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1077 // const_index is Ptr computed by llvm using an alignment of 16.
1078 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1079 // then div by 4 at the ISel step
1080 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1081 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1082 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1083 }
1084 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
1085 } else {
1086 // non constant ptr cant be folded, keeps it as a v4f32 load
1087 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001088 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001089 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001090 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001091 );
1092 }
1093
1094 if (!VT.isVector()) {
1095 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1096 DAG.getConstant(0, MVT::i32));
1097 }
1098
1099 SDValue MergedValues[2] = {
1100 Result,
1101 Chain
1102 };
1103 return DAG.getMergeValues(MergedValues, 2, DL);
1104 }
1105
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001106 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1107 return SDValue();
1108 }
1109
1110 // Lowering for indirect addressing
1111 const MachineFunction &MF = DAG.getMachineFunction();
1112 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1113 getTargetMachine().getFrameLowering());
1114 unsigned StackWidth = TFL->getStackWidth(MF);
1115
1116 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1117
1118 if (VT.isVector()) {
1119 unsigned NumElemVT = VT.getVectorNumElements();
1120 EVT ElemVT = VT.getVectorElementType();
1121 SDValue Loads[4];
1122
1123 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1124 "vector width in load");
1125
1126 for (unsigned i = 0; i < NumElemVT; ++i) {
1127 unsigned Channel, PtrIncr;
1128 getStackAddress(StackWidth, i, Channel, PtrIncr);
1129 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1130 DAG.getConstant(PtrIncr, MVT::i32));
1131 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1132 Chain, Ptr,
1133 DAG.getTargetConstant(Channel, MVT::i32),
1134 Op.getOperand(2));
1135 }
1136 for (unsigned i = NumElemVT; i < 4; ++i) {
1137 Loads[i] = DAG.getUNDEF(ElemVT);
1138 }
1139 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1140 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1141 } else {
1142 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1143 Chain, Ptr,
1144 DAG.getTargetConstant(0, MVT::i32), // Channel
1145 Op.getOperand(2));
1146 }
1147
1148 SDValue Ops[2];
1149 Ops[0] = LoweredLoad;
1150 Ops[1] = Chain;
1151
1152 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001153}
Tom Stellard75aadc22012-12-11 21:25:42 +00001154
Tom Stellard75aadc22012-12-11 21:25:42 +00001155/// XXX Only kernel functions are supported, so we can assume for now that
1156/// every function is a kernel function, but in the future we should use
1157/// separate calling conventions for kernel and non-kernel functions.
1158SDValue R600TargetLowering::LowerFormalArguments(
1159 SDValue Chain,
1160 CallingConv::ID CallConv,
1161 bool isVarArg,
1162 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001163 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001164 SmallVectorImpl<SDValue> &InVals) const {
1165 unsigned ParamOffsetBytes = 36;
1166 Function::const_arg_iterator FuncArg =
1167 DAG.getMachineFunction().getFunction()->arg_begin();
1168 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1169 EVT VT = Ins[i].VT;
1170 Type *ArgType = FuncArg->getType();
1171 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1172 32 : ArgType->getPrimitiveSizeInBits();
1173 unsigned ArgBytes = ArgSizeInBits >> 3;
1174 EVT ArgVT;
1175 if (ArgSizeInBits < VT.getSizeInBits()) {
1176 assert(!ArgType->isFloatTy() &&
1177 "Extending floating point arguments not supported yet");
1178 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1179 } else {
1180 ArgVT = VT;
1181 }
1182 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1183 AMDGPUAS::PARAM_I_ADDRESS);
1184 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
1185 DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard8d469ed2013-02-19 15:22:44 +00001186 MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard75aadc22012-12-11 21:25:42 +00001187 ArgVT, false, false, ArgBytes);
1188 InVals.push_back(Arg);
1189 ParamOffsetBytes += ArgBytes;
1190 }
1191 return Chain;
1192}
1193
Matt Arsenault758659232013-05-18 00:21:46 +00001194EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001195 if (!VT.isVector()) return MVT::i32;
1196 return VT.changeVectorElementTypeToInteger();
1197}
1198
Benjamin Kramer193960c2013-06-11 13:32:25 +00001199static SDValue
1200CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1201 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001202 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1203 assert(RemapSwizzle.empty());
1204 SDValue NewBldVec[4] = {
1205 VectorEntry.getOperand(0),
1206 VectorEntry.getOperand(1),
1207 VectorEntry.getOperand(2),
1208 VectorEntry.getOperand(3)
1209 };
1210
1211 for (unsigned i = 0; i < 4; i++) {
1212 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1213 if (C->isZero()) {
1214 RemapSwizzle[i] = 4; // SEL_0
1215 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1216 } else if (C->isExactlyValue(1.0)) {
1217 RemapSwizzle[i] = 5; // SEL_1
1218 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1219 }
1220 }
1221
1222 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1223 continue;
1224 for (unsigned j = 0; j < i; j++) {
1225 if (NewBldVec[i] == NewBldVec[j]) {
1226 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1227 RemapSwizzle[i] = j;
1228 break;
1229 }
1230 }
1231 }
1232
1233 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1234 VectorEntry.getValueType(), NewBldVec, 4);
1235}
1236
Benjamin Kramer193960c2013-06-11 13:32:25 +00001237static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1238 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001239 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1240 assert(RemapSwizzle.empty());
1241 SDValue NewBldVec[4] = {
1242 VectorEntry.getOperand(0),
1243 VectorEntry.getOperand(1),
1244 VectorEntry.getOperand(2),
1245 VectorEntry.getOperand(3)
1246 };
1247 bool isUnmovable[4] = { false, false, false, false };
1248
1249 for (unsigned i = 0; i < 4; i++) {
1250 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1251 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1252 ->getZExtValue();
1253 if (!isUnmovable[Idx]) {
1254 // Swap i and Idx
1255 std::swap(NewBldVec[Idx], NewBldVec[i]);
1256 RemapSwizzle[Idx] = i;
1257 RemapSwizzle[i] = Idx;
1258 }
1259 isUnmovable[Idx] = true;
1260 }
1261 }
1262
1263 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1264 VectorEntry.getValueType(), NewBldVec, 4);
1265}
1266
1267
1268SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1269SDValue Swz[4], SelectionDAG &DAG) const {
1270 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1271 // Old -> New swizzle values
1272 DenseMap<unsigned, unsigned> SwizzleRemap;
1273
1274 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1275 for (unsigned i = 0; i < 4; i++) {
1276 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1277 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1278 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1279 }
1280
1281 SwizzleRemap.clear();
1282 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1283 for (unsigned i = 0; i < 4; i++) {
1284 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1285 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1286 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1287 }
1288
1289 return BuildVector;
1290}
1291
1292
Tom Stellard75aadc22012-12-11 21:25:42 +00001293//===----------------------------------------------------------------------===//
1294// Custom DAG Optimizations
1295//===----------------------------------------------------------------------===//
1296
1297SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1298 DAGCombinerInfo &DCI) const {
1299 SelectionDAG &DAG = DCI.DAG;
1300
1301 switch (N->getOpcode()) {
1302 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1303 case ISD::FP_ROUND: {
1304 SDValue Arg = N->getOperand(0);
1305 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001306 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001307 Arg.getOperand(0));
1308 }
1309 break;
1310 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001311
1312 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1313 // (i32 select_cc f32, f32, -1, 0 cc)
1314 //
1315 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1316 // this to one of the SET*_DX10 instructions.
1317 case ISD::FP_TO_SINT: {
1318 SDValue FNeg = N->getOperand(0);
1319 if (FNeg.getOpcode() != ISD::FNEG) {
1320 return SDValue();
1321 }
1322 SDValue SelectCC = FNeg.getOperand(0);
1323 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1324 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1325 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1326 !isHWTrueValue(SelectCC.getOperand(2)) ||
1327 !isHWFalseValue(SelectCC.getOperand(3))) {
1328 return SDValue();
1329 }
1330
Andrew Trickef9de2a2013-05-25 02:42:55 +00001331 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001332 SelectCC.getOperand(0), // LHS
1333 SelectCC.getOperand(1), // RHS
1334 DAG.getConstant(-1, MVT::i32), // True
1335 DAG.getConstant(0, MVT::i32), // Flase
1336 SelectCC.getOperand(4)); // CC
1337
1338 break;
1339 }
Tom Stellard365366f2013-01-23 02:09:06 +00001340 // Extract_vec (Build_vector) generated by custom lowering
1341 // also needs to be customly combined
1342 case ISD::EXTRACT_VECTOR_ELT: {
1343 SDValue Arg = N->getOperand(0);
1344 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1345 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1346 unsigned Element = Const->getZExtValue();
1347 return Arg->getOperand(Element);
1348 }
1349 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001350 if (Arg.getOpcode() == ISD::BITCAST &&
1351 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1352 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1353 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001354 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001355 Arg->getOperand(0).getOperand(Element));
1356 }
1357 }
Tom Stellard365366f2013-01-23 02:09:06 +00001358 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001359
1360 case ISD::SELECT_CC: {
1361 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1362 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001363 //
1364 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1365 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001366 SDValue LHS = N->getOperand(0);
1367 if (LHS.getOpcode() != ISD::SELECT_CC) {
1368 return SDValue();
1369 }
1370
1371 SDValue RHS = N->getOperand(1);
1372 SDValue True = N->getOperand(2);
1373 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001374 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001375
1376 if (LHS.getOperand(2).getNode() != True.getNode() ||
1377 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001378 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001379 return SDValue();
1380 }
1381
Tom Stellard5e524892013-03-08 15:37:11 +00001382 switch (NCC) {
1383 default: return SDValue();
1384 case ISD::SETNE: return LHS;
1385 case ISD::SETEQ: {
1386 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1387 LHSCC = ISD::getSetCCInverse(LHSCC,
1388 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001389 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001390 LHS.getOperand(0),
1391 LHS.getOperand(1),
1392 LHS.getOperand(2),
1393 LHS.getOperand(3),
1394 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001395 }
Tom Stellard5e524892013-03-08 15:37:11 +00001396 }
1397 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001398 case AMDGPUISD::EXPORT: {
1399 SDValue Arg = N->getOperand(1);
1400 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1401 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001402
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001403 SDValue NewArgs[8] = {
1404 N->getOperand(0), // Chain
1405 SDValue(),
1406 N->getOperand(2), // ArrayBase
1407 N->getOperand(3), // Type
1408 N->getOperand(4), // SWZ_X
1409 N->getOperand(5), // SWZ_Y
1410 N->getOperand(6), // SWZ_Z
1411 N->getOperand(7) // SWZ_W
1412 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001413 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001414 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001415 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001416 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001417 case AMDGPUISD::TEXTURE_FETCH: {
1418 SDValue Arg = N->getOperand(1);
1419 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1420 break;
1421
1422 SDValue NewArgs[19] = {
1423 N->getOperand(0),
1424 N->getOperand(1),
1425 N->getOperand(2),
1426 N->getOperand(3),
1427 N->getOperand(4),
1428 N->getOperand(5),
1429 N->getOperand(6),
1430 N->getOperand(7),
1431 N->getOperand(8),
1432 N->getOperand(9),
1433 N->getOperand(10),
1434 N->getOperand(11),
1435 N->getOperand(12),
1436 N->getOperand(13),
1437 N->getOperand(14),
1438 N->getOperand(15),
1439 N->getOperand(16),
1440 N->getOperand(17),
1441 N->getOperand(18),
1442 };
1443 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1444 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1445 NewArgs, 19);
1446 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001447 }
1448 return SDValue();
1449}