blob: e846ff4aee3f8c725d82f3e2bb7fbdf909208de0 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000041 setOperationAction(ISD::FCOS, MVT::f32, Custom);
42 setOperationAction(ISD::FSIN, MVT::f32, Custom);
43
Tom Stellard75aadc22012-12-11 21:25:42 +000044 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000045 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000046
Tom Stellard492ebea2013-03-08 15:37:07 +000047 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
48 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
50 setOperationAction(ISD::FSUB, MVT::f32, Expand);
51
52 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
53 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
57 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
58
Tom Stellarde8f9f282013-03-08 15:37:05 +000059 setOperationAction(ISD::SETCC, MVT::i32, Expand);
60 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
62
63 setOperationAction(ISD::SELECT, MVT::i32, Custom);
64 setOperationAction(ISD::SELECT, MVT::f32, Custom);
65
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000066 // Legalize loads and stores to the private address space.
67 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000068 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000069 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000070 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
71 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
72 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000074 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000076 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000077 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000078 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
79 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000080
Tom Stellard365366f2013-01-23 02:09:06 +000081 setOperationAction(ISD::LOAD, MVT::i32, Custom);
82 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000083 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
84
Tom Stellard75aadc22012-12-11 21:25:42 +000085 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000086 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000087 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000088 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000089 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000090
Michel Danzer49812b52013-07-10 16:37:07 +000091 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
92
Tom Stellardb852af52013-03-08 15:37:03 +000093 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000094 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +000095 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +000096}
97
98MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
99 MachineInstr * MI, MachineBasicBlock * BB) const {
100 MachineFunction * MF = BB->getParent();
101 MachineRegisterInfo &MRI = MF->getRegInfo();
102 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000103 const R600InstrInfo *TII =
104 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000105
106 switch (MI->getOpcode()) {
107 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard75aadc22012-12-11 21:25:42 +0000108 case AMDGPU::CLAMP_R600: {
109 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
110 AMDGPU::MOV,
111 MI->getOperand(0).getReg(),
112 MI->getOperand(1).getReg());
113 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
114 break;
115 }
116
117 case AMDGPU::FABS_R600: {
118 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
119 AMDGPU::MOV,
120 MI->getOperand(0).getReg(),
121 MI->getOperand(1).getReg());
122 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
123 break;
124 }
125
126 case AMDGPU::FNEG_R600: {
127 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
128 AMDGPU::MOV,
129 MI->getOperand(0).getReg(),
130 MI->getOperand(1).getReg());
131 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
132 break;
133 }
134
Tom Stellard75aadc22012-12-11 21:25:42 +0000135 case AMDGPU::MASK_WRITE: {
136 unsigned maskedRegister = MI->getOperand(0).getReg();
137 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
138 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
139 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
140 break;
141 }
142
Tom Stellardc026e8b2013-06-28 15:47:08 +0000143 case AMDGPU::LDS_READ_RET: {
144 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
145 TII->get(MI->getOpcode()),
146 AMDGPU::OQAP);
147 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
148 NewMI.addOperand(MI->getOperand(i));
149 }
150 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
151 MI->getOperand(0).getReg(),
152 AMDGPU::OQAP);
153 break;
154 }
155
Tom Stellard75aadc22012-12-11 21:25:42 +0000156 case AMDGPU::MOV_IMM_F32:
157 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
158 MI->getOperand(1).getFPImm()->getValueAPF()
159 .bitcastToAPInt().getZExtValue());
160 break;
161 case AMDGPU::MOV_IMM_I32:
162 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
163 MI->getOperand(1).getImm());
164 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000165 case AMDGPU::CONST_COPY: {
166 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
167 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000168 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000169 MI->getOperand(1).getImm());
170 break;
171 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000172
173 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000174 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000175 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
176 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
177
178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
179 .addOperand(MI->getOperand(0))
180 .addOperand(MI->getOperand(1))
181 .addImm(EOP); // Set End of program bit
182 break;
183 }
184
Tom Stellard75aadc22012-12-11 21:25:42 +0000185 case AMDGPU::TXD: {
186 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
187 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000188 MachineOperand &RID = MI->getOperand(4);
189 MachineOperand &SID = MI->getOperand(5);
190 unsigned TextureId = MI->getOperand(6).getImm();
191 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
192 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000193
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000194 switch (TextureId) {
195 case 5: // Rect
196 CTX = CTY = 0;
197 break;
198 case 6: // Shadow1D
199 SrcW = SrcZ;
200 break;
201 case 7: // Shadow2D
202 SrcW = SrcZ;
203 break;
204 case 8: // ShadowRect
205 CTX = CTY = 0;
206 SrcW = SrcZ;
207 break;
208 case 9: // 1DArray
209 SrcZ = SrcY;
210 CTZ = 0;
211 break;
212 case 10: // 2DArray
213 CTZ = 0;
214 break;
215 case 11: // Shadow1DArray
216 SrcZ = SrcY;
217 CTZ = 0;
218 break;
219 case 12: // Shadow2DArray
220 CTZ = 0;
221 break;
222 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
224 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000225 .addImm(SrcX)
226 .addImm(SrcY)
227 .addImm(SrcZ)
228 .addImm(SrcW)
229 .addImm(0)
230 .addImm(0)
231 .addImm(0)
232 .addImm(0)
233 .addImm(1)
234 .addImm(2)
235 .addImm(3)
236 .addOperand(RID)
237 .addOperand(SID)
238 .addImm(CTX)
239 .addImm(CTY)
240 .addImm(CTZ)
241 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000242 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
243 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000244 .addImm(SrcX)
245 .addImm(SrcY)
246 .addImm(SrcZ)
247 .addImm(SrcW)
248 .addImm(0)
249 .addImm(0)
250 .addImm(0)
251 .addImm(0)
252 .addImm(1)
253 .addImm(2)
254 .addImm(3)
255 .addOperand(RID)
256 .addOperand(SID)
257 .addImm(CTX)
258 .addImm(CTY)
259 .addImm(CTZ)
260 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000261 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
262 .addOperand(MI->getOperand(0))
263 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000264 .addImm(SrcX)
265 .addImm(SrcY)
266 .addImm(SrcZ)
267 .addImm(SrcW)
268 .addImm(0)
269 .addImm(0)
270 .addImm(0)
271 .addImm(0)
272 .addImm(1)
273 .addImm(2)
274 .addImm(3)
275 .addOperand(RID)
276 .addOperand(SID)
277 .addImm(CTX)
278 .addImm(CTY)
279 .addImm(CTZ)
280 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000281 .addReg(T0, RegState::Implicit)
282 .addReg(T1, RegState::Implicit);
283 break;
284 }
285
286 case AMDGPU::TXD_SHADOW: {
287 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
288 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000289 MachineOperand &RID = MI->getOperand(4);
290 MachineOperand &SID = MI->getOperand(5);
291 unsigned TextureId = MI->getOperand(6).getImm();
292 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
293 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
294
295 switch (TextureId) {
296 case 5: // Rect
297 CTX = CTY = 0;
298 break;
299 case 6: // Shadow1D
300 SrcW = SrcZ;
301 break;
302 case 7: // Shadow2D
303 SrcW = SrcZ;
304 break;
305 case 8: // ShadowRect
306 CTX = CTY = 0;
307 SrcW = SrcZ;
308 break;
309 case 9: // 1DArray
310 SrcZ = SrcY;
311 CTZ = 0;
312 break;
313 case 10: // 2DArray
314 CTZ = 0;
315 break;
316 case 11: // Shadow1DArray
317 SrcZ = SrcY;
318 CTZ = 0;
319 break;
320 case 12: // Shadow2DArray
321 CTZ = 0;
322 break;
323 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000324
325 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
326 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000327 .addImm(SrcX)
328 .addImm(SrcY)
329 .addImm(SrcZ)
330 .addImm(SrcW)
331 .addImm(0)
332 .addImm(0)
333 .addImm(0)
334 .addImm(0)
335 .addImm(1)
336 .addImm(2)
337 .addImm(3)
338 .addOperand(RID)
339 .addOperand(SID)
340 .addImm(CTX)
341 .addImm(CTY)
342 .addImm(CTZ)
343 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000344 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
345 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000346 .addImm(SrcX)
347 .addImm(SrcY)
348 .addImm(SrcZ)
349 .addImm(SrcW)
350 .addImm(0)
351 .addImm(0)
352 .addImm(0)
353 .addImm(0)
354 .addImm(1)
355 .addImm(2)
356 .addImm(3)
357 .addOperand(RID)
358 .addOperand(SID)
359 .addImm(CTX)
360 .addImm(CTY)
361 .addImm(CTZ)
362 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000363 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
364 .addOperand(MI->getOperand(0))
365 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000366 .addImm(SrcX)
367 .addImm(SrcY)
368 .addImm(SrcZ)
369 .addImm(SrcW)
370 .addImm(0)
371 .addImm(0)
372 .addImm(0)
373 .addImm(0)
374 .addImm(1)
375 .addImm(2)
376 .addImm(3)
377 .addOperand(RID)
378 .addOperand(SID)
379 .addImm(CTX)
380 .addImm(CTY)
381 .addImm(CTZ)
382 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000383 .addReg(T0, RegState::Implicit)
384 .addReg(T1, RegState::Implicit);
385 break;
386 }
387
388 case AMDGPU::BRANCH:
389 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000390 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000391 break;
392
393 case AMDGPU::BRANCH_COND_f32: {
394 MachineInstr *NewMI =
395 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
396 AMDGPU::PREDICATE_BIT)
397 .addOperand(MI->getOperand(1))
398 .addImm(OPCODE_IS_NOT_ZERO)
399 .addImm(0); // Flags
400 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000401 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000402 .addOperand(MI->getOperand(0))
403 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
404 break;
405 }
406
407 case AMDGPU::BRANCH_COND_i32: {
408 MachineInstr *NewMI =
409 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
410 AMDGPU::PREDICATE_BIT)
411 .addOperand(MI->getOperand(1))
412 .addImm(OPCODE_IS_NOT_ZERO_INT)
413 .addImm(0); // Flags
414 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000415 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000416 .addOperand(MI->getOperand(0))
417 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
418 break;
419 }
420
Tom Stellard75aadc22012-12-11 21:25:42 +0000421 case AMDGPU::EG_ExportSwz:
422 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000423 // Instruction is left unmodified if its not the last one of its type
424 bool isLastInstructionOfItsType = true;
425 unsigned InstExportType = MI->getOperand(1).getImm();
426 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
427 EndBlock = BB->end(); NextExportInst != EndBlock;
428 NextExportInst = llvm::next(NextExportInst)) {
429 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
430 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
431 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
432 .getImm();
433 if (CurrentInstExportType == InstExportType) {
434 isLastInstructionOfItsType = false;
435 break;
436 }
437 }
438 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000439 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000440 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 return BB;
442 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
444 .addOperand(MI->getOperand(0))
445 .addOperand(MI->getOperand(1))
446 .addOperand(MI->getOperand(2))
447 .addOperand(MI->getOperand(3))
448 .addOperand(MI->getOperand(4))
449 .addOperand(MI->getOperand(5))
450 .addOperand(MI->getOperand(6))
451 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000452 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000453 break;
454 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000455 case AMDGPU::RETURN: {
456 // RETURN instructions must have the live-out registers as implicit uses,
457 // otherwise they appear dead.
458 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
459 MachineInstrBuilder MIB(*MF, MI);
460 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
461 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
462 return BB;
463 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000464 }
465
466 MI->eraseFromParent();
467 return BB;
468}
469
470//===----------------------------------------------------------------------===//
471// Custom DAG Lowering Operations
472//===----------------------------------------------------------------------===//
473
Tom Stellard75aadc22012-12-11 21:25:42 +0000474SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000475 MachineFunction &MF = DAG.getMachineFunction();
476 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000477 switch (Op.getOpcode()) {
478 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000479 case ISD::FCOS:
480 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000481 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
482 case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000483 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000484 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000485 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000486 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 case ISD::INTRINSIC_VOID: {
488 SDValue Chain = Op.getOperand(0);
489 unsigned IntrinsicID =
490 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
491 switch (IntrinsicID) {
492 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000493 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
494 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000495 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000496 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000497 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000498 case AMDGPUIntrinsic::R600_store_swizzle: {
499 const SDValue Args[8] = {
500 Chain,
501 Op.getOperand(2), // Export Value
502 Op.getOperand(3), // ArrayBase
503 Op.getOperand(4), // Type
504 DAG.getConstant(0, MVT::i32), // SWZ_X
505 DAG.getConstant(1, MVT::i32), // SWZ_Y
506 DAG.getConstant(2, MVT::i32), // SWZ_Z
507 DAG.getConstant(3, MVT::i32) // SWZ_W
508 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000509 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000510 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000512
Tom Stellard75aadc22012-12-11 21:25:42 +0000513 // default for switch(IntrinsicID)
514 default: break;
515 }
516 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
517 break;
518 }
519 case ISD::INTRINSIC_WO_CHAIN: {
520 unsigned IntrinsicID =
521 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
522 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000523 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 switch(IntrinsicID) {
525 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
526 case AMDGPUIntrinsic::R600_load_input: {
527 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
528 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000529 MachineFunction &MF = DAG.getMachineFunction();
530 MachineRegisterInfo &MRI = MF.getRegInfo();
531 MRI.addLiveIn(Reg);
532 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000533 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000535
536 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000538 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
539 MachineSDNode *interp;
540 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000541 const MachineFunction &MF = DAG.getMachineFunction();
542 const R600InstrInfo *TII =
543 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000544 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
545 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
546 return DAG.getTargetExtractSubreg(
547 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
548 DL, MVT::f32, SDValue(interp, 0));
549 }
550
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000551 MachineFunction &MF = DAG.getMachineFunction();
552 MachineRegisterInfo &MRI = MF.getRegInfo();
553 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
554 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
555 MRI.addLiveIn(RegisterI);
556 MRI.addLiveIn(RegisterJ);
557 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
558 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
559 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
560 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
561
Tom Stellard41afe6a2013-02-05 17:09:14 +0000562 if (slot % 4 < 2)
563 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
564 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000565 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000566 else
567 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
568 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000569 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000570 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000571 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000572 case AMDGPUIntrinsic::R600_tex:
573 case AMDGPUIntrinsic::R600_texc:
574 case AMDGPUIntrinsic::R600_txl:
575 case AMDGPUIntrinsic::R600_txlc:
576 case AMDGPUIntrinsic::R600_txb:
577 case AMDGPUIntrinsic::R600_txbc:
578 case AMDGPUIntrinsic::R600_txf:
579 case AMDGPUIntrinsic::R600_txq:
580 case AMDGPUIntrinsic::R600_ddx:
581 case AMDGPUIntrinsic::R600_ddy: {
582 unsigned TextureOp;
583 switch (IntrinsicID) {
584 case AMDGPUIntrinsic::R600_tex:
585 TextureOp = 0;
586 break;
587 case AMDGPUIntrinsic::R600_texc:
588 TextureOp = 1;
589 break;
590 case AMDGPUIntrinsic::R600_txl:
591 TextureOp = 2;
592 break;
593 case AMDGPUIntrinsic::R600_txlc:
594 TextureOp = 3;
595 break;
596 case AMDGPUIntrinsic::R600_txb:
597 TextureOp = 4;
598 break;
599 case AMDGPUIntrinsic::R600_txbc:
600 TextureOp = 5;
601 break;
602 case AMDGPUIntrinsic::R600_txf:
603 TextureOp = 6;
604 break;
605 case AMDGPUIntrinsic::R600_txq:
606 TextureOp = 7;
607 break;
608 case AMDGPUIntrinsic::R600_ddx:
609 TextureOp = 8;
610 break;
611 case AMDGPUIntrinsic::R600_ddy:
612 TextureOp = 9;
613 break;
614 default:
615 llvm_unreachable("Unknow Texture Operation");
616 }
617
618 SDValue TexArgs[19] = {
619 DAG.getConstant(TextureOp, MVT::i32),
620 Op.getOperand(1),
621 DAG.getConstant(0, MVT::i32),
622 DAG.getConstant(1, MVT::i32),
623 DAG.getConstant(2, MVT::i32),
624 DAG.getConstant(3, MVT::i32),
625 Op.getOperand(2),
626 Op.getOperand(3),
627 Op.getOperand(4),
628 DAG.getConstant(0, MVT::i32),
629 DAG.getConstant(1, MVT::i32),
630 DAG.getConstant(2, MVT::i32),
631 DAG.getConstant(3, MVT::i32),
632 Op.getOperand(5),
633 Op.getOperand(6),
634 Op.getOperand(7),
635 Op.getOperand(8),
636 Op.getOperand(9),
637 Op.getOperand(10)
638 };
639 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
640 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000641 case AMDGPUIntrinsic::AMDGPU_dp4: {
642 SDValue Args[8] = {
643 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
644 DAG.getConstant(0, MVT::i32)),
645 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
646 DAG.getConstant(0, MVT::i32)),
647 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
648 DAG.getConstant(1, MVT::i32)),
649 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
650 DAG.getConstant(1, MVT::i32)),
651 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
652 DAG.getConstant(2, MVT::i32)),
653 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
654 DAG.getConstant(2, MVT::i32)),
655 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
656 DAG.getConstant(3, MVT::i32)),
657 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
658 DAG.getConstant(3, MVT::i32))
659 };
660 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
661 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000662
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000663 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000664 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000665 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000666 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000667 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000668 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000669 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000671 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000672 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000673 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000674 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000675 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000679 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000680 return LowerImplicitParameter(DAG, VT, DL, 8);
681
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000682 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000683 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
684 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000685 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000686 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
687 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000688 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000689 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
690 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
693 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000694 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
696 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
699 AMDGPU::T0_Z, VT);
700 }
701 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
702 break;
703 }
704 } // end switch(Op.getOpcode())
705 return SDValue();
706}
707
708void R600TargetLowering::ReplaceNodeResults(SDNode *N,
709 SmallVectorImpl<SDValue> &Results,
710 SelectionDAG &DAG) const {
711 switch (N->getOpcode()) {
712 default: return;
713 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000714 return;
715 case ISD::LOAD: {
716 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
717 Results.push_back(SDValue(Node, 0));
718 Results.push_back(SDValue(Node, 1));
719 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
720 // function
721 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
722 return;
723 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000724 case ISD::STORE:
725 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
726 Results.push_back(SDValue(Node, 0));
727 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000728 }
729}
730
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000731SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
732 // On hw >= R700, COS/SIN input must be between -1. and 1.
733 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
734 EVT VT = Op.getValueType();
735 SDValue Arg = Op.getOperand(0);
736 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
737 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
738 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
739 DAG.getConstantFP(0.15915494309, MVT::f32)),
740 DAG.getConstantFP(0.5, MVT::f32)));
741 unsigned TrigNode;
742 switch (Op.getOpcode()) {
743 case ISD::FCOS:
744 TrigNode = AMDGPUISD::COS_HW;
745 break;
746 case ISD::FSIN:
747 TrigNode = AMDGPUISD::SIN_HW;
748 break;
749 default:
750 llvm_unreachable("Wrong trig opcode");
751 }
752 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
753 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
754 DAG.getConstantFP(-0.5, MVT::f32)));
755 if (Gen >= AMDGPUSubtarget::R700)
756 return TrigVal;
757 // On R600 hw, COS/SIN input must be between -Pi and Pi.
758 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
759 DAG.getConstantFP(3.14159265359, MVT::f32));
760}
761
Tom Stellard75aadc22012-12-11 21:25:42 +0000762SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
763 return DAG.getNode(
764 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000765 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 MVT::i1,
767 Op, DAG.getConstantFP(0.0f, MVT::f32),
768 DAG.getCondCode(ISD::SETNE)
769 );
770}
771
Tom Stellard75aadc22012-12-11 21:25:42 +0000772SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000773 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 unsigned DwordOffset) const {
775 unsigned ByteOffset = DwordOffset * 4;
776 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000777 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000778
779 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
780 assert(isInt<16>(ByteOffset));
781
782 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
783 DAG.getConstant(ByteOffset, MVT::i32), // PTR
784 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
785 false, false, false, 0);
786}
787
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000788SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
789
790 MachineFunction &MF = DAG.getMachineFunction();
791 const AMDGPUFrameLowering *TFL =
792 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
793
794 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
795 assert(FIN);
796
797 unsigned FrameIndex = FIN->getIndex();
798 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
799 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
800}
801
Tom Stellard75aadc22012-12-11 21:25:42 +0000802bool R600TargetLowering::isZero(SDValue Op) const {
803 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
804 return Cst->isNullValue();
805 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
806 return CstFP->isZero();
807 } else {
808 return false;
809 }
810}
811
812SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000813 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000814 EVT VT = Op.getValueType();
815
816 SDValue LHS = Op.getOperand(0);
817 SDValue RHS = Op.getOperand(1);
818 SDValue True = Op.getOperand(2);
819 SDValue False = Op.getOperand(3);
820 SDValue CC = Op.getOperand(4);
821 SDValue Temp;
822
823 // LHS and RHS are guaranteed to be the same value type
824 EVT CompareVT = LHS.getValueType();
825
826 // Check if we can lower this to a native operation.
827
Tom Stellard2add82d2013-03-08 15:37:09 +0000828 // Try to lower to a SET* instruction:
829 //
830 // SET* can match the following patterns:
831 //
832 // select_cc f32, f32, -1, 0, cc_any
833 // select_cc f32, f32, 1.0f, 0.0f, cc_any
834 // select_cc i32, i32, -1, 0, cc_any
835 //
836
837 // Move hardware True/False values to the correct operand.
838 if (isHWTrueValue(False) && isHWFalseValue(True)) {
839 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
840 std::swap(False, True);
841 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
842 }
843
844 if (isHWTrueValue(True) && isHWFalseValue(False) &&
845 (CompareVT == VT || VT == MVT::i32)) {
846 // This can be matched by a SET* instruction.
847 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
848 }
849
Tom Stellard75aadc22012-12-11 21:25:42 +0000850 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000851 //
852 // CND* can match the following patterns:
853 //
854 // select_cc f32, 0.0, f32, f32, cc_any
855 // select_cc f32, 0.0, i32, i32, cc_any
856 // select_cc i32, 0, f32, f32, cc_any
857 // select_cc i32, 0, i32, i32, cc_any
858 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000859 if (isZero(LHS) || isZero(RHS)) {
860 SDValue Cond = (isZero(LHS) ? RHS : LHS);
861 SDValue Zero = (isZero(LHS) ? LHS : RHS);
862 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
863 if (CompareVT != VT) {
864 // Bitcast True / False to the correct types. This will end up being
865 // a nop, but it allows us to define only a single pattern in the
866 // .TD files for each CND* instruction rather than having to have
867 // one pattern for integer True/False and one for fp True/False
868 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
869 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
870 }
871 if (isZero(LHS)) {
872 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
873 }
874
875 switch (CCOpcode) {
876 case ISD::SETONE:
877 case ISD::SETUNE:
878 case ISD::SETNE:
879 case ISD::SETULE:
880 case ISD::SETULT:
881 case ISD::SETOLE:
882 case ISD::SETOLT:
883 case ISD::SETLE:
884 case ISD::SETLT:
885 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
886 Temp = True;
887 True = False;
888 False = Temp;
889 break;
890 default:
891 break;
892 }
893 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
894 Cond, Zero,
895 True, False,
896 DAG.getCondCode(CCOpcode));
897 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
898 }
899
Tom Stellard75aadc22012-12-11 21:25:42 +0000900
901 // Possible Min/Max pattern
902 SDValue MinMax = LowerMinMax(Op, DAG);
903 if (MinMax.getNode()) {
904 return MinMax;
905 }
906
907 // If we make it this for it means we have no native instructions to handle
908 // this SELECT_CC, so we must lower it.
909 SDValue HWTrue, HWFalse;
910
911 if (CompareVT == MVT::f32) {
912 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
913 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
914 } else if (CompareVT == MVT::i32) {
915 HWTrue = DAG.getConstant(-1, CompareVT);
916 HWFalse = DAG.getConstant(0, CompareVT);
917 }
918 else {
919 assert(!"Unhandled value type in LowerSELECT_CC");
920 }
921
922 // Lower this unsupported SELECT_CC into a combination of two supported
923 // SELECT_CC operations.
924 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
925
926 return DAG.getNode(ISD::SELECT_CC, DL, VT,
927 Cond, HWFalse,
928 True, False,
929 DAG.getCondCode(ISD::SETNE));
930}
931
932SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
933 return DAG.getNode(ISD::SELECT_CC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000934 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000935 Op.getValueType(),
936 Op.getOperand(0),
937 DAG.getConstant(0, MVT::i32),
938 Op.getOperand(1),
939 Op.getOperand(2),
940 DAG.getCondCode(ISD::SETNE));
941}
942
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000943/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
944/// convert these pointers to a register index. Each register holds
945/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
946/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
947/// for indirect addressing.
948SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
949 unsigned StackWidth,
950 SelectionDAG &DAG) const {
951 unsigned SRLPad;
952 switch(StackWidth) {
953 case 1:
954 SRLPad = 2;
955 break;
956 case 2:
957 SRLPad = 3;
958 break;
959 case 4:
960 SRLPad = 4;
961 break;
962 default: llvm_unreachable("Invalid stack width");
963 }
964
Andrew Trickef9de2a2013-05-25 02:42:55 +0000965 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000966 DAG.getConstant(SRLPad, MVT::i32));
967}
968
969void R600TargetLowering::getStackAddress(unsigned StackWidth,
970 unsigned ElemIdx,
971 unsigned &Channel,
972 unsigned &PtrIncr) const {
973 switch (StackWidth) {
974 default:
975 case 1:
976 Channel = 0;
977 if (ElemIdx > 0) {
978 PtrIncr = 1;
979 } else {
980 PtrIncr = 0;
981 }
982 break;
983 case 2:
984 Channel = ElemIdx % 2;
985 if (ElemIdx == 2) {
986 PtrIncr = 1;
987 } else {
988 PtrIncr = 0;
989 }
990 break;
991 case 4:
992 Channel = ElemIdx;
993 PtrIncr = 0;
994 break;
995 }
996}
997
Tom Stellard75aadc22012-12-11 21:25:42 +0000998SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000999 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001000 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1001 SDValue Chain = Op.getOperand(0);
1002 SDValue Value = Op.getOperand(1);
1003 SDValue Ptr = Op.getOperand(2);
1004
Tom Stellard2ffc3302013-08-26 15:05:44 +00001005 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001006 if (Result.getNode()) {
1007 return Result;
1008 }
1009
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001010 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1011 if (StoreNode->isTruncatingStore()) {
1012 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001013 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001014 EVT MemVT = StoreNode->getMemoryVT();
1015 SDValue MaskConstant;
1016 if (MemVT == MVT::i8) {
1017 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1018 } else {
1019 assert(MemVT == MVT::i16);
1020 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1021 }
1022 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1023 DAG.getConstant(2, MVT::i32));
1024 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1025 DAG.getConstant(0x00000003, VT));
1026 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1027 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1028 DAG.getConstant(3, VT));
1029 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1030 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1031 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1032 // vector instead.
1033 SDValue Src[4] = {
1034 ShiftedValue,
1035 DAG.getConstant(0, MVT::i32),
1036 DAG.getConstant(0, MVT::i32),
1037 Mask
1038 };
1039 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1040 SDValue Args[3] = { Chain, Input, DWordAddr };
1041 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1042 Op->getVTList(), Args, 3, MemVT,
1043 StoreNode->getMemOperand());
1044 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1045 Value.getValueType().bitsGE(MVT::i32)) {
1046 // Convert pointer from byte address to dword address.
1047 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1048 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1049 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001050
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001051 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1052 assert(!"Truncated and indexed stores not supported yet");
1053 } else {
1054 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1055 }
1056 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001057 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001058 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001059
1060 EVT ValueVT = Value.getValueType();
1061
1062 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1063 return SDValue();
1064 }
1065
1066 // Lowering for indirect addressing
1067
1068 const MachineFunction &MF = DAG.getMachineFunction();
1069 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1070 getTargetMachine().getFrameLowering());
1071 unsigned StackWidth = TFL->getStackWidth(MF);
1072
1073 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1074
1075 if (ValueVT.isVector()) {
1076 unsigned NumElemVT = ValueVT.getVectorNumElements();
1077 EVT ElemVT = ValueVT.getVectorElementType();
1078 SDValue Stores[4];
1079
1080 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1081 "vector width in load");
1082
1083 for (unsigned i = 0; i < NumElemVT; ++i) {
1084 unsigned Channel, PtrIncr;
1085 getStackAddress(StackWidth, i, Channel, PtrIncr);
1086 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1087 DAG.getConstant(PtrIncr, MVT::i32));
1088 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1089 Value, DAG.getConstant(i, MVT::i32));
1090
1091 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1092 Chain, Elem, Ptr,
1093 DAG.getTargetConstant(Channel, MVT::i32));
1094 }
1095 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1096 } else {
1097 if (ValueVT == MVT::i8) {
1098 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1099 }
1100 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001101 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001102 }
1103
1104 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001105}
1106
Tom Stellard365366f2013-01-23 02:09:06 +00001107// return (512 + (kc_bank << 12)
1108static int
1109ConstantAddressBlock(unsigned AddressSpace) {
1110 switch (AddressSpace) {
1111 case AMDGPUAS::CONSTANT_BUFFER_0:
1112 return 512;
1113 case AMDGPUAS::CONSTANT_BUFFER_1:
1114 return 512 + 4096;
1115 case AMDGPUAS::CONSTANT_BUFFER_2:
1116 return 512 + 4096 * 2;
1117 case AMDGPUAS::CONSTANT_BUFFER_3:
1118 return 512 + 4096 * 3;
1119 case AMDGPUAS::CONSTANT_BUFFER_4:
1120 return 512 + 4096 * 4;
1121 case AMDGPUAS::CONSTANT_BUFFER_5:
1122 return 512 + 4096 * 5;
1123 case AMDGPUAS::CONSTANT_BUFFER_6:
1124 return 512 + 4096 * 6;
1125 case AMDGPUAS::CONSTANT_BUFFER_7:
1126 return 512 + 4096 * 7;
1127 case AMDGPUAS::CONSTANT_BUFFER_8:
1128 return 512 + 4096 * 8;
1129 case AMDGPUAS::CONSTANT_BUFFER_9:
1130 return 512 + 4096 * 9;
1131 case AMDGPUAS::CONSTANT_BUFFER_10:
1132 return 512 + 4096 * 10;
1133 case AMDGPUAS::CONSTANT_BUFFER_11:
1134 return 512 + 4096 * 11;
1135 case AMDGPUAS::CONSTANT_BUFFER_12:
1136 return 512 + 4096 * 12;
1137 case AMDGPUAS::CONSTANT_BUFFER_13:
1138 return 512 + 4096 * 13;
1139 case AMDGPUAS::CONSTANT_BUFFER_14:
1140 return 512 + 4096 * 14;
1141 case AMDGPUAS::CONSTANT_BUFFER_15:
1142 return 512 + 4096 * 15;
1143 default:
1144 return -1;
1145 }
1146}
1147
1148SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1149{
1150 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001151 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001152 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1153 SDValue Chain = Op.getOperand(0);
1154 SDValue Ptr = Op.getOperand(1);
1155 SDValue LoweredLoad;
1156
1157 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1158 if (ConstantBlock > -1) {
1159 SDValue Result;
1160 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001161 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1162 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001163 SDValue Slots[4];
1164 for (unsigned i = 0; i < 4; i++) {
1165 // We want Const position encoded with the following formula :
1166 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1167 // const_index is Ptr computed by llvm using an alignment of 16.
1168 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1169 // then div by 4 at the ISel step
1170 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1171 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1172 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1173 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001174 EVT NewVT = MVT::v4i32;
1175 unsigned NumElements = 4;
1176 if (VT.isVector()) {
1177 NewVT = VT;
1178 NumElements = VT.getVectorNumElements();
1179 }
1180 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001181 } else {
1182 // non constant ptr cant be folded, keeps it as a v4f32 load
1183 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001184 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001185 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001186 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001187 );
1188 }
1189
1190 if (!VT.isVector()) {
1191 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1192 DAG.getConstant(0, MVT::i32));
1193 }
1194
1195 SDValue MergedValues[2] = {
1196 Result,
1197 Chain
1198 };
1199 return DAG.getMergeValues(MergedValues, 2, DL);
1200 }
1201
Tom Stellard84021442013-07-23 01:48:24 +00001202 // For most operations returning SDValue() will result int he node being
1203 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1204 // we need to manually expand loads that may be legal in some address spaces
1205 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1206 // for compute shaders, since the data is sign extended when it is uploaded
1207 // to the buffer. Howerver SEXT loads from other addresspaces are not
1208 // supported, so we need to expand them here.
1209 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1210 EVT MemVT = LoadNode->getMemoryVT();
1211 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1212 SDValue ShiftAmount =
1213 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1214 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1215 LoadNode->getPointerInfo(), MemVT,
1216 LoadNode->isVolatile(),
1217 LoadNode->isNonTemporal(),
1218 LoadNode->getAlignment());
1219 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1220 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1221
1222 SDValue MergedValues[2] = { Sra, Chain };
1223 return DAG.getMergeValues(MergedValues, 2, DL);
1224 }
1225
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001226 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1227 return SDValue();
1228 }
1229
1230 // Lowering for indirect addressing
1231 const MachineFunction &MF = DAG.getMachineFunction();
1232 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1233 getTargetMachine().getFrameLowering());
1234 unsigned StackWidth = TFL->getStackWidth(MF);
1235
1236 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1237
1238 if (VT.isVector()) {
1239 unsigned NumElemVT = VT.getVectorNumElements();
1240 EVT ElemVT = VT.getVectorElementType();
1241 SDValue Loads[4];
1242
1243 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1244 "vector width in load");
1245
1246 for (unsigned i = 0; i < NumElemVT; ++i) {
1247 unsigned Channel, PtrIncr;
1248 getStackAddress(StackWidth, i, Channel, PtrIncr);
1249 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1250 DAG.getConstant(PtrIncr, MVT::i32));
1251 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1252 Chain, Ptr,
1253 DAG.getTargetConstant(Channel, MVT::i32),
1254 Op.getOperand(2));
1255 }
1256 for (unsigned i = NumElemVT; i < 4; ++i) {
1257 Loads[i] = DAG.getUNDEF(ElemVT);
1258 }
1259 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1260 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1261 } else {
1262 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1263 Chain, Ptr,
1264 DAG.getTargetConstant(0, MVT::i32), // Channel
1265 Op.getOperand(2));
1266 }
1267
1268 SDValue Ops[2];
1269 Ops[0] = LoweredLoad;
1270 Ops[1] = Chain;
1271
1272 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001273}
Tom Stellard75aadc22012-12-11 21:25:42 +00001274
Tom Stellard75aadc22012-12-11 21:25:42 +00001275/// XXX Only kernel functions are supported, so we can assume for now that
1276/// every function is a kernel function, but in the future we should use
1277/// separate calling conventions for kernel and non-kernel functions.
1278SDValue R600TargetLowering::LowerFormalArguments(
1279 SDValue Chain,
1280 CallingConv::ID CallConv,
1281 bool isVarArg,
1282 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001283 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001284 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001285 SmallVector<CCValAssign, 16> ArgLocs;
1286 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1287 getTargetMachine(), ArgLocs, *DAG.getContext());
1288
1289 AnalyzeFormalArguments(CCInfo, Ins);
1290
Tom Stellard1e803092013-07-23 01:48:18 +00001291 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001292 CCValAssign &VA = ArgLocs[i];
1293 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001294
Tom Stellard75aadc22012-12-11 21:25:42 +00001295 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001296 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001297
1298 // The first 36 bytes of the input buffer contains information about
1299 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001300 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001301 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001302 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1303 false, false, 4); // 4 is the prefered alignment for
1304 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001305 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001306 }
1307 return Chain;
1308}
1309
Matt Arsenault758659232013-05-18 00:21:46 +00001310EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001311 if (!VT.isVector()) return MVT::i32;
1312 return VT.changeVectorElementTypeToInteger();
1313}
1314
Benjamin Kramer193960c2013-06-11 13:32:25 +00001315static SDValue
1316CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1317 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001318 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1319 assert(RemapSwizzle.empty());
1320 SDValue NewBldVec[4] = {
1321 VectorEntry.getOperand(0),
1322 VectorEntry.getOperand(1),
1323 VectorEntry.getOperand(2),
1324 VectorEntry.getOperand(3)
1325 };
1326
1327 for (unsigned i = 0; i < 4; i++) {
1328 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1329 if (C->isZero()) {
1330 RemapSwizzle[i] = 4; // SEL_0
1331 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1332 } else if (C->isExactlyValue(1.0)) {
1333 RemapSwizzle[i] = 5; // SEL_1
1334 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1335 }
1336 }
1337
1338 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1339 continue;
1340 for (unsigned j = 0; j < i; j++) {
1341 if (NewBldVec[i] == NewBldVec[j]) {
1342 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1343 RemapSwizzle[i] = j;
1344 break;
1345 }
1346 }
1347 }
1348
1349 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1350 VectorEntry.getValueType(), NewBldVec, 4);
1351}
1352
Benjamin Kramer193960c2013-06-11 13:32:25 +00001353static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1354 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001355 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1356 assert(RemapSwizzle.empty());
1357 SDValue NewBldVec[4] = {
1358 VectorEntry.getOperand(0),
1359 VectorEntry.getOperand(1),
1360 VectorEntry.getOperand(2),
1361 VectorEntry.getOperand(3)
1362 };
1363 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001364 for (unsigned i = 0; i < 4; i++)
1365 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001366
1367 for (unsigned i = 0; i < 4; i++) {
1368 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1369 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1370 ->getZExtValue();
1371 if (!isUnmovable[Idx]) {
1372 // Swap i and Idx
1373 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001374 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001375 }
1376 isUnmovable[Idx] = true;
1377 }
1378 }
1379
1380 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1381 VectorEntry.getValueType(), NewBldVec, 4);
1382}
1383
1384
1385SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1386SDValue Swz[4], SelectionDAG &DAG) const {
1387 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1388 // Old -> New swizzle values
1389 DenseMap<unsigned, unsigned> SwizzleRemap;
1390
1391 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1392 for (unsigned i = 0; i < 4; i++) {
1393 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1394 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1395 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1396 }
1397
1398 SwizzleRemap.clear();
1399 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1400 for (unsigned i = 0; i < 4; i++) {
1401 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1402 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1403 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1404 }
1405
1406 return BuildVector;
1407}
1408
1409
Tom Stellard75aadc22012-12-11 21:25:42 +00001410//===----------------------------------------------------------------------===//
1411// Custom DAG Optimizations
1412//===----------------------------------------------------------------------===//
1413
1414SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1415 DAGCombinerInfo &DCI) const {
1416 SelectionDAG &DAG = DCI.DAG;
1417
1418 switch (N->getOpcode()) {
1419 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1420 case ISD::FP_ROUND: {
1421 SDValue Arg = N->getOperand(0);
1422 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001423 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001424 Arg.getOperand(0));
1425 }
1426 break;
1427 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001428
1429 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1430 // (i32 select_cc f32, f32, -1, 0 cc)
1431 //
1432 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1433 // this to one of the SET*_DX10 instructions.
1434 case ISD::FP_TO_SINT: {
1435 SDValue FNeg = N->getOperand(0);
1436 if (FNeg.getOpcode() != ISD::FNEG) {
1437 return SDValue();
1438 }
1439 SDValue SelectCC = FNeg.getOperand(0);
1440 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1441 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1442 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1443 !isHWTrueValue(SelectCC.getOperand(2)) ||
1444 !isHWFalseValue(SelectCC.getOperand(3))) {
1445 return SDValue();
1446 }
1447
Andrew Trickef9de2a2013-05-25 02:42:55 +00001448 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001449 SelectCC.getOperand(0), // LHS
1450 SelectCC.getOperand(1), // RHS
1451 DAG.getConstant(-1, MVT::i32), // True
1452 DAG.getConstant(0, MVT::i32), // Flase
1453 SelectCC.getOperand(4)); // CC
1454
1455 break;
1456 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001457
1458 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1459 // => build_vector elt0, …, NewEltIdx, …, eltN
1460 case ISD::INSERT_VECTOR_ELT: {
1461 SDValue InVec = N->getOperand(0);
1462 SDValue InVal = N->getOperand(1);
1463 SDValue EltNo = N->getOperand(2);
1464 SDLoc dl(N);
1465
1466 // If the inserted element is an UNDEF, just use the input vector.
1467 if (InVal.getOpcode() == ISD::UNDEF)
1468 return InVec;
1469
1470 EVT VT = InVec.getValueType();
1471
1472 // If we can't generate a legal BUILD_VECTOR, exit
1473 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1474 return SDValue();
1475
1476 // Check that we know which element is being inserted
1477 if (!isa<ConstantSDNode>(EltNo))
1478 return SDValue();
1479 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1480
1481 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1482 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1483 // vector elements.
1484 SmallVector<SDValue, 8> Ops;
1485 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1486 Ops.append(InVec.getNode()->op_begin(),
1487 InVec.getNode()->op_end());
1488 } else if (InVec.getOpcode() == ISD::UNDEF) {
1489 unsigned NElts = VT.getVectorNumElements();
1490 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1491 } else {
1492 return SDValue();
1493 }
1494
1495 // Insert the element
1496 if (Elt < Ops.size()) {
1497 // All the operands of BUILD_VECTOR must have the same type;
1498 // we enforce that here.
1499 EVT OpVT = Ops[0].getValueType();
1500 if (InVal.getValueType() != OpVT)
1501 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1502 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1503 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1504 Ops[Elt] = InVal;
1505 }
1506
1507 // Return the new vector
1508 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1509 VT, &Ops[0], Ops.size());
1510 }
1511
Tom Stellard365366f2013-01-23 02:09:06 +00001512 // Extract_vec (Build_vector) generated by custom lowering
1513 // also needs to be customly combined
1514 case ISD::EXTRACT_VECTOR_ELT: {
1515 SDValue Arg = N->getOperand(0);
1516 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1517 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1518 unsigned Element = Const->getZExtValue();
1519 return Arg->getOperand(Element);
1520 }
1521 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001522 if (Arg.getOpcode() == ISD::BITCAST &&
1523 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1524 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1525 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001526 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001527 Arg->getOperand(0).getOperand(Element));
1528 }
1529 }
Tom Stellard365366f2013-01-23 02:09:06 +00001530 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001531
1532 case ISD::SELECT_CC: {
1533 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1534 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001535 //
1536 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1537 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001538 SDValue LHS = N->getOperand(0);
1539 if (LHS.getOpcode() != ISD::SELECT_CC) {
1540 return SDValue();
1541 }
1542
1543 SDValue RHS = N->getOperand(1);
1544 SDValue True = N->getOperand(2);
1545 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001546 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001547
1548 if (LHS.getOperand(2).getNode() != True.getNode() ||
1549 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001550 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001551 return SDValue();
1552 }
1553
Tom Stellard5e524892013-03-08 15:37:11 +00001554 switch (NCC) {
1555 default: return SDValue();
1556 case ISD::SETNE: return LHS;
1557 case ISD::SETEQ: {
1558 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1559 LHSCC = ISD::getSetCCInverse(LHSCC,
1560 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001561 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001562 LHS.getOperand(0),
1563 LHS.getOperand(1),
1564 LHS.getOperand(2),
1565 LHS.getOperand(3),
1566 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001567 }
Tom Stellard5e524892013-03-08 15:37:11 +00001568 }
1569 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001570
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001571 case AMDGPUISD::EXPORT: {
1572 SDValue Arg = N->getOperand(1);
1573 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1574 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001575
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001576 SDValue NewArgs[8] = {
1577 N->getOperand(0), // Chain
1578 SDValue(),
1579 N->getOperand(2), // ArrayBase
1580 N->getOperand(3), // Type
1581 N->getOperand(4), // SWZ_X
1582 N->getOperand(5), // SWZ_Y
1583 N->getOperand(6), // SWZ_Z
1584 N->getOperand(7) // SWZ_W
1585 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001586 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001587 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001588 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001589 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001590 case AMDGPUISD::TEXTURE_FETCH: {
1591 SDValue Arg = N->getOperand(1);
1592 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1593 break;
1594
1595 SDValue NewArgs[19] = {
1596 N->getOperand(0),
1597 N->getOperand(1),
1598 N->getOperand(2),
1599 N->getOperand(3),
1600 N->getOperand(4),
1601 N->getOperand(5),
1602 N->getOperand(6),
1603 N->getOperand(7),
1604 N->getOperand(8),
1605 N->getOperand(9),
1606 N->getOperand(10),
1607 N->getOperand(11),
1608 N->getOperand(12),
1609 N->getOperand(13),
1610 N->getOperand(14),
1611 N->getOperand(15),
1612 N->getOperand(16),
1613 N->getOperand(17),
1614 N->getOperand(18),
1615 };
1616 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1617 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1618 NewArgs, 19);
1619 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001620 }
1621 return SDValue();
1622}