blob: 5db793737e0885dfa9d51e40e18d94bfae3b218c [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000041 setOperationAction(ISD::FCOS, MVT::f32, Custom);
42 setOperationAction(ISD::FSIN, MVT::f32, Custom);
43
Tom Stellard75aadc22012-12-11 21:25:42 +000044 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000045 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000046
Tom Stellard492ebea2013-03-08 15:37:07 +000047 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
48 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000049
50 setOperationAction(ISD::FSUB, MVT::f32, Expand);
51
52 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
53 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
54 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000055
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
57 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
58
Tom Stellarde8f9f282013-03-08 15:37:05 +000059 setOperationAction(ISD::SETCC, MVT::i32, Expand);
60 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
62
Tom Stellard53f2f902013-09-05 18:38:03 +000063 setOperationAction(ISD::SELECT, MVT::i32, Expand);
64 setOperationAction(ISD::SELECT, MVT::f32, Expand);
65 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
66 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
67 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
68 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000069
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000070 // Legalize loads and stores to the private address space.
71 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000072 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000073 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000074 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
75 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000078 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000079 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000080 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000081 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000082 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
83 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000084
Tom Stellard365366f2013-01-23 02:09:06 +000085 setOperationAction(ISD::LOAD, MVT::i32, Custom);
86 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000087 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
88
Tom Stellard75aadc22012-12-11 21:25:42 +000089 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +000090 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +000091 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +000092 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +000093 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +000094
Michel Danzer49812b52013-07-10 16:37:07 +000095 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
96
Tom Stellardb852af52013-03-08 15:37:03 +000097 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +000098 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +000099 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000100}
101
102MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
103 MachineInstr * MI, MachineBasicBlock * BB) const {
104 MachineFunction * MF = BB->getParent();
105 MachineRegisterInfo &MRI = MF->getRegInfo();
106 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000107 const R600InstrInfo *TII =
108 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
110 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000111 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000112 if (TII->isLDSInstr(MI->getOpcode()) &&
113 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
114 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
115 assert(DstIdx != -1);
116 MachineInstrBuilder NewMI;
117 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
118 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
119 AMDGPU::OQAP);
120 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
121 MI->getOperand(0).getReg(),
122 AMDGPU::OQAP);
123 } else {
124 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
125 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
126 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000127 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
128 NewMI.addOperand(MI->getOperand(i));
129 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000130 } else {
131 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
132 }
133 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000134 case AMDGPU::CLAMP_R600: {
135 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
136 AMDGPU::MOV,
137 MI->getOperand(0).getReg(),
138 MI->getOperand(1).getReg());
139 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
140 break;
141 }
142
143 case AMDGPU::FABS_R600: {
144 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
145 AMDGPU::MOV,
146 MI->getOperand(0).getReg(),
147 MI->getOperand(1).getReg());
148 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
149 break;
150 }
151
152 case AMDGPU::FNEG_R600: {
153 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
154 AMDGPU::MOV,
155 MI->getOperand(0).getReg(),
156 MI->getOperand(1).getReg());
157 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
158 break;
159 }
160
Tom Stellard75aadc22012-12-11 21:25:42 +0000161 case AMDGPU::MASK_WRITE: {
162 unsigned maskedRegister = MI->getOperand(0).getReg();
163 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
164 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
165 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
166 break;
167 }
168
169 case AMDGPU::MOV_IMM_F32:
170 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
171 MI->getOperand(1).getFPImm()->getValueAPF()
172 .bitcastToAPInt().getZExtValue());
173 break;
174 case AMDGPU::MOV_IMM_I32:
175 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
176 MI->getOperand(1).getImm());
177 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000178 case AMDGPU::CONST_COPY: {
179 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
180 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000181 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000182 MI->getOperand(1).getImm());
183 break;
184 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000185
186 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000187 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000188 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
189 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
190
191 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
192 .addOperand(MI->getOperand(0))
193 .addOperand(MI->getOperand(1))
194 .addImm(EOP); // Set End of program bit
195 break;
196 }
197
Tom Stellard75aadc22012-12-11 21:25:42 +0000198 case AMDGPU::TXD: {
199 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
200 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000201 MachineOperand &RID = MI->getOperand(4);
202 MachineOperand &SID = MI->getOperand(5);
203 unsigned TextureId = MI->getOperand(6).getImm();
204 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
205 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000207 switch (TextureId) {
208 case 5: // Rect
209 CTX = CTY = 0;
210 break;
211 case 6: // Shadow1D
212 SrcW = SrcZ;
213 break;
214 case 7: // Shadow2D
215 SrcW = SrcZ;
216 break;
217 case 8: // ShadowRect
218 CTX = CTY = 0;
219 SrcW = SrcZ;
220 break;
221 case 9: // 1DArray
222 SrcZ = SrcY;
223 CTZ = 0;
224 break;
225 case 10: // 2DArray
226 CTZ = 0;
227 break;
228 case 11: // Shadow1DArray
229 SrcZ = SrcY;
230 CTZ = 0;
231 break;
232 case 12: // Shadow2DArray
233 CTZ = 0;
234 break;
235 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000236 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
237 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000238 .addImm(SrcX)
239 .addImm(SrcY)
240 .addImm(SrcZ)
241 .addImm(SrcW)
242 .addImm(0)
243 .addImm(0)
244 .addImm(0)
245 .addImm(0)
246 .addImm(1)
247 .addImm(2)
248 .addImm(3)
249 .addOperand(RID)
250 .addOperand(SID)
251 .addImm(CTX)
252 .addImm(CTY)
253 .addImm(CTZ)
254 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
256 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000257 .addImm(SrcX)
258 .addImm(SrcY)
259 .addImm(SrcZ)
260 .addImm(SrcW)
261 .addImm(0)
262 .addImm(0)
263 .addImm(0)
264 .addImm(0)
265 .addImm(1)
266 .addImm(2)
267 .addImm(3)
268 .addOperand(RID)
269 .addOperand(SID)
270 .addImm(CTX)
271 .addImm(CTY)
272 .addImm(CTZ)
273 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000274 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
275 .addOperand(MI->getOperand(0))
276 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000277 .addImm(SrcX)
278 .addImm(SrcY)
279 .addImm(SrcZ)
280 .addImm(SrcW)
281 .addImm(0)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(1)
286 .addImm(2)
287 .addImm(3)
288 .addOperand(RID)
289 .addOperand(SID)
290 .addImm(CTX)
291 .addImm(CTY)
292 .addImm(CTZ)
293 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000294 .addReg(T0, RegState::Implicit)
295 .addReg(T1, RegState::Implicit);
296 break;
297 }
298
299 case AMDGPU::TXD_SHADOW: {
300 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
301 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000302 MachineOperand &RID = MI->getOperand(4);
303 MachineOperand &SID = MI->getOperand(5);
304 unsigned TextureId = MI->getOperand(6).getImm();
305 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
306 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
307
308 switch (TextureId) {
309 case 5: // Rect
310 CTX = CTY = 0;
311 break;
312 case 6: // Shadow1D
313 SrcW = SrcZ;
314 break;
315 case 7: // Shadow2D
316 SrcW = SrcZ;
317 break;
318 case 8: // ShadowRect
319 CTX = CTY = 0;
320 SrcW = SrcZ;
321 break;
322 case 9: // 1DArray
323 SrcZ = SrcY;
324 CTZ = 0;
325 break;
326 case 10: // 2DArray
327 CTZ = 0;
328 break;
329 case 11: // Shadow1DArray
330 SrcZ = SrcY;
331 CTZ = 0;
332 break;
333 case 12: // Shadow2DArray
334 CTZ = 0;
335 break;
336 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000337
338 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
339 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000340 .addImm(SrcX)
341 .addImm(SrcY)
342 .addImm(SrcZ)
343 .addImm(SrcW)
344 .addImm(0)
345 .addImm(0)
346 .addImm(0)
347 .addImm(0)
348 .addImm(1)
349 .addImm(2)
350 .addImm(3)
351 .addOperand(RID)
352 .addOperand(SID)
353 .addImm(CTX)
354 .addImm(CTY)
355 .addImm(CTZ)
356 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
358 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000359 .addImm(SrcX)
360 .addImm(SrcY)
361 .addImm(SrcZ)
362 .addImm(SrcW)
363 .addImm(0)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(1)
368 .addImm(2)
369 .addImm(3)
370 .addOperand(RID)
371 .addOperand(SID)
372 .addImm(CTX)
373 .addImm(CTY)
374 .addImm(CTZ)
375 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
377 .addOperand(MI->getOperand(0))
378 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000379 .addImm(SrcX)
380 .addImm(SrcY)
381 .addImm(SrcZ)
382 .addImm(SrcW)
383 .addImm(0)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(1)
388 .addImm(2)
389 .addImm(3)
390 .addOperand(RID)
391 .addOperand(SID)
392 .addImm(CTX)
393 .addImm(CTY)
394 .addImm(CTZ)
395 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000396 .addReg(T0, RegState::Implicit)
397 .addReg(T1, RegState::Implicit);
398 break;
399 }
400
401 case AMDGPU::BRANCH:
402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000403 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000404 break;
405
406 case AMDGPU::BRANCH_COND_f32: {
407 MachineInstr *NewMI =
408 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
409 AMDGPU::PREDICATE_BIT)
410 .addOperand(MI->getOperand(1))
411 .addImm(OPCODE_IS_NOT_ZERO)
412 .addImm(0); // Flags
413 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000414 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000415 .addOperand(MI->getOperand(0))
416 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
417 break;
418 }
419
420 case AMDGPU::BRANCH_COND_i32: {
421 MachineInstr *NewMI =
422 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
423 AMDGPU::PREDICATE_BIT)
424 .addOperand(MI->getOperand(1))
425 .addImm(OPCODE_IS_NOT_ZERO_INT)
426 .addImm(0); // Flags
427 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000428 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000429 .addOperand(MI->getOperand(0))
430 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
431 break;
432 }
433
Tom Stellard75aadc22012-12-11 21:25:42 +0000434 case AMDGPU::EG_ExportSwz:
435 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000436 // Instruction is left unmodified if its not the last one of its type
437 bool isLastInstructionOfItsType = true;
438 unsigned InstExportType = MI->getOperand(1).getImm();
439 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
440 EndBlock = BB->end(); NextExportInst != EndBlock;
441 NextExportInst = llvm::next(NextExportInst)) {
442 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
443 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
444 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
445 .getImm();
446 if (CurrentInstExportType == InstExportType) {
447 isLastInstructionOfItsType = false;
448 break;
449 }
450 }
451 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000452 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000453 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 return BB;
455 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
456 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
457 .addOperand(MI->getOperand(0))
458 .addOperand(MI->getOperand(1))
459 .addOperand(MI->getOperand(2))
460 .addOperand(MI->getOperand(3))
461 .addOperand(MI->getOperand(4))
462 .addOperand(MI->getOperand(5))
463 .addOperand(MI->getOperand(6))
464 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000465 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 break;
467 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000468 case AMDGPU::RETURN: {
469 // RETURN instructions must have the live-out registers as implicit uses,
470 // otherwise they appear dead.
471 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
472 MachineInstrBuilder MIB(*MF, MI);
473 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
474 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
475 return BB;
476 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000477 }
478
479 MI->eraseFromParent();
480 return BB;
481}
482
483//===----------------------------------------------------------------------===//
484// Custom DAG Lowering Operations
485//===----------------------------------------------------------------------===//
486
Tom Stellard75aadc22012-12-11 21:25:42 +0000487SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000488 MachineFunction &MF = DAG.getMachineFunction();
489 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000490 switch (Op.getOpcode()) {
491 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000492 case ISD::FCOS:
493 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000495 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000496 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000497 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000498 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000499 case ISD::INTRINSIC_VOID: {
500 SDValue Chain = Op.getOperand(0);
501 unsigned IntrinsicID =
502 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
503 switch (IntrinsicID) {
504 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000505 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
506 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000507 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000508 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000510 case AMDGPUIntrinsic::R600_store_swizzle: {
511 const SDValue Args[8] = {
512 Chain,
513 Op.getOperand(2), // Export Value
514 Op.getOperand(3), // ArrayBase
515 Op.getOperand(4), // Type
516 DAG.getConstant(0, MVT::i32), // SWZ_X
517 DAG.getConstant(1, MVT::i32), // SWZ_Y
518 DAG.getConstant(2, MVT::i32), // SWZ_Z
519 DAG.getConstant(3, MVT::i32) // SWZ_W
520 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000521 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000522 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000524
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 // default for switch(IntrinsicID)
526 default: break;
527 }
528 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
529 break;
530 }
531 case ISD::INTRINSIC_WO_CHAIN: {
532 unsigned IntrinsicID =
533 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
534 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000535 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000536 switch(IntrinsicID) {
537 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
538 case AMDGPUIntrinsic::R600_load_input: {
539 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
540 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000541 MachineFunction &MF = DAG.getMachineFunction();
542 MachineRegisterInfo &MRI = MF.getRegInfo();
543 MRI.addLiveIn(Reg);
544 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000545 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000546 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000547
548 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000549 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000550 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
551 MachineSDNode *interp;
552 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000553 const MachineFunction &MF = DAG.getMachineFunction();
554 const R600InstrInfo *TII =
555 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000556 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
557 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
558 return DAG.getTargetExtractSubreg(
559 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
560 DL, MVT::f32, SDValue(interp, 0));
561 }
562
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000563 MachineFunction &MF = DAG.getMachineFunction();
564 MachineRegisterInfo &MRI = MF.getRegInfo();
565 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
566 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
567 MRI.addLiveIn(RegisterI);
568 MRI.addLiveIn(RegisterJ);
569 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
570 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
571 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
572 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
573
Tom Stellard41afe6a2013-02-05 17:09:14 +0000574 if (slot % 4 < 2)
575 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
576 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000577 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000578 else
579 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
580 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000581 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000582 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000583 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000584 case AMDGPUIntrinsic::R600_tex:
585 case AMDGPUIntrinsic::R600_texc:
586 case AMDGPUIntrinsic::R600_txl:
587 case AMDGPUIntrinsic::R600_txlc:
588 case AMDGPUIntrinsic::R600_txb:
589 case AMDGPUIntrinsic::R600_txbc:
590 case AMDGPUIntrinsic::R600_txf:
591 case AMDGPUIntrinsic::R600_txq:
592 case AMDGPUIntrinsic::R600_ddx:
593 case AMDGPUIntrinsic::R600_ddy: {
594 unsigned TextureOp;
595 switch (IntrinsicID) {
596 case AMDGPUIntrinsic::R600_tex:
597 TextureOp = 0;
598 break;
599 case AMDGPUIntrinsic::R600_texc:
600 TextureOp = 1;
601 break;
602 case AMDGPUIntrinsic::R600_txl:
603 TextureOp = 2;
604 break;
605 case AMDGPUIntrinsic::R600_txlc:
606 TextureOp = 3;
607 break;
608 case AMDGPUIntrinsic::R600_txb:
609 TextureOp = 4;
610 break;
611 case AMDGPUIntrinsic::R600_txbc:
612 TextureOp = 5;
613 break;
614 case AMDGPUIntrinsic::R600_txf:
615 TextureOp = 6;
616 break;
617 case AMDGPUIntrinsic::R600_txq:
618 TextureOp = 7;
619 break;
620 case AMDGPUIntrinsic::R600_ddx:
621 TextureOp = 8;
622 break;
623 case AMDGPUIntrinsic::R600_ddy:
624 TextureOp = 9;
625 break;
626 default:
627 llvm_unreachable("Unknow Texture Operation");
628 }
629
630 SDValue TexArgs[19] = {
631 DAG.getConstant(TextureOp, MVT::i32),
632 Op.getOperand(1),
633 DAG.getConstant(0, MVT::i32),
634 DAG.getConstant(1, MVT::i32),
635 DAG.getConstant(2, MVT::i32),
636 DAG.getConstant(3, MVT::i32),
637 Op.getOperand(2),
638 Op.getOperand(3),
639 Op.getOperand(4),
640 DAG.getConstant(0, MVT::i32),
641 DAG.getConstant(1, MVT::i32),
642 DAG.getConstant(2, MVT::i32),
643 DAG.getConstant(3, MVT::i32),
644 Op.getOperand(5),
645 Op.getOperand(6),
646 Op.getOperand(7),
647 Op.getOperand(8),
648 Op.getOperand(9),
649 Op.getOperand(10)
650 };
651 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
652 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000653 case AMDGPUIntrinsic::AMDGPU_dp4: {
654 SDValue Args[8] = {
655 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
656 DAG.getConstant(0, MVT::i32)),
657 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
658 DAG.getConstant(0, MVT::i32)),
659 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
660 DAG.getConstant(1, MVT::i32)),
661 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
662 DAG.getConstant(1, MVT::i32)),
663 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
664 DAG.getConstant(2, MVT::i32)),
665 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
666 DAG.getConstant(2, MVT::i32)),
667 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
668 DAG.getConstant(3, MVT::i32)),
669 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
670 DAG.getConstant(3, MVT::i32))
671 };
672 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
673 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000674
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000675 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000676 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000677 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000678 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000679 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000680 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000681 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000682 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000683 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000684 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000685 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000686 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000687 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return LowerImplicitParameter(DAG, VT, DL, 8);
693
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000694 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000695 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
696 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
699 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000700 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000701 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
702 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000703 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000704 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
705 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
708 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000709 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000710 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
711 AMDGPU::T0_Z, VT);
712 }
713 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
714 break;
715 }
716 } // end switch(Op.getOpcode())
717 return SDValue();
718}
719
720void R600TargetLowering::ReplaceNodeResults(SDNode *N,
721 SmallVectorImpl<SDValue> &Results,
722 SelectionDAG &DAG) const {
723 switch (N->getOpcode()) {
724 default: return;
725 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000726 return;
727 case ISD::LOAD: {
728 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
729 Results.push_back(SDValue(Node, 0));
730 Results.push_back(SDValue(Node, 1));
731 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
732 // function
733 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
734 return;
735 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000736 case ISD::STORE:
737 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
738 Results.push_back(SDValue(Node, 0));
739 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000740 }
741}
742
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000743SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
744 // On hw >= R700, COS/SIN input must be between -1. and 1.
745 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
746 EVT VT = Op.getValueType();
747 SDValue Arg = Op.getOperand(0);
748 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
749 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
750 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
751 DAG.getConstantFP(0.15915494309, MVT::f32)),
752 DAG.getConstantFP(0.5, MVT::f32)));
753 unsigned TrigNode;
754 switch (Op.getOpcode()) {
755 case ISD::FCOS:
756 TrigNode = AMDGPUISD::COS_HW;
757 break;
758 case ISD::FSIN:
759 TrigNode = AMDGPUISD::SIN_HW;
760 break;
761 default:
762 llvm_unreachable("Wrong trig opcode");
763 }
764 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
765 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
766 DAG.getConstantFP(-0.5, MVT::f32)));
767 if (Gen >= AMDGPUSubtarget::R700)
768 return TrigVal;
769 // On R600 hw, COS/SIN input must be between -Pi and Pi.
770 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
771 DAG.getConstantFP(3.14159265359, MVT::f32));
772}
773
Tom Stellard75aadc22012-12-11 21:25:42 +0000774SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
775 return DAG.getNode(
776 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000777 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 MVT::i1,
779 Op, DAG.getConstantFP(0.0f, MVT::f32),
780 DAG.getCondCode(ISD::SETNE)
781 );
782}
783
Tom Stellard75aadc22012-12-11 21:25:42 +0000784SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000785 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 unsigned DwordOffset) const {
787 unsigned ByteOffset = DwordOffset * 4;
788 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000789 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000790
791 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
792 assert(isInt<16>(ByteOffset));
793
794 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
795 DAG.getConstant(ByteOffset, MVT::i32), // PTR
796 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
797 false, false, false, 0);
798}
799
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000800SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
801
802 MachineFunction &MF = DAG.getMachineFunction();
803 const AMDGPUFrameLowering *TFL =
804 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
805
806 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
807 assert(FIN);
808
809 unsigned FrameIndex = FIN->getIndex();
810 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
811 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
812}
813
Tom Stellard75aadc22012-12-11 21:25:42 +0000814bool R600TargetLowering::isZero(SDValue Op) const {
815 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
816 return Cst->isNullValue();
817 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
818 return CstFP->isZero();
819 } else {
820 return false;
821 }
822}
823
824SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000825 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000826 EVT VT = Op.getValueType();
827
828 SDValue LHS = Op.getOperand(0);
829 SDValue RHS = Op.getOperand(1);
830 SDValue True = Op.getOperand(2);
831 SDValue False = Op.getOperand(3);
832 SDValue CC = Op.getOperand(4);
833 SDValue Temp;
834
835 // LHS and RHS are guaranteed to be the same value type
836 EVT CompareVT = LHS.getValueType();
837
838 // Check if we can lower this to a native operation.
839
Tom Stellard2add82d2013-03-08 15:37:09 +0000840 // Try to lower to a SET* instruction:
841 //
842 // SET* can match the following patterns:
843 //
844 // select_cc f32, f32, -1, 0, cc_any
845 // select_cc f32, f32, 1.0f, 0.0f, cc_any
846 // select_cc i32, i32, -1, 0, cc_any
847 //
848
849 // Move hardware True/False values to the correct operand.
850 if (isHWTrueValue(False) && isHWFalseValue(True)) {
851 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
852 std::swap(False, True);
853 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
854 }
855
856 if (isHWTrueValue(True) && isHWFalseValue(False) &&
857 (CompareVT == VT || VT == MVT::i32)) {
858 // This can be matched by a SET* instruction.
859 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
860 }
861
Tom Stellard75aadc22012-12-11 21:25:42 +0000862 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000863 //
864 // CND* can match the following patterns:
865 //
866 // select_cc f32, 0.0, f32, f32, cc_any
867 // select_cc f32, 0.0, i32, i32, cc_any
868 // select_cc i32, 0, f32, f32, cc_any
869 // select_cc i32, 0, i32, i32, cc_any
870 //
Tom Stellard75aadc22012-12-11 21:25:42 +0000871 if (isZero(LHS) || isZero(RHS)) {
872 SDValue Cond = (isZero(LHS) ? RHS : LHS);
873 SDValue Zero = (isZero(LHS) ? LHS : RHS);
874 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
875 if (CompareVT != VT) {
876 // Bitcast True / False to the correct types. This will end up being
877 // a nop, but it allows us to define only a single pattern in the
878 // .TD files for each CND* instruction rather than having to have
879 // one pattern for integer True/False and one for fp True/False
880 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
881 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
882 }
883 if (isZero(LHS)) {
884 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
885 }
886
887 switch (CCOpcode) {
888 case ISD::SETONE:
889 case ISD::SETUNE:
890 case ISD::SETNE:
891 case ISD::SETULE:
892 case ISD::SETULT:
893 case ISD::SETOLE:
894 case ISD::SETOLT:
895 case ISD::SETLE:
896 case ISD::SETLT:
897 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
898 Temp = True;
899 True = False;
900 False = Temp;
901 break;
902 default:
903 break;
904 }
905 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
906 Cond, Zero,
907 True, False,
908 DAG.getCondCode(CCOpcode));
909 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
910 }
911
Tom Stellard75aadc22012-12-11 21:25:42 +0000912
913 // Possible Min/Max pattern
914 SDValue MinMax = LowerMinMax(Op, DAG);
915 if (MinMax.getNode()) {
916 return MinMax;
917 }
918
919 // If we make it this for it means we have no native instructions to handle
920 // this SELECT_CC, so we must lower it.
921 SDValue HWTrue, HWFalse;
922
923 if (CompareVT == MVT::f32) {
924 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
925 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
926 } else if (CompareVT == MVT::i32) {
927 HWTrue = DAG.getConstant(-1, CompareVT);
928 HWFalse = DAG.getConstant(0, CompareVT);
929 }
930 else {
931 assert(!"Unhandled value type in LowerSELECT_CC");
932 }
933
934 // Lower this unsupported SELECT_CC into a combination of two supported
935 // SELECT_CC operations.
936 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
937
938 return DAG.getNode(ISD::SELECT_CC, DL, VT,
939 Cond, HWFalse,
940 True, False,
941 DAG.getCondCode(ISD::SETNE));
942}
943
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000944/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
945/// convert these pointers to a register index. Each register holds
946/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
947/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
948/// for indirect addressing.
949SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
950 unsigned StackWidth,
951 SelectionDAG &DAG) const {
952 unsigned SRLPad;
953 switch(StackWidth) {
954 case 1:
955 SRLPad = 2;
956 break;
957 case 2:
958 SRLPad = 3;
959 break;
960 case 4:
961 SRLPad = 4;
962 break;
963 default: llvm_unreachable("Invalid stack width");
964 }
965
Andrew Trickef9de2a2013-05-25 02:42:55 +0000966 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000967 DAG.getConstant(SRLPad, MVT::i32));
968}
969
970void R600TargetLowering::getStackAddress(unsigned StackWidth,
971 unsigned ElemIdx,
972 unsigned &Channel,
973 unsigned &PtrIncr) const {
974 switch (StackWidth) {
975 default:
976 case 1:
977 Channel = 0;
978 if (ElemIdx > 0) {
979 PtrIncr = 1;
980 } else {
981 PtrIncr = 0;
982 }
983 break;
984 case 2:
985 Channel = ElemIdx % 2;
986 if (ElemIdx == 2) {
987 PtrIncr = 1;
988 } else {
989 PtrIncr = 0;
990 }
991 break;
992 case 4:
993 Channel = ElemIdx;
994 PtrIncr = 0;
995 break;
996 }
997}
998
Tom Stellard75aadc22012-12-11 21:25:42 +0000999SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001000 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001001 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1002 SDValue Chain = Op.getOperand(0);
1003 SDValue Value = Op.getOperand(1);
1004 SDValue Ptr = Op.getOperand(2);
1005
Tom Stellard2ffc3302013-08-26 15:05:44 +00001006 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001007 if (Result.getNode()) {
1008 return Result;
1009 }
1010
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001011 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1012 if (StoreNode->isTruncatingStore()) {
1013 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001014 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001015 EVT MemVT = StoreNode->getMemoryVT();
1016 SDValue MaskConstant;
1017 if (MemVT == MVT::i8) {
1018 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1019 } else {
1020 assert(MemVT == MVT::i16);
1021 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1022 }
1023 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1024 DAG.getConstant(2, MVT::i32));
1025 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1026 DAG.getConstant(0x00000003, VT));
1027 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1028 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1029 DAG.getConstant(3, VT));
1030 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1031 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1032 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1033 // vector instead.
1034 SDValue Src[4] = {
1035 ShiftedValue,
1036 DAG.getConstant(0, MVT::i32),
1037 DAG.getConstant(0, MVT::i32),
1038 Mask
1039 };
1040 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1041 SDValue Args[3] = { Chain, Input, DWordAddr };
1042 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1043 Op->getVTList(), Args, 3, MemVT,
1044 StoreNode->getMemOperand());
1045 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1046 Value.getValueType().bitsGE(MVT::i32)) {
1047 // Convert pointer from byte address to dword address.
1048 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1049 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1050 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001051
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001052 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1053 assert(!"Truncated and indexed stores not supported yet");
1054 } else {
1055 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1056 }
1057 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001058 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001059 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001060
1061 EVT ValueVT = Value.getValueType();
1062
1063 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1064 return SDValue();
1065 }
1066
1067 // Lowering for indirect addressing
1068
1069 const MachineFunction &MF = DAG.getMachineFunction();
1070 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1071 getTargetMachine().getFrameLowering());
1072 unsigned StackWidth = TFL->getStackWidth(MF);
1073
1074 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1075
1076 if (ValueVT.isVector()) {
1077 unsigned NumElemVT = ValueVT.getVectorNumElements();
1078 EVT ElemVT = ValueVT.getVectorElementType();
1079 SDValue Stores[4];
1080
1081 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1082 "vector width in load");
1083
1084 for (unsigned i = 0; i < NumElemVT; ++i) {
1085 unsigned Channel, PtrIncr;
1086 getStackAddress(StackWidth, i, Channel, PtrIncr);
1087 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1088 DAG.getConstant(PtrIncr, MVT::i32));
1089 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1090 Value, DAG.getConstant(i, MVT::i32));
1091
1092 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1093 Chain, Elem, Ptr,
1094 DAG.getTargetConstant(Channel, MVT::i32));
1095 }
1096 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1097 } else {
1098 if (ValueVT == MVT::i8) {
1099 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1100 }
1101 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001102 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001103 }
1104
1105 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001106}
1107
Tom Stellard365366f2013-01-23 02:09:06 +00001108// return (512 + (kc_bank << 12)
1109static int
1110ConstantAddressBlock(unsigned AddressSpace) {
1111 switch (AddressSpace) {
1112 case AMDGPUAS::CONSTANT_BUFFER_0:
1113 return 512;
1114 case AMDGPUAS::CONSTANT_BUFFER_1:
1115 return 512 + 4096;
1116 case AMDGPUAS::CONSTANT_BUFFER_2:
1117 return 512 + 4096 * 2;
1118 case AMDGPUAS::CONSTANT_BUFFER_3:
1119 return 512 + 4096 * 3;
1120 case AMDGPUAS::CONSTANT_BUFFER_4:
1121 return 512 + 4096 * 4;
1122 case AMDGPUAS::CONSTANT_BUFFER_5:
1123 return 512 + 4096 * 5;
1124 case AMDGPUAS::CONSTANT_BUFFER_6:
1125 return 512 + 4096 * 6;
1126 case AMDGPUAS::CONSTANT_BUFFER_7:
1127 return 512 + 4096 * 7;
1128 case AMDGPUAS::CONSTANT_BUFFER_8:
1129 return 512 + 4096 * 8;
1130 case AMDGPUAS::CONSTANT_BUFFER_9:
1131 return 512 + 4096 * 9;
1132 case AMDGPUAS::CONSTANT_BUFFER_10:
1133 return 512 + 4096 * 10;
1134 case AMDGPUAS::CONSTANT_BUFFER_11:
1135 return 512 + 4096 * 11;
1136 case AMDGPUAS::CONSTANT_BUFFER_12:
1137 return 512 + 4096 * 12;
1138 case AMDGPUAS::CONSTANT_BUFFER_13:
1139 return 512 + 4096 * 13;
1140 case AMDGPUAS::CONSTANT_BUFFER_14:
1141 return 512 + 4096 * 14;
1142 case AMDGPUAS::CONSTANT_BUFFER_15:
1143 return 512 + 4096 * 15;
1144 default:
1145 return -1;
1146 }
1147}
1148
1149SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1150{
1151 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001152 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001153 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1154 SDValue Chain = Op.getOperand(0);
1155 SDValue Ptr = Op.getOperand(1);
1156 SDValue LoweredLoad;
1157
Tom Stellard35bb18c2013-08-26 15:06:04 +00001158 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1159 SDValue MergedValues[2] = {
1160 SplitVectorLoad(Op, DAG),
1161 Chain
1162 };
1163 return DAG.getMergeValues(MergedValues, 2, DL);
1164 }
1165
Tom Stellard365366f2013-01-23 02:09:06 +00001166 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1167 if (ConstantBlock > -1) {
1168 SDValue Result;
1169 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001170 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1171 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001172 SDValue Slots[4];
1173 for (unsigned i = 0; i < 4; i++) {
1174 // We want Const position encoded with the following formula :
1175 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1176 // const_index is Ptr computed by llvm using an alignment of 16.
1177 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1178 // then div by 4 at the ISel step
1179 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1180 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1181 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1182 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001183 EVT NewVT = MVT::v4i32;
1184 unsigned NumElements = 4;
1185 if (VT.isVector()) {
1186 NewVT = VT;
1187 NumElements = VT.getVectorNumElements();
1188 }
1189 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001190 } else {
1191 // non constant ptr cant be folded, keeps it as a v4f32 load
1192 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001193 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001194 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001195 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001196 );
1197 }
1198
1199 if (!VT.isVector()) {
1200 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1201 DAG.getConstant(0, MVT::i32));
1202 }
1203
1204 SDValue MergedValues[2] = {
1205 Result,
1206 Chain
1207 };
1208 return DAG.getMergeValues(MergedValues, 2, DL);
1209 }
1210
Tom Stellard84021442013-07-23 01:48:24 +00001211 // For most operations returning SDValue() will result int he node being
1212 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1213 // we need to manually expand loads that may be legal in some address spaces
1214 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1215 // for compute shaders, since the data is sign extended when it is uploaded
1216 // to the buffer. Howerver SEXT loads from other addresspaces are not
1217 // supported, so we need to expand them here.
1218 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1219 EVT MemVT = LoadNode->getMemoryVT();
1220 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1221 SDValue ShiftAmount =
1222 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1223 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1224 LoadNode->getPointerInfo(), MemVT,
1225 LoadNode->isVolatile(),
1226 LoadNode->isNonTemporal(),
1227 LoadNode->getAlignment());
1228 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1229 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1230
1231 SDValue MergedValues[2] = { Sra, Chain };
1232 return DAG.getMergeValues(MergedValues, 2, DL);
1233 }
1234
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001235 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1236 return SDValue();
1237 }
1238
1239 // Lowering for indirect addressing
1240 const MachineFunction &MF = DAG.getMachineFunction();
1241 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1242 getTargetMachine().getFrameLowering());
1243 unsigned StackWidth = TFL->getStackWidth(MF);
1244
1245 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1246
1247 if (VT.isVector()) {
1248 unsigned NumElemVT = VT.getVectorNumElements();
1249 EVT ElemVT = VT.getVectorElementType();
1250 SDValue Loads[4];
1251
1252 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1253 "vector width in load");
1254
1255 for (unsigned i = 0; i < NumElemVT; ++i) {
1256 unsigned Channel, PtrIncr;
1257 getStackAddress(StackWidth, i, Channel, PtrIncr);
1258 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1259 DAG.getConstant(PtrIncr, MVT::i32));
1260 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1261 Chain, Ptr,
1262 DAG.getTargetConstant(Channel, MVT::i32),
1263 Op.getOperand(2));
1264 }
1265 for (unsigned i = NumElemVT; i < 4; ++i) {
1266 Loads[i] = DAG.getUNDEF(ElemVT);
1267 }
1268 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1269 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1270 } else {
1271 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1272 Chain, Ptr,
1273 DAG.getTargetConstant(0, MVT::i32), // Channel
1274 Op.getOperand(2));
1275 }
1276
1277 SDValue Ops[2];
1278 Ops[0] = LoweredLoad;
1279 Ops[1] = Chain;
1280
1281 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001282}
Tom Stellard75aadc22012-12-11 21:25:42 +00001283
Tom Stellard75aadc22012-12-11 21:25:42 +00001284/// XXX Only kernel functions are supported, so we can assume for now that
1285/// every function is a kernel function, but in the future we should use
1286/// separate calling conventions for kernel and non-kernel functions.
1287SDValue R600TargetLowering::LowerFormalArguments(
1288 SDValue Chain,
1289 CallingConv::ID CallConv,
1290 bool isVarArg,
1291 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001292 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001293 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001294 SmallVector<CCValAssign, 16> ArgLocs;
1295 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1296 getTargetMachine(), ArgLocs, *DAG.getContext());
1297
1298 AnalyzeFormalArguments(CCInfo, Ins);
1299
Tom Stellard1e803092013-07-23 01:48:18 +00001300 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001301 CCValAssign &VA = ArgLocs[i];
1302 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001303
Tom Stellard75aadc22012-12-11 21:25:42 +00001304 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001305 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001306
1307 // The first 36 bytes of the input buffer contains information about
1308 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001309 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001310 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001311 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1312 false, false, 4); // 4 is the prefered alignment for
1313 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001314 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001315 }
1316 return Chain;
1317}
1318
Matt Arsenault758659232013-05-18 00:21:46 +00001319EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001320 if (!VT.isVector()) return MVT::i32;
1321 return VT.changeVectorElementTypeToInteger();
1322}
1323
Benjamin Kramer193960c2013-06-11 13:32:25 +00001324static SDValue
1325CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1326 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001327 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1328 assert(RemapSwizzle.empty());
1329 SDValue NewBldVec[4] = {
1330 VectorEntry.getOperand(0),
1331 VectorEntry.getOperand(1),
1332 VectorEntry.getOperand(2),
1333 VectorEntry.getOperand(3)
1334 };
1335
1336 for (unsigned i = 0; i < 4; i++) {
1337 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1338 if (C->isZero()) {
1339 RemapSwizzle[i] = 4; // SEL_0
1340 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1341 } else if (C->isExactlyValue(1.0)) {
1342 RemapSwizzle[i] = 5; // SEL_1
1343 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1344 }
1345 }
1346
1347 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1348 continue;
1349 for (unsigned j = 0; j < i; j++) {
1350 if (NewBldVec[i] == NewBldVec[j]) {
1351 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1352 RemapSwizzle[i] = j;
1353 break;
1354 }
1355 }
1356 }
1357
1358 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1359 VectorEntry.getValueType(), NewBldVec, 4);
1360}
1361
Benjamin Kramer193960c2013-06-11 13:32:25 +00001362static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1363 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001364 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1365 assert(RemapSwizzle.empty());
1366 SDValue NewBldVec[4] = {
1367 VectorEntry.getOperand(0),
1368 VectorEntry.getOperand(1),
1369 VectorEntry.getOperand(2),
1370 VectorEntry.getOperand(3)
1371 };
1372 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001373 for (unsigned i = 0; i < 4; i++)
1374 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001375
1376 for (unsigned i = 0; i < 4; i++) {
1377 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1378 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1379 ->getZExtValue();
1380 if (!isUnmovable[Idx]) {
1381 // Swap i and Idx
1382 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001383 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001384 }
1385 isUnmovable[Idx] = true;
1386 }
1387 }
1388
1389 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1390 VectorEntry.getValueType(), NewBldVec, 4);
1391}
1392
1393
1394SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1395SDValue Swz[4], SelectionDAG &DAG) const {
1396 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1397 // Old -> New swizzle values
1398 DenseMap<unsigned, unsigned> SwizzleRemap;
1399
1400 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1401 for (unsigned i = 0; i < 4; i++) {
1402 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1403 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1404 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1405 }
1406
1407 SwizzleRemap.clear();
1408 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1409 for (unsigned i = 0; i < 4; i++) {
1410 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1411 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1412 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1413 }
1414
1415 return BuildVector;
1416}
1417
1418
Tom Stellard75aadc22012-12-11 21:25:42 +00001419//===----------------------------------------------------------------------===//
1420// Custom DAG Optimizations
1421//===----------------------------------------------------------------------===//
1422
1423SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1424 DAGCombinerInfo &DCI) const {
1425 SelectionDAG &DAG = DCI.DAG;
1426
1427 switch (N->getOpcode()) {
1428 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1429 case ISD::FP_ROUND: {
1430 SDValue Arg = N->getOperand(0);
1431 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001432 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001433 Arg.getOperand(0));
1434 }
1435 break;
1436 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001437
1438 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1439 // (i32 select_cc f32, f32, -1, 0 cc)
1440 //
1441 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1442 // this to one of the SET*_DX10 instructions.
1443 case ISD::FP_TO_SINT: {
1444 SDValue FNeg = N->getOperand(0);
1445 if (FNeg.getOpcode() != ISD::FNEG) {
1446 return SDValue();
1447 }
1448 SDValue SelectCC = FNeg.getOperand(0);
1449 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1450 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1451 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1452 !isHWTrueValue(SelectCC.getOperand(2)) ||
1453 !isHWFalseValue(SelectCC.getOperand(3))) {
1454 return SDValue();
1455 }
1456
Andrew Trickef9de2a2013-05-25 02:42:55 +00001457 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001458 SelectCC.getOperand(0), // LHS
1459 SelectCC.getOperand(1), // RHS
1460 DAG.getConstant(-1, MVT::i32), // True
1461 DAG.getConstant(0, MVT::i32), // Flase
1462 SelectCC.getOperand(4)); // CC
1463
1464 break;
1465 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001466
1467 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1468 // => build_vector elt0, …, NewEltIdx, …, eltN
1469 case ISD::INSERT_VECTOR_ELT: {
1470 SDValue InVec = N->getOperand(0);
1471 SDValue InVal = N->getOperand(1);
1472 SDValue EltNo = N->getOperand(2);
1473 SDLoc dl(N);
1474
1475 // If the inserted element is an UNDEF, just use the input vector.
1476 if (InVal.getOpcode() == ISD::UNDEF)
1477 return InVec;
1478
1479 EVT VT = InVec.getValueType();
1480
1481 // If we can't generate a legal BUILD_VECTOR, exit
1482 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1483 return SDValue();
1484
1485 // Check that we know which element is being inserted
1486 if (!isa<ConstantSDNode>(EltNo))
1487 return SDValue();
1488 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1489
1490 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1491 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1492 // vector elements.
1493 SmallVector<SDValue, 8> Ops;
1494 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1495 Ops.append(InVec.getNode()->op_begin(),
1496 InVec.getNode()->op_end());
1497 } else if (InVec.getOpcode() == ISD::UNDEF) {
1498 unsigned NElts = VT.getVectorNumElements();
1499 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1500 } else {
1501 return SDValue();
1502 }
1503
1504 // Insert the element
1505 if (Elt < Ops.size()) {
1506 // All the operands of BUILD_VECTOR must have the same type;
1507 // we enforce that here.
1508 EVT OpVT = Ops[0].getValueType();
1509 if (InVal.getValueType() != OpVT)
1510 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1511 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1512 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1513 Ops[Elt] = InVal;
1514 }
1515
1516 // Return the new vector
1517 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1518 VT, &Ops[0], Ops.size());
1519 }
1520
Tom Stellard365366f2013-01-23 02:09:06 +00001521 // Extract_vec (Build_vector) generated by custom lowering
1522 // also needs to be customly combined
1523 case ISD::EXTRACT_VECTOR_ELT: {
1524 SDValue Arg = N->getOperand(0);
1525 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1526 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1527 unsigned Element = Const->getZExtValue();
1528 return Arg->getOperand(Element);
1529 }
1530 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001531 if (Arg.getOpcode() == ISD::BITCAST &&
1532 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1533 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1534 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001535 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001536 Arg->getOperand(0).getOperand(Element));
1537 }
1538 }
Tom Stellard365366f2013-01-23 02:09:06 +00001539 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001540
1541 case ISD::SELECT_CC: {
1542 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1543 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001544 //
1545 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1546 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001547 SDValue LHS = N->getOperand(0);
1548 if (LHS.getOpcode() != ISD::SELECT_CC) {
1549 return SDValue();
1550 }
1551
1552 SDValue RHS = N->getOperand(1);
1553 SDValue True = N->getOperand(2);
1554 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001555 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001556
1557 if (LHS.getOperand(2).getNode() != True.getNode() ||
1558 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001559 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001560 return SDValue();
1561 }
1562
Tom Stellard5e524892013-03-08 15:37:11 +00001563 switch (NCC) {
1564 default: return SDValue();
1565 case ISD::SETNE: return LHS;
1566 case ISD::SETEQ: {
1567 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1568 LHSCC = ISD::getSetCCInverse(LHSCC,
1569 LHS.getOperand(0).getValueType().isInteger());
Andrew Trickef9de2a2013-05-25 02:42:55 +00001570 return DAG.getSelectCC(SDLoc(N),
Tom Stellard5e524892013-03-08 15:37:11 +00001571 LHS.getOperand(0),
1572 LHS.getOperand(1),
1573 LHS.getOperand(2),
1574 LHS.getOperand(3),
1575 LHSCC);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001576 }
Tom Stellard5e524892013-03-08 15:37:11 +00001577 }
1578 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001579
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001580 case AMDGPUISD::EXPORT: {
1581 SDValue Arg = N->getOperand(1);
1582 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1583 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001584
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001585 SDValue NewArgs[8] = {
1586 N->getOperand(0), // Chain
1587 SDValue(),
1588 N->getOperand(2), // ArrayBase
1589 N->getOperand(3), // Type
1590 N->getOperand(4), // SWZ_X
1591 N->getOperand(5), // SWZ_Y
1592 N->getOperand(6), // SWZ_Z
1593 N->getOperand(7) // SWZ_W
1594 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001595 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001596 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001597 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001598 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001599 case AMDGPUISD::TEXTURE_FETCH: {
1600 SDValue Arg = N->getOperand(1);
1601 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1602 break;
1603
1604 SDValue NewArgs[19] = {
1605 N->getOperand(0),
1606 N->getOperand(1),
1607 N->getOperand(2),
1608 N->getOperand(3),
1609 N->getOperand(4),
1610 N->getOperand(5),
1611 N->getOperand(6),
1612 N->getOperand(7),
1613 N->getOperand(8),
1614 N->getOperand(9),
1615 N->getOperand(10),
1616 N->getOperand(11),
1617 N->getOperand(12),
1618 N->getOperand(13),
1619 N->getOperand(14),
1620 N->getOperand(15),
1621 N->getOperand(16),
1622 N->getOperand(17),
1623 N->getOperand(18),
1624 };
1625 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1626 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1627 NewArgs, 19);
1628 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001629 }
1630 return SDValue();
1631}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001632
1633static bool
1634FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001635 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001636 const R600InstrInfo *TII =
1637 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1638 if (!Src.isMachineOpcode())
1639 return false;
1640 switch (Src.getMachineOpcode()) {
1641 case AMDGPU::FNEG_R600:
1642 if (!Neg.getNode())
1643 return false;
1644 Src = Src.getOperand(0);
1645 Neg = DAG.getTargetConstant(1, MVT::i32);
1646 return true;
1647 case AMDGPU::FABS_R600:
1648 if (!Abs.getNode())
1649 return false;
1650 Src = Src.getOperand(0);
1651 Abs = DAG.getTargetConstant(1, MVT::i32);
1652 return true;
1653 case AMDGPU::CONST_COPY: {
1654 unsigned Opcode = ParentNode->getMachineOpcode();
1655 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1656
1657 if (!Sel.getNode())
1658 return false;
1659
1660 SDValue CstOffset = Src.getOperand(0);
1661 if (ParentNode->getValueType(0).isVector())
1662 return false;
1663
1664 // Gather constants values
1665 int SrcIndices[] = {
1666 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1667 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1668 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1669 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1670 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1671 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1672 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1673 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1674 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1675 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1676 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1677 };
1678 std::vector<unsigned> Consts;
1679 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1680 int OtherSrcIdx = SrcIndices[i];
1681 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1682 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1683 continue;
1684 if (HasDst) {
1685 OtherSrcIdx--;
1686 OtherSelIdx--;
1687 }
1688 if (RegisterSDNode *Reg =
1689 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1690 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1691 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1692 ParentNode->getOperand(OtherSelIdx));
1693 Consts.push_back(Cst->getZExtValue());
1694 }
1695 }
1696 }
1697
1698 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1699 Consts.push_back(Cst->getZExtValue());
1700 if (!TII->fitsConstReadLimitations(Consts)) {
1701 return false;
1702 }
1703
1704 Sel = CstOffset;
1705 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1706 return true;
1707 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001708 case AMDGPU::MOV_IMM_I32:
1709 case AMDGPU::MOV_IMM_F32: {
1710 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1711 uint64_t ImmValue = 0;
1712
1713
1714 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1715 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1716 float FloatValue = FPC->getValueAPF().convertToFloat();
1717 if (FloatValue == 0.0) {
1718 ImmReg = AMDGPU::ZERO;
1719 } else if (FloatValue == 0.5) {
1720 ImmReg = AMDGPU::HALF;
1721 } else if (FloatValue == 1.0) {
1722 ImmReg = AMDGPU::ONE;
1723 } else {
1724 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1725 }
1726 } else {
1727 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1728 uint64_t Value = C->getZExtValue();
1729 if (Value == 0) {
1730 ImmReg = AMDGPU::ZERO;
1731 } else if (Value == 1) {
1732 ImmReg = AMDGPU::ONE_INT;
1733 } else {
1734 ImmValue = Value;
1735 }
1736 }
1737
1738 // Check that we aren't already using an immediate.
1739 // XXX: It's possible for an instruction to have more than one
1740 // immediate operand, but this is not supported yet.
1741 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1742 if (!Imm.getNode())
1743 return false;
1744 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1745 assert(C);
1746 if (C->getZExtValue())
1747 return false;
1748 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1749 }
1750 Src = DAG.getRegister(ImmReg, MVT::i32);
1751 return true;
1752 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001753 default:
1754 return false;
1755 }
1756}
1757
1758
1759/// \brief Fold the instructions after selecting them
1760SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1761 SelectionDAG &DAG) const {
1762 const R600InstrInfo *TII =
1763 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1764 if (!Node->isMachineOpcode())
1765 return Node;
1766 unsigned Opcode = Node->getMachineOpcode();
1767 SDValue FakeOp;
1768
1769 std::vector<SDValue> Ops;
1770 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1771 I != E; ++I)
1772 Ops.push_back(*I);
1773
1774 if (Opcode == AMDGPU::DOT_4) {
1775 int OperandIdx[] = {
1776 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1777 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1778 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1779 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1780 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1781 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1782 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1783 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1784 };
1785 int NegIdx[] = {
1786 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1787 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1788 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1789 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1790 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1791 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1792 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1793 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1794 };
1795 int AbsIdx[] = {
1796 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1797 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1798 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1799 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1800 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1801 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1802 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1803 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1804 };
1805 for (unsigned i = 0; i < 8; i++) {
1806 if (OperandIdx[i] < 0)
1807 return Node;
1808 SDValue &Src = Ops[OperandIdx[i] - 1];
1809 SDValue &Neg = Ops[NegIdx[i] - 1];
1810 SDValue &Abs = Ops[AbsIdx[i] - 1];
1811 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1812 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1813 if (HasDst)
1814 SelIdx--;
1815 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001816 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1817 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1818 }
1819 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1820 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1821 SDValue &Src = Ops[i];
1822 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001823 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1824 }
1825 } else {
1826 if (!TII->hasInstrModifiers(Opcode))
1827 return Node;
1828 int OperandIdx[] = {
1829 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1830 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1831 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1832 };
1833 int NegIdx[] = {
1834 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1837 };
1838 int AbsIdx[] = {
1839 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1840 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1841 -1
1842 };
1843 for (unsigned i = 0; i < 3; i++) {
1844 if (OperandIdx[i] < 0)
1845 return Node;
1846 SDValue &Src = Ops[OperandIdx[i] - 1];
1847 SDValue &Neg = Ops[NegIdx[i] - 1];
1848 SDValue FakeAbs;
1849 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1850 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1851 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001852 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1853 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001854 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001855 ImmIdx--;
1856 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001857 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001858 SDValue &Imm = Ops[ImmIdx];
1859 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001860 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1861 }
1862 }
1863
1864 return Node;
1865}