blob: 4d15321fd02b4288ef5f9a5d02c8def6b1adff63 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000210 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000460 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000462 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000473 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
765 default: return;
766 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000767 return;
768 case ISD::LOAD: {
769 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
770 Results.push_back(SDValue(Node, 0));
771 Results.push_back(SDValue(Node, 1));
772 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
773 // function
774 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
775 return;
776 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000777 case ISD::STORE:
778 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
779 Results.push_back(SDValue(Node, 0));
780 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 }
782}
783
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000784SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
785 // On hw >= R700, COS/SIN input must be between -1. and 1.
786 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
787 EVT VT = Op.getValueType();
788 SDValue Arg = Op.getOperand(0);
789 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
790 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
791 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
792 DAG.getConstantFP(0.15915494309, MVT::f32)),
793 DAG.getConstantFP(0.5, MVT::f32)));
794 unsigned TrigNode;
795 switch (Op.getOpcode()) {
796 case ISD::FCOS:
797 TrigNode = AMDGPUISD::COS_HW;
798 break;
799 case ISD::FSIN:
800 TrigNode = AMDGPUISD::SIN_HW;
801 break;
802 default:
803 llvm_unreachable("Wrong trig opcode");
804 }
805 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
806 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
807 DAG.getConstantFP(-0.5, MVT::f32)));
808 if (Gen >= AMDGPUSubtarget::R700)
809 return TrigVal;
810 // On R600 hw, COS/SIN input must be between -Pi and Pi.
811 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
812 DAG.getConstantFP(3.14159265359, MVT::f32));
813}
814
Tom Stellard75aadc22012-12-11 21:25:42 +0000815SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
816 return DAG.getNode(
817 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000818 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 MVT::i1,
820 Op, DAG.getConstantFP(0.0f, MVT::f32),
821 DAG.getCondCode(ISD::SETNE)
822 );
823}
824
Tom Stellard75aadc22012-12-11 21:25:42 +0000825SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000826 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 unsigned DwordOffset) const {
828 unsigned ByteOffset = DwordOffset * 4;
829 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000830 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000831
832 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
833 assert(isInt<16>(ByteOffset));
834
835 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
836 DAG.getConstant(ByteOffset, MVT::i32), // PTR
837 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
838 false, false, false, 0);
839}
840
Tom Stellard75aadc22012-12-11 21:25:42 +0000841bool R600TargetLowering::isZero(SDValue Op) const {
842 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
843 return Cst->isNullValue();
844 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
845 return CstFP->isZero();
846 } else {
847 return false;
848 }
849}
850
851SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000852 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000853 EVT VT = Op.getValueType();
854
855 SDValue LHS = Op.getOperand(0);
856 SDValue RHS = Op.getOperand(1);
857 SDValue True = Op.getOperand(2);
858 SDValue False = Op.getOperand(3);
859 SDValue CC = Op.getOperand(4);
860 SDValue Temp;
861
862 // LHS and RHS are guaranteed to be the same value type
863 EVT CompareVT = LHS.getValueType();
864
865 // Check if we can lower this to a native operation.
866
Tom Stellard2add82d2013-03-08 15:37:09 +0000867 // Try to lower to a SET* instruction:
868 //
869 // SET* can match the following patterns:
870 //
Tom Stellardcd428182013-09-28 02:50:38 +0000871 // select_cc f32, f32, -1, 0, cc_supported
872 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
873 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000874 //
875
876 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000877 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
878 ISD::CondCode InverseCC =
879 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000880 if (isHWTrueValue(False) && isHWFalseValue(True)) {
881 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
882 std::swap(False, True);
883 CC = DAG.getCondCode(InverseCC);
884 } else {
885 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
886 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
887 std::swap(False, True);
888 std::swap(LHS, RHS);
889 CC = DAG.getCondCode(SwapInvCC);
890 }
891 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000892 }
893
894 if (isHWTrueValue(True) && isHWFalseValue(False) &&
895 (CompareVT == VT || VT == MVT::i32)) {
896 // This can be matched by a SET* instruction.
897 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
898 }
899
Tom Stellard75aadc22012-12-11 21:25:42 +0000900 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000901 //
902 // CND* can match the following patterns:
903 //
Tom Stellardcd428182013-09-28 02:50:38 +0000904 // select_cc f32, 0.0, f32, f32, cc_supported
905 // select_cc f32, 0.0, i32, i32, cc_supported
906 // select_cc i32, 0, f32, f32, cc_supported
907 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000908 //
Tom Stellardcd428182013-09-28 02:50:38 +0000909
910 // Try to move the zero value to the RHS
911 if (isZero(LHS)) {
912 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
913 // Try swapping the operands
914 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
915 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(CCSwapped);
918 } else {
919 // Try inverting the conditon and then swapping the operands
920 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
921 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
922 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
923 std::swap(True, False);
924 std::swap(LHS, RHS);
925 CC = DAG.getCondCode(CCSwapped);
926 }
927 }
928 }
929 if (isZero(RHS)) {
930 SDValue Cond = LHS;
931 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000932 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
933 if (CompareVT != VT) {
934 // Bitcast True / False to the correct types. This will end up being
935 // a nop, but it allows us to define only a single pattern in the
936 // .TD files for each CND* instruction rather than having to have
937 // one pattern for integer True/False and one for fp True/False
938 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
939 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
940 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000941
942 switch (CCOpcode) {
943 case ISD::SETONE:
944 case ISD::SETUNE:
945 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000946 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
947 Temp = True;
948 True = False;
949 False = Temp;
950 break;
951 default:
952 break;
953 }
954 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
955 Cond, Zero,
956 True, False,
957 DAG.getCondCode(CCOpcode));
958 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
959 }
960
Tom Stellard75aadc22012-12-11 21:25:42 +0000961
962 // Possible Min/Max pattern
963 SDValue MinMax = LowerMinMax(Op, DAG);
964 if (MinMax.getNode()) {
965 return MinMax;
966 }
967
968 // If we make it this for it means we have no native instructions to handle
969 // this SELECT_CC, so we must lower it.
970 SDValue HWTrue, HWFalse;
971
972 if (CompareVT == MVT::f32) {
973 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
974 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
975 } else if (CompareVT == MVT::i32) {
976 HWTrue = DAG.getConstant(-1, CompareVT);
977 HWFalse = DAG.getConstant(0, CompareVT);
978 }
979 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000980 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +0000981 }
982
983 // Lower this unsupported SELECT_CC into a combination of two supported
984 // SELECT_CC operations.
985 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
986
987 return DAG.getNode(ISD::SELECT_CC, DL, VT,
988 Cond, HWFalse,
989 True, False,
990 DAG.getCondCode(ISD::SETNE));
991}
992
Alp Tokercb402912014-01-24 17:20:08 +0000993/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000994/// convert these pointers to a register index. Each register holds
995/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
996/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
997/// for indirect addressing.
998SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
999 unsigned StackWidth,
1000 SelectionDAG &DAG) const {
1001 unsigned SRLPad;
1002 switch(StackWidth) {
1003 case 1:
1004 SRLPad = 2;
1005 break;
1006 case 2:
1007 SRLPad = 3;
1008 break;
1009 case 4:
1010 SRLPad = 4;
1011 break;
1012 default: llvm_unreachable("Invalid stack width");
1013 }
1014
Andrew Trickef9de2a2013-05-25 02:42:55 +00001015 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001016 DAG.getConstant(SRLPad, MVT::i32));
1017}
1018
1019void R600TargetLowering::getStackAddress(unsigned StackWidth,
1020 unsigned ElemIdx,
1021 unsigned &Channel,
1022 unsigned &PtrIncr) const {
1023 switch (StackWidth) {
1024 default:
1025 case 1:
1026 Channel = 0;
1027 if (ElemIdx > 0) {
1028 PtrIncr = 1;
1029 } else {
1030 PtrIncr = 0;
1031 }
1032 break;
1033 case 2:
1034 Channel = ElemIdx % 2;
1035 if (ElemIdx == 2) {
1036 PtrIncr = 1;
1037 } else {
1038 PtrIncr = 0;
1039 }
1040 break;
1041 case 4:
1042 Channel = ElemIdx;
1043 PtrIncr = 0;
1044 break;
1045 }
1046}
1047
Tom Stellard75aadc22012-12-11 21:25:42 +00001048SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001049 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001050 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1051 SDValue Chain = Op.getOperand(0);
1052 SDValue Value = Op.getOperand(1);
1053 SDValue Ptr = Op.getOperand(2);
1054
Tom Stellard2ffc3302013-08-26 15:05:44 +00001055 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001056 if (Result.getNode()) {
1057 return Result;
1058 }
1059
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1061 if (StoreNode->isTruncatingStore()) {
1062 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001063 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001064 EVT MemVT = StoreNode->getMemoryVT();
1065 SDValue MaskConstant;
1066 if (MemVT == MVT::i8) {
1067 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1068 } else {
1069 assert(MemVT == MVT::i16);
1070 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1071 }
1072 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1073 DAG.getConstant(2, MVT::i32));
1074 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1075 DAG.getConstant(0x00000003, VT));
1076 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1077 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1078 DAG.getConstant(3, VT));
1079 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1080 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1081 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1082 // vector instead.
1083 SDValue Src[4] = {
1084 ShiftedValue,
1085 DAG.getConstant(0, MVT::i32),
1086 DAG.getConstant(0, MVT::i32),
1087 Mask
1088 };
1089 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1090 SDValue Args[3] = { Chain, Input, DWordAddr };
1091 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1092 Op->getVTList(), Args, 3, MemVT,
1093 StoreNode->getMemOperand());
1094 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1095 Value.getValueType().bitsGE(MVT::i32)) {
1096 // Convert pointer from byte address to dword address.
1097 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1098 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1099 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001100
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001101 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001102 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001103 } else {
1104 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1105 }
1106 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001107 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001108 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001109
1110 EVT ValueVT = Value.getValueType();
1111
1112 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1113 return SDValue();
1114 }
1115
Tom Stellarde9373602014-01-22 19:24:14 +00001116 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1117 if (Ret.getNode()) {
1118 return Ret;
1119 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001120 // Lowering for indirect addressing
1121
1122 const MachineFunction &MF = DAG.getMachineFunction();
1123 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1124 getTargetMachine().getFrameLowering());
1125 unsigned StackWidth = TFL->getStackWidth(MF);
1126
1127 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1128
1129 if (ValueVT.isVector()) {
1130 unsigned NumElemVT = ValueVT.getVectorNumElements();
1131 EVT ElemVT = ValueVT.getVectorElementType();
1132 SDValue Stores[4];
1133
1134 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1135 "vector width in load");
1136
1137 for (unsigned i = 0; i < NumElemVT; ++i) {
1138 unsigned Channel, PtrIncr;
1139 getStackAddress(StackWidth, i, Channel, PtrIncr);
1140 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1141 DAG.getConstant(PtrIncr, MVT::i32));
1142 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1143 Value, DAG.getConstant(i, MVT::i32));
1144
1145 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1146 Chain, Elem, Ptr,
1147 DAG.getTargetConstant(Channel, MVT::i32));
1148 }
1149 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1150 } else {
1151 if (ValueVT == MVT::i8) {
1152 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1153 }
1154 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001155 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001156 }
1157
1158 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001159}
1160
Tom Stellard365366f2013-01-23 02:09:06 +00001161// return (512 + (kc_bank << 12)
1162static int
1163ConstantAddressBlock(unsigned AddressSpace) {
1164 switch (AddressSpace) {
1165 case AMDGPUAS::CONSTANT_BUFFER_0:
1166 return 512;
1167 case AMDGPUAS::CONSTANT_BUFFER_1:
1168 return 512 + 4096;
1169 case AMDGPUAS::CONSTANT_BUFFER_2:
1170 return 512 + 4096 * 2;
1171 case AMDGPUAS::CONSTANT_BUFFER_3:
1172 return 512 + 4096 * 3;
1173 case AMDGPUAS::CONSTANT_BUFFER_4:
1174 return 512 + 4096 * 4;
1175 case AMDGPUAS::CONSTANT_BUFFER_5:
1176 return 512 + 4096 * 5;
1177 case AMDGPUAS::CONSTANT_BUFFER_6:
1178 return 512 + 4096 * 6;
1179 case AMDGPUAS::CONSTANT_BUFFER_7:
1180 return 512 + 4096 * 7;
1181 case AMDGPUAS::CONSTANT_BUFFER_8:
1182 return 512 + 4096 * 8;
1183 case AMDGPUAS::CONSTANT_BUFFER_9:
1184 return 512 + 4096 * 9;
1185 case AMDGPUAS::CONSTANT_BUFFER_10:
1186 return 512 + 4096 * 10;
1187 case AMDGPUAS::CONSTANT_BUFFER_11:
1188 return 512 + 4096 * 11;
1189 case AMDGPUAS::CONSTANT_BUFFER_12:
1190 return 512 + 4096 * 12;
1191 case AMDGPUAS::CONSTANT_BUFFER_13:
1192 return 512 + 4096 * 13;
1193 case AMDGPUAS::CONSTANT_BUFFER_14:
1194 return 512 + 4096 * 14;
1195 case AMDGPUAS::CONSTANT_BUFFER_15:
1196 return 512 + 4096 * 15;
1197 default:
1198 return -1;
1199 }
1200}
1201
1202SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1203{
1204 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001205 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001206 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1207 SDValue Chain = Op.getOperand(0);
1208 SDValue Ptr = Op.getOperand(1);
1209 SDValue LoweredLoad;
1210
Tom Stellarde9373602014-01-22 19:24:14 +00001211 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1212 if (Ret.getNode()) {
1213 SDValue Ops[2];
1214 Ops[0] = Ret;
1215 Ops[1] = Chain;
1216 return DAG.getMergeValues(Ops, 2, DL);
1217 }
1218
1219
Tom Stellard35bb18c2013-08-26 15:06:04 +00001220 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1221 SDValue MergedValues[2] = {
1222 SplitVectorLoad(Op, DAG),
1223 Chain
1224 };
1225 return DAG.getMergeValues(MergedValues, 2, DL);
1226 }
1227
Tom Stellard365366f2013-01-23 02:09:06 +00001228 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001229 if (ConstantBlock > -1 &&
1230 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1231 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001232 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001233 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1234 isa<Constant>(LoadNode->getSrcValue()) ||
1235 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001236 SDValue Slots[4];
1237 for (unsigned i = 0; i < 4; i++) {
1238 // We want Const position encoded with the following formula :
1239 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1240 // const_index is Ptr computed by llvm using an alignment of 16.
1241 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1242 // then div by 4 at the ISel step
1243 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1244 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1245 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1246 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001247 EVT NewVT = MVT::v4i32;
1248 unsigned NumElements = 4;
1249 if (VT.isVector()) {
1250 NewVT = VT;
1251 NumElements = VT.getVectorNumElements();
1252 }
1253 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001254 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001255 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001256 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001257 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001258 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001259 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001260 );
1261 }
1262
1263 if (!VT.isVector()) {
1264 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1265 DAG.getConstant(0, MVT::i32));
1266 }
1267
1268 SDValue MergedValues[2] = {
1269 Result,
1270 Chain
1271 };
1272 return DAG.getMergeValues(MergedValues, 2, DL);
1273 }
1274
Matt Arsenault909d0c02013-10-30 23:43:29 +00001275 // For most operations returning SDValue() will result in the node being
1276 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1277 // need to manually expand loads that may be legal in some address spaces and
1278 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1279 // compute shaders, since the data is sign extended when it is uploaded to the
1280 // buffer. However SEXT loads from other address spaces are not supported, so
1281 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001282 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1283 EVT MemVT = LoadNode->getMemoryVT();
1284 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1285 SDValue ShiftAmount =
1286 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1287 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1288 LoadNode->getPointerInfo(), MemVT,
1289 LoadNode->isVolatile(),
1290 LoadNode->isNonTemporal(),
1291 LoadNode->getAlignment());
1292 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1293 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1294
1295 SDValue MergedValues[2] = { Sra, Chain };
1296 return DAG.getMergeValues(MergedValues, 2, DL);
1297 }
1298
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001299 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1300 return SDValue();
1301 }
1302
1303 // Lowering for indirect addressing
1304 const MachineFunction &MF = DAG.getMachineFunction();
1305 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1306 getTargetMachine().getFrameLowering());
1307 unsigned StackWidth = TFL->getStackWidth(MF);
1308
1309 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1310
1311 if (VT.isVector()) {
1312 unsigned NumElemVT = VT.getVectorNumElements();
1313 EVT ElemVT = VT.getVectorElementType();
1314 SDValue Loads[4];
1315
1316 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1317 "vector width in load");
1318
1319 for (unsigned i = 0; i < NumElemVT; ++i) {
1320 unsigned Channel, PtrIncr;
1321 getStackAddress(StackWidth, i, Channel, PtrIncr);
1322 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1323 DAG.getConstant(PtrIncr, MVT::i32));
1324 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1325 Chain, Ptr,
1326 DAG.getTargetConstant(Channel, MVT::i32),
1327 Op.getOperand(2));
1328 }
1329 for (unsigned i = NumElemVT; i < 4; ++i) {
1330 Loads[i] = DAG.getUNDEF(ElemVT);
1331 }
1332 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1333 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1334 } else {
1335 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1336 Chain, Ptr,
1337 DAG.getTargetConstant(0, MVT::i32), // Channel
1338 Op.getOperand(2));
1339 }
1340
1341 SDValue Ops[2];
1342 Ops[0] = LoweredLoad;
1343 Ops[1] = Chain;
1344
1345 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001346}
Tom Stellard75aadc22012-12-11 21:25:42 +00001347
Tom Stellard75aadc22012-12-11 21:25:42 +00001348/// XXX Only kernel functions are supported, so we can assume for now that
1349/// every function is a kernel function, but in the future we should use
1350/// separate calling conventions for kernel and non-kernel functions.
1351SDValue R600TargetLowering::LowerFormalArguments(
1352 SDValue Chain,
1353 CallingConv::ID CallConv,
1354 bool isVarArg,
1355 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001356 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001357 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001358 SmallVector<CCValAssign, 16> ArgLocs;
1359 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1360 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001361 MachineFunction &MF = DAG.getMachineFunction();
1362 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001363
Tom Stellardaf775432013-10-23 00:44:32 +00001364 SmallVector<ISD::InputArg, 8> LocalIns;
1365
1366 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1367 LocalIns);
1368
1369 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001370
Tom Stellard1e803092013-07-23 01:48:18 +00001371 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001372 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001373 EVT VT = Ins[i].VT;
1374 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001375
Vincent Lejeunef143af32013-11-11 22:10:24 +00001376 if (ShaderType != ShaderType::COMPUTE) {
1377 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1378 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1379 InVals.push_back(Register);
1380 continue;
1381 }
1382
Tom Stellard75aadc22012-12-11 21:25:42 +00001383 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001384 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001385
Matt Arsenaultfae02982014-03-17 18:58:11 +00001386 // i64 isn't a legal type, so the register type used ends up as i32, which
1387 // isn't expected here. It attempts to create this sextload, but it ends up
1388 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1389 // for <1 x i64>.
1390
Tom Stellardacfeebf2013-07-23 01:48:05 +00001391 // The first 36 bytes of the input buffer contains information about
1392 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001393 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1394 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1395 MachinePointerInfo(UndefValue::get(PtrTy)),
1396 MemVT, false, false, 4);
Alp Tokercb402912014-01-24 17:20:08 +00001397 // 4 is the preferred alignment for
1398 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001399 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001400 }
1401 return Chain;
1402}
1403
Matt Arsenault758659232013-05-18 00:21:46 +00001404EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001405 if (!VT.isVector()) return MVT::i32;
1406 return VT.changeVectorElementTypeToInteger();
1407}
1408
Benjamin Kramer193960c2013-06-11 13:32:25 +00001409static SDValue
1410CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1411 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001412 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1413 assert(RemapSwizzle.empty());
1414 SDValue NewBldVec[4] = {
1415 VectorEntry.getOperand(0),
1416 VectorEntry.getOperand(1),
1417 VectorEntry.getOperand(2),
1418 VectorEntry.getOperand(3)
1419 };
1420
1421 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001422 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1423 // We mask write here to teach later passes that the ith element of this
1424 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1425 // break false dependencies and additionnaly make assembly easier to read.
1426 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001427 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1428 if (C->isZero()) {
1429 RemapSwizzle[i] = 4; // SEL_0
1430 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1431 } else if (C->isExactlyValue(1.0)) {
1432 RemapSwizzle[i] = 5; // SEL_1
1433 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1434 }
1435 }
1436
1437 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1438 continue;
1439 for (unsigned j = 0; j < i; j++) {
1440 if (NewBldVec[i] == NewBldVec[j]) {
1441 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1442 RemapSwizzle[i] = j;
1443 break;
1444 }
1445 }
1446 }
1447
1448 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1449 VectorEntry.getValueType(), NewBldVec, 4);
1450}
1451
Benjamin Kramer193960c2013-06-11 13:32:25 +00001452static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1453 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001454 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1455 assert(RemapSwizzle.empty());
1456 SDValue NewBldVec[4] = {
1457 VectorEntry.getOperand(0),
1458 VectorEntry.getOperand(1),
1459 VectorEntry.getOperand(2),
1460 VectorEntry.getOperand(3)
1461 };
1462 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001463 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001464 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001465 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1466 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1467 ->getZExtValue();
1468 if (i == Idx)
1469 isUnmovable[Idx] = true;
1470 }
1471 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001472
1473 for (unsigned i = 0; i < 4; i++) {
1474 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1475 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1476 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001477 if (isUnmovable[Idx])
1478 continue;
1479 // Swap i and Idx
1480 std::swap(NewBldVec[Idx], NewBldVec[i]);
1481 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1482 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001483 }
1484 }
1485
1486 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1487 VectorEntry.getValueType(), NewBldVec, 4);
1488}
1489
1490
1491SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1492SDValue Swz[4], SelectionDAG &DAG) const {
1493 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1494 // Old -> New swizzle values
1495 DenseMap<unsigned, unsigned> SwizzleRemap;
1496
1497 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1498 for (unsigned i = 0; i < 4; i++) {
1499 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1500 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1501 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1502 }
1503
1504 SwizzleRemap.clear();
1505 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1506 for (unsigned i = 0; i < 4; i++) {
1507 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1508 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1509 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1510 }
1511
1512 return BuildVector;
1513}
1514
1515
Tom Stellard75aadc22012-12-11 21:25:42 +00001516//===----------------------------------------------------------------------===//
1517// Custom DAG Optimizations
1518//===----------------------------------------------------------------------===//
1519
1520SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1521 DAGCombinerInfo &DCI) const {
1522 SelectionDAG &DAG = DCI.DAG;
1523
1524 switch (N->getOpcode()) {
1525 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1526 case ISD::FP_ROUND: {
1527 SDValue Arg = N->getOperand(0);
1528 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001529 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001530 Arg.getOperand(0));
1531 }
1532 break;
1533 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001534
1535 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1536 // (i32 select_cc f32, f32, -1, 0 cc)
1537 //
1538 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1539 // this to one of the SET*_DX10 instructions.
1540 case ISD::FP_TO_SINT: {
1541 SDValue FNeg = N->getOperand(0);
1542 if (FNeg.getOpcode() != ISD::FNEG) {
1543 return SDValue();
1544 }
1545 SDValue SelectCC = FNeg.getOperand(0);
1546 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1547 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1548 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1549 !isHWTrueValue(SelectCC.getOperand(2)) ||
1550 !isHWFalseValue(SelectCC.getOperand(3))) {
1551 return SDValue();
1552 }
1553
Andrew Trickef9de2a2013-05-25 02:42:55 +00001554 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001555 SelectCC.getOperand(0), // LHS
1556 SelectCC.getOperand(1), // RHS
1557 DAG.getConstant(-1, MVT::i32), // True
1558 DAG.getConstant(0, MVT::i32), // Flase
1559 SelectCC.getOperand(4)); // CC
1560
1561 break;
1562 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001563
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001564 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1565 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001566 case ISD::INSERT_VECTOR_ELT: {
1567 SDValue InVec = N->getOperand(0);
1568 SDValue InVal = N->getOperand(1);
1569 SDValue EltNo = N->getOperand(2);
1570 SDLoc dl(N);
1571
1572 // If the inserted element is an UNDEF, just use the input vector.
1573 if (InVal.getOpcode() == ISD::UNDEF)
1574 return InVec;
1575
1576 EVT VT = InVec.getValueType();
1577
1578 // If we can't generate a legal BUILD_VECTOR, exit
1579 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1580 return SDValue();
1581
1582 // Check that we know which element is being inserted
1583 if (!isa<ConstantSDNode>(EltNo))
1584 return SDValue();
1585 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1586
1587 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1588 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1589 // vector elements.
1590 SmallVector<SDValue, 8> Ops;
1591 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1592 Ops.append(InVec.getNode()->op_begin(),
1593 InVec.getNode()->op_end());
1594 } else if (InVec.getOpcode() == ISD::UNDEF) {
1595 unsigned NElts = VT.getVectorNumElements();
1596 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1597 } else {
1598 return SDValue();
1599 }
1600
1601 // Insert the element
1602 if (Elt < Ops.size()) {
1603 // All the operands of BUILD_VECTOR must have the same type;
1604 // we enforce that here.
1605 EVT OpVT = Ops[0].getValueType();
1606 if (InVal.getValueType() != OpVT)
1607 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1608 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1609 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1610 Ops[Elt] = InVal;
1611 }
1612
1613 // Return the new vector
1614 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1615 VT, &Ops[0], Ops.size());
1616 }
1617
Tom Stellard365366f2013-01-23 02:09:06 +00001618 // Extract_vec (Build_vector) generated by custom lowering
1619 // also needs to be customly combined
1620 case ISD::EXTRACT_VECTOR_ELT: {
1621 SDValue Arg = N->getOperand(0);
1622 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1623 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1624 unsigned Element = Const->getZExtValue();
1625 return Arg->getOperand(Element);
1626 }
1627 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001628 if (Arg.getOpcode() == ISD::BITCAST &&
1629 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1630 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1631 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001632 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001633 Arg->getOperand(0).getOperand(Element));
1634 }
1635 }
Tom Stellard365366f2013-01-23 02:09:06 +00001636 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001637
1638 case ISD::SELECT_CC: {
1639 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1640 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001641 //
1642 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1643 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001644 SDValue LHS = N->getOperand(0);
1645 if (LHS.getOpcode() != ISD::SELECT_CC) {
1646 return SDValue();
1647 }
1648
1649 SDValue RHS = N->getOperand(1);
1650 SDValue True = N->getOperand(2);
1651 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001652 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001653
1654 if (LHS.getOperand(2).getNode() != True.getNode() ||
1655 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001656 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001657 return SDValue();
1658 }
1659
Tom Stellard5e524892013-03-08 15:37:11 +00001660 switch (NCC) {
1661 default: return SDValue();
1662 case ISD::SETNE: return LHS;
1663 case ISD::SETEQ: {
1664 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1665 LHSCC = ISD::getSetCCInverse(LHSCC,
1666 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001667 if (DCI.isBeforeLegalizeOps() ||
1668 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1669 return DAG.getSelectCC(SDLoc(N),
1670 LHS.getOperand(0),
1671 LHS.getOperand(1),
1672 LHS.getOperand(2),
1673 LHS.getOperand(3),
1674 LHSCC);
1675 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001676 }
Tom Stellard5e524892013-03-08 15:37:11 +00001677 }
Tom Stellardcd428182013-09-28 02:50:38 +00001678 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001679 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001680
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001681 case AMDGPUISD::EXPORT: {
1682 SDValue Arg = N->getOperand(1);
1683 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1684 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001685
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001686 SDValue NewArgs[8] = {
1687 N->getOperand(0), // Chain
1688 SDValue(),
1689 N->getOperand(2), // ArrayBase
1690 N->getOperand(3), // Type
1691 N->getOperand(4), // SWZ_X
1692 N->getOperand(5), // SWZ_Y
1693 N->getOperand(6), // SWZ_Z
1694 N->getOperand(7) // SWZ_W
1695 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001696 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001697 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001698 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001699 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001700 case AMDGPUISD::TEXTURE_FETCH: {
1701 SDValue Arg = N->getOperand(1);
1702 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1703 break;
1704
1705 SDValue NewArgs[19] = {
1706 N->getOperand(0),
1707 N->getOperand(1),
1708 N->getOperand(2),
1709 N->getOperand(3),
1710 N->getOperand(4),
1711 N->getOperand(5),
1712 N->getOperand(6),
1713 N->getOperand(7),
1714 N->getOperand(8),
1715 N->getOperand(9),
1716 N->getOperand(10),
1717 N->getOperand(11),
1718 N->getOperand(12),
1719 N->getOperand(13),
1720 N->getOperand(14),
1721 N->getOperand(15),
1722 N->getOperand(16),
1723 N->getOperand(17),
1724 N->getOperand(18),
1725 };
1726 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1727 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1728 NewArgs, 19);
1729 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001730 }
1731 return SDValue();
1732}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001733
1734static bool
1735FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001736 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001737 const R600InstrInfo *TII =
1738 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1739 if (!Src.isMachineOpcode())
1740 return false;
1741 switch (Src.getMachineOpcode()) {
1742 case AMDGPU::FNEG_R600:
1743 if (!Neg.getNode())
1744 return false;
1745 Src = Src.getOperand(0);
1746 Neg = DAG.getTargetConstant(1, MVT::i32);
1747 return true;
1748 case AMDGPU::FABS_R600:
1749 if (!Abs.getNode())
1750 return false;
1751 Src = Src.getOperand(0);
1752 Abs = DAG.getTargetConstant(1, MVT::i32);
1753 return true;
1754 case AMDGPU::CONST_COPY: {
1755 unsigned Opcode = ParentNode->getMachineOpcode();
1756 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1757
1758 if (!Sel.getNode())
1759 return false;
1760
1761 SDValue CstOffset = Src.getOperand(0);
1762 if (ParentNode->getValueType(0).isVector())
1763 return false;
1764
1765 // Gather constants values
1766 int SrcIndices[] = {
1767 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1768 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1769 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1770 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1771 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1772 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1773 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1774 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1775 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1776 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1777 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1778 };
1779 std::vector<unsigned> Consts;
1780 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1781 int OtherSrcIdx = SrcIndices[i];
1782 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1783 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1784 continue;
1785 if (HasDst) {
1786 OtherSrcIdx--;
1787 OtherSelIdx--;
1788 }
1789 if (RegisterSDNode *Reg =
1790 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1791 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1792 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1793 ParentNode->getOperand(OtherSelIdx));
1794 Consts.push_back(Cst->getZExtValue());
1795 }
1796 }
1797 }
1798
1799 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1800 Consts.push_back(Cst->getZExtValue());
1801 if (!TII->fitsConstReadLimitations(Consts)) {
1802 return false;
1803 }
1804
1805 Sel = CstOffset;
1806 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1807 return true;
1808 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001809 case AMDGPU::MOV_IMM_I32:
1810 case AMDGPU::MOV_IMM_F32: {
1811 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1812 uint64_t ImmValue = 0;
1813
1814
1815 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1816 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1817 float FloatValue = FPC->getValueAPF().convertToFloat();
1818 if (FloatValue == 0.0) {
1819 ImmReg = AMDGPU::ZERO;
1820 } else if (FloatValue == 0.5) {
1821 ImmReg = AMDGPU::HALF;
1822 } else if (FloatValue == 1.0) {
1823 ImmReg = AMDGPU::ONE;
1824 } else {
1825 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1826 }
1827 } else {
1828 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1829 uint64_t Value = C->getZExtValue();
1830 if (Value == 0) {
1831 ImmReg = AMDGPU::ZERO;
1832 } else if (Value == 1) {
1833 ImmReg = AMDGPU::ONE_INT;
1834 } else {
1835 ImmValue = Value;
1836 }
1837 }
1838
1839 // Check that we aren't already using an immediate.
1840 // XXX: It's possible for an instruction to have more than one
1841 // immediate operand, but this is not supported yet.
1842 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1843 if (!Imm.getNode())
1844 return false;
1845 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1846 assert(C);
1847 if (C->getZExtValue())
1848 return false;
1849 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1850 }
1851 Src = DAG.getRegister(ImmReg, MVT::i32);
1852 return true;
1853 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001854 default:
1855 return false;
1856 }
1857}
1858
1859
1860/// \brief Fold the instructions after selecting them
1861SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1862 SelectionDAG &DAG) const {
1863 const R600InstrInfo *TII =
1864 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1865 if (!Node->isMachineOpcode())
1866 return Node;
1867 unsigned Opcode = Node->getMachineOpcode();
1868 SDValue FakeOp;
1869
1870 std::vector<SDValue> Ops;
1871 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1872 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001873 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001874
1875 if (Opcode == AMDGPU::DOT_4) {
1876 int OperandIdx[] = {
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1878 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001885 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001886 int NegIdx[] = {
1887 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1895 };
1896 int AbsIdx[] = {
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1902 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1903 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1905 };
1906 for (unsigned i = 0; i < 8; i++) {
1907 if (OperandIdx[i] < 0)
1908 return Node;
1909 SDValue &Src = Ops[OperandIdx[i] - 1];
1910 SDValue &Neg = Ops[NegIdx[i] - 1];
1911 SDValue &Abs = Ops[AbsIdx[i] - 1];
1912 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1913 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1914 if (HasDst)
1915 SelIdx--;
1916 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001917 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1918 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1919 }
1920 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1921 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1922 SDValue &Src = Ops[i];
1923 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001924 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1925 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001926 } else if (Opcode == AMDGPU::CLAMP_R600) {
1927 SDValue Src = Node->getOperand(0);
1928 if (!Src.isMachineOpcode() ||
1929 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1930 return Node;
1931 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1932 AMDGPU::OpName::clamp);
1933 if (ClampIdx < 0)
1934 return Node;
1935 std::vector<SDValue> Ops;
1936 unsigned NumOp = Src.getNumOperands();
1937 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001938 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001939 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1940 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1941 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001942 } else {
1943 if (!TII->hasInstrModifiers(Opcode))
1944 return Node;
1945 int OperandIdx[] = {
1946 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1947 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1948 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1949 };
1950 int NegIdx[] = {
1951 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1952 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1953 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1954 };
1955 int AbsIdx[] = {
1956 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1957 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1958 -1
1959 };
1960 for (unsigned i = 0; i < 3; i++) {
1961 if (OperandIdx[i] < 0)
1962 return Node;
1963 SDValue &Src = Ops[OperandIdx[i] - 1];
1964 SDValue &Neg = Ops[NegIdx[i] - 1];
1965 SDValue FakeAbs;
1966 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1967 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1968 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001969 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1970 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001971 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001972 ImmIdx--;
1973 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001974 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001975 SDValue &Imm = Ops[ImmIdx];
1976 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001977 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1978 }
1979 }
1980
1981 return Node;
1982}