blob: be810851a176c230cb7168da995081c32704e544 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000210 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000460 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000462 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000473 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000765 default:
766 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
767 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000769 return;
770 case ISD::LOAD: {
771 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
772 Results.push_back(SDValue(Node, 0));
773 Results.push_back(SDValue(Node, 1));
774 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
775 // function
776 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
777 return;
778 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000779 case ISD::STORE:
780 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
781 Results.push_back(SDValue(Node, 0));
782 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 }
784}
785
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000786SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
787 // On hw >= R700, COS/SIN input must be between -1. and 1.
788 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
789 EVT VT = Op.getValueType();
790 SDValue Arg = Op.getOperand(0);
791 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
792 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
793 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
794 DAG.getConstantFP(0.15915494309, MVT::f32)),
795 DAG.getConstantFP(0.5, MVT::f32)));
796 unsigned TrigNode;
797 switch (Op.getOpcode()) {
798 case ISD::FCOS:
799 TrigNode = AMDGPUISD::COS_HW;
800 break;
801 case ISD::FSIN:
802 TrigNode = AMDGPUISD::SIN_HW;
803 break;
804 default:
805 llvm_unreachable("Wrong trig opcode");
806 }
807 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
808 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
809 DAG.getConstantFP(-0.5, MVT::f32)));
810 if (Gen >= AMDGPUSubtarget::R700)
811 return TrigVal;
812 // On R600 hw, COS/SIN input must be between -Pi and Pi.
813 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
814 DAG.getConstantFP(3.14159265359, MVT::f32));
815}
816
Tom Stellard75aadc22012-12-11 21:25:42 +0000817SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
818 return DAG.getNode(
819 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000820 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 MVT::i1,
822 Op, DAG.getConstantFP(0.0f, MVT::f32),
823 DAG.getCondCode(ISD::SETNE)
824 );
825}
826
Tom Stellard75aadc22012-12-11 21:25:42 +0000827SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000828 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 unsigned DwordOffset) const {
830 unsigned ByteOffset = DwordOffset * 4;
831 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000832 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000833
834 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
835 assert(isInt<16>(ByteOffset));
836
837 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
838 DAG.getConstant(ByteOffset, MVT::i32), // PTR
839 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
840 false, false, false, 0);
841}
842
Tom Stellard75aadc22012-12-11 21:25:42 +0000843bool R600TargetLowering::isZero(SDValue Op) const {
844 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
845 return Cst->isNullValue();
846 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
847 return CstFP->isZero();
848 } else {
849 return false;
850 }
851}
852
853SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000854 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000855 EVT VT = Op.getValueType();
856
857 SDValue LHS = Op.getOperand(0);
858 SDValue RHS = Op.getOperand(1);
859 SDValue True = Op.getOperand(2);
860 SDValue False = Op.getOperand(3);
861 SDValue CC = Op.getOperand(4);
862 SDValue Temp;
863
864 // LHS and RHS are guaranteed to be the same value type
865 EVT CompareVT = LHS.getValueType();
866
867 // Check if we can lower this to a native operation.
868
Tom Stellard2add82d2013-03-08 15:37:09 +0000869 // Try to lower to a SET* instruction:
870 //
871 // SET* can match the following patterns:
872 //
Tom Stellardcd428182013-09-28 02:50:38 +0000873 // select_cc f32, f32, -1, 0, cc_supported
874 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
875 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000876 //
877
878 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000879 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
880 ISD::CondCode InverseCC =
881 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000882 if (isHWTrueValue(False) && isHWFalseValue(True)) {
883 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
884 std::swap(False, True);
885 CC = DAG.getCondCode(InverseCC);
886 } else {
887 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
888 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
889 std::swap(False, True);
890 std::swap(LHS, RHS);
891 CC = DAG.getCondCode(SwapInvCC);
892 }
893 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000894 }
895
896 if (isHWTrueValue(True) && isHWFalseValue(False) &&
897 (CompareVT == VT || VT == MVT::i32)) {
898 // This can be matched by a SET* instruction.
899 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
900 }
901
Tom Stellard75aadc22012-12-11 21:25:42 +0000902 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000903 //
904 // CND* can match the following patterns:
905 //
Tom Stellardcd428182013-09-28 02:50:38 +0000906 // select_cc f32, 0.0, f32, f32, cc_supported
907 // select_cc f32, 0.0, i32, i32, cc_supported
908 // select_cc i32, 0, f32, f32, cc_supported
909 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000910 //
Tom Stellardcd428182013-09-28 02:50:38 +0000911
912 // Try to move the zero value to the RHS
913 if (isZero(LHS)) {
914 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
915 // Try swapping the operands
916 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
917 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
918 std::swap(LHS, RHS);
919 CC = DAG.getCondCode(CCSwapped);
920 } else {
921 // Try inverting the conditon and then swapping the operands
922 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
923 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
924 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
925 std::swap(True, False);
926 std::swap(LHS, RHS);
927 CC = DAG.getCondCode(CCSwapped);
928 }
929 }
930 }
931 if (isZero(RHS)) {
932 SDValue Cond = LHS;
933 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000934 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
935 if (CompareVT != VT) {
936 // Bitcast True / False to the correct types. This will end up being
937 // a nop, but it allows us to define only a single pattern in the
938 // .TD files for each CND* instruction rather than having to have
939 // one pattern for integer True/False and one for fp True/False
940 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
941 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
942 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000943
944 switch (CCOpcode) {
945 case ISD::SETONE:
946 case ISD::SETUNE:
947 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000948 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
949 Temp = True;
950 True = False;
951 False = Temp;
952 break;
953 default:
954 break;
955 }
956 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
957 Cond, Zero,
958 True, False,
959 DAG.getCondCode(CCOpcode));
960 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
961 }
962
Tom Stellard75aadc22012-12-11 21:25:42 +0000963
964 // Possible Min/Max pattern
965 SDValue MinMax = LowerMinMax(Op, DAG);
966 if (MinMax.getNode()) {
967 return MinMax;
968 }
969
970 // If we make it this for it means we have no native instructions to handle
971 // this SELECT_CC, so we must lower it.
972 SDValue HWTrue, HWFalse;
973
974 if (CompareVT == MVT::f32) {
975 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
976 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
977 } else if (CompareVT == MVT::i32) {
978 HWTrue = DAG.getConstant(-1, CompareVT);
979 HWFalse = DAG.getConstant(0, CompareVT);
980 }
981 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000982 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +0000983 }
984
985 // Lower this unsupported SELECT_CC into a combination of two supported
986 // SELECT_CC operations.
987 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
988
989 return DAG.getNode(ISD::SELECT_CC, DL, VT,
990 Cond, HWFalse,
991 True, False,
992 DAG.getCondCode(ISD::SETNE));
993}
994
Alp Tokercb402912014-01-24 17:20:08 +0000995/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000996/// convert these pointers to a register index. Each register holds
997/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
998/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
999/// for indirect addressing.
1000SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1001 unsigned StackWidth,
1002 SelectionDAG &DAG) const {
1003 unsigned SRLPad;
1004 switch(StackWidth) {
1005 case 1:
1006 SRLPad = 2;
1007 break;
1008 case 2:
1009 SRLPad = 3;
1010 break;
1011 case 4:
1012 SRLPad = 4;
1013 break;
1014 default: llvm_unreachable("Invalid stack width");
1015 }
1016
Andrew Trickef9de2a2013-05-25 02:42:55 +00001017 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001018 DAG.getConstant(SRLPad, MVT::i32));
1019}
1020
1021void R600TargetLowering::getStackAddress(unsigned StackWidth,
1022 unsigned ElemIdx,
1023 unsigned &Channel,
1024 unsigned &PtrIncr) const {
1025 switch (StackWidth) {
1026 default:
1027 case 1:
1028 Channel = 0;
1029 if (ElemIdx > 0) {
1030 PtrIncr = 1;
1031 } else {
1032 PtrIncr = 0;
1033 }
1034 break;
1035 case 2:
1036 Channel = ElemIdx % 2;
1037 if (ElemIdx == 2) {
1038 PtrIncr = 1;
1039 } else {
1040 PtrIncr = 0;
1041 }
1042 break;
1043 case 4:
1044 Channel = ElemIdx;
1045 PtrIncr = 0;
1046 break;
1047 }
1048}
1049
Tom Stellard75aadc22012-12-11 21:25:42 +00001050SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001051 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001052 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1053 SDValue Chain = Op.getOperand(0);
1054 SDValue Value = Op.getOperand(1);
1055 SDValue Ptr = Op.getOperand(2);
1056
Tom Stellard2ffc3302013-08-26 15:05:44 +00001057 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001058 if (Result.getNode()) {
1059 return Result;
1060 }
1061
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001062 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1063 if (StoreNode->isTruncatingStore()) {
1064 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001065 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001066 EVT MemVT = StoreNode->getMemoryVT();
1067 SDValue MaskConstant;
1068 if (MemVT == MVT::i8) {
1069 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1070 } else {
1071 assert(MemVT == MVT::i16);
1072 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1073 }
1074 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1075 DAG.getConstant(2, MVT::i32));
1076 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1077 DAG.getConstant(0x00000003, VT));
1078 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1079 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1080 DAG.getConstant(3, VT));
1081 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1082 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1083 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1084 // vector instead.
1085 SDValue Src[4] = {
1086 ShiftedValue,
1087 DAG.getConstant(0, MVT::i32),
1088 DAG.getConstant(0, MVT::i32),
1089 Mask
1090 };
1091 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1092 SDValue Args[3] = { Chain, Input, DWordAddr };
1093 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1094 Op->getVTList(), Args, 3, MemVT,
1095 StoreNode->getMemOperand());
1096 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1097 Value.getValueType().bitsGE(MVT::i32)) {
1098 // Convert pointer from byte address to dword address.
1099 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1100 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1101 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001102
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001103 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001104 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001105 } else {
1106 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1107 }
1108 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001109 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001110 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001111
1112 EVT ValueVT = Value.getValueType();
1113
1114 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1115 return SDValue();
1116 }
1117
Tom Stellarde9373602014-01-22 19:24:14 +00001118 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1119 if (Ret.getNode()) {
1120 return Ret;
1121 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001122 // Lowering for indirect addressing
1123
1124 const MachineFunction &MF = DAG.getMachineFunction();
1125 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1126 getTargetMachine().getFrameLowering());
1127 unsigned StackWidth = TFL->getStackWidth(MF);
1128
1129 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1130
1131 if (ValueVT.isVector()) {
1132 unsigned NumElemVT = ValueVT.getVectorNumElements();
1133 EVT ElemVT = ValueVT.getVectorElementType();
1134 SDValue Stores[4];
1135
1136 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1137 "vector width in load");
1138
1139 for (unsigned i = 0; i < NumElemVT; ++i) {
1140 unsigned Channel, PtrIncr;
1141 getStackAddress(StackWidth, i, Channel, PtrIncr);
1142 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1143 DAG.getConstant(PtrIncr, MVT::i32));
1144 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1145 Value, DAG.getConstant(i, MVT::i32));
1146
1147 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1148 Chain, Elem, Ptr,
1149 DAG.getTargetConstant(Channel, MVT::i32));
1150 }
1151 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1152 } else {
1153 if (ValueVT == MVT::i8) {
1154 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1155 }
1156 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001157 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001158 }
1159
1160 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001161}
1162
Tom Stellard365366f2013-01-23 02:09:06 +00001163// return (512 + (kc_bank << 12)
1164static int
1165ConstantAddressBlock(unsigned AddressSpace) {
1166 switch (AddressSpace) {
1167 case AMDGPUAS::CONSTANT_BUFFER_0:
1168 return 512;
1169 case AMDGPUAS::CONSTANT_BUFFER_1:
1170 return 512 + 4096;
1171 case AMDGPUAS::CONSTANT_BUFFER_2:
1172 return 512 + 4096 * 2;
1173 case AMDGPUAS::CONSTANT_BUFFER_3:
1174 return 512 + 4096 * 3;
1175 case AMDGPUAS::CONSTANT_BUFFER_4:
1176 return 512 + 4096 * 4;
1177 case AMDGPUAS::CONSTANT_BUFFER_5:
1178 return 512 + 4096 * 5;
1179 case AMDGPUAS::CONSTANT_BUFFER_6:
1180 return 512 + 4096 * 6;
1181 case AMDGPUAS::CONSTANT_BUFFER_7:
1182 return 512 + 4096 * 7;
1183 case AMDGPUAS::CONSTANT_BUFFER_8:
1184 return 512 + 4096 * 8;
1185 case AMDGPUAS::CONSTANT_BUFFER_9:
1186 return 512 + 4096 * 9;
1187 case AMDGPUAS::CONSTANT_BUFFER_10:
1188 return 512 + 4096 * 10;
1189 case AMDGPUAS::CONSTANT_BUFFER_11:
1190 return 512 + 4096 * 11;
1191 case AMDGPUAS::CONSTANT_BUFFER_12:
1192 return 512 + 4096 * 12;
1193 case AMDGPUAS::CONSTANT_BUFFER_13:
1194 return 512 + 4096 * 13;
1195 case AMDGPUAS::CONSTANT_BUFFER_14:
1196 return 512 + 4096 * 14;
1197 case AMDGPUAS::CONSTANT_BUFFER_15:
1198 return 512 + 4096 * 15;
1199 default:
1200 return -1;
1201 }
1202}
1203
1204SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1205{
1206 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001207 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001208 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1209 SDValue Chain = Op.getOperand(0);
1210 SDValue Ptr = Op.getOperand(1);
1211 SDValue LoweredLoad;
1212
Tom Stellarde9373602014-01-22 19:24:14 +00001213 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1214 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001215 SDValue Ops[2] = {
1216 Ret,
1217 Chain
1218 };
Tom Stellarde9373602014-01-22 19:24:14 +00001219 return DAG.getMergeValues(Ops, 2, DL);
1220 }
1221
1222
Tom Stellard35bb18c2013-08-26 15:06:04 +00001223 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1224 SDValue MergedValues[2] = {
1225 SplitVectorLoad(Op, DAG),
1226 Chain
1227 };
1228 return DAG.getMergeValues(MergedValues, 2, DL);
1229 }
1230
Tom Stellard365366f2013-01-23 02:09:06 +00001231 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001232 if (ConstantBlock > -1 &&
1233 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1234 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001235 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001236 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1237 isa<Constant>(LoadNode->getSrcValue()) ||
1238 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001239 SDValue Slots[4];
1240 for (unsigned i = 0; i < 4; i++) {
1241 // We want Const position encoded with the following formula :
1242 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1243 // const_index is Ptr computed by llvm using an alignment of 16.
1244 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1245 // then div by 4 at the ISel step
1246 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1247 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1248 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1249 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001250 EVT NewVT = MVT::v4i32;
1251 unsigned NumElements = 4;
1252 if (VT.isVector()) {
1253 NewVT = VT;
1254 NumElements = VT.getVectorNumElements();
1255 }
1256 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001257 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001258 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001259 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001260 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001261 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001262 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001263 );
1264 }
1265
1266 if (!VT.isVector()) {
1267 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1268 DAG.getConstant(0, MVT::i32));
1269 }
1270
1271 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001272 Result,
1273 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001274 };
1275 return DAG.getMergeValues(MergedValues, 2, DL);
1276 }
1277
Matt Arsenault909d0c02013-10-30 23:43:29 +00001278 // For most operations returning SDValue() will result in the node being
1279 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1280 // need to manually expand loads that may be legal in some address spaces and
1281 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1282 // compute shaders, since the data is sign extended when it is uploaded to the
1283 // buffer. However SEXT loads from other address spaces are not supported, so
1284 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001285 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1286 EVT MemVT = LoadNode->getMemoryVT();
1287 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1288 SDValue ShiftAmount =
1289 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1290 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1291 LoadNode->getPointerInfo(), MemVT,
1292 LoadNode->isVolatile(),
1293 LoadNode->isNonTemporal(),
1294 LoadNode->getAlignment());
1295 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1296 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1297
1298 SDValue MergedValues[2] = { Sra, Chain };
1299 return DAG.getMergeValues(MergedValues, 2, DL);
1300 }
1301
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001302 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1303 return SDValue();
1304 }
1305
1306 // Lowering for indirect addressing
1307 const MachineFunction &MF = DAG.getMachineFunction();
1308 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1309 getTargetMachine().getFrameLowering());
1310 unsigned StackWidth = TFL->getStackWidth(MF);
1311
1312 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1313
1314 if (VT.isVector()) {
1315 unsigned NumElemVT = VT.getVectorNumElements();
1316 EVT ElemVT = VT.getVectorElementType();
1317 SDValue Loads[4];
1318
1319 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1320 "vector width in load");
1321
1322 for (unsigned i = 0; i < NumElemVT; ++i) {
1323 unsigned Channel, PtrIncr;
1324 getStackAddress(StackWidth, i, Channel, PtrIncr);
1325 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1326 DAG.getConstant(PtrIncr, MVT::i32));
1327 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1328 Chain, Ptr,
1329 DAG.getTargetConstant(Channel, MVT::i32),
1330 Op.getOperand(2));
1331 }
1332 for (unsigned i = NumElemVT; i < 4; ++i) {
1333 Loads[i] = DAG.getUNDEF(ElemVT);
1334 }
1335 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1336 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1337 } else {
1338 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1339 Chain, Ptr,
1340 DAG.getTargetConstant(0, MVT::i32), // Channel
1341 Op.getOperand(2));
1342 }
1343
Matt Arsenault7939acd2014-04-07 16:44:24 +00001344 SDValue Ops[2] = {
1345 LoweredLoad,
1346 Chain
1347 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001348
1349 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001350}
Tom Stellard75aadc22012-12-11 21:25:42 +00001351
Tom Stellard75aadc22012-12-11 21:25:42 +00001352/// XXX Only kernel functions are supported, so we can assume for now that
1353/// every function is a kernel function, but in the future we should use
1354/// separate calling conventions for kernel and non-kernel functions.
1355SDValue R600TargetLowering::LowerFormalArguments(
1356 SDValue Chain,
1357 CallingConv::ID CallConv,
1358 bool isVarArg,
1359 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001360 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001361 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001362 SmallVector<CCValAssign, 16> ArgLocs;
1363 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1364 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001365 MachineFunction &MF = DAG.getMachineFunction();
1366 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001367
Tom Stellardaf775432013-10-23 00:44:32 +00001368 SmallVector<ISD::InputArg, 8> LocalIns;
1369
1370 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1371 LocalIns);
1372
1373 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001374
Tom Stellard1e803092013-07-23 01:48:18 +00001375 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001376 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001377 EVT VT = Ins[i].VT;
1378 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001379
Vincent Lejeunef143af32013-11-11 22:10:24 +00001380 if (ShaderType != ShaderType::COMPUTE) {
1381 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1382 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1383 InVals.push_back(Register);
1384 continue;
1385 }
1386
Tom Stellard75aadc22012-12-11 21:25:42 +00001387 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001388 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001389
Matt Arsenaultfae02982014-03-17 18:58:11 +00001390 // i64 isn't a legal type, so the register type used ends up as i32, which
1391 // isn't expected here. It attempts to create this sextload, but it ends up
1392 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1393 // for <1 x i64>.
1394
Tom Stellardacfeebf2013-07-23 01:48:05 +00001395 // The first 36 bytes of the input buffer contains information about
1396 // thread group and global sizes.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001397
1398 // FIXME: This should really check the extload type, but the handling of
1399 // extload vecto parameters seems to be broken.
1400 //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1401 ISD::LoadExtType Ext = ISD::SEXTLOAD;
1402 SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
Tom Stellardaf775432013-10-23 00:44:32 +00001403 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1404 MachinePointerInfo(UndefValue::get(PtrTy)),
1405 MemVT, false, false, 4);
Alp Tokercb402912014-01-24 17:20:08 +00001406 // 4 is the preferred alignment for
1407 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001408 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001409 }
1410 return Chain;
1411}
1412
Matt Arsenault758659232013-05-18 00:21:46 +00001413EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001414 if (!VT.isVector()) return MVT::i32;
1415 return VT.changeVectorElementTypeToInteger();
1416}
1417
Benjamin Kramer193960c2013-06-11 13:32:25 +00001418static SDValue
1419CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1420 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001421 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1422 assert(RemapSwizzle.empty());
1423 SDValue NewBldVec[4] = {
1424 VectorEntry.getOperand(0),
1425 VectorEntry.getOperand(1),
1426 VectorEntry.getOperand(2),
1427 VectorEntry.getOperand(3)
1428 };
1429
1430 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001431 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1432 // We mask write here to teach later passes that the ith element of this
1433 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1434 // break false dependencies and additionnaly make assembly easier to read.
1435 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001436 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1437 if (C->isZero()) {
1438 RemapSwizzle[i] = 4; // SEL_0
1439 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1440 } else if (C->isExactlyValue(1.0)) {
1441 RemapSwizzle[i] = 5; // SEL_1
1442 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1443 }
1444 }
1445
1446 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1447 continue;
1448 for (unsigned j = 0; j < i; j++) {
1449 if (NewBldVec[i] == NewBldVec[j]) {
1450 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1451 RemapSwizzle[i] = j;
1452 break;
1453 }
1454 }
1455 }
1456
1457 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1458 VectorEntry.getValueType(), NewBldVec, 4);
1459}
1460
Benjamin Kramer193960c2013-06-11 13:32:25 +00001461static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1462 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001463 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1464 assert(RemapSwizzle.empty());
1465 SDValue NewBldVec[4] = {
1466 VectorEntry.getOperand(0),
1467 VectorEntry.getOperand(1),
1468 VectorEntry.getOperand(2),
1469 VectorEntry.getOperand(3)
1470 };
1471 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001472 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001473 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001474 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1475 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1476 ->getZExtValue();
1477 if (i == Idx)
1478 isUnmovable[Idx] = true;
1479 }
1480 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001481
1482 for (unsigned i = 0; i < 4; i++) {
1483 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1484 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1485 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001486 if (isUnmovable[Idx])
1487 continue;
1488 // Swap i and Idx
1489 std::swap(NewBldVec[Idx], NewBldVec[i]);
1490 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1491 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001492 }
1493 }
1494
1495 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1496 VectorEntry.getValueType(), NewBldVec, 4);
1497}
1498
1499
1500SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1501SDValue Swz[4], SelectionDAG &DAG) const {
1502 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1503 // Old -> New swizzle values
1504 DenseMap<unsigned, unsigned> SwizzleRemap;
1505
1506 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1507 for (unsigned i = 0; i < 4; i++) {
1508 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1509 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1510 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1511 }
1512
1513 SwizzleRemap.clear();
1514 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1515 for (unsigned i = 0; i < 4; i++) {
1516 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1517 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1518 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1519 }
1520
1521 return BuildVector;
1522}
1523
1524
Tom Stellard75aadc22012-12-11 21:25:42 +00001525//===----------------------------------------------------------------------===//
1526// Custom DAG Optimizations
1527//===----------------------------------------------------------------------===//
1528
1529SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1530 DAGCombinerInfo &DCI) const {
1531 SelectionDAG &DAG = DCI.DAG;
1532
1533 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001534 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001535 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1536 case ISD::FP_ROUND: {
1537 SDValue Arg = N->getOperand(0);
1538 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001539 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001540 Arg.getOperand(0));
1541 }
1542 break;
1543 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001544
1545 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1546 // (i32 select_cc f32, f32, -1, 0 cc)
1547 //
1548 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1549 // this to one of the SET*_DX10 instructions.
1550 case ISD::FP_TO_SINT: {
1551 SDValue FNeg = N->getOperand(0);
1552 if (FNeg.getOpcode() != ISD::FNEG) {
1553 return SDValue();
1554 }
1555 SDValue SelectCC = FNeg.getOperand(0);
1556 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1557 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1558 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1559 !isHWTrueValue(SelectCC.getOperand(2)) ||
1560 !isHWFalseValue(SelectCC.getOperand(3))) {
1561 return SDValue();
1562 }
1563
Andrew Trickef9de2a2013-05-25 02:42:55 +00001564 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001565 SelectCC.getOperand(0), // LHS
1566 SelectCC.getOperand(1), // RHS
1567 DAG.getConstant(-1, MVT::i32), // True
1568 DAG.getConstant(0, MVT::i32), // Flase
1569 SelectCC.getOperand(4)); // CC
1570
1571 break;
1572 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001573
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001574 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1575 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001576 case ISD::INSERT_VECTOR_ELT: {
1577 SDValue InVec = N->getOperand(0);
1578 SDValue InVal = N->getOperand(1);
1579 SDValue EltNo = N->getOperand(2);
1580 SDLoc dl(N);
1581
1582 // If the inserted element is an UNDEF, just use the input vector.
1583 if (InVal.getOpcode() == ISD::UNDEF)
1584 return InVec;
1585
1586 EVT VT = InVec.getValueType();
1587
1588 // If we can't generate a legal BUILD_VECTOR, exit
1589 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1590 return SDValue();
1591
1592 // Check that we know which element is being inserted
1593 if (!isa<ConstantSDNode>(EltNo))
1594 return SDValue();
1595 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1596
1597 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1598 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1599 // vector elements.
1600 SmallVector<SDValue, 8> Ops;
1601 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1602 Ops.append(InVec.getNode()->op_begin(),
1603 InVec.getNode()->op_end());
1604 } else if (InVec.getOpcode() == ISD::UNDEF) {
1605 unsigned NElts = VT.getVectorNumElements();
1606 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1607 } else {
1608 return SDValue();
1609 }
1610
1611 // Insert the element
1612 if (Elt < Ops.size()) {
1613 // All the operands of BUILD_VECTOR must have the same type;
1614 // we enforce that here.
1615 EVT OpVT = Ops[0].getValueType();
1616 if (InVal.getValueType() != OpVT)
1617 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1618 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1619 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1620 Ops[Elt] = InVal;
1621 }
1622
1623 // Return the new vector
1624 return DAG.getNode(ISD::BUILD_VECTOR, dl,
Matt Arsenault7939acd2014-04-07 16:44:24 +00001625 VT, Ops.data(), Ops.size());
Quentin Colombete2e05482013-07-30 00:27:16 +00001626 }
1627
Tom Stellard365366f2013-01-23 02:09:06 +00001628 // Extract_vec (Build_vector) generated by custom lowering
1629 // also needs to be customly combined
1630 case ISD::EXTRACT_VECTOR_ELT: {
1631 SDValue Arg = N->getOperand(0);
1632 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1633 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1634 unsigned Element = Const->getZExtValue();
1635 return Arg->getOperand(Element);
1636 }
1637 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001638 if (Arg.getOpcode() == ISD::BITCAST &&
1639 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1640 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1641 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001642 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001643 Arg->getOperand(0).getOperand(Element));
1644 }
1645 }
Tom Stellard365366f2013-01-23 02:09:06 +00001646 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001647
1648 case ISD::SELECT_CC: {
1649 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1650 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001651 //
1652 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1653 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001654 SDValue LHS = N->getOperand(0);
1655 if (LHS.getOpcode() != ISD::SELECT_CC) {
1656 return SDValue();
1657 }
1658
1659 SDValue RHS = N->getOperand(1);
1660 SDValue True = N->getOperand(2);
1661 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001662 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001663
1664 if (LHS.getOperand(2).getNode() != True.getNode() ||
1665 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001666 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001667 return SDValue();
1668 }
1669
Tom Stellard5e524892013-03-08 15:37:11 +00001670 switch (NCC) {
1671 default: return SDValue();
1672 case ISD::SETNE: return LHS;
1673 case ISD::SETEQ: {
1674 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1675 LHSCC = ISD::getSetCCInverse(LHSCC,
1676 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001677 if (DCI.isBeforeLegalizeOps() ||
1678 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1679 return DAG.getSelectCC(SDLoc(N),
1680 LHS.getOperand(0),
1681 LHS.getOperand(1),
1682 LHS.getOperand(2),
1683 LHS.getOperand(3),
1684 LHSCC);
1685 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001686 }
Tom Stellard5e524892013-03-08 15:37:11 +00001687 }
Tom Stellardcd428182013-09-28 02:50:38 +00001688 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001689 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001690
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001691 case AMDGPUISD::EXPORT: {
1692 SDValue Arg = N->getOperand(1);
1693 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1694 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001695
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001696 SDValue NewArgs[8] = {
1697 N->getOperand(0), // Chain
1698 SDValue(),
1699 N->getOperand(2), // ArrayBase
1700 N->getOperand(3), // Type
1701 N->getOperand(4), // SWZ_X
1702 N->getOperand(5), // SWZ_Y
1703 N->getOperand(6), // SWZ_Z
1704 N->getOperand(7) // SWZ_W
1705 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001706 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001707 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001708 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001709 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001710 case AMDGPUISD::TEXTURE_FETCH: {
1711 SDValue Arg = N->getOperand(1);
1712 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1713 break;
1714
1715 SDValue NewArgs[19] = {
1716 N->getOperand(0),
1717 N->getOperand(1),
1718 N->getOperand(2),
1719 N->getOperand(3),
1720 N->getOperand(4),
1721 N->getOperand(5),
1722 N->getOperand(6),
1723 N->getOperand(7),
1724 N->getOperand(8),
1725 N->getOperand(9),
1726 N->getOperand(10),
1727 N->getOperand(11),
1728 N->getOperand(12),
1729 N->getOperand(13),
1730 N->getOperand(14),
1731 N->getOperand(15),
1732 N->getOperand(16),
1733 N->getOperand(17),
1734 N->getOperand(18),
1735 };
1736 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1737 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1738 NewArgs, 19);
1739 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001740 }
1741 return SDValue();
1742}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001743
1744static bool
1745FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001746 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001747 const R600InstrInfo *TII =
1748 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1749 if (!Src.isMachineOpcode())
1750 return false;
1751 switch (Src.getMachineOpcode()) {
1752 case AMDGPU::FNEG_R600:
1753 if (!Neg.getNode())
1754 return false;
1755 Src = Src.getOperand(0);
1756 Neg = DAG.getTargetConstant(1, MVT::i32);
1757 return true;
1758 case AMDGPU::FABS_R600:
1759 if (!Abs.getNode())
1760 return false;
1761 Src = Src.getOperand(0);
1762 Abs = DAG.getTargetConstant(1, MVT::i32);
1763 return true;
1764 case AMDGPU::CONST_COPY: {
1765 unsigned Opcode = ParentNode->getMachineOpcode();
1766 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1767
1768 if (!Sel.getNode())
1769 return false;
1770
1771 SDValue CstOffset = Src.getOperand(0);
1772 if (ParentNode->getValueType(0).isVector())
1773 return false;
1774
1775 // Gather constants values
1776 int SrcIndices[] = {
1777 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1778 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1779 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1780 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1781 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1782 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1783 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1784 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1785 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1786 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1787 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1788 };
1789 std::vector<unsigned> Consts;
1790 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1791 int OtherSrcIdx = SrcIndices[i];
1792 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1793 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1794 continue;
1795 if (HasDst) {
1796 OtherSrcIdx--;
1797 OtherSelIdx--;
1798 }
1799 if (RegisterSDNode *Reg =
1800 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1801 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1802 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1803 ParentNode->getOperand(OtherSelIdx));
1804 Consts.push_back(Cst->getZExtValue());
1805 }
1806 }
1807 }
1808
1809 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1810 Consts.push_back(Cst->getZExtValue());
1811 if (!TII->fitsConstReadLimitations(Consts)) {
1812 return false;
1813 }
1814
1815 Sel = CstOffset;
1816 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1817 return true;
1818 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001819 case AMDGPU::MOV_IMM_I32:
1820 case AMDGPU::MOV_IMM_F32: {
1821 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1822 uint64_t ImmValue = 0;
1823
1824
1825 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1826 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1827 float FloatValue = FPC->getValueAPF().convertToFloat();
1828 if (FloatValue == 0.0) {
1829 ImmReg = AMDGPU::ZERO;
1830 } else if (FloatValue == 0.5) {
1831 ImmReg = AMDGPU::HALF;
1832 } else if (FloatValue == 1.0) {
1833 ImmReg = AMDGPU::ONE;
1834 } else {
1835 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1836 }
1837 } else {
1838 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1839 uint64_t Value = C->getZExtValue();
1840 if (Value == 0) {
1841 ImmReg = AMDGPU::ZERO;
1842 } else if (Value == 1) {
1843 ImmReg = AMDGPU::ONE_INT;
1844 } else {
1845 ImmValue = Value;
1846 }
1847 }
1848
1849 // Check that we aren't already using an immediate.
1850 // XXX: It's possible for an instruction to have more than one
1851 // immediate operand, but this is not supported yet.
1852 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1853 if (!Imm.getNode())
1854 return false;
1855 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1856 assert(C);
1857 if (C->getZExtValue())
1858 return false;
1859 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1860 }
1861 Src = DAG.getRegister(ImmReg, MVT::i32);
1862 return true;
1863 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001864 default:
1865 return false;
1866 }
1867}
1868
1869
1870/// \brief Fold the instructions after selecting them
1871SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1872 SelectionDAG &DAG) const {
1873 const R600InstrInfo *TII =
1874 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1875 if (!Node->isMachineOpcode())
1876 return Node;
1877 unsigned Opcode = Node->getMachineOpcode();
1878 SDValue FakeOp;
1879
1880 std::vector<SDValue> Ops;
1881 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1882 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001883 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001884
1885 if (Opcode == AMDGPU::DOT_4) {
1886 int OperandIdx[] = {
1887 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001895 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001896 int NegIdx[] = {
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1902 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1903 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1905 };
1906 int AbsIdx[] = {
1907 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1908 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1909 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1910 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1911 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1912 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1913 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1914 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1915 };
1916 for (unsigned i = 0; i < 8; i++) {
1917 if (OperandIdx[i] < 0)
1918 return Node;
1919 SDValue &Src = Ops[OperandIdx[i] - 1];
1920 SDValue &Neg = Ops[NegIdx[i] - 1];
1921 SDValue &Abs = Ops[AbsIdx[i] - 1];
1922 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1923 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1924 if (HasDst)
1925 SelIdx--;
1926 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001927 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1928 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1929 }
1930 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1931 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1932 SDValue &Src = Ops[i];
1933 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001934 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1935 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001936 } else if (Opcode == AMDGPU::CLAMP_R600) {
1937 SDValue Src = Node->getOperand(0);
1938 if (!Src.isMachineOpcode() ||
1939 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1940 return Node;
1941 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1942 AMDGPU::OpName::clamp);
1943 if (ClampIdx < 0)
1944 return Node;
1945 std::vector<SDValue> Ops;
1946 unsigned NumOp = Src.getNumOperands();
1947 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001948 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001949 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1950 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1951 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001952 } else {
1953 if (!TII->hasInstrModifiers(Opcode))
1954 return Node;
1955 int OperandIdx[] = {
1956 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1957 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1958 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1959 };
1960 int NegIdx[] = {
1961 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1962 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1963 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1964 };
1965 int AbsIdx[] = {
1966 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1967 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1968 -1
1969 };
1970 for (unsigned i = 0; i < 3; i++) {
1971 if (OperandIdx[i] < 0)
1972 return Node;
1973 SDValue &Src = Ops[OperandIdx[i] - 1];
1974 SDValue &Neg = Ops[NegIdx[i] - 1];
1975 SDValue FakeAbs;
1976 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1977 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1978 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001979 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1980 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001981 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001982 ImmIdx--;
1983 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001984 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001985 SDValue &Imm = Ops[ImmIdx];
1986 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001987 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1988 }
1989 }
1990
1991 return Node;
1992}