blob: 349146e1b6dddc595dd5ab6726730c8d8d61e9ab [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000210 unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000460 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000462 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000473 bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000765 default:
766 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
767 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000768 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000769 return;
770 case ISD::LOAD: {
771 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
772 Results.push_back(SDValue(Node, 0));
773 Results.push_back(SDValue(Node, 1));
774 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
775 // function
776 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
777 return;
778 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000779 case ISD::STORE:
780 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
781 Results.push_back(SDValue(Node, 0));
782 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 }
784}
785
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000786SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
787 // On hw >= R700, COS/SIN input must be between -1. and 1.
788 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
789 EVT VT = Op.getValueType();
790 SDValue Arg = Op.getOperand(0);
791 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
792 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
793 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
794 DAG.getConstantFP(0.15915494309, MVT::f32)),
795 DAG.getConstantFP(0.5, MVT::f32)));
796 unsigned TrigNode;
797 switch (Op.getOpcode()) {
798 case ISD::FCOS:
799 TrigNode = AMDGPUISD::COS_HW;
800 break;
801 case ISD::FSIN:
802 TrigNode = AMDGPUISD::SIN_HW;
803 break;
804 default:
805 llvm_unreachable("Wrong trig opcode");
806 }
807 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
808 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
809 DAG.getConstantFP(-0.5, MVT::f32)));
810 if (Gen >= AMDGPUSubtarget::R700)
811 return TrigVal;
812 // On R600 hw, COS/SIN input must be between -Pi and Pi.
813 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
814 DAG.getConstantFP(3.14159265359, MVT::f32));
815}
816
Tom Stellard75aadc22012-12-11 21:25:42 +0000817SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
818 return DAG.getNode(
819 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000820 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000821 MVT::i1,
822 Op, DAG.getConstantFP(0.0f, MVT::f32),
823 DAG.getCondCode(ISD::SETNE)
824 );
825}
826
Tom Stellard75aadc22012-12-11 21:25:42 +0000827SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000828 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000829 unsigned DwordOffset) const {
830 unsigned ByteOffset = DwordOffset * 4;
831 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000832 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000833
834 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
835 assert(isInt<16>(ByteOffset));
836
837 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
838 DAG.getConstant(ByteOffset, MVT::i32), // PTR
839 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
840 false, false, false, 0);
841}
842
Tom Stellard75aadc22012-12-11 21:25:42 +0000843bool R600TargetLowering::isZero(SDValue Op) const {
844 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
845 return Cst->isNullValue();
846 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
847 return CstFP->isZero();
848 } else {
849 return false;
850 }
851}
852
853SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000854 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000855 EVT VT = Op.getValueType();
856
857 SDValue LHS = Op.getOperand(0);
858 SDValue RHS = Op.getOperand(1);
859 SDValue True = Op.getOperand(2);
860 SDValue False = Op.getOperand(3);
861 SDValue CC = Op.getOperand(4);
862 SDValue Temp;
863
864 // LHS and RHS are guaranteed to be the same value type
865 EVT CompareVT = LHS.getValueType();
866
867 // Check if we can lower this to a native operation.
868
Tom Stellard2add82d2013-03-08 15:37:09 +0000869 // Try to lower to a SET* instruction:
870 //
871 // SET* can match the following patterns:
872 //
Tom Stellardcd428182013-09-28 02:50:38 +0000873 // select_cc f32, f32, -1, 0, cc_supported
874 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
875 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000876 //
877
878 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000879 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
880 ISD::CondCode InverseCC =
881 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000882 if (isHWTrueValue(False) && isHWFalseValue(True)) {
883 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
884 std::swap(False, True);
885 CC = DAG.getCondCode(InverseCC);
886 } else {
887 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
888 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
889 std::swap(False, True);
890 std::swap(LHS, RHS);
891 CC = DAG.getCondCode(SwapInvCC);
892 }
893 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000894 }
895
896 if (isHWTrueValue(True) && isHWFalseValue(False) &&
897 (CompareVT == VT || VT == MVT::i32)) {
898 // This can be matched by a SET* instruction.
899 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
900 }
901
Tom Stellard75aadc22012-12-11 21:25:42 +0000902 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000903 //
904 // CND* can match the following patterns:
905 //
Tom Stellardcd428182013-09-28 02:50:38 +0000906 // select_cc f32, 0.0, f32, f32, cc_supported
907 // select_cc f32, 0.0, i32, i32, cc_supported
908 // select_cc i32, 0, f32, f32, cc_supported
909 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000910 //
Tom Stellardcd428182013-09-28 02:50:38 +0000911
912 // Try to move the zero value to the RHS
913 if (isZero(LHS)) {
914 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
915 // Try swapping the operands
916 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
917 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
918 std::swap(LHS, RHS);
919 CC = DAG.getCondCode(CCSwapped);
920 } else {
921 // Try inverting the conditon and then swapping the operands
922 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
923 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
924 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
925 std::swap(True, False);
926 std::swap(LHS, RHS);
927 CC = DAG.getCondCode(CCSwapped);
928 }
929 }
930 }
931 if (isZero(RHS)) {
932 SDValue Cond = LHS;
933 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000934 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
935 if (CompareVT != VT) {
936 // Bitcast True / False to the correct types. This will end up being
937 // a nop, but it allows us to define only a single pattern in the
938 // .TD files for each CND* instruction rather than having to have
939 // one pattern for integer True/False and one for fp True/False
940 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
941 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
942 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000943
944 switch (CCOpcode) {
945 case ISD::SETONE:
946 case ISD::SETUNE:
947 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000948 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
949 Temp = True;
950 True = False;
951 False = Temp;
952 break;
953 default:
954 break;
955 }
956 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
957 Cond, Zero,
958 True, False,
959 DAG.getCondCode(CCOpcode));
960 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
961 }
962
Tom Stellard75aadc22012-12-11 21:25:42 +0000963
964 // Possible Min/Max pattern
965 SDValue MinMax = LowerMinMax(Op, DAG);
966 if (MinMax.getNode()) {
967 return MinMax;
968 }
969
970 // If we make it this for it means we have no native instructions to handle
971 // this SELECT_CC, so we must lower it.
972 SDValue HWTrue, HWFalse;
973
974 if (CompareVT == MVT::f32) {
975 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
976 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
977 } else if (CompareVT == MVT::i32) {
978 HWTrue = DAG.getConstant(-1, CompareVT);
979 HWFalse = DAG.getConstant(0, CompareVT);
980 }
981 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000982 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +0000983 }
984
985 // Lower this unsupported SELECT_CC into a combination of two supported
986 // SELECT_CC operations.
987 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
988
989 return DAG.getNode(ISD::SELECT_CC, DL, VT,
990 Cond, HWFalse,
991 True, False,
992 DAG.getCondCode(ISD::SETNE));
993}
994
Alp Tokercb402912014-01-24 17:20:08 +0000995/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000996/// convert these pointers to a register index. Each register holds
997/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
998/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
999/// for indirect addressing.
1000SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1001 unsigned StackWidth,
1002 SelectionDAG &DAG) const {
1003 unsigned SRLPad;
1004 switch(StackWidth) {
1005 case 1:
1006 SRLPad = 2;
1007 break;
1008 case 2:
1009 SRLPad = 3;
1010 break;
1011 case 4:
1012 SRLPad = 4;
1013 break;
1014 default: llvm_unreachable("Invalid stack width");
1015 }
1016
Andrew Trickef9de2a2013-05-25 02:42:55 +00001017 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001018 DAG.getConstant(SRLPad, MVT::i32));
1019}
1020
1021void R600TargetLowering::getStackAddress(unsigned StackWidth,
1022 unsigned ElemIdx,
1023 unsigned &Channel,
1024 unsigned &PtrIncr) const {
1025 switch (StackWidth) {
1026 default:
1027 case 1:
1028 Channel = 0;
1029 if (ElemIdx > 0) {
1030 PtrIncr = 1;
1031 } else {
1032 PtrIncr = 0;
1033 }
1034 break;
1035 case 2:
1036 Channel = ElemIdx % 2;
1037 if (ElemIdx == 2) {
1038 PtrIncr = 1;
1039 } else {
1040 PtrIncr = 0;
1041 }
1042 break;
1043 case 4:
1044 Channel = ElemIdx;
1045 PtrIncr = 0;
1046 break;
1047 }
1048}
1049
Tom Stellard75aadc22012-12-11 21:25:42 +00001050SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001051 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001052 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1053 SDValue Chain = Op.getOperand(0);
1054 SDValue Value = Op.getOperand(1);
1055 SDValue Ptr = Op.getOperand(2);
1056
Tom Stellard2ffc3302013-08-26 15:05:44 +00001057 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001058 if (Result.getNode()) {
1059 return Result;
1060 }
1061
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001062 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1063 if (StoreNode->isTruncatingStore()) {
1064 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001065 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001066 EVT MemVT = StoreNode->getMemoryVT();
1067 SDValue MaskConstant;
1068 if (MemVT == MVT::i8) {
1069 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1070 } else {
1071 assert(MemVT == MVT::i16);
1072 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1073 }
1074 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1075 DAG.getConstant(2, MVT::i32));
1076 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1077 DAG.getConstant(0x00000003, VT));
1078 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1079 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1080 DAG.getConstant(3, VT));
1081 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1082 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1083 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1084 // vector instead.
1085 SDValue Src[4] = {
1086 ShiftedValue,
1087 DAG.getConstant(0, MVT::i32),
1088 DAG.getConstant(0, MVT::i32),
1089 Mask
1090 };
1091 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1092 SDValue Args[3] = { Chain, Input, DWordAddr };
1093 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1094 Op->getVTList(), Args, 3, MemVT,
1095 StoreNode->getMemOperand());
1096 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1097 Value.getValueType().bitsGE(MVT::i32)) {
1098 // Convert pointer from byte address to dword address.
1099 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1100 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1101 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001102
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001103 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001104 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001105 } else {
1106 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1107 }
1108 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001109 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001110 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001111
1112 EVT ValueVT = Value.getValueType();
1113
1114 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1115 return SDValue();
1116 }
1117
Tom Stellarde9373602014-01-22 19:24:14 +00001118 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1119 if (Ret.getNode()) {
1120 return Ret;
1121 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001122 // Lowering for indirect addressing
1123
1124 const MachineFunction &MF = DAG.getMachineFunction();
1125 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1126 getTargetMachine().getFrameLowering());
1127 unsigned StackWidth = TFL->getStackWidth(MF);
1128
1129 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1130
1131 if (ValueVT.isVector()) {
1132 unsigned NumElemVT = ValueVT.getVectorNumElements();
1133 EVT ElemVT = ValueVT.getVectorElementType();
1134 SDValue Stores[4];
1135
1136 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1137 "vector width in load");
1138
1139 for (unsigned i = 0; i < NumElemVT; ++i) {
1140 unsigned Channel, PtrIncr;
1141 getStackAddress(StackWidth, i, Channel, PtrIncr);
1142 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1143 DAG.getConstant(PtrIncr, MVT::i32));
1144 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1145 Value, DAG.getConstant(i, MVT::i32));
1146
1147 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1148 Chain, Elem, Ptr,
1149 DAG.getTargetConstant(Channel, MVT::i32));
1150 }
1151 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1152 } else {
1153 if (ValueVT == MVT::i8) {
1154 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1155 }
1156 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001157 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001158 }
1159
1160 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001161}
1162
Tom Stellard365366f2013-01-23 02:09:06 +00001163// return (512 + (kc_bank << 12)
1164static int
1165ConstantAddressBlock(unsigned AddressSpace) {
1166 switch (AddressSpace) {
1167 case AMDGPUAS::CONSTANT_BUFFER_0:
1168 return 512;
1169 case AMDGPUAS::CONSTANT_BUFFER_1:
1170 return 512 + 4096;
1171 case AMDGPUAS::CONSTANT_BUFFER_2:
1172 return 512 + 4096 * 2;
1173 case AMDGPUAS::CONSTANT_BUFFER_3:
1174 return 512 + 4096 * 3;
1175 case AMDGPUAS::CONSTANT_BUFFER_4:
1176 return 512 + 4096 * 4;
1177 case AMDGPUAS::CONSTANT_BUFFER_5:
1178 return 512 + 4096 * 5;
1179 case AMDGPUAS::CONSTANT_BUFFER_6:
1180 return 512 + 4096 * 6;
1181 case AMDGPUAS::CONSTANT_BUFFER_7:
1182 return 512 + 4096 * 7;
1183 case AMDGPUAS::CONSTANT_BUFFER_8:
1184 return 512 + 4096 * 8;
1185 case AMDGPUAS::CONSTANT_BUFFER_9:
1186 return 512 + 4096 * 9;
1187 case AMDGPUAS::CONSTANT_BUFFER_10:
1188 return 512 + 4096 * 10;
1189 case AMDGPUAS::CONSTANT_BUFFER_11:
1190 return 512 + 4096 * 11;
1191 case AMDGPUAS::CONSTANT_BUFFER_12:
1192 return 512 + 4096 * 12;
1193 case AMDGPUAS::CONSTANT_BUFFER_13:
1194 return 512 + 4096 * 13;
1195 case AMDGPUAS::CONSTANT_BUFFER_14:
1196 return 512 + 4096 * 14;
1197 case AMDGPUAS::CONSTANT_BUFFER_15:
1198 return 512 + 4096 * 15;
1199 default:
1200 return -1;
1201 }
1202}
1203
1204SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1205{
1206 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001207 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001208 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1209 SDValue Chain = Op.getOperand(0);
1210 SDValue Ptr = Op.getOperand(1);
1211 SDValue LoweredLoad;
1212
Tom Stellarde9373602014-01-22 19:24:14 +00001213 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1214 if (Ret.getNode()) {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001215 SDValue Ops[2] = {
1216 Ret,
1217 Chain
1218 };
Tom Stellarde9373602014-01-22 19:24:14 +00001219 return DAG.getMergeValues(Ops, 2, DL);
1220 }
1221
1222
Tom Stellard35bb18c2013-08-26 15:06:04 +00001223 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1224 SDValue MergedValues[2] = {
1225 SplitVectorLoad(Op, DAG),
1226 Chain
1227 };
1228 return DAG.getMergeValues(MergedValues, 2, DL);
1229 }
1230
Tom Stellard365366f2013-01-23 02:09:06 +00001231 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001232 if (ConstantBlock > -1 &&
1233 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1234 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001235 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001236 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1237 isa<Constant>(LoadNode->getSrcValue()) ||
1238 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001239 SDValue Slots[4];
1240 for (unsigned i = 0; i < 4; i++) {
1241 // We want Const position encoded with the following formula :
1242 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1243 // const_index is Ptr computed by llvm using an alignment of 16.
1244 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1245 // then div by 4 at the ISel step
1246 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1247 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1248 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1249 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001250 EVT NewVT = MVT::v4i32;
1251 unsigned NumElements = 4;
1252 if (VT.isVector()) {
1253 NewVT = VT;
1254 NumElements = VT.getVectorNumElements();
1255 }
1256 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001257 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001258 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001259 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001260 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001261 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001262 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001263 );
1264 }
1265
1266 if (!VT.isVector()) {
1267 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1268 DAG.getConstant(0, MVT::i32));
1269 }
1270
1271 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001272 Result,
1273 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001274 };
1275 return DAG.getMergeValues(MergedValues, 2, DL);
1276 }
1277
Matt Arsenault909d0c02013-10-30 23:43:29 +00001278 // For most operations returning SDValue() will result in the node being
1279 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1280 // need to manually expand loads that may be legal in some address spaces and
1281 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1282 // compute shaders, since the data is sign extended when it is uploaded to the
1283 // buffer. However SEXT loads from other address spaces are not supported, so
1284 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001285 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1286 EVT MemVT = LoadNode->getMemoryVT();
1287 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1288 SDValue ShiftAmount =
1289 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1290 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1291 LoadNode->getPointerInfo(), MemVT,
1292 LoadNode->isVolatile(),
1293 LoadNode->isNonTemporal(),
1294 LoadNode->getAlignment());
1295 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1296 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1297
1298 SDValue MergedValues[2] = { Sra, Chain };
1299 return DAG.getMergeValues(MergedValues, 2, DL);
1300 }
1301
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001302 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1303 return SDValue();
1304 }
1305
1306 // Lowering for indirect addressing
1307 const MachineFunction &MF = DAG.getMachineFunction();
1308 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1309 getTargetMachine().getFrameLowering());
1310 unsigned StackWidth = TFL->getStackWidth(MF);
1311
1312 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1313
1314 if (VT.isVector()) {
1315 unsigned NumElemVT = VT.getVectorNumElements();
1316 EVT ElemVT = VT.getVectorElementType();
1317 SDValue Loads[4];
1318
1319 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1320 "vector width in load");
1321
1322 for (unsigned i = 0; i < NumElemVT; ++i) {
1323 unsigned Channel, PtrIncr;
1324 getStackAddress(StackWidth, i, Channel, PtrIncr);
1325 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1326 DAG.getConstant(PtrIncr, MVT::i32));
1327 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1328 Chain, Ptr,
1329 DAG.getTargetConstant(Channel, MVT::i32),
1330 Op.getOperand(2));
1331 }
1332 for (unsigned i = NumElemVT; i < 4; ++i) {
1333 Loads[i] = DAG.getUNDEF(ElemVT);
1334 }
1335 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1336 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1337 } else {
1338 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1339 Chain, Ptr,
1340 DAG.getTargetConstant(0, MVT::i32), // Channel
1341 Op.getOperand(2));
1342 }
1343
Matt Arsenault7939acd2014-04-07 16:44:24 +00001344 SDValue Ops[2] = {
1345 LoweredLoad,
1346 Chain
1347 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001348
1349 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001350}
Tom Stellard75aadc22012-12-11 21:25:42 +00001351
Tom Stellard75aadc22012-12-11 21:25:42 +00001352/// XXX Only kernel functions are supported, so we can assume for now that
1353/// every function is a kernel function, but in the future we should use
1354/// separate calling conventions for kernel and non-kernel functions.
1355SDValue R600TargetLowering::LowerFormalArguments(
1356 SDValue Chain,
1357 CallingConv::ID CallConv,
1358 bool isVarArg,
1359 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001360 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001361 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001362 SmallVector<CCValAssign, 16> ArgLocs;
1363 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1364 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001365 MachineFunction &MF = DAG.getMachineFunction();
1366 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001367
Tom Stellardaf775432013-10-23 00:44:32 +00001368 SmallVector<ISD::InputArg, 8> LocalIns;
1369
1370 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1371 LocalIns);
1372
1373 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001374
Tom Stellard1e803092013-07-23 01:48:18 +00001375 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001376 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001377 EVT VT = Ins[i].VT;
1378 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001379
Vincent Lejeunef143af32013-11-11 22:10:24 +00001380 if (ShaderType != ShaderType::COMPUTE) {
1381 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1382 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1383 InVals.push_back(Register);
1384 continue;
1385 }
1386
Tom Stellard75aadc22012-12-11 21:25:42 +00001387 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001388 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001389
Matt Arsenaultfae02982014-03-17 18:58:11 +00001390 // i64 isn't a legal type, so the register type used ends up as i32, which
1391 // isn't expected here. It attempts to create this sextload, but it ends up
1392 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1393 // for <1 x i64>.
1394
Tom Stellardacfeebf2013-07-23 01:48:05 +00001395 // The first 36 bytes of the input buffer contains information about
1396 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001397 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1398 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1399 MachinePointerInfo(UndefValue::get(PtrTy)),
1400 MemVT, false, false, 4);
Alp Tokercb402912014-01-24 17:20:08 +00001401 // 4 is the preferred alignment for
1402 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001403 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001404 }
1405 return Chain;
1406}
1407
Matt Arsenault758659232013-05-18 00:21:46 +00001408EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001409 if (!VT.isVector()) return MVT::i32;
1410 return VT.changeVectorElementTypeToInteger();
1411}
1412
Benjamin Kramer193960c2013-06-11 13:32:25 +00001413static SDValue
1414CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1415 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001416 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1417 assert(RemapSwizzle.empty());
1418 SDValue NewBldVec[4] = {
1419 VectorEntry.getOperand(0),
1420 VectorEntry.getOperand(1),
1421 VectorEntry.getOperand(2),
1422 VectorEntry.getOperand(3)
1423 };
1424
1425 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001426 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1427 // We mask write here to teach later passes that the ith element of this
1428 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1429 // break false dependencies and additionnaly make assembly easier to read.
1430 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001431 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1432 if (C->isZero()) {
1433 RemapSwizzle[i] = 4; // SEL_0
1434 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1435 } else if (C->isExactlyValue(1.0)) {
1436 RemapSwizzle[i] = 5; // SEL_1
1437 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1438 }
1439 }
1440
1441 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1442 continue;
1443 for (unsigned j = 0; j < i; j++) {
1444 if (NewBldVec[i] == NewBldVec[j]) {
1445 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1446 RemapSwizzle[i] = j;
1447 break;
1448 }
1449 }
1450 }
1451
1452 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1453 VectorEntry.getValueType(), NewBldVec, 4);
1454}
1455
Benjamin Kramer193960c2013-06-11 13:32:25 +00001456static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1457 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001458 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1459 assert(RemapSwizzle.empty());
1460 SDValue NewBldVec[4] = {
1461 VectorEntry.getOperand(0),
1462 VectorEntry.getOperand(1),
1463 VectorEntry.getOperand(2),
1464 VectorEntry.getOperand(3)
1465 };
1466 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001467 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001468 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001469 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1470 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1471 ->getZExtValue();
1472 if (i == Idx)
1473 isUnmovable[Idx] = true;
1474 }
1475 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001476
1477 for (unsigned i = 0; i < 4; i++) {
1478 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1479 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1480 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001481 if (isUnmovable[Idx])
1482 continue;
1483 // Swap i and Idx
1484 std::swap(NewBldVec[Idx], NewBldVec[i]);
1485 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1486 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001487 }
1488 }
1489
1490 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1491 VectorEntry.getValueType(), NewBldVec, 4);
1492}
1493
1494
1495SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1496SDValue Swz[4], SelectionDAG &DAG) const {
1497 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1498 // Old -> New swizzle values
1499 DenseMap<unsigned, unsigned> SwizzleRemap;
1500
1501 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1502 for (unsigned i = 0; i < 4; i++) {
1503 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1504 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1505 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1506 }
1507
1508 SwizzleRemap.clear();
1509 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1510 for (unsigned i = 0; i < 4; i++) {
1511 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1512 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1513 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1514 }
1515
1516 return BuildVector;
1517}
1518
1519
Tom Stellard75aadc22012-12-11 21:25:42 +00001520//===----------------------------------------------------------------------===//
1521// Custom DAG Optimizations
1522//===----------------------------------------------------------------------===//
1523
1524SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1525 DAGCombinerInfo &DCI) const {
1526 SelectionDAG &DAG = DCI.DAG;
1527
1528 switch (N->getOpcode()) {
1529 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1530 case ISD::FP_ROUND: {
1531 SDValue Arg = N->getOperand(0);
1532 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001533 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001534 Arg.getOperand(0));
1535 }
1536 break;
1537 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001538
1539 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1540 // (i32 select_cc f32, f32, -1, 0 cc)
1541 //
1542 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1543 // this to one of the SET*_DX10 instructions.
1544 case ISD::FP_TO_SINT: {
1545 SDValue FNeg = N->getOperand(0);
1546 if (FNeg.getOpcode() != ISD::FNEG) {
1547 return SDValue();
1548 }
1549 SDValue SelectCC = FNeg.getOperand(0);
1550 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1551 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1552 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1553 !isHWTrueValue(SelectCC.getOperand(2)) ||
1554 !isHWFalseValue(SelectCC.getOperand(3))) {
1555 return SDValue();
1556 }
1557
Andrew Trickef9de2a2013-05-25 02:42:55 +00001558 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001559 SelectCC.getOperand(0), // LHS
1560 SelectCC.getOperand(1), // RHS
1561 DAG.getConstant(-1, MVT::i32), // True
1562 DAG.getConstant(0, MVT::i32), // Flase
1563 SelectCC.getOperand(4)); // CC
1564
1565 break;
1566 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001567
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001568 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1569 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001570 case ISD::INSERT_VECTOR_ELT: {
1571 SDValue InVec = N->getOperand(0);
1572 SDValue InVal = N->getOperand(1);
1573 SDValue EltNo = N->getOperand(2);
1574 SDLoc dl(N);
1575
1576 // If the inserted element is an UNDEF, just use the input vector.
1577 if (InVal.getOpcode() == ISD::UNDEF)
1578 return InVec;
1579
1580 EVT VT = InVec.getValueType();
1581
1582 // If we can't generate a legal BUILD_VECTOR, exit
1583 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1584 return SDValue();
1585
1586 // Check that we know which element is being inserted
1587 if (!isa<ConstantSDNode>(EltNo))
1588 return SDValue();
1589 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1590
1591 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1592 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1593 // vector elements.
1594 SmallVector<SDValue, 8> Ops;
1595 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1596 Ops.append(InVec.getNode()->op_begin(),
1597 InVec.getNode()->op_end());
1598 } else if (InVec.getOpcode() == ISD::UNDEF) {
1599 unsigned NElts = VT.getVectorNumElements();
1600 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1601 } else {
1602 return SDValue();
1603 }
1604
1605 // Insert the element
1606 if (Elt < Ops.size()) {
1607 // All the operands of BUILD_VECTOR must have the same type;
1608 // we enforce that here.
1609 EVT OpVT = Ops[0].getValueType();
1610 if (InVal.getValueType() != OpVT)
1611 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1612 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1613 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1614 Ops[Elt] = InVal;
1615 }
1616
1617 // Return the new vector
1618 return DAG.getNode(ISD::BUILD_VECTOR, dl,
Matt Arsenault7939acd2014-04-07 16:44:24 +00001619 VT, Ops.data(), Ops.size());
Quentin Colombete2e05482013-07-30 00:27:16 +00001620 }
1621
Tom Stellard365366f2013-01-23 02:09:06 +00001622 // Extract_vec (Build_vector) generated by custom lowering
1623 // also needs to be customly combined
1624 case ISD::EXTRACT_VECTOR_ELT: {
1625 SDValue Arg = N->getOperand(0);
1626 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1627 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1628 unsigned Element = Const->getZExtValue();
1629 return Arg->getOperand(Element);
1630 }
1631 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001632 if (Arg.getOpcode() == ISD::BITCAST &&
1633 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1634 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1635 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001636 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001637 Arg->getOperand(0).getOperand(Element));
1638 }
1639 }
Tom Stellard365366f2013-01-23 02:09:06 +00001640 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001641
1642 case ISD::SELECT_CC: {
1643 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1644 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001645 //
1646 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1647 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001648 SDValue LHS = N->getOperand(0);
1649 if (LHS.getOpcode() != ISD::SELECT_CC) {
1650 return SDValue();
1651 }
1652
1653 SDValue RHS = N->getOperand(1);
1654 SDValue True = N->getOperand(2);
1655 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001656 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001657
1658 if (LHS.getOperand(2).getNode() != True.getNode() ||
1659 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001660 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001661 return SDValue();
1662 }
1663
Tom Stellard5e524892013-03-08 15:37:11 +00001664 switch (NCC) {
1665 default: return SDValue();
1666 case ISD::SETNE: return LHS;
1667 case ISD::SETEQ: {
1668 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1669 LHSCC = ISD::getSetCCInverse(LHSCC,
1670 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001671 if (DCI.isBeforeLegalizeOps() ||
1672 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1673 return DAG.getSelectCC(SDLoc(N),
1674 LHS.getOperand(0),
1675 LHS.getOperand(1),
1676 LHS.getOperand(2),
1677 LHS.getOperand(3),
1678 LHSCC);
1679 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001680 }
Tom Stellard5e524892013-03-08 15:37:11 +00001681 }
Tom Stellardcd428182013-09-28 02:50:38 +00001682 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001683 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001684
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001685 case AMDGPUISD::EXPORT: {
1686 SDValue Arg = N->getOperand(1);
1687 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1688 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001689
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001690 SDValue NewArgs[8] = {
1691 N->getOperand(0), // Chain
1692 SDValue(),
1693 N->getOperand(2), // ArrayBase
1694 N->getOperand(3), // Type
1695 N->getOperand(4), // SWZ_X
1696 N->getOperand(5), // SWZ_Y
1697 N->getOperand(6), // SWZ_Z
1698 N->getOperand(7) // SWZ_W
1699 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001700 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001701 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001702 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001703 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001704 case AMDGPUISD::TEXTURE_FETCH: {
1705 SDValue Arg = N->getOperand(1);
1706 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1707 break;
1708
1709 SDValue NewArgs[19] = {
1710 N->getOperand(0),
1711 N->getOperand(1),
1712 N->getOperand(2),
1713 N->getOperand(3),
1714 N->getOperand(4),
1715 N->getOperand(5),
1716 N->getOperand(6),
1717 N->getOperand(7),
1718 N->getOperand(8),
1719 N->getOperand(9),
1720 N->getOperand(10),
1721 N->getOperand(11),
1722 N->getOperand(12),
1723 N->getOperand(13),
1724 N->getOperand(14),
1725 N->getOperand(15),
1726 N->getOperand(16),
1727 N->getOperand(17),
1728 N->getOperand(18),
1729 };
1730 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1731 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1732 NewArgs, 19);
1733 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001734 }
1735 return SDValue();
1736}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001737
1738static bool
1739FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001740 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001741 const R600InstrInfo *TII =
1742 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1743 if (!Src.isMachineOpcode())
1744 return false;
1745 switch (Src.getMachineOpcode()) {
1746 case AMDGPU::FNEG_R600:
1747 if (!Neg.getNode())
1748 return false;
1749 Src = Src.getOperand(0);
1750 Neg = DAG.getTargetConstant(1, MVT::i32);
1751 return true;
1752 case AMDGPU::FABS_R600:
1753 if (!Abs.getNode())
1754 return false;
1755 Src = Src.getOperand(0);
1756 Abs = DAG.getTargetConstant(1, MVT::i32);
1757 return true;
1758 case AMDGPU::CONST_COPY: {
1759 unsigned Opcode = ParentNode->getMachineOpcode();
1760 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1761
1762 if (!Sel.getNode())
1763 return false;
1764
1765 SDValue CstOffset = Src.getOperand(0);
1766 if (ParentNode->getValueType(0).isVector())
1767 return false;
1768
1769 // Gather constants values
1770 int SrcIndices[] = {
1771 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1772 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1773 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1774 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1775 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1776 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1777 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1778 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1779 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1780 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1781 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1782 };
1783 std::vector<unsigned> Consts;
1784 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1785 int OtherSrcIdx = SrcIndices[i];
1786 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1787 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1788 continue;
1789 if (HasDst) {
1790 OtherSrcIdx--;
1791 OtherSelIdx--;
1792 }
1793 if (RegisterSDNode *Reg =
1794 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1795 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1796 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1797 ParentNode->getOperand(OtherSelIdx));
1798 Consts.push_back(Cst->getZExtValue());
1799 }
1800 }
1801 }
1802
1803 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1804 Consts.push_back(Cst->getZExtValue());
1805 if (!TII->fitsConstReadLimitations(Consts)) {
1806 return false;
1807 }
1808
1809 Sel = CstOffset;
1810 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1811 return true;
1812 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001813 case AMDGPU::MOV_IMM_I32:
1814 case AMDGPU::MOV_IMM_F32: {
1815 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1816 uint64_t ImmValue = 0;
1817
1818
1819 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1820 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1821 float FloatValue = FPC->getValueAPF().convertToFloat();
1822 if (FloatValue == 0.0) {
1823 ImmReg = AMDGPU::ZERO;
1824 } else if (FloatValue == 0.5) {
1825 ImmReg = AMDGPU::HALF;
1826 } else if (FloatValue == 1.0) {
1827 ImmReg = AMDGPU::ONE;
1828 } else {
1829 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1830 }
1831 } else {
1832 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1833 uint64_t Value = C->getZExtValue();
1834 if (Value == 0) {
1835 ImmReg = AMDGPU::ZERO;
1836 } else if (Value == 1) {
1837 ImmReg = AMDGPU::ONE_INT;
1838 } else {
1839 ImmValue = Value;
1840 }
1841 }
1842
1843 // Check that we aren't already using an immediate.
1844 // XXX: It's possible for an instruction to have more than one
1845 // immediate operand, but this is not supported yet.
1846 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1847 if (!Imm.getNode())
1848 return false;
1849 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1850 assert(C);
1851 if (C->getZExtValue())
1852 return false;
1853 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1854 }
1855 Src = DAG.getRegister(ImmReg, MVT::i32);
1856 return true;
1857 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001858 default:
1859 return false;
1860 }
1861}
1862
1863
1864/// \brief Fold the instructions after selecting them
1865SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1866 SelectionDAG &DAG) const {
1867 const R600InstrInfo *TII =
1868 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1869 if (!Node->isMachineOpcode())
1870 return Node;
1871 unsigned Opcode = Node->getMachineOpcode();
1872 SDValue FakeOp;
1873
1874 std::vector<SDValue> Ops;
1875 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1876 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001877 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001878
1879 if (Opcode == AMDGPU::DOT_4) {
1880 int OperandIdx[] = {
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1886 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1887 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001889 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001890 int NegIdx[] = {
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1895 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1899 };
1900 int AbsIdx[] = {
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1902 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1903 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1904 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1905 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1906 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1907 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1908 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1909 };
1910 for (unsigned i = 0; i < 8; i++) {
1911 if (OperandIdx[i] < 0)
1912 return Node;
1913 SDValue &Src = Ops[OperandIdx[i] - 1];
1914 SDValue &Neg = Ops[NegIdx[i] - 1];
1915 SDValue &Abs = Ops[AbsIdx[i] - 1];
1916 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1917 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1918 if (HasDst)
1919 SelIdx--;
1920 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001921 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1922 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1923 }
1924 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1925 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1926 SDValue &Src = Ops[i];
1927 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001928 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1929 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001930 } else if (Opcode == AMDGPU::CLAMP_R600) {
1931 SDValue Src = Node->getOperand(0);
1932 if (!Src.isMachineOpcode() ||
1933 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1934 return Node;
1935 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1936 AMDGPU::OpName::clamp);
1937 if (ClampIdx < 0)
1938 return Node;
1939 std::vector<SDValue> Ops;
1940 unsigned NumOp = Src.getNumOperands();
1941 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001942 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001943 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1944 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1945 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001946 } else {
1947 if (!TII->hasInstrModifiers(Opcode))
1948 return Node;
1949 int OperandIdx[] = {
1950 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1951 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1952 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1953 };
1954 int NegIdx[] = {
1955 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1956 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1957 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1958 };
1959 int AbsIdx[] = {
1960 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1961 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1962 -1
1963 };
1964 for (unsigned i = 0; i < 3; i++) {
1965 if (OperandIdx[i] < 0)
1966 return Node;
1967 SDValue &Src = Ops[OperandIdx[i] - 1];
1968 SDValue &Neg = Ops[NegIdx[i] - 1];
1969 SDValue FakeAbs;
1970 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1971 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1972 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001973 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1974 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001975 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001976 ImmIdx--;
1977 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001978 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001979 SDValue &Imm = Ops[ImmIdx];
1980 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001981 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1982 }
1983 }
1984
1985 return Node;
1986}