blob: 03feabe23e68283e12208d1ea854c9078c1e49ce [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
210 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
460 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
461 EndBlock = BB->end(); NextExportInst != EndBlock;
462 NextExportInst = llvm::next(NextExportInst)) {
463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
765 default: return;
766 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000767 return;
768 case ISD::LOAD: {
769 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
770 Results.push_back(SDValue(Node, 0));
771 Results.push_back(SDValue(Node, 1));
772 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
773 // function
774 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
775 return;
776 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000777 case ISD::STORE:
778 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
779 Results.push_back(SDValue(Node, 0));
780 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 }
782}
783
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000784SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
785 // On hw >= R700, COS/SIN input must be between -1. and 1.
786 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
787 EVT VT = Op.getValueType();
788 SDValue Arg = Op.getOperand(0);
789 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
790 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
791 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
792 DAG.getConstantFP(0.15915494309, MVT::f32)),
793 DAG.getConstantFP(0.5, MVT::f32)));
794 unsigned TrigNode;
795 switch (Op.getOpcode()) {
796 case ISD::FCOS:
797 TrigNode = AMDGPUISD::COS_HW;
798 break;
799 case ISD::FSIN:
800 TrigNode = AMDGPUISD::SIN_HW;
801 break;
802 default:
803 llvm_unreachable("Wrong trig opcode");
804 }
805 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
806 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
807 DAG.getConstantFP(-0.5, MVT::f32)));
808 if (Gen >= AMDGPUSubtarget::R700)
809 return TrigVal;
810 // On R600 hw, COS/SIN input must be between -Pi and Pi.
811 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
812 DAG.getConstantFP(3.14159265359, MVT::f32));
813}
814
Tom Stellard75aadc22012-12-11 21:25:42 +0000815SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
816 return DAG.getNode(
817 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000818 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 MVT::i1,
820 Op, DAG.getConstantFP(0.0f, MVT::f32),
821 DAG.getCondCode(ISD::SETNE)
822 );
823}
824
Tom Stellard75aadc22012-12-11 21:25:42 +0000825SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000826 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 unsigned DwordOffset) const {
828 unsigned ByteOffset = DwordOffset * 4;
829 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000830 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000831
832 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
833 assert(isInt<16>(ByteOffset));
834
835 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
836 DAG.getConstant(ByteOffset, MVT::i32), // PTR
837 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
838 false, false, false, 0);
839}
840
Tom Stellard75aadc22012-12-11 21:25:42 +0000841bool R600TargetLowering::isZero(SDValue Op) const {
842 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
843 return Cst->isNullValue();
844 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
845 return CstFP->isZero();
846 } else {
847 return false;
848 }
849}
850
851SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000852 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000853 EVT VT = Op.getValueType();
854
855 SDValue LHS = Op.getOperand(0);
856 SDValue RHS = Op.getOperand(1);
857 SDValue True = Op.getOperand(2);
858 SDValue False = Op.getOperand(3);
859 SDValue CC = Op.getOperand(4);
860 SDValue Temp;
861
862 // LHS and RHS are guaranteed to be the same value type
863 EVT CompareVT = LHS.getValueType();
864
865 // Check if we can lower this to a native operation.
866
Tom Stellard2add82d2013-03-08 15:37:09 +0000867 // Try to lower to a SET* instruction:
868 //
869 // SET* can match the following patterns:
870 //
Tom Stellardcd428182013-09-28 02:50:38 +0000871 // select_cc f32, f32, -1, 0, cc_supported
872 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
873 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000874 //
875
876 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000877 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
878 ISD::CondCode InverseCC =
879 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000880 if (isHWTrueValue(False) && isHWFalseValue(True)) {
881 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
882 std::swap(False, True);
883 CC = DAG.getCondCode(InverseCC);
884 } else {
885 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
886 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
887 std::swap(False, True);
888 std::swap(LHS, RHS);
889 CC = DAG.getCondCode(SwapInvCC);
890 }
891 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000892 }
893
894 if (isHWTrueValue(True) && isHWFalseValue(False) &&
895 (CompareVT == VT || VT == MVT::i32)) {
896 // This can be matched by a SET* instruction.
897 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
898 }
899
Tom Stellard75aadc22012-12-11 21:25:42 +0000900 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000901 //
902 // CND* can match the following patterns:
903 //
Tom Stellardcd428182013-09-28 02:50:38 +0000904 // select_cc f32, 0.0, f32, f32, cc_supported
905 // select_cc f32, 0.0, i32, i32, cc_supported
906 // select_cc i32, 0, f32, f32, cc_supported
907 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000908 //
Tom Stellardcd428182013-09-28 02:50:38 +0000909
910 // Try to move the zero value to the RHS
911 if (isZero(LHS)) {
912 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
913 // Try swapping the operands
914 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
915 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(CCSwapped);
918 } else {
919 // Try inverting the conditon and then swapping the operands
920 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
921 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
922 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
923 std::swap(True, False);
924 std::swap(LHS, RHS);
925 CC = DAG.getCondCode(CCSwapped);
926 }
927 }
928 }
929 if (isZero(RHS)) {
930 SDValue Cond = LHS;
931 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000932 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
933 if (CompareVT != VT) {
934 // Bitcast True / False to the correct types. This will end up being
935 // a nop, but it allows us to define only a single pattern in the
936 // .TD files for each CND* instruction rather than having to have
937 // one pattern for integer True/False and one for fp True/False
938 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
939 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
940 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000941
942 switch (CCOpcode) {
943 case ISD::SETONE:
944 case ISD::SETUNE:
945 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000946 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
947 Temp = True;
948 True = False;
949 False = Temp;
950 break;
951 default:
952 break;
953 }
954 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
955 Cond, Zero,
956 True, False,
957 DAG.getCondCode(CCOpcode));
958 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
959 }
960
Tom Stellard75aadc22012-12-11 21:25:42 +0000961
962 // Possible Min/Max pattern
963 SDValue MinMax = LowerMinMax(Op, DAG);
964 if (MinMax.getNode()) {
965 return MinMax;
966 }
967
968 // If we make it this for it means we have no native instructions to handle
969 // this SELECT_CC, so we must lower it.
970 SDValue HWTrue, HWFalse;
971
972 if (CompareVT == MVT::f32) {
973 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
974 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
975 } else if (CompareVT == MVT::i32) {
976 HWTrue = DAG.getConstant(-1, CompareVT);
977 HWFalse = DAG.getConstant(0, CompareVT);
978 }
979 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000980 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +0000981 }
982
983 // Lower this unsupported SELECT_CC into a combination of two supported
984 // SELECT_CC operations.
985 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
986
987 return DAG.getNode(ISD::SELECT_CC, DL, VT,
988 Cond, HWFalse,
989 True, False,
990 DAG.getCondCode(ISD::SETNE));
991}
992
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000993/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
994/// convert these pointers to a register index. Each register holds
995/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
996/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
997/// for indirect addressing.
998SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
999 unsigned StackWidth,
1000 SelectionDAG &DAG) const {
1001 unsigned SRLPad;
1002 switch(StackWidth) {
1003 case 1:
1004 SRLPad = 2;
1005 break;
1006 case 2:
1007 SRLPad = 3;
1008 break;
1009 case 4:
1010 SRLPad = 4;
1011 break;
1012 default: llvm_unreachable("Invalid stack width");
1013 }
1014
Andrew Trickef9de2a2013-05-25 02:42:55 +00001015 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001016 DAG.getConstant(SRLPad, MVT::i32));
1017}
1018
1019void R600TargetLowering::getStackAddress(unsigned StackWidth,
1020 unsigned ElemIdx,
1021 unsigned &Channel,
1022 unsigned &PtrIncr) const {
1023 switch (StackWidth) {
1024 default:
1025 case 1:
1026 Channel = 0;
1027 if (ElemIdx > 0) {
1028 PtrIncr = 1;
1029 } else {
1030 PtrIncr = 0;
1031 }
1032 break;
1033 case 2:
1034 Channel = ElemIdx % 2;
1035 if (ElemIdx == 2) {
1036 PtrIncr = 1;
1037 } else {
1038 PtrIncr = 0;
1039 }
1040 break;
1041 case 4:
1042 Channel = ElemIdx;
1043 PtrIncr = 0;
1044 break;
1045 }
1046}
1047
Tom Stellard75aadc22012-12-11 21:25:42 +00001048SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001049 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001050 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1051 SDValue Chain = Op.getOperand(0);
1052 SDValue Value = Op.getOperand(1);
1053 SDValue Ptr = Op.getOperand(2);
1054
Tom Stellard2ffc3302013-08-26 15:05:44 +00001055 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001056 if (Result.getNode()) {
1057 return Result;
1058 }
1059
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1061 if (StoreNode->isTruncatingStore()) {
1062 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001063 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001064 EVT MemVT = StoreNode->getMemoryVT();
1065 SDValue MaskConstant;
1066 if (MemVT == MVT::i8) {
1067 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1068 } else {
1069 assert(MemVT == MVT::i16);
1070 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1071 }
1072 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1073 DAG.getConstant(2, MVT::i32));
1074 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1075 DAG.getConstant(0x00000003, VT));
1076 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1077 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1078 DAG.getConstant(3, VT));
1079 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1080 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1081 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1082 // vector instead.
1083 SDValue Src[4] = {
1084 ShiftedValue,
1085 DAG.getConstant(0, MVT::i32),
1086 DAG.getConstant(0, MVT::i32),
1087 Mask
1088 };
1089 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1090 SDValue Args[3] = { Chain, Input, DWordAddr };
1091 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1092 Op->getVTList(), Args, 3, MemVT,
1093 StoreNode->getMemOperand());
1094 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1095 Value.getValueType().bitsGE(MVT::i32)) {
1096 // Convert pointer from byte address to dword address.
1097 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1098 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1099 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001100
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001101 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001102 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001103 } else {
1104 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1105 }
1106 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001107 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001108 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001109
1110 EVT ValueVT = Value.getValueType();
1111
1112 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1113 return SDValue();
1114 }
1115
Tom Stellarde9373602014-01-22 19:24:14 +00001116 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1117 if (Ret.getNode()) {
1118 return Ret;
1119 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001120 // Lowering for indirect addressing
1121
1122 const MachineFunction &MF = DAG.getMachineFunction();
1123 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1124 getTargetMachine().getFrameLowering());
1125 unsigned StackWidth = TFL->getStackWidth(MF);
1126
1127 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1128
1129 if (ValueVT.isVector()) {
1130 unsigned NumElemVT = ValueVT.getVectorNumElements();
1131 EVT ElemVT = ValueVT.getVectorElementType();
1132 SDValue Stores[4];
1133
1134 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1135 "vector width in load");
1136
1137 for (unsigned i = 0; i < NumElemVT; ++i) {
1138 unsigned Channel, PtrIncr;
1139 getStackAddress(StackWidth, i, Channel, PtrIncr);
1140 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1141 DAG.getConstant(PtrIncr, MVT::i32));
1142 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1143 Value, DAG.getConstant(i, MVT::i32));
1144
1145 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1146 Chain, Elem, Ptr,
1147 DAG.getTargetConstant(Channel, MVT::i32));
1148 }
1149 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1150 } else {
1151 if (ValueVT == MVT::i8) {
1152 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1153 }
1154 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001155 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001156 }
1157
1158 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001159}
1160
Tom Stellard365366f2013-01-23 02:09:06 +00001161// return (512 + (kc_bank << 12)
1162static int
1163ConstantAddressBlock(unsigned AddressSpace) {
1164 switch (AddressSpace) {
1165 case AMDGPUAS::CONSTANT_BUFFER_0:
1166 return 512;
1167 case AMDGPUAS::CONSTANT_BUFFER_1:
1168 return 512 + 4096;
1169 case AMDGPUAS::CONSTANT_BUFFER_2:
1170 return 512 + 4096 * 2;
1171 case AMDGPUAS::CONSTANT_BUFFER_3:
1172 return 512 + 4096 * 3;
1173 case AMDGPUAS::CONSTANT_BUFFER_4:
1174 return 512 + 4096 * 4;
1175 case AMDGPUAS::CONSTANT_BUFFER_5:
1176 return 512 + 4096 * 5;
1177 case AMDGPUAS::CONSTANT_BUFFER_6:
1178 return 512 + 4096 * 6;
1179 case AMDGPUAS::CONSTANT_BUFFER_7:
1180 return 512 + 4096 * 7;
1181 case AMDGPUAS::CONSTANT_BUFFER_8:
1182 return 512 + 4096 * 8;
1183 case AMDGPUAS::CONSTANT_BUFFER_9:
1184 return 512 + 4096 * 9;
1185 case AMDGPUAS::CONSTANT_BUFFER_10:
1186 return 512 + 4096 * 10;
1187 case AMDGPUAS::CONSTANT_BUFFER_11:
1188 return 512 + 4096 * 11;
1189 case AMDGPUAS::CONSTANT_BUFFER_12:
1190 return 512 + 4096 * 12;
1191 case AMDGPUAS::CONSTANT_BUFFER_13:
1192 return 512 + 4096 * 13;
1193 case AMDGPUAS::CONSTANT_BUFFER_14:
1194 return 512 + 4096 * 14;
1195 case AMDGPUAS::CONSTANT_BUFFER_15:
1196 return 512 + 4096 * 15;
1197 default:
1198 return -1;
1199 }
1200}
1201
1202SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1203{
1204 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001205 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001206 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1207 SDValue Chain = Op.getOperand(0);
1208 SDValue Ptr = Op.getOperand(1);
1209 SDValue LoweredLoad;
1210
Tom Stellarde9373602014-01-22 19:24:14 +00001211 SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1212 if (Ret.getNode()) {
1213 SDValue Ops[2];
1214 Ops[0] = Ret;
1215 Ops[1] = Chain;
1216 return DAG.getMergeValues(Ops, 2, DL);
1217 }
1218
1219
Tom Stellard35bb18c2013-08-26 15:06:04 +00001220 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1221 SDValue MergedValues[2] = {
1222 SplitVectorLoad(Op, DAG),
1223 Chain
1224 };
1225 return DAG.getMergeValues(MergedValues, 2, DL);
1226 }
1227
Tom Stellard365366f2013-01-23 02:09:06 +00001228 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001229 if (ConstantBlock > -1 &&
1230 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1231 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001232 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001233 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1234 isa<Constant>(LoadNode->getSrcValue()) ||
1235 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001236 SDValue Slots[4];
1237 for (unsigned i = 0; i < 4; i++) {
1238 // We want Const position encoded with the following formula :
1239 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1240 // const_index is Ptr computed by llvm using an alignment of 16.
1241 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1242 // then div by 4 at the ISel step
1243 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1244 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1245 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1246 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001247 EVT NewVT = MVT::v4i32;
1248 unsigned NumElements = 4;
1249 if (VT.isVector()) {
1250 NewVT = VT;
1251 NumElements = VT.getVectorNumElements();
1252 }
1253 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001254 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001255 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001256 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001257 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001258 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001259 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001260 );
1261 }
1262
1263 if (!VT.isVector()) {
1264 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1265 DAG.getConstant(0, MVT::i32));
1266 }
1267
1268 SDValue MergedValues[2] = {
1269 Result,
1270 Chain
1271 };
1272 return DAG.getMergeValues(MergedValues, 2, DL);
1273 }
1274
Matt Arsenault909d0c02013-10-30 23:43:29 +00001275 // For most operations returning SDValue() will result in the node being
1276 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1277 // need to manually expand loads that may be legal in some address spaces and
1278 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1279 // compute shaders, since the data is sign extended when it is uploaded to the
1280 // buffer. However SEXT loads from other address spaces are not supported, so
1281 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001282 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1283 EVT MemVT = LoadNode->getMemoryVT();
1284 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1285 SDValue ShiftAmount =
1286 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1287 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1288 LoadNode->getPointerInfo(), MemVT,
1289 LoadNode->isVolatile(),
1290 LoadNode->isNonTemporal(),
1291 LoadNode->getAlignment());
1292 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1293 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1294
1295 SDValue MergedValues[2] = { Sra, Chain };
1296 return DAG.getMergeValues(MergedValues, 2, DL);
1297 }
1298
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001299 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1300 return SDValue();
1301 }
1302
1303 // Lowering for indirect addressing
1304 const MachineFunction &MF = DAG.getMachineFunction();
1305 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1306 getTargetMachine().getFrameLowering());
1307 unsigned StackWidth = TFL->getStackWidth(MF);
1308
1309 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1310
1311 if (VT.isVector()) {
1312 unsigned NumElemVT = VT.getVectorNumElements();
1313 EVT ElemVT = VT.getVectorElementType();
1314 SDValue Loads[4];
1315
1316 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1317 "vector width in load");
1318
1319 for (unsigned i = 0; i < NumElemVT; ++i) {
1320 unsigned Channel, PtrIncr;
1321 getStackAddress(StackWidth, i, Channel, PtrIncr);
1322 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1323 DAG.getConstant(PtrIncr, MVT::i32));
1324 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1325 Chain, Ptr,
1326 DAG.getTargetConstant(Channel, MVT::i32),
1327 Op.getOperand(2));
1328 }
1329 for (unsigned i = NumElemVT; i < 4; ++i) {
1330 Loads[i] = DAG.getUNDEF(ElemVT);
1331 }
1332 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1333 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1334 } else {
1335 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1336 Chain, Ptr,
1337 DAG.getTargetConstant(0, MVT::i32), // Channel
1338 Op.getOperand(2));
1339 }
1340
1341 SDValue Ops[2];
1342 Ops[0] = LoweredLoad;
1343 Ops[1] = Chain;
1344
1345 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001346}
Tom Stellard75aadc22012-12-11 21:25:42 +00001347
Tom Stellard75aadc22012-12-11 21:25:42 +00001348/// XXX Only kernel functions are supported, so we can assume for now that
1349/// every function is a kernel function, but in the future we should use
1350/// separate calling conventions for kernel and non-kernel functions.
1351SDValue R600TargetLowering::LowerFormalArguments(
1352 SDValue Chain,
1353 CallingConv::ID CallConv,
1354 bool isVarArg,
1355 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001356 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001357 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001358 SmallVector<CCValAssign, 16> ArgLocs;
1359 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1360 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001361 MachineFunction &MF = DAG.getMachineFunction();
1362 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001363
Tom Stellardaf775432013-10-23 00:44:32 +00001364 SmallVector<ISD::InputArg, 8> LocalIns;
1365
1366 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1367 LocalIns);
1368
1369 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001370
Tom Stellard1e803092013-07-23 01:48:18 +00001371 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001372 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001373 EVT VT = Ins[i].VT;
1374 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001375
Vincent Lejeunef143af32013-11-11 22:10:24 +00001376 if (ShaderType != ShaderType::COMPUTE) {
1377 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1378 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1379 InVals.push_back(Register);
1380 continue;
1381 }
1382
Tom Stellard75aadc22012-12-11 21:25:42 +00001383 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001384 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001385
1386 // The first 36 bytes of the input buffer contains information about
1387 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001388 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1389 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1390 MachinePointerInfo(UndefValue::get(PtrTy)),
1391 MemVT, false, false, 4);
1392 // 4 is the prefered alignment for
1393 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001394 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001395 }
1396 return Chain;
1397}
1398
Matt Arsenault758659232013-05-18 00:21:46 +00001399EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001400 if (!VT.isVector()) return MVT::i32;
1401 return VT.changeVectorElementTypeToInteger();
1402}
1403
Benjamin Kramer193960c2013-06-11 13:32:25 +00001404static SDValue
1405CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1406 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001407 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1408 assert(RemapSwizzle.empty());
1409 SDValue NewBldVec[4] = {
1410 VectorEntry.getOperand(0),
1411 VectorEntry.getOperand(1),
1412 VectorEntry.getOperand(2),
1413 VectorEntry.getOperand(3)
1414 };
1415
1416 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001417 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1418 // We mask write here to teach later passes that the ith element of this
1419 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1420 // break false dependencies and additionnaly make assembly easier to read.
1421 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001422 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1423 if (C->isZero()) {
1424 RemapSwizzle[i] = 4; // SEL_0
1425 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1426 } else if (C->isExactlyValue(1.0)) {
1427 RemapSwizzle[i] = 5; // SEL_1
1428 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1429 }
1430 }
1431
1432 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1433 continue;
1434 for (unsigned j = 0; j < i; j++) {
1435 if (NewBldVec[i] == NewBldVec[j]) {
1436 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1437 RemapSwizzle[i] = j;
1438 break;
1439 }
1440 }
1441 }
1442
1443 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1444 VectorEntry.getValueType(), NewBldVec, 4);
1445}
1446
Benjamin Kramer193960c2013-06-11 13:32:25 +00001447static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1448 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001449 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1450 assert(RemapSwizzle.empty());
1451 SDValue NewBldVec[4] = {
1452 VectorEntry.getOperand(0),
1453 VectorEntry.getOperand(1),
1454 VectorEntry.getOperand(2),
1455 VectorEntry.getOperand(3)
1456 };
1457 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001458 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001459 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001460 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1461 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1462 ->getZExtValue();
1463 if (i == Idx)
1464 isUnmovable[Idx] = true;
1465 }
1466 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001467
1468 for (unsigned i = 0; i < 4; i++) {
1469 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1470 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1471 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001472 if (isUnmovable[Idx])
1473 continue;
1474 // Swap i and Idx
1475 std::swap(NewBldVec[Idx], NewBldVec[i]);
1476 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1477 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001478 }
1479 }
1480
1481 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1482 VectorEntry.getValueType(), NewBldVec, 4);
1483}
1484
1485
1486SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1487SDValue Swz[4], SelectionDAG &DAG) const {
1488 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1489 // Old -> New swizzle values
1490 DenseMap<unsigned, unsigned> SwizzleRemap;
1491
1492 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1493 for (unsigned i = 0; i < 4; i++) {
1494 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1495 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1496 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1497 }
1498
1499 SwizzleRemap.clear();
1500 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1501 for (unsigned i = 0; i < 4; i++) {
1502 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1503 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1504 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1505 }
1506
1507 return BuildVector;
1508}
1509
1510
Tom Stellard75aadc22012-12-11 21:25:42 +00001511//===----------------------------------------------------------------------===//
1512// Custom DAG Optimizations
1513//===----------------------------------------------------------------------===//
1514
1515SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1516 DAGCombinerInfo &DCI) const {
1517 SelectionDAG &DAG = DCI.DAG;
1518
1519 switch (N->getOpcode()) {
1520 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1521 case ISD::FP_ROUND: {
1522 SDValue Arg = N->getOperand(0);
1523 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001524 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001525 Arg.getOperand(0));
1526 }
1527 break;
1528 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001529
1530 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1531 // (i32 select_cc f32, f32, -1, 0 cc)
1532 //
1533 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1534 // this to one of the SET*_DX10 instructions.
1535 case ISD::FP_TO_SINT: {
1536 SDValue FNeg = N->getOperand(0);
1537 if (FNeg.getOpcode() != ISD::FNEG) {
1538 return SDValue();
1539 }
1540 SDValue SelectCC = FNeg.getOperand(0);
1541 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1542 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1543 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1544 !isHWTrueValue(SelectCC.getOperand(2)) ||
1545 !isHWFalseValue(SelectCC.getOperand(3))) {
1546 return SDValue();
1547 }
1548
Andrew Trickef9de2a2013-05-25 02:42:55 +00001549 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001550 SelectCC.getOperand(0), // LHS
1551 SelectCC.getOperand(1), // RHS
1552 DAG.getConstant(-1, MVT::i32), // True
1553 DAG.getConstant(0, MVT::i32), // Flase
1554 SelectCC.getOperand(4)); // CC
1555
1556 break;
1557 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001558
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001559 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1560 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001561 case ISD::INSERT_VECTOR_ELT: {
1562 SDValue InVec = N->getOperand(0);
1563 SDValue InVal = N->getOperand(1);
1564 SDValue EltNo = N->getOperand(2);
1565 SDLoc dl(N);
1566
1567 // If the inserted element is an UNDEF, just use the input vector.
1568 if (InVal.getOpcode() == ISD::UNDEF)
1569 return InVec;
1570
1571 EVT VT = InVec.getValueType();
1572
1573 // If we can't generate a legal BUILD_VECTOR, exit
1574 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1575 return SDValue();
1576
1577 // Check that we know which element is being inserted
1578 if (!isa<ConstantSDNode>(EltNo))
1579 return SDValue();
1580 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1581
1582 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1583 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1584 // vector elements.
1585 SmallVector<SDValue, 8> Ops;
1586 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1587 Ops.append(InVec.getNode()->op_begin(),
1588 InVec.getNode()->op_end());
1589 } else if (InVec.getOpcode() == ISD::UNDEF) {
1590 unsigned NElts = VT.getVectorNumElements();
1591 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1592 } else {
1593 return SDValue();
1594 }
1595
1596 // Insert the element
1597 if (Elt < Ops.size()) {
1598 // All the operands of BUILD_VECTOR must have the same type;
1599 // we enforce that here.
1600 EVT OpVT = Ops[0].getValueType();
1601 if (InVal.getValueType() != OpVT)
1602 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1603 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1604 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1605 Ops[Elt] = InVal;
1606 }
1607
1608 // Return the new vector
1609 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1610 VT, &Ops[0], Ops.size());
1611 }
1612
Tom Stellard365366f2013-01-23 02:09:06 +00001613 // Extract_vec (Build_vector) generated by custom lowering
1614 // also needs to be customly combined
1615 case ISD::EXTRACT_VECTOR_ELT: {
1616 SDValue Arg = N->getOperand(0);
1617 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1618 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1619 unsigned Element = Const->getZExtValue();
1620 return Arg->getOperand(Element);
1621 }
1622 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001623 if (Arg.getOpcode() == ISD::BITCAST &&
1624 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1625 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1626 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001627 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001628 Arg->getOperand(0).getOperand(Element));
1629 }
1630 }
Tom Stellard365366f2013-01-23 02:09:06 +00001631 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001632
1633 case ISD::SELECT_CC: {
1634 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1635 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001636 //
1637 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1638 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001639 SDValue LHS = N->getOperand(0);
1640 if (LHS.getOpcode() != ISD::SELECT_CC) {
1641 return SDValue();
1642 }
1643
1644 SDValue RHS = N->getOperand(1);
1645 SDValue True = N->getOperand(2);
1646 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001647 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001648
1649 if (LHS.getOperand(2).getNode() != True.getNode() ||
1650 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001651 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001652 return SDValue();
1653 }
1654
Tom Stellard5e524892013-03-08 15:37:11 +00001655 switch (NCC) {
1656 default: return SDValue();
1657 case ISD::SETNE: return LHS;
1658 case ISD::SETEQ: {
1659 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1660 LHSCC = ISD::getSetCCInverse(LHSCC,
1661 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001662 if (DCI.isBeforeLegalizeOps() ||
1663 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1664 return DAG.getSelectCC(SDLoc(N),
1665 LHS.getOperand(0),
1666 LHS.getOperand(1),
1667 LHS.getOperand(2),
1668 LHS.getOperand(3),
1669 LHSCC);
1670 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001671 }
Tom Stellard5e524892013-03-08 15:37:11 +00001672 }
Tom Stellardcd428182013-09-28 02:50:38 +00001673 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001674 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001675
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001676 case AMDGPUISD::EXPORT: {
1677 SDValue Arg = N->getOperand(1);
1678 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1679 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001680
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001681 SDValue NewArgs[8] = {
1682 N->getOperand(0), // Chain
1683 SDValue(),
1684 N->getOperand(2), // ArrayBase
1685 N->getOperand(3), // Type
1686 N->getOperand(4), // SWZ_X
1687 N->getOperand(5), // SWZ_Y
1688 N->getOperand(6), // SWZ_Z
1689 N->getOperand(7) // SWZ_W
1690 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001691 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001692 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001693 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001694 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001695 case AMDGPUISD::TEXTURE_FETCH: {
1696 SDValue Arg = N->getOperand(1);
1697 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1698 break;
1699
1700 SDValue NewArgs[19] = {
1701 N->getOperand(0),
1702 N->getOperand(1),
1703 N->getOperand(2),
1704 N->getOperand(3),
1705 N->getOperand(4),
1706 N->getOperand(5),
1707 N->getOperand(6),
1708 N->getOperand(7),
1709 N->getOperand(8),
1710 N->getOperand(9),
1711 N->getOperand(10),
1712 N->getOperand(11),
1713 N->getOperand(12),
1714 N->getOperand(13),
1715 N->getOperand(14),
1716 N->getOperand(15),
1717 N->getOperand(16),
1718 N->getOperand(17),
1719 N->getOperand(18),
1720 };
1721 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1722 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1723 NewArgs, 19);
1724 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001725 }
1726 return SDValue();
1727}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001728
1729static bool
1730FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001731 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001732 const R600InstrInfo *TII =
1733 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1734 if (!Src.isMachineOpcode())
1735 return false;
1736 switch (Src.getMachineOpcode()) {
1737 case AMDGPU::FNEG_R600:
1738 if (!Neg.getNode())
1739 return false;
1740 Src = Src.getOperand(0);
1741 Neg = DAG.getTargetConstant(1, MVT::i32);
1742 return true;
1743 case AMDGPU::FABS_R600:
1744 if (!Abs.getNode())
1745 return false;
1746 Src = Src.getOperand(0);
1747 Abs = DAG.getTargetConstant(1, MVT::i32);
1748 return true;
1749 case AMDGPU::CONST_COPY: {
1750 unsigned Opcode = ParentNode->getMachineOpcode();
1751 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1752
1753 if (!Sel.getNode())
1754 return false;
1755
1756 SDValue CstOffset = Src.getOperand(0);
1757 if (ParentNode->getValueType(0).isVector())
1758 return false;
1759
1760 // Gather constants values
1761 int SrcIndices[] = {
1762 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1763 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1764 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1765 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1766 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1767 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1768 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1769 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1770 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1771 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1772 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1773 };
1774 std::vector<unsigned> Consts;
1775 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1776 int OtherSrcIdx = SrcIndices[i];
1777 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1778 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1779 continue;
1780 if (HasDst) {
1781 OtherSrcIdx--;
1782 OtherSelIdx--;
1783 }
1784 if (RegisterSDNode *Reg =
1785 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1786 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1787 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1788 ParentNode->getOperand(OtherSelIdx));
1789 Consts.push_back(Cst->getZExtValue());
1790 }
1791 }
1792 }
1793
1794 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1795 Consts.push_back(Cst->getZExtValue());
1796 if (!TII->fitsConstReadLimitations(Consts)) {
1797 return false;
1798 }
1799
1800 Sel = CstOffset;
1801 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1802 return true;
1803 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001804 case AMDGPU::MOV_IMM_I32:
1805 case AMDGPU::MOV_IMM_F32: {
1806 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1807 uint64_t ImmValue = 0;
1808
1809
1810 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1811 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1812 float FloatValue = FPC->getValueAPF().convertToFloat();
1813 if (FloatValue == 0.0) {
1814 ImmReg = AMDGPU::ZERO;
1815 } else if (FloatValue == 0.5) {
1816 ImmReg = AMDGPU::HALF;
1817 } else if (FloatValue == 1.0) {
1818 ImmReg = AMDGPU::ONE;
1819 } else {
1820 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1821 }
1822 } else {
1823 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1824 uint64_t Value = C->getZExtValue();
1825 if (Value == 0) {
1826 ImmReg = AMDGPU::ZERO;
1827 } else if (Value == 1) {
1828 ImmReg = AMDGPU::ONE_INT;
1829 } else {
1830 ImmValue = Value;
1831 }
1832 }
1833
1834 // Check that we aren't already using an immediate.
1835 // XXX: It's possible for an instruction to have more than one
1836 // immediate operand, but this is not supported yet.
1837 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1838 if (!Imm.getNode())
1839 return false;
1840 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1841 assert(C);
1842 if (C->getZExtValue())
1843 return false;
1844 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1845 }
1846 Src = DAG.getRegister(ImmReg, MVT::i32);
1847 return true;
1848 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001849 default:
1850 return false;
1851 }
1852}
1853
1854
1855/// \brief Fold the instructions after selecting them
1856SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1857 SelectionDAG &DAG) const {
1858 const R600InstrInfo *TII =
1859 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1860 if (!Node->isMachineOpcode())
1861 return Node;
1862 unsigned Opcode = Node->getMachineOpcode();
1863 SDValue FakeOp;
1864
1865 std::vector<SDValue> Ops;
1866 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1867 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001868 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001869
1870 if (Opcode == AMDGPU::DOT_4) {
1871 int OperandIdx[] = {
1872 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1873 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1874 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1875 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1876 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1878 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001880 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001881 int NegIdx[] = {
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1886 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1887 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1890 };
1891 int AbsIdx[] = {
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1895 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1900 };
1901 for (unsigned i = 0; i < 8; i++) {
1902 if (OperandIdx[i] < 0)
1903 return Node;
1904 SDValue &Src = Ops[OperandIdx[i] - 1];
1905 SDValue &Neg = Ops[NegIdx[i] - 1];
1906 SDValue &Abs = Ops[AbsIdx[i] - 1];
1907 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1908 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1909 if (HasDst)
1910 SelIdx--;
1911 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001912 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1913 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1914 }
1915 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1916 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1917 SDValue &Src = Ops[i];
1918 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001919 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1920 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001921 } else if (Opcode == AMDGPU::CLAMP_R600) {
1922 SDValue Src = Node->getOperand(0);
1923 if (!Src.isMachineOpcode() ||
1924 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1925 return Node;
1926 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1927 AMDGPU::OpName::clamp);
1928 if (ClampIdx < 0)
1929 return Node;
1930 std::vector<SDValue> Ops;
1931 unsigned NumOp = Src.getNumOperands();
1932 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001933 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001934 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1935 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1936 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001937 } else {
1938 if (!TII->hasInstrModifiers(Opcode))
1939 return Node;
1940 int OperandIdx[] = {
1941 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1942 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1943 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1944 };
1945 int NegIdx[] = {
1946 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1947 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1948 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1949 };
1950 int AbsIdx[] = {
1951 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1952 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1953 -1
1954 };
1955 for (unsigned i = 0; i < 3; i++) {
1956 if (OperandIdx[i] < 0)
1957 return Node;
1958 SDValue &Src = Ops[OperandIdx[i] - 1];
1959 SDValue &Neg = Ops[NegIdx[i] - 1];
1960 SDValue FakeAbs;
1961 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1962 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1963 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001964 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1965 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001966 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001967 ImmIdx--;
1968 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001969 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001970 SDValue &Imm = Ops[ImmIdx];
1971 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001972 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1973 }
1974 }
1975
1976 return Node;
1977}