blob: 0fcb488672f848ce6843ef2a5803ec71c45d7bac [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
210 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
460 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
461 EndBlock = BB->end(); NextExportInst != EndBlock;
462 NextExportInst = llvm::next(NextExportInst)) {
463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
765 default: return;
766 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000767 return;
768 case ISD::LOAD: {
769 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
770 Results.push_back(SDValue(Node, 0));
771 Results.push_back(SDValue(Node, 1));
772 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
773 // function
774 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
775 return;
776 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000777 case ISD::STORE:
778 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
779 Results.push_back(SDValue(Node, 0));
780 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 }
782}
783
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000784SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
785 // On hw >= R700, COS/SIN input must be between -1. and 1.
786 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
787 EVT VT = Op.getValueType();
788 SDValue Arg = Op.getOperand(0);
789 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
790 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
791 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
792 DAG.getConstantFP(0.15915494309, MVT::f32)),
793 DAG.getConstantFP(0.5, MVT::f32)));
794 unsigned TrigNode;
795 switch (Op.getOpcode()) {
796 case ISD::FCOS:
797 TrigNode = AMDGPUISD::COS_HW;
798 break;
799 case ISD::FSIN:
800 TrigNode = AMDGPUISD::SIN_HW;
801 break;
802 default:
803 llvm_unreachable("Wrong trig opcode");
804 }
805 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
806 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
807 DAG.getConstantFP(-0.5, MVT::f32)));
808 if (Gen >= AMDGPUSubtarget::R700)
809 return TrigVal;
810 // On R600 hw, COS/SIN input must be between -Pi and Pi.
811 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
812 DAG.getConstantFP(3.14159265359, MVT::f32));
813}
814
Tom Stellard75aadc22012-12-11 21:25:42 +0000815SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
816 return DAG.getNode(
817 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000818 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 MVT::i1,
820 Op, DAG.getConstantFP(0.0f, MVT::f32),
821 DAG.getCondCode(ISD::SETNE)
822 );
823}
824
Tom Stellard75aadc22012-12-11 21:25:42 +0000825SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000826 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 unsigned DwordOffset) const {
828 unsigned ByteOffset = DwordOffset * 4;
829 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000830 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000831
832 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
833 assert(isInt<16>(ByteOffset));
834
835 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
836 DAG.getConstant(ByteOffset, MVT::i32), // PTR
837 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
838 false, false, false, 0);
839}
840
Tom Stellard75aadc22012-12-11 21:25:42 +0000841bool R600TargetLowering::isZero(SDValue Op) const {
842 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
843 return Cst->isNullValue();
844 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
845 return CstFP->isZero();
846 } else {
847 return false;
848 }
849}
850
851SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000852 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000853 EVT VT = Op.getValueType();
854
855 SDValue LHS = Op.getOperand(0);
856 SDValue RHS = Op.getOperand(1);
857 SDValue True = Op.getOperand(2);
858 SDValue False = Op.getOperand(3);
859 SDValue CC = Op.getOperand(4);
860 SDValue Temp;
861
862 // LHS and RHS are guaranteed to be the same value type
863 EVT CompareVT = LHS.getValueType();
864
865 // Check if we can lower this to a native operation.
866
Tom Stellard2add82d2013-03-08 15:37:09 +0000867 // Try to lower to a SET* instruction:
868 //
869 // SET* can match the following patterns:
870 //
Tom Stellardcd428182013-09-28 02:50:38 +0000871 // select_cc f32, f32, -1, 0, cc_supported
872 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
873 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000874 //
875
876 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000877 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
878 ISD::CondCode InverseCC =
879 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000880 if (isHWTrueValue(False) && isHWFalseValue(True)) {
881 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
882 std::swap(False, True);
883 CC = DAG.getCondCode(InverseCC);
884 } else {
885 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
886 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
887 std::swap(False, True);
888 std::swap(LHS, RHS);
889 CC = DAG.getCondCode(SwapInvCC);
890 }
891 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000892 }
893
894 if (isHWTrueValue(True) && isHWFalseValue(False) &&
895 (CompareVT == VT || VT == MVT::i32)) {
896 // This can be matched by a SET* instruction.
897 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
898 }
899
Tom Stellard75aadc22012-12-11 21:25:42 +0000900 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000901 //
902 // CND* can match the following patterns:
903 //
Tom Stellardcd428182013-09-28 02:50:38 +0000904 // select_cc f32, 0.0, f32, f32, cc_supported
905 // select_cc f32, 0.0, i32, i32, cc_supported
906 // select_cc i32, 0, f32, f32, cc_supported
907 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000908 //
Tom Stellardcd428182013-09-28 02:50:38 +0000909
910 // Try to move the zero value to the RHS
911 if (isZero(LHS)) {
912 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
913 // Try swapping the operands
914 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
915 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(CCSwapped);
918 } else {
919 // Try inverting the conditon and then swapping the operands
920 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
921 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
922 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
923 std::swap(True, False);
924 std::swap(LHS, RHS);
925 CC = DAG.getCondCode(CCSwapped);
926 }
927 }
928 }
929 if (isZero(RHS)) {
930 SDValue Cond = LHS;
931 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000932 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
933 if (CompareVT != VT) {
934 // Bitcast True / False to the correct types. This will end up being
935 // a nop, but it allows us to define only a single pattern in the
936 // .TD files for each CND* instruction rather than having to have
937 // one pattern for integer True/False and one for fp True/False
938 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
939 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
940 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000941
942 switch (CCOpcode) {
943 case ISD::SETONE:
944 case ISD::SETUNE:
945 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000946 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
947 Temp = True;
948 True = False;
949 False = Temp;
950 break;
951 default:
952 break;
953 }
954 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
955 Cond, Zero,
956 True, False,
957 DAG.getCondCode(CCOpcode));
958 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
959 }
960
Tom Stellard75aadc22012-12-11 21:25:42 +0000961
962 // Possible Min/Max pattern
963 SDValue MinMax = LowerMinMax(Op, DAG);
964 if (MinMax.getNode()) {
965 return MinMax;
966 }
967
968 // If we make it this for it means we have no native instructions to handle
969 // this SELECT_CC, so we must lower it.
970 SDValue HWTrue, HWFalse;
971
972 if (CompareVT == MVT::f32) {
973 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
974 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
975 } else if (CompareVT == MVT::i32) {
976 HWTrue = DAG.getConstant(-1, CompareVT);
977 HWFalse = DAG.getConstant(0, CompareVT);
978 }
979 else {
980 assert(!"Unhandled value type in LowerSELECT_CC");
981 }
982
983 // Lower this unsupported SELECT_CC into a combination of two supported
984 // SELECT_CC operations.
985 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
986
987 return DAG.getNode(ISD::SELECT_CC, DL, VT,
988 Cond, HWFalse,
989 True, False,
990 DAG.getCondCode(ISD::SETNE));
991}
992
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000993/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
994/// convert these pointers to a register index. Each register holds
995/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
996/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
997/// for indirect addressing.
998SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
999 unsigned StackWidth,
1000 SelectionDAG &DAG) const {
1001 unsigned SRLPad;
1002 switch(StackWidth) {
1003 case 1:
1004 SRLPad = 2;
1005 break;
1006 case 2:
1007 SRLPad = 3;
1008 break;
1009 case 4:
1010 SRLPad = 4;
1011 break;
1012 default: llvm_unreachable("Invalid stack width");
1013 }
1014
Andrew Trickef9de2a2013-05-25 02:42:55 +00001015 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001016 DAG.getConstant(SRLPad, MVT::i32));
1017}
1018
1019void R600TargetLowering::getStackAddress(unsigned StackWidth,
1020 unsigned ElemIdx,
1021 unsigned &Channel,
1022 unsigned &PtrIncr) const {
1023 switch (StackWidth) {
1024 default:
1025 case 1:
1026 Channel = 0;
1027 if (ElemIdx > 0) {
1028 PtrIncr = 1;
1029 } else {
1030 PtrIncr = 0;
1031 }
1032 break;
1033 case 2:
1034 Channel = ElemIdx % 2;
1035 if (ElemIdx == 2) {
1036 PtrIncr = 1;
1037 } else {
1038 PtrIncr = 0;
1039 }
1040 break;
1041 case 4:
1042 Channel = ElemIdx;
1043 PtrIncr = 0;
1044 break;
1045 }
1046}
1047
Tom Stellard75aadc22012-12-11 21:25:42 +00001048SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001049 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001050 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1051 SDValue Chain = Op.getOperand(0);
1052 SDValue Value = Op.getOperand(1);
1053 SDValue Ptr = Op.getOperand(2);
1054
Tom Stellard2ffc3302013-08-26 15:05:44 +00001055 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001056 if (Result.getNode()) {
1057 return Result;
1058 }
1059
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1061 if (StoreNode->isTruncatingStore()) {
1062 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001063 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001064 EVT MemVT = StoreNode->getMemoryVT();
1065 SDValue MaskConstant;
1066 if (MemVT == MVT::i8) {
1067 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1068 } else {
1069 assert(MemVT == MVT::i16);
1070 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1071 }
1072 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1073 DAG.getConstant(2, MVT::i32));
1074 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1075 DAG.getConstant(0x00000003, VT));
1076 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1077 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1078 DAG.getConstant(3, VT));
1079 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1080 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1081 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1082 // vector instead.
1083 SDValue Src[4] = {
1084 ShiftedValue,
1085 DAG.getConstant(0, MVT::i32),
1086 DAG.getConstant(0, MVT::i32),
1087 Mask
1088 };
1089 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1090 SDValue Args[3] = { Chain, Input, DWordAddr };
1091 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1092 Op->getVTList(), Args, 3, MemVT,
1093 StoreNode->getMemOperand());
1094 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1095 Value.getValueType().bitsGE(MVT::i32)) {
1096 // Convert pointer from byte address to dword address.
1097 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1098 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1099 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001100
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001101 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1102 assert(!"Truncated and indexed stores not supported yet");
1103 } else {
1104 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1105 }
1106 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001107 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001108 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001109
1110 EVT ValueVT = Value.getValueType();
1111
1112 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1113 return SDValue();
1114 }
1115
1116 // Lowering for indirect addressing
1117
1118 const MachineFunction &MF = DAG.getMachineFunction();
1119 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1120 getTargetMachine().getFrameLowering());
1121 unsigned StackWidth = TFL->getStackWidth(MF);
1122
1123 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1124
1125 if (ValueVT.isVector()) {
1126 unsigned NumElemVT = ValueVT.getVectorNumElements();
1127 EVT ElemVT = ValueVT.getVectorElementType();
1128 SDValue Stores[4];
1129
1130 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1131 "vector width in load");
1132
1133 for (unsigned i = 0; i < NumElemVT; ++i) {
1134 unsigned Channel, PtrIncr;
1135 getStackAddress(StackWidth, i, Channel, PtrIncr);
1136 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1137 DAG.getConstant(PtrIncr, MVT::i32));
1138 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1139 Value, DAG.getConstant(i, MVT::i32));
1140
1141 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1142 Chain, Elem, Ptr,
1143 DAG.getTargetConstant(Channel, MVT::i32));
1144 }
1145 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1146 } else {
1147 if (ValueVT == MVT::i8) {
1148 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1149 }
1150 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001151 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001152 }
1153
1154 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001155}
1156
Tom Stellard365366f2013-01-23 02:09:06 +00001157// return (512 + (kc_bank << 12)
1158static int
1159ConstantAddressBlock(unsigned AddressSpace) {
1160 switch (AddressSpace) {
1161 case AMDGPUAS::CONSTANT_BUFFER_0:
1162 return 512;
1163 case AMDGPUAS::CONSTANT_BUFFER_1:
1164 return 512 + 4096;
1165 case AMDGPUAS::CONSTANT_BUFFER_2:
1166 return 512 + 4096 * 2;
1167 case AMDGPUAS::CONSTANT_BUFFER_3:
1168 return 512 + 4096 * 3;
1169 case AMDGPUAS::CONSTANT_BUFFER_4:
1170 return 512 + 4096 * 4;
1171 case AMDGPUAS::CONSTANT_BUFFER_5:
1172 return 512 + 4096 * 5;
1173 case AMDGPUAS::CONSTANT_BUFFER_6:
1174 return 512 + 4096 * 6;
1175 case AMDGPUAS::CONSTANT_BUFFER_7:
1176 return 512 + 4096 * 7;
1177 case AMDGPUAS::CONSTANT_BUFFER_8:
1178 return 512 + 4096 * 8;
1179 case AMDGPUAS::CONSTANT_BUFFER_9:
1180 return 512 + 4096 * 9;
1181 case AMDGPUAS::CONSTANT_BUFFER_10:
1182 return 512 + 4096 * 10;
1183 case AMDGPUAS::CONSTANT_BUFFER_11:
1184 return 512 + 4096 * 11;
1185 case AMDGPUAS::CONSTANT_BUFFER_12:
1186 return 512 + 4096 * 12;
1187 case AMDGPUAS::CONSTANT_BUFFER_13:
1188 return 512 + 4096 * 13;
1189 case AMDGPUAS::CONSTANT_BUFFER_14:
1190 return 512 + 4096 * 14;
1191 case AMDGPUAS::CONSTANT_BUFFER_15:
1192 return 512 + 4096 * 15;
1193 default:
1194 return -1;
1195 }
1196}
1197
1198SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1199{
1200 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001201 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001202 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1203 SDValue Chain = Op.getOperand(0);
1204 SDValue Ptr = Op.getOperand(1);
1205 SDValue LoweredLoad;
1206
Tom Stellard35bb18c2013-08-26 15:06:04 +00001207 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1208 SDValue MergedValues[2] = {
1209 SplitVectorLoad(Op, DAG),
1210 Chain
1211 };
1212 return DAG.getMergeValues(MergedValues, 2, DL);
1213 }
1214
Tom Stellard365366f2013-01-23 02:09:06 +00001215 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001216 if (ConstantBlock > -1 &&
1217 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1218 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001219 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001220 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1221 isa<Constant>(LoadNode->getSrcValue()) ||
1222 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001223 SDValue Slots[4];
1224 for (unsigned i = 0; i < 4; i++) {
1225 // We want Const position encoded with the following formula :
1226 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1227 // const_index is Ptr computed by llvm using an alignment of 16.
1228 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1229 // then div by 4 at the ISel step
1230 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1231 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1232 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1233 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001234 EVT NewVT = MVT::v4i32;
1235 unsigned NumElements = 4;
1236 if (VT.isVector()) {
1237 NewVT = VT;
1238 NumElements = VT.getVectorNumElements();
1239 }
1240 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001241 } else {
1242 // non constant ptr cant be folded, keeps it as a v4f32 load
1243 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001244 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001245 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001246 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001247 );
1248 }
1249
1250 if (!VT.isVector()) {
1251 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1252 DAG.getConstant(0, MVT::i32));
1253 }
1254
1255 SDValue MergedValues[2] = {
1256 Result,
1257 Chain
1258 };
1259 return DAG.getMergeValues(MergedValues, 2, DL);
1260 }
1261
Matt Arsenault909d0c02013-10-30 23:43:29 +00001262 // For most operations returning SDValue() will result in the node being
1263 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1264 // need to manually expand loads that may be legal in some address spaces and
1265 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1266 // compute shaders, since the data is sign extended when it is uploaded to the
1267 // buffer. However SEXT loads from other address spaces are not supported, so
1268 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001269 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1270 EVT MemVT = LoadNode->getMemoryVT();
1271 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1272 SDValue ShiftAmount =
1273 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1274 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1275 LoadNode->getPointerInfo(), MemVT,
1276 LoadNode->isVolatile(),
1277 LoadNode->isNonTemporal(),
1278 LoadNode->getAlignment());
1279 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1280 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1281
1282 SDValue MergedValues[2] = { Sra, Chain };
1283 return DAG.getMergeValues(MergedValues, 2, DL);
1284 }
1285
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001286 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1287 return SDValue();
1288 }
1289
1290 // Lowering for indirect addressing
1291 const MachineFunction &MF = DAG.getMachineFunction();
1292 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1293 getTargetMachine().getFrameLowering());
1294 unsigned StackWidth = TFL->getStackWidth(MF);
1295
1296 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1297
1298 if (VT.isVector()) {
1299 unsigned NumElemVT = VT.getVectorNumElements();
1300 EVT ElemVT = VT.getVectorElementType();
1301 SDValue Loads[4];
1302
1303 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1304 "vector width in load");
1305
1306 for (unsigned i = 0; i < NumElemVT; ++i) {
1307 unsigned Channel, PtrIncr;
1308 getStackAddress(StackWidth, i, Channel, PtrIncr);
1309 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1310 DAG.getConstant(PtrIncr, MVT::i32));
1311 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1312 Chain, Ptr,
1313 DAG.getTargetConstant(Channel, MVT::i32),
1314 Op.getOperand(2));
1315 }
1316 for (unsigned i = NumElemVT; i < 4; ++i) {
1317 Loads[i] = DAG.getUNDEF(ElemVT);
1318 }
1319 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1320 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1321 } else {
1322 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1323 Chain, Ptr,
1324 DAG.getTargetConstant(0, MVT::i32), // Channel
1325 Op.getOperand(2));
1326 }
1327
1328 SDValue Ops[2];
1329 Ops[0] = LoweredLoad;
1330 Ops[1] = Chain;
1331
1332 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001333}
Tom Stellard75aadc22012-12-11 21:25:42 +00001334
Tom Stellard75aadc22012-12-11 21:25:42 +00001335/// XXX Only kernel functions are supported, so we can assume for now that
1336/// every function is a kernel function, but in the future we should use
1337/// separate calling conventions for kernel and non-kernel functions.
1338SDValue R600TargetLowering::LowerFormalArguments(
1339 SDValue Chain,
1340 CallingConv::ID CallConv,
1341 bool isVarArg,
1342 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001343 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001344 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001345 SmallVector<CCValAssign, 16> ArgLocs;
1346 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1347 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001348 MachineFunction &MF = DAG.getMachineFunction();
1349 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001350
Tom Stellardaf775432013-10-23 00:44:32 +00001351 SmallVector<ISD::InputArg, 8> LocalIns;
1352
1353 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1354 LocalIns);
1355
1356 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001357
Tom Stellard1e803092013-07-23 01:48:18 +00001358 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001359 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001360 EVT VT = Ins[i].VT;
1361 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001362
Vincent Lejeunef143af32013-11-11 22:10:24 +00001363 if (ShaderType != ShaderType::COMPUTE) {
1364 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1365 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1366 InVals.push_back(Register);
1367 continue;
1368 }
1369
Tom Stellard75aadc22012-12-11 21:25:42 +00001370 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001371 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001372
1373 // The first 36 bytes of the input buffer contains information about
1374 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001375 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1376 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1377 MachinePointerInfo(UndefValue::get(PtrTy)),
1378 MemVT, false, false, 4);
1379 // 4 is the prefered alignment for
1380 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001381 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001382 }
1383 return Chain;
1384}
1385
Matt Arsenault758659232013-05-18 00:21:46 +00001386EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001387 if (!VT.isVector()) return MVT::i32;
1388 return VT.changeVectorElementTypeToInteger();
1389}
1390
Benjamin Kramer193960c2013-06-11 13:32:25 +00001391static SDValue
1392CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1393 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001394 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1395 assert(RemapSwizzle.empty());
1396 SDValue NewBldVec[4] = {
1397 VectorEntry.getOperand(0),
1398 VectorEntry.getOperand(1),
1399 VectorEntry.getOperand(2),
1400 VectorEntry.getOperand(3)
1401 };
1402
1403 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001404 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1405 // We mask write here to teach later passes that the ith element of this
1406 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1407 // break false dependencies and additionnaly make assembly easier to read.
1408 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001409 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1410 if (C->isZero()) {
1411 RemapSwizzle[i] = 4; // SEL_0
1412 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1413 } else if (C->isExactlyValue(1.0)) {
1414 RemapSwizzle[i] = 5; // SEL_1
1415 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1416 }
1417 }
1418
1419 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1420 continue;
1421 for (unsigned j = 0; j < i; j++) {
1422 if (NewBldVec[i] == NewBldVec[j]) {
1423 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1424 RemapSwizzle[i] = j;
1425 break;
1426 }
1427 }
1428 }
1429
1430 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1431 VectorEntry.getValueType(), NewBldVec, 4);
1432}
1433
Benjamin Kramer193960c2013-06-11 13:32:25 +00001434static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1435 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001436 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1437 assert(RemapSwizzle.empty());
1438 SDValue NewBldVec[4] = {
1439 VectorEntry.getOperand(0),
1440 VectorEntry.getOperand(1),
1441 VectorEntry.getOperand(2),
1442 VectorEntry.getOperand(3)
1443 };
1444 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001445 for (unsigned i = 0; i < 4; i++)
1446 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001447
1448 for (unsigned i = 0; i < 4; i++) {
1449 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1450 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1451 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001452 if (i == Idx) {
1453 isUnmovable[Idx] = true;
1454 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001455 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001456 if (isUnmovable[Idx])
1457 continue;
1458 // Swap i and Idx
1459 std::swap(NewBldVec[Idx], NewBldVec[i]);
1460 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1461 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001462 }
1463 }
1464
1465 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1466 VectorEntry.getValueType(), NewBldVec, 4);
1467}
1468
1469
1470SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1471SDValue Swz[4], SelectionDAG &DAG) const {
1472 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1473 // Old -> New swizzle values
1474 DenseMap<unsigned, unsigned> SwizzleRemap;
1475
1476 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1477 for (unsigned i = 0; i < 4; i++) {
1478 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1479 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1480 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1481 }
1482
1483 SwizzleRemap.clear();
1484 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1485 for (unsigned i = 0; i < 4; i++) {
1486 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1487 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1488 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1489 }
1490
1491 return BuildVector;
1492}
1493
1494
Tom Stellard75aadc22012-12-11 21:25:42 +00001495//===----------------------------------------------------------------------===//
1496// Custom DAG Optimizations
1497//===----------------------------------------------------------------------===//
1498
1499SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1500 DAGCombinerInfo &DCI) const {
1501 SelectionDAG &DAG = DCI.DAG;
1502
1503 switch (N->getOpcode()) {
1504 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1505 case ISD::FP_ROUND: {
1506 SDValue Arg = N->getOperand(0);
1507 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001508 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001509 Arg.getOperand(0));
1510 }
1511 break;
1512 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001513
1514 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1515 // (i32 select_cc f32, f32, -1, 0 cc)
1516 //
1517 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1518 // this to one of the SET*_DX10 instructions.
1519 case ISD::FP_TO_SINT: {
1520 SDValue FNeg = N->getOperand(0);
1521 if (FNeg.getOpcode() != ISD::FNEG) {
1522 return SDValue();
1523 }
1524 SDValue SelectCC = FNeg.getOperand(0);
1525 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1526 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1527 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1528 !isHWTrueValue(SelectCC.getOperand(2)) ||
1529 !isHWFalseValue(SelectCC.getOperand(3))) {
1530 return SDValue();
1531 }
1532
Andrew Trickef9de2a2013-05-25 02:42:55 +00001533 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001534 SelectCC.getOperand(0), // LHS
1535 SelectCC.getOperand(1), // RHS
1536 DAG.getConstant(-1, MVT::i32), // True
1537 DAG.getConstant(0, MVT::i32), // Flase
1538 SelectCC.getOperand(4)); // CC
1539
1540 break;
1541 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001542
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001543 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1544 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001545 case ISD::INSERT_VECTOR_ELT: {
1546 SDValue InVec = N->getOperand(0);
1547 SDValue InVal = N->getOperand(1);
1548 SDValue EltNo = N->getOperand(2);
1549 SDLoc dl(N);
1550
1551 // If the inserted element is an UNDEF, just use the input vector.
1552 if (InVal.getOpcode() == ISD::UNDEF)
1553 return InVec;
1554
1555 EVT VT = InVec.getValueType();
1556
1557 // If we can't generate a legal BUILD_VECTOR, exit
1558 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1559 return SDValue();
1560
1561 // Check that we know which element is being inserted
1562 if (!isa<ConstantSDNode>(EltNo))
1563 return SDValue();
1564 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1565
1566 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1567 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1568 // vector elements.
1569 SmallVector<SDValue, 8> Ops;
1570 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1571 Ops.append(InVec.getNode()->op_begin(),
1572 InVec.getNode()->op_end());
1573 } else if (InVec.getOpcode() == ISD::UNDEF) {
1574 unsigned NElts = VT.getVectorNumElements();
1575 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1576 } else {
1577 return SDValue();
1578 }
1579
1580 // Insert the element
1581 if (Elt < Ops.size()) {
1582 // All the operands of BUILD_VECTOR must have the same type;
1583 // we enforce that here.
1584 EVT OpVT = Ops[0].getValueType();
1585 if (InVal.getValueType() != OpVT)
1586 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1587 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1588 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1589 Ops[Elt] = InVal;
1590 }
1591
1592 // Return the new vector
1593 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1594 VT, &Ops[0], Ops.size());
1595 }
1596
Tom Stellard365366f2013-01-23 02:09:06 +00001597 // Extract_vec (Build_vector) generated by custom lowering
1598 // also needs to be customly combined
1599 case ISD::EXTRACT_VECTOR_ELT: {
1600 SDValue Arg = N->getOperand(0);
1601 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1602 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1603 unsigned Element = Const->getZExtValue();
1604 return Arg->getOperand(Element);
1605 }
1606 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001607 if (Arg.getOpcode() == ISD::BITCAST &&
1608 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1609 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1610 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001611 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001612 Arg->getOperand(0).getOperand(Element));
1613 }
1614 }
Tom Stellard365366f2013-01-23 02:09:06 +00001615 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001616
1617 case ISD::SELECT_CC: {
1618 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1619 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001620 //
1621 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1622 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001623 SDValue LHS = N->getOperand(0);
1624 if (LHS.getOpcode() != ISD::SELECT_CC) {
1625 return SDValue();
1626 }
1627
1628 SDValue RHS = N->getOperand(1);
1629 SDValue True = N->getOperand(2);
1630 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001631 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001632
1633 if (LHS.getOperand(2).getNode() != True.getNode() ||
1634 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001635 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001636 return SDValue();
1637 }
1638
Tom Stellard5e524892013-03-08 15:37:11 +00001639 switch (NCC) {
1640 default: return SDValue();
1641 case ISD::SETNE: return LHS;
1642 case ISD::SETEQ: {
1643 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1644 LHSCC = ISD::getSetCCInverse(LHSCC,
1645 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001646 if (DCI.isBeforeLegalizeOps() ||
1647 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1648 return DAG.getSelectCC(SDLoc(N),
1649 LHS.getOperand(0),
1650 LHS.getOperand(1),
1651 LHS.getOperand(2),
1652 LHS.getOperand(3),
1653 LHSCC);
1654 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001655 }
Tom Stellard5e524892013-03-08 15:37:11 +00001656 }
Tom Stellardcd428182013-09-28 02:50:38 +00001657 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001658 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001659
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001660 case AMDGPUISD::EXPORT: {
1661 SDValue Arg = N->getOperand(1);
1662 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1663 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001664
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001665 SDValue NewArgs[8] = {
1666 N->getOperand(0), // Chain
1667 SDValue(),
1668 N->getOperand(2), // ArrayBase
1669 N->getOperand(3), // Type
1670 N->getOperand(4), // SWZ_X
1671 N->getOperand(5), // SWZ_Y
1672 N->getOperand(6), // SWZ_Z
1673 N->getOperand(7) // SWZ_W
1674 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001675 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001676 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001677 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001678 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001679 case AMDGPUISD::TEXTURE_FETCH: {
1680 SDValue Arg = N->getOperand(1);
1681 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1682 break;
1683
1684 SDValue NewArgs[19] = {
1685 N->getOperand(0),
1686 N->getOperand(1),
1687 N->getOperand(2),
1688 N->getOperand(3),
1689 N->getOperand(4),
1690 N->getOperand(5),
1691 N->getOperand(6),
1692 N->getOperand(7),
1693 N->getOperand(8),
1694 N->getOperand(9),
1695 N->getOperand(10),
1696 N->getOperand(11),
1697 N->getOperand(12),
1698 N->getOperand(13),
1699 N->getOperand(14),
1700 N->getOperand(15),
1701 N->getOperand(16),
1702 N->getOperand(17),
1703 N->getOperand(18),
1704 };
1705 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1706 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1707 NewArgs, 19);
1708 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001709 }
1710 return SDValue();
1711}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001712
1713static bool
1714FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001715 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001716 const R600InstrInfo *TII =
1717 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1718 if (!Src.isMachineOpcode())
1719 return false;
1720 switch (Src.getMachineOpcode()) {
1721 case AMDGPU::FNEG_R600:
1722 if (!Neg.getNode())
1723 return false;
1724 Src = Src.getOperand(0);
1725 Neg = DAG.getTargetConstant(1, MVT::i32);
1726 return true;
1727 case AMDGPU::FABS_R600:
1728 if (!Abs.getNode())
1729 return false;
1730 Src = Src.getOperand(0);
1731 Abs = DAG.getTargetConstant(1, MVT::i32);
1732 return true;
1733 case AMDGPU::CONST_COPY: {
1734 unsigned Opcode = ParentNode->getMachineOpcode();
1735 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1736
1737 if (!Sel.getNode())
1738 return false;
1739
1740 SDValue CstOffset = Src.getOperand(0);
1741 if (ParentNode->getValueType(0).isVector())
1742 return false;
1743
1744 // Gather constants values
1745 int SrcIndices[] = {
1746 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1747 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1748 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1749 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1750 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1751 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1752 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1753 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1754 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1755 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1756 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1757 };
1758 std::vector<unsigned> Consts;
1759 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1760 int OtherSrcIdx = SrcIndices[i];
1761 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1762 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1763 continue;
1764 if (HasDst) {
1765 OtherSrcIdx--;
1766 OtherSelIdx--;
1767 }
1768 if (RegisterSDNode *Reg =
1769 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1770 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1771 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1772 ParentNode->getOperand(OtherSelIdx));
1773 Consts.push_back(Cst->getZExtValue());
1774 }
1775 }
1776 }
1777
1778 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1779 Consts.push_back(Cst->getZExtValue());
1780 if (!TII->fitsConstReadLimitations(Consts)) {
1781 return false;
1782 }
1783
1784 Sel = CstOffset;
1785 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1786 return true;
1787 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001788 case AMDGPU::MOV_IMM_I32:
1789 case AMDGPU::MOV_IMM_F32: {
1790 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1791 uint64_t ImmValue = 0;
1792
1793
1794 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1795 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1796 float FloatValue = FPC->getValueAPF().convertToFloat();
1797 if (FloatValue == 0.0) {
1798 ImmReg = AMDGPU::ZERO;
1799 } else if (FloatValue == 0.5) {
1800 ImmReg = AMDGPU::HALF;
1801 } else if (FloatValue == 1.0) {
1802 ImmReg = AMDGPU::ONE;
1803 } else {
1804 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1805 }
1806 } else {
1807 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1808 uint64_t Value = C->getZExtValue();
1809 if (Value == 0) {
1810 ImmReg = AMDGPU::ZERO;
1811 } else if (Value == 1) {
1812 ImmReg = AMDGPU::ONE_INT;
1813 } else {
1814 ImmValue = Value;
1815 }
1816 }
1817
1818 // Check that we aren't already using an immediate.
1819 // XXX: It's possible for an instruction to have more than one
1820 // immediate operand, but this is not supported yet.
1821 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1822 if (!Imm.getNode())
1823 return false;
1824 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1825 assert(C);
1826 if (C->getZExtValue())
1827 return false;
1828 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1829 }
1830 Src = DAG.getRegister(ImmReg, MVT::i32);
1831 return true;
1832 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001833 default:
1834 return false;
1835 }
1836}
1837
1838
1839/// \brief Fold the instructions after selecting them
1840SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1841 SelectionDAG &DAG) const {
1842 const R600InstrInfo *TII =
1843 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1844 if (!Node->isMachineOpcode())
1845 return Node;
1846 unsigned Opcode = Node->getMachineOpcode();
1847 SDValue FakeOp;
1848
1849 std::vector<SDValue> Ops;
1850 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1851 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001852 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001853
1854 if (Opcode == AMDGPU::DOT_4) {
1855 int OperandIdx[] = {
1856 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1857 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1858 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1859 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1860 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1861 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1862 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1863 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001864 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001865 int NegIdx[] = {
1866 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1867 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1868 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1869 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1870 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1871 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1872 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1873 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1874 };
1875 int AbsIdx[] = {
1876 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1878 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1884 };
1885 for (unsigned i = 0; i < 8; i++) {
1886 if (OperandIdx[i] < 0)
1887 return Node;
1888 SDValue &Src = Ops[OperandIdx[i] - 1];
1889 SDValue &Neg = Ops[NegIdx[i] - 1];
1890 SDValue &Abs = Ops[AbsIdx[i] - 1];
1891 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1892 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1893 if (HasDst)
1894 SelIdx--;
1895 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001896 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1897 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1898 }
1899 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1900 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1901 SDValue &Src = Ops[i];
1902 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001903 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1904 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001905 } else if (Opcode == AMDGPU::CLAMP_R600) {
1906 SDValue Src = Node->getOperand(0);
1907 if (!Src.isMachineOpcode() ||
1908 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1909 return Node;
1910 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1911 AMDGPU::OpName::clamp);
1912 if (ClampIdx < 0)
1913 return Node;
1914 std::vector<SDValue> Ops;
1915 unsigned NumOp = Src.getNumOperands();
1916 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001917 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001918 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1919 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1920 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001921 } else {
1922 if (!TII->hasInstrModifiers(Opcode))
1923 return Node;
1924 int OperandIdx[] = {
1925 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1926 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1927 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1928 };
1929 int NegIdx[] = {
1930 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1931 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1932 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1933 };
1934 int AbsIdx[] = {
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1936 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1937 -1
1938 };
1939 for (unsigned i = 0; i < 3; i++) {
1940 if (OperandIdx[i] < 0)
1941 return Node;
1942 SDValue &Src = Ops[OperandIdx[i] - 1];
1943 SDValue &Neg = Ops[NegIdx[i] - 1];
1944 SDValue FakeAbs;
1945 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1946 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1947 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001948 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1949 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001950 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001951 ImmIdx--;
1952 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001953 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001954 SDValue &Imm = Ops[ImmIdx];
1955 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001956 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1957 }
1958 }
1959
1960 return Node;
1961}