blob: a236a3e91bf08b8fe1027426dba9ce39713b41a0 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000137 if (TII->isLDSInstr(MI->getOpcode()) &&
138 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
139 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
140 assert(DstIdx != -1);
141 MachineInstrBuilder NewMI;
142 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
143 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
144 AMDGPU::OQAP);
145 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
146 MI->getOperand(0).getReg(),
147 AMDGPU::OQAP);
148 } else {
149 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
150 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
151 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000152 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
153 NewMI.addOperand(MI->getOperand(i));
154 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000155 } else {
156 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
157 }
158 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000159 case AMDGPU::CLAMP_R600: {
160 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
161 AMDGPU::MOV,
162 MI->getOperand(0).getReg(),
163 MI->getOperand(1).getReg());
164 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
165 break;
166 }
167
168 case AMDGPU::FABS_R600: {
169 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
170 AMDGPU::MOV,
171 MI->getOperand(0).getReg(),
172 MI->getOperand(1).getReg());
173 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
174 break;
175 }
176
177 case AMDGPU::FNEG_R600: {
178 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
179 AMDGPU::MOV,
180 MI->getOperand(0).getReg(),
181 MI->getOperand(1).getReg());
182 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
183 break;
184 }
185
Tom Stellard75aadc22012-12-11 21:25:42 +0000186 case AMDGPU::MASK_WRITE: {
187 unsigned maskedRegister = MI->getOperand(0).getReg();
188 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
189 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
190 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
191 break;
192 }
193
194 case AMDGPU::MOV_IMM_F32:
195 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
196 MI->getOperand(1).getFPImm()->getValueAPF()
197 .bitcastToAPInt().getZExtValue());
198 break;
199 case AMDGPU::MOV_IMM_I32:
200 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
201 MI->getOperand(1).getImm());
202 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 case AMDGPU::CONST_COPY: {
204 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
205 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000206 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000207 MI->getOperand(1).getImm());
208 break;
209 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000210
211 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000212 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000213 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
214 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
215
216 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
217 .addOperand(MI->getOperand(0))
218 .addOperand(MI->getOperand(1))
219 .addImm(EOP); // Set End of program bit
220 break;
221 }
222
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 case AMDGPU::TXD: {
224 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
225 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000226 MachineOperand &RID = MI->getOperand(4);
227 MachineOperand &SID = MI->getOperand(5);
228 unsigned TextureId = MI->getOperand(6).getImm();
229 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
230 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000231
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000232 switch (TextureId) {
233 case 5: // Rect
234 CTX = CTY = 0;
235 break;
236 case 6: // Shadow1D
237 SrcW = SrcZ;
238 break;
239 case 7: // Shadow2D
240 SrcW = SrcZ;
241 break;
242 case 8: // ShadowRect
243 CTX = CTY = 0;
244 SrcW = SrcZ;
245 break;
246 case 9: // 1DArray
247 SrcZ = SrcY;
248 CTZ = 0;
249 break;
250 case 10: // 2DArray
251 CTZ = 0;
252 break;
253 case 11: // Shadow1DArray
254 SrcZ = SrcY;
255 CTZ = 0;
256 break;
257 case 12: // Shadow2DArray
258 CTZ = 0;
259 break;
260 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000261 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
262 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000263 .addImm(SrcX)
264 .addImm(SrcY)
265 .addImm(SrcZ)
266 .addImm(SrcW)
267 .addImm(0)
268 .addImm(0)
269 .addImm(0)
270 .addImm(0)
271 .addImm(1)
272 .addImm(2)
273 .addImm(3)
274 .addOperand(RID)
275 .addOperand(SID)
276 .addImm(CTX)
277 .addImm(CTY)
278 .addImm(CTZ)
279 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000280 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
281 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000282 .addImm(SrcX)
283 .addImm(SrcY)
284 .addImm(SrcZ)
285 .addImm(SrcW)
286 .addImm(0)
287 .addImm(0)
288 .addImm(0)
289 .addImm(0)
290 .addImm(1)
291 .addImm(2)
292 .addImm(3)
293 .addOperand(RID)
294 .addOperand(SID)
295 .addImm(CTX)
296 .addImm(CTY)
297 .addImm(CTZ)
298 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000299 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
300 .addOperand(MI->getOperand(0))
301 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000302 .addImm(SrcX)
303 .addImm(SrcY)
304 .addImm(SrcZ)
305 .addImm(SrcW)
306 .addImm(0)
307 .addImm(0)
308 .addImm(0)
309 .addImm(0)
310 .addImm(1)
311 .addImm(2)
312 .addImm(3)
313 .addOperand(RID)
314 .addOperand(SID)
315 .addImm(CTX)
316 .addImm(CTY)
317 .addImm(CTZ)
318 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 .addReg(T0, RegState::Implicit)
320 .addReg(T1, RegState::Implicit);
321 break;
322 }
323
324 case AMDGPU::TXD_SHADOW: {
325 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
326 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000327 MachineOperand &RID = MI->getOperand(4);
328 MachineOperand &SID = MI->getOperand(5);
329 unsigned TextureId = MI->getOperand(6).getImm();
330 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
331 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
332
333 switch (TextureId) {
334 case 5: // Rect
335 CTX = CTY = 0;
336 break;
337 case 6: // Shadow1D
338 SrcW = SrcZ;
339 break;
340 case 7: // Shadow2D
341 SrcW = SrcZ;
342 break;
343 case 8: // ShadowRect
344 CTX = CTY = 0;
345 SrcW = SrcZ;
346 break;
347 case 9: // 1DArray
348 SrcZ = SrcY;
349 CTZ = 0;
350 break;
351 case 10: // 2DArray
352 CTZ = 0;
353 break;
354 case 11: // Shadow1DArray
355 SrcZ = SrcY;
356 CTZ = 0;
357 break;
358 case 12: // Shadow2DArray
359 CTZ = 0;
360 break;
361 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000362
363 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
364 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000365 .addImm(SrcX)
366 .addImm(SrcY)
367 .addImm(SrcZ)
368 .addImm(SrcW)
369 .addImm(0)
370 .addImm(0)
371 .addImm(0)
372 .addImm(0)
373 .addImm(1)
374 .addImm(2)
375 .addImm(3)
376 .addOperand(RID)
377 .addOperand(SID)
378 .addImm(CTX)
379 .addImm(CTY)
380 .addImm(CTZ)
381 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000382 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
383 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000384 .addImm(SrcX)
385 .addImm(SrcY)
386 .addImm(SrcZ)
387 .addImm(SrcW)
388 .addImm(0)
389 .addImm(0)
390 .addImm(0)
391 .addImm(0)
392 .addImm(1)
393 .addImm(2)
394 .addImm(3)
395 .addOperand(RID)
396 .addOperand(SID)
397 .addImm(CTX)
398 .addImm(CTY)
399 .addImm(CTZ)
400 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000401 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
402 .addOperand(MI->getOperand(0))
403 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000404 .addImm(SrcX)
405 .addImm(SrcY)
406 .addImm(SrcZ)
407 .addImm(SrcW)
408 .addImm(0)
409 .addImm(0)
410 .addImm(0)
411 .addImm(0)
412 .addImm(1)
413 .addImm(2)
414 .addImm(3)
415 .addOperand(RID)
416 .addOperand(SID)
417 .addImm(CTX)
418 .addImm(CTY)
419 .addImm(CTZ)
420 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000421 .addReg(T0, RegState::Implicit)
422 .addReg(T1, RegState::Implicit);
423 break;
424 }
425
426 case AMDGPU::BRANCH:
427 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000428 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000429 break;
430
431 case AMDGPU::BRANCH_COND_f32: {
432 MachineInstr *NewMI =
433 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
434 AMDGPU::PREDICATE_BIT)
435 .addOperand(MI->getOperand(1))
436 .addImm(OPCODE_IS_NOT_ZERO)
437 .addImm(0); // Flags
438 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000439 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000440 .addOperand(MI->getOperand(0))
441 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
442 break;
443 }
444
445 case AMDGPU::BRANCH_COND_i32: {
446 MachineInstr *NewMI =
447 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
448 AMDGPU::PREDICATE_BIT)
449 .addOperand(MI->getOperand(1))
450 .addImm(OPCODE_IS_NOT_ZERO_INT)
451 .addImm(0); // Flags
452 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000453 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 .addOperand(MI->getOperand(0))
455 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
456 break;
457 }
458
Tom Stellard75aadc22012-12-11 21:25:42 +0000459 case AMDGPU::EG_ExportSwz:
460 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000461 // Instruction is left unmodified if its not the last one of its type
462 bool isLastInstructionOfItsType = true;
463 unsigned InstExportType = MI->getOperand(1).getImm();
464 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
465 EndBlock = BB->end(); NextExportInst != EndBlock;
466 NextExportInst = llvm::next(NextExportInst)) {
467 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
468 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
469 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
470 .getImm();
471 if (CurrentInstExportType == InstExportType) {
472 isLastInstructionOfItsType = false;
473 break;
474 }
475 }
476 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000477 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000478 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000479 return BB;
480 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
481 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
482 .addOperand(MI->getOperand(0))
483 .addOperand(MI->getOperand(1))
484 .addOperand(MI->getOperand(2))
485 .addOperand(MI->getOperand(3))
486 .addOperand(MI->getOperand(4))
487 .addOperand(MI->getOperand(5))
488 .addOperand(MI->getOperand(6))
489 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000490 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000491 break;
492 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000493 case AMDGPU::RETURN: {
494 // RETURN instructions must have the live-out registers as implicit uses,
495 // otherwise they appear dead.
496 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
497 MachineInstrBuilder MIB(*MF, MI);
498 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
499 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
500 return BB;
501 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 }
503
504 MI->eraseFromParent();
505 return BB;
506}
507
508//===----------------------------------------------------------------------===//
509// Custom DAG Lowering Operations
510//===----------------------------------------------------------------------===//
511
Tom Stellard75aadc22012-12-11 21:25:42 +0000512SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000513 MachineFunction &MF = DAG.getMachineFunction();
514 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 switch (Op.getOpcode()) {
516 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000517 case ISD::FCOS:
518 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000520 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000521 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000522 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000523 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 case ISD::INTRINSIC_VOID: {
525 SDValue Chain = Op.getOperand(0);
526 unsigned IntrinsicID =
527 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
528 switch (IntrinsicID) {
529 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000530 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
531 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000532 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000533 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000534 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000535 case AMDGPUIntrinsic::R600_store_swizzle: {
536 const SDValue Args[8] = {
537 Chain,
538 Op.getOperand(2), // Export Value
539 Op.getOperand(3), // ArrayBase
540 Op.getOperand(4), // Type
541 DAG.getConstant(0, MVT::i32), // SWZ_X
542 DAG.getConstant(1, MVT::i32), // SWZ_Y
543 DAG.getConstant(2, MVT::i32), // SWZ_Z
544 DAG.getConstant(3, MVT::i32) // SWZ_W
545 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000546 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000547 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000549
Tom Stellard75aadc22012-12-11 21:25:42 +0000550 // default for switch(IntrinsicID)
551 default: break;
552 }
553 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
554 break;
555 }
556 case ISD::INTRINSIC_WO_CHAIN: {
557 unsigned IntrinsicID =
558 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
559 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000560 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000561 switch(IntrinsicID) {
562 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000563 case AMDGPUIntrinsic::R600_load_input: {
564 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
565 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
566 MachineFunction &MF = DAG.getMachineFunction();
567 MachineRegisterInfo &MRI = MF.getRegInfo();
568 MRI.addLiveIn(Reg);
569 return DAG.getCopyFromReg(DAG.getEntryNode(),
570 SDLoc(DAG.getEntryNode()), Reg, VT);
571 }
572
573 case AMDGPUIntrinsic::R600_interp_input: {
574 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
575 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
576 MachineSDNode *interp;
577 if (ijb < 0) {
578 const MachineFunction &MF = DAG.getMachineFunction();
579 const R600InstrInfo *TII =
580 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
581 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
582 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
583 return DAG.getTargetExtractSubreg(
584 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
585 DL, MVT::f32, SDValue(interp, 0));
586 }
587 MachineFunction &MF = DAG.getMachineFunction();
588 MachineRegisterInfo &MRI = MF.getRegInfo();
589 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
590 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
591 MRI.addLiveIn(RegisterI);
592 MRI.addLiveIn(RegisterJ);
593 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
594 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
595 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
596 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
597
598 if (slot % 4 < 2)
599 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
600 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
601 RegisterJNode, RegisterINode);
602 else
603 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
604 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
605 RegisterJNode, RegisterINode);
606 return SDValue(interp, slot % 2);
607 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000608 case AMDGPUIntrinsic::R600_interp_xy:
609 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000610 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 SDValue RegisterINode = Op.getOperand(2);
613 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614
Vincent Lejeunef143af32013-11-11 22:10:24 +0000615 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000616 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000617 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000618 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000619 else
620 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000621 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000622 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000623 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
624 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000625 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000626 case AMDGPUIntrinsic::R600_tex:
627 case AMDGPUIntrinsic::R600_texc:
628 case AMDGPUIntrinsic::R600_txl:
629 case AMDGPUIntrinsic::R600_txlc:
630 case AMDGPUIntrinsic::R600_txb:
631 case AMDGPUIntrinsic::R600_txbc:
632 case AMDGPUIntrinsic::R600_txf:
633 case AMDGPUIntrinsic::R600_txq:
634 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000635 case AMDGPUIntrinsic::R600_ddy:
636 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000637 unsigned TextureOp;
638 switch (IntrinsicID) {
639 case AMDGPUIntrinsic::R600_tex:
640 TextureOp = 0;
641 break;
642 case AMDGPUIntrinsic::R600_texc:
643 TextureOp = 1;
644 break;
645 case AMDGPUIntrinsic::R600_txl:
646 TextureOp = 2;
647 break;
648 case AMDGPUIntrinsic::R600_txlc:
649 TextureOp = 3;
650 break;
651 case AMDGPUIntrinsic::R600_txb:
652 TextureOp = 4;
653 break;
654 case AMDGPUIntrinsic::R600_txbc:
655 TextureOp = 5;
656 break;
657 case AMDGPUIntrinsic::R600_txf:
658 TextureOp = 6;
659 break;
660 case AMDGPUIntrinsic::R600_txq:
661 TextureOp = 7;
662 break;
663 case AMDGPUIntrinsic::R600_ddx:
664 TextureOp = 8;
665 break;
666 case AMDGPUIntrinsic::R600_ddy:
667 TextureOp = 9;
668 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000669 case AMDGPUIntrinsic::R600_ldptr:
670 TextureOp = 10;
671 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000672 default:
673 llvm_unreachable("Unknow Texture Operation");
674 }
675
676 SDValue TexArgs[19] = {
677 DAG.getConstant(TextureOp, MVT::i32),
678 Op.getOperand(1),
679 DAG.getConstant(0, MVT::i32),
680 DAG.getConstant(1, MVT::i32),
681 DAG.getConstant(2, MVT::i32),
682 DAG.getConstant(3, MVT::i32),
683 Op.getOperand(2),
684 Op.getOperand(3),
685 Op.getOperand(4),
686 DAG.getConstant(0, MVT::i32),
687 DAG.getConstant(1, MVT::i32),
688 DAG.getConstant(2, MVT::i32),
689 DAG.getConstant(3, MVT::i32),
690 Op.getOperand(5),
691 Op.getOperand(6),
692 Op.getOperand(7),
693 Op.getOperand(8),
694 Op.getOperand(9),
695 Op.getOperand(10)
696 };
697 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
698 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000699 case AMDGPUIntrinsic::AMDGPU_dp4: {
700 SDValue Args[8] = {
701 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
702 DAG.getConstant(0, MVT::i32)),
703 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
704 DAG.getConstant(0, MVT::i32)),
705 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
706 DAG.getConstant(1, MVT::i32)),
707 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
708 DAG.getConstant(1, MVT::i32)),
709 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
710 DAG.getConstant(2, MVT::i32)),
711 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
712 DAG.getConstant(2, MVT::i32)),
713 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
714 DAG.getConstant(3, MVT::i32)),
715 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
716 DAG.getConstant(3, MVT::i32))
717 };
718 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
719 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000720
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000721 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000722 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000723 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000724 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000725 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000726 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000727 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000728 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000729 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000730 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000731 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000732 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000733 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000734 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000737 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000738 return LowerImplicitParameter(DAG, VT, DL, 8);
739
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000740 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000741 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
742 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000743 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000744 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
745 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000746 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000747 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
748 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000749 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000750 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
751 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000752 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000753 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
754 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000755 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000756 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
757 AMDGPU::T0_Z, VT);
758 }
759 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
760 break;
761 }
762 } // end switch(Op.getOpcode())
763 return SDValue();
764}
765
766void R600TargetLowering::ReplaceNodeResults(SDNode *N,
767 SmallVectorImpl<SDValue> &Results,
768 SelectionDAG &DAG) const {
769 switch (N->getOpcode()) {
770 default: return;
771 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000772 return;
773 case ISD::LOAD: {
774 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
775 Results.push_back(SDValue(Node, 0));
776 Results.push_back(SDValue(Node, 1));
777 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
778 // function
779 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
780 return;
781 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000782 case ISD::STORE:
783 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
784 Results.push_back(SDValue(Node, 0));
785 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 }
787}
788
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000789SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
790 // On hw >= R700, COS/SIN input must be between -1. and 1.
791 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
792 EVT VT = Op.getValueType();
793 SDValue Arg = Op.getOperand(0);
794 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
795 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
796 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
797 DAG.getConstantFP(0.15915494309, MVT::f32)),
798 DAG.getConstantFP(0.5, MVT::f32)));
799 unsigned TrigNode;
800 switch (Op.getOpcode()) {
801 case ISD::FCOS:
802 TrigNode = AMDGPUISD::COS_HW;
803 break;
804 case ISD::FSIN:
805 TrigNode = AMDGPUISD::SIN_HW;
806 break;
807 default:
808 llvm_unreachable("Wrong trig opcode");
809 }
810 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
811 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
812 DAG.getConstantFP(-0.5, MVT::f32)));
813 if (Gen >= AMDGPUSubtarget::R700)
814 return TrigVal;
815 // On R600 hw, COS/SIN input must be between -Pi and Pi.
816 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
817 DAG.getConstantFP(3.14159265359, MVT::f32));
818}
819
Tom Stellard75aadc22012-12-11 21:25:42 +0000820SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
821 return DAG.getNode(
822 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000823 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000824 MVT::i1,
825 Op, DAG.getConstantFP(0.0f, MVT::f32),
826 DAG.getCondCode(ISD::SETNE)
827 );
828}
829
Tom Stellard75aadc22012-12-11 21:25:42 +0000830SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000831 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000832 unsigned DwordOffset) const {
833 unsigned ByteOffset = DwordOffset * 4;
834 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000835 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000836
837 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
838 assert(isInt<16>(ByteOffset));
839
840 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
841 DAG.getConstant(ByteOffset, MVT::i32), // PTR
842 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
843 false, false, false, 0);
844}
845
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000846SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
847
848 MachineFunction &MF = DAG.getMachineFunction();
849 const AMDGPUFrameLowering *TFL =
850 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
851
852 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
853 assert(FIN);
854
855 unsigned FrameIndex = FIN->getIndex();
856 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
857 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
858}
859
Tom Stellard75aadc22012-12-11 21:25:42 +0000860bool R600TargetLowering::isZero(SDValue Op) const {
861 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
862 return Cst->isNullValue();
863 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
864 return CstFP->isZero();
865 } else {
866 return false;
867 }
868}
869
870SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000871 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000872 EVT VT = Op.getValueType();
873
874 SDValue LHS = Op.getOperand(0);
875 SDValue RHS = Op.getOperand(1);
876 SDValue True = Op.getOperand(2);
877 SDValue False = Op.getOperand(3);
878 SDValue CC = Op.getOperand(4);
879 SDValue Temp;
880
881 // LHS and RHS are guaranteed to be the same value type
882 EVT CompareVT = LHS.getValueType();
883
884 // Check if we can lower this to a native operation.
885
Tom Stellard2add82d2013-03-08 15:37:09 +0000886 // Try to lower to a SET* instruction:
887 //
888 // SET* can match the following patterns:
889 //
Tom Stellardcd428182013-09-28 02:50:38 +0000890 // select_cc f32, f32, -1, 0, cc_supported
891 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
892 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000893 //
894
895 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000896 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
897 ISD::CondCode InverseCC =
898 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000899 if (isHWTrueValue(False) && isHWFalseValue(True)) {
900 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
901 std::swap(False, True);
902 CC = DAG.getCondCode(InverseCC);
903 } else {
904 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
905 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
906 std::swap(False, True);
907 std::swap(LHS, RHS);
908 CC = DAG.getCondCode(SwapInvCC);
909 }
910 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000911 }
912
913 if (isHWTrueValue(True) && isHWFalseValue(False) &&
914 (CompareVT == VT || VT == MVT::i32)) {
915 // This can be matched by a SET* instruction.
916 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
917 }
918
Tom Stellard75aadc22012-12-11 21:25:42 +0000919 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000920 //
921 // CND* can match the following patterns:
922 //
Tom Stellardcd428182013-09-28 02:50:38 +0000923 // select_cc f32, 0.0, f32, f32, cc_supported
924 // select_cc f32, 0.0, i32, i32, cc_supported
925 // select_cc i32, 0, f32, f32, cc_supported
926 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000927 //
Tom Stellardcd428182013-09-28 02:50:38 +0000928
929 // Try to move the zero value to the RHS
930 if (isZero(LHS)) {
931 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
932 // Try swapping the operands
933 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
934 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
935 std::swap(LHS, RHS);
936 CC = DAG.getCondCode(CCSwapped);
937 } else {
938 // Try inverting the conditon and then swapping the operands
939 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
940 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
941 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
942 std::swap(True, False);
943 std::swap(LHS, RHS);
944 CC = DAG.getCondCode(CCSwapped);
945 }
946 }
947 }
948 if (isZero(RHS)) {
949 SDValue Cond = LHS;
950 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000951 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
952 if (CompareVT != VT) {
953 // Bitcast True / False to the correct types. This will end up being
954 // a nop, but it allows us to define only a single pattern in the
955 // .TD files for each CND* instruction rather than having to have
956 // one pattern for integer True/False and one for fp True/False
957 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
958 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
959 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000960
961 switch (CCOpcode) {
962 case ISD::SETONE:
963 case ISD::SETUNE:
964 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000965 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
966 Temp = True;
967 True = False;
968 False = Temp;
969 break;
970 default:
971 break;
972 }
973 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
974 Cond, Zero,
975 True, False,
976 DAG.getCondCode(CCOpcode));
977 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
978 }
979
Tom Stellard75aadc22012-12-11 21:25:42 +0000980
981 // Possible Min/Max pattern
982 SDValue MinMax = LowerMinMax(Op, DAG);
983 if (MinMax.getNode()) {
984 return MinMax;
985 }
986
987 // If we make it this for it means we have no native instructions to handle
988 // this SELECT_CC, so we must lower it.
989 SDValue HWTrue, HWFalse;
990
991 if (CompareVT == MVT::f32) {
992 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
993 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
994 } else if (CompareVT == MVT::i32) {
995 HWTrue = DAG.getConstant(-1, CompareVT);
996 HWFalse = DAG.getConstant(0, CompareVT);
997 }
998 else {
999 assert(!"Unhandled value type in LowerSELECT_CC");
1000 }
1001
1002 // Lower this unsupported SELECT_CC into a combination of two supported
1003 // SELECT_CC operations.
1004 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1005
1006 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1007 Cond, HWFalse,
1008 True, False,
1009 DAG.getCondCode(ISD::SETNE));
1010}
1011
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001012/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
1013/// convert these pointers to a register index. Each register holds
1014/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1015/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1016/// for indirect addressing.
1017SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1018 unsigned StackWidth,
1019 SelectionDAG &DAG) const {
1020 unsigned SRLPad;
1021 switch(StackWidth) {
1022 case 1:
1023 SRLPad = 2;
1024 break;
1025 case 2:
1026 SRLPad = 3;
1027 break;
1028 case 4:
1029 SRLPad = 4;
1030 break;
1031 default: llvm_unreachable("Invalid stack width");
1032 }
1033
Andrew Trickef9de2a2013-05-25 02:42:55 +00001034 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001035 DAG.getConstant(SRLPad, MVT::i32));
1036}
1037
1038void R600TargetLowering::getStackAddress(unsigned StackWidth,
1039 unsigned ElemIdx,
1040 unsigned &Channel,
1041 unsigned &PtrIncr) const {
1042 switch (StackWidth) {
1043 default:
1044 case 1:
1045 Channel = 0;
1046 if (ElemIdx > 0) {
1047 PtrIncr = 1;
1048 } else {
1049 PtrIncr = 0;
1050 }
1051 break;
1052 case 2:
1053 Channel = ElemIdx % 2;
1054 if (ElemIdx == 2) {
1055 PtrIncr = 1;
1056 } else {
1057 PtrIncr = 0;
1058 }
1059 break;
1060 case 4:
1061 Channel = ElemIdx;
1062 PtrIncr = 0;
1063 break;
1064 }
1065}
1066
Tom Stellard75aadc22012-12-11 21:25:42 +00001067SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001068 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001069 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1070 SDValue Chain = Op.getOperand(0);
1071 SDValue Value = Op.getOperand(1);
1072 SDValue Ptr = Op.getOperand(2);
1073
Tom Stellard2ffc3302013-08-26 15:05:44 +00001074 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001075 if (Result.getNode()) {
1076 return Result;
1077 }
1078
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001079 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1080 if (StoreNode->isTruncatingStore()) {
1081 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001082 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001083 EVT MemVT = StoreNode->getMemoryVT();
1084 SDValue MaskConstant;
1085 if (MemVT == MVT::i8) {
1086 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1087 } else {
1088 assert(MemVT == MVT::i16);
1089 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1090 }
1091 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1092 DAG.getConstant(2, MVT::i32));
1093 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1094 DAG.getConstant(0x00000003, VT));
1095 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1096 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1097 DAG.getConstant(3, VT));
1098 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1099 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1100 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1101 // vector instead.
1102 SDValue Src[4] = {
1103 ShiftedValue,
1104 DAG.getConstant(0, MVT::i32),
1105 DAG.getConstant(0, MVT::i32),
1106 Mask
1107 };
1108 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1109 SDValue Args[3] = { Chain, Input, DWordAddr };
1110 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1111 Op->getVTList(), Args, 3, MemVT,
1112 StoreNode->getMemOperand());
1113 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1114 Value.getValueType().bitsGE(MVT::i32)) {
1115 // Convert pointer from byte address to dword address.
1116 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1117 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1118 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001119
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001120 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1121 assert(!"Truncated and indexed stores not supported yet");
1122 } else {
1123 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1124 }
1125 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001126 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001127 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001128
1129 EVT ValueVT = Value.getValueType();
1130
1131 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1132 return SDValue();
1133 }
1134
1135 // Lowering for indirect addressing
1136
1137 const MachineFunction &MF = DAG.getMachineFunction();
1138 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1139 getTargetMachine().getFrameLowering());
1140 unsigned StackWidth = TFL->getStackWidth(MF);
1141
1142 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1143
1144 if (ValueVT.isVector()) {
1145 unsigned NumElemVT = ValueVT.getVectorNumElements();
1146 EVT ElemVT = ValueVT.getVectorElementType();
1147 SDValue Stores[4];
1148
1149 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1150 "vector width in load");
1151
1152 for (unsigned i = 0; i < NumElemVT; ++i) {
1153 unsigned Channel, PtrIncr;
1154 getStackAddress(StackWidth, i, Channel, PtrIncr);
1155 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1156 DAG.getConstant(PtrIncr, MVT::i32));
1157 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1158 Value, DAG.getConstant(i, MVT::i32));
1159
1160 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1161 Chain, Elem, Ptr,
1162 DAG.getTargetConstant(Channel, MVT::i32));
1163 }
1164 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1165 } else {
1166 if (ValueVT == MVT::i8) {
1167 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1168 }
1169 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001170 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001171 }
1172
1173 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001174}
1175
Tom Stellard365366f2013-01-23 02:09:06 +00001176// return (512 + (kc_bank << 12)
1177static int
1178ConstantAddressBlock(unsigned AddressSpace) {
1179 switch (AddressSpace) {
1180 case AMDGPUAS::CONSTANT_BUFFER_0:
1181 return 512;
1182 case AMDGPUAS::CONSTANT_BUFFER_1:
1183 return 512 + 4096;
1184 case AMDGPUAS::CONSTANT_BUFFER_2:
1185 return 512 + 4096 * 2;
1186 case AMDGPUAS::CONSTANT_BUFFER_3:
1187 return 512 + 4096 * 3;
1188 case AMDGPUAS::CONSTANT_BUFFER_4:
1189 return 512 + 4096 * 4;
1190 case AMDGPUAS::CONSTANT_BUFFER_5:
1191 return 512 + 4096 * 5;
1192 case AMDGPUAS::CONSTANT_BUFFER_6:
1193 return 512 + 4096 * 6;
1194 case AMDGPUAS::CONSTANT_BUFFER_7:
1195 return 512 + 4096 * 7;
1196 case AMDGPUAS::CONSTANT_BUFFER_8:
1197 return 512 + 4096 * 8;
1198 case AMDGPUAS::CONSTANT_BUFFER_9:
1199 return 512 + 4096 * 9;
1200 case AMDGPUAS::CONSTANT_BUFFER_10:
1201 return 512 + 4096 * 10;
1202 case AMDGPUAS::CONSTANT_BUFFER_11:
1203 return 512 + 4096 * 11;
1204 case AMDGPUAS::CONSTANT_BUFFER_12:
1205 return 512 + 4096 * 12;
1206 case AMDGPUAS::CONSTANT_BUFFER_13:
1207 return 512 + 4096 * 13;
1208 case AMDGPUAS::CONSTANT_BUFFER_14:
1209 return 512 + 4096 * 14;
1210 case AMDGPUAS::CONSTANT_BUFFER_15:
1211 return 512 + 4096 * 15;
1212 default:
1213 return -1;
1214 }
1215}
1216
1217SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1218{
1219 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001220 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001221 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1222 SDValue Chain = Op.getOperand(0);
1223 SDValue Ptr = Op.getOperand(1);
1224 SDValue LoweredLoad;
1225
Tom Stellard35bb18c2013-08-26 15:06:04 +00001226 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1227 SDValue MergedValues[2] = {
1228 SplitVectorLoad(Op, DAG),
1229 Chain
1230 };
1231 return DAG.getMergeValues(MergedValues, 2, DL);
1232 }
1233
Tom Stellard365366f2013-01-23 02:09:06 +00001234 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001235 if (ConstantBlock > -1 &&
1236 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1237 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001238 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001239 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1240 isa<Constant>(LoadNode->getSrcValue()) ||
1241 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001242 SDValue Slots[4];
1243 for (unsigned i = 0; i < 4; i++) {
1244 // We want Const position encoded with the following formula :
1245 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1246 // const_index is Ptr computed by llvm using an alignment of 16.
1247 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1248 // then div by 4 at the ISel step
1249 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1250 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1251 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1252 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001253 EVT NewVT = MVT::v4i32;
1254 unsigned NumElements = 4;
1255 if (VT.isVector()) {
1256 NewVT = VT;
1257 NumElements = VT.getVectorNumElements();
1258 }
1259 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001260 } else {
1261 // non constant ptr cant be folded, keeps it as a v4f32 load
1262 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001263 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001264 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001265 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001266 );
1267 }
1268
1269 if (!VT.isVector()) {
1270 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1271 DAG.getConstant(0, MVT::i32));
1272 }
1273
1274 SDValue MergedValues[2] = {
1275 Result,
1276 Chain
1277 };
1278 return DAG.getMergeValues(MergedValues, 2, DL);
1279 }
1280
Matt Arsenault909d0c02013-10-30 23:43:29 +00001281 // For most operations returning SDValue() will result in the node being
1282 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1283 // need to manually expand loads that may be legal in some address spaces and
1284 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1285 // compute shaders, since the data is sign extended when it is uploaded to the
1286 // buffer. However SEXT loads from other address spaces are not supported, so
1287 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001288 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1289 EVT MemVT = LoadNode->getMemoryVT();
1290 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1291 SDValue ShiftAmount =
1292 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1293 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1294 LoadNode->getPointerInfo(), MemVT,
1295 LoadNode->isVolatile(),
1296 LoadNode->isNonTemporal(),
1297 LoadNode->getAlignment());
1298 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1299 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1300
1301 SDValue MergedValues[2] = { Sra, Chain };
1302 return DAG.getMergeValues(MergedValues, 2, DL);
1303 }
1304
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001305 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1306 return SDValue();
1307 }
1308
1309 // Lowering for indirect addressing
1310 const MachineFunction &MF = DAG.getMachineFunction();
1311 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1312 getTargetMachine().getFrameLowering());
1313 unsigned StackWidth = TFL->getStackWidth(MF);
1314
1315 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1316
1317 if (VT.isVector()) {
1318 unsigned NumElemVT = VT.getVectorNumElements();
1319 EVT ElemVT = VT.getVectorElementType();
1320 SDValue Loads[4];
1321
1322 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1323 "vector width in load");
1324
1325 for (unsigned i = 0; i < NumElemVT; ++i) {
1326 unsigned Channel, PtrIncr;
1327 getStackAddress(StackWidth, i, Channel, PtrIncr);
1328 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1329 DAG.getConstant(PtrIncr, MVT::i32));
1330 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1331 Chain, Ptr,
1332 DAG.getTargetConstant(Channel, MVT::i32),
1333 Op.getOperand(2));
1334 }
1335 for (unsigned i = NumElemVT; i < 4; ++i) {
1336 Loads[i] = DAG.getUNDEF(ElemVT);
1337 }
1338 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1339 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1340 } else {
1341 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1342 Chain, Ptr,
1343 DAG.getTargetConstant(0, MVT::i32), // Channel
1344 Op.getOperand(2));
1345 }
1346
1347 SDValue Ops[2];
1348 Ops[0] = LoweredLoad;
1349 Ops[1] = Chain;
1350
1351 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001352}
Tom Stellard75aadc22012-12-11 21:25:42 +00001353
Tom Stellard75aadc22012-12-11 21:25:42 +00001354/// XXX Only kernel functions are supported, so we can assume for now that
1355/// every function is a kernel function, but in the future we should use
1356/// separate calling conventions for kernel and non-kernel functions.
1357SDValue R600TargetLowering::LowerFormalArguments(
1358 SDValue Chain,
1359 CallingConv::ID CallConv,
1360 bool isVarArg,
1361 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001362 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001363 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001364 SmallVector<CCValAssign, 16> ArgLocs;
1365 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1366 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001367 MachineFunction &MF = DAG.getMachineFunction();
1368 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001369
Tom Stellardaf775432013-10-23 00:44:32 +00001370 SmallVector<ISD::InputArg, 8> LocalIns;
1371
1372 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1373 LocalIns);
1374
1375 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001376
Tom Stellard1e803092013-07-23 01:48:18 +00001377 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001378 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001379 EVT VT = Ins[i].VT;
1380 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001381
Vincent Lejeunef143af32013-11-11 22:10:24 +00001382 if (ShaderType != ShaderType::COMPUTE) {
1383 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1384 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1385 InVals.push_back(Register);
1386 continue;
1387 }
1388
Tom Stellard75aadc22012-12-11 21:25:42 +00001389 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001390 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001391
1392 // The first 36 bytes of the input buffer contains information about
1393 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001394 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1395 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1396 MachinePointerInfo(UndefValue::get(PtrTy)),
1397 MemVT, false, false, 4);
1398 // 4 is the prefered alignment for
1399 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001400 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001401 }
1402 return Chain;
1403}
1404
Matt Arsenault758659232013-05-18 00:21:46 +00001405EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001406 if (!VT.isVector()) return MVT::i32;
1407 return VT.changeVectorElementTypeToInteger();
1408}
1409
Benjamin Kramer193960c2013-06-11 13:32:25 +00001410static SDValue
1411CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1412 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001413 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1414 assert(RemapSwizzle.empty());
1415 SDValue NewBldVec[4] = {
1416 VectorEntry.getOperand(0),
1417 VectorEntry.getOperand(1),
1418 VectorEntry.getOperand(2),
1419 VectorEntry.getOperand(3)
1420 };
1421
1422 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001423 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1424 // We mask write here to teach later passes that the ith element of this
1425 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1426 // break false dependencies and additionnaly make assembly easier to read.
1427 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001428 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1429 if (C->isZero()) {
1430 RemapSwizzle[i] = 4; // SEL_0
1431 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1432 } else if (C->isExactlyValue(1.0)) {
1433 RemapSwizzle[i] = 5; // SEL_1
1434 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1435 }
1436 }
1437
1438 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1439 continue;
1440 for (unsigned j = 0; j < i; j++) {
1441 if (NewBldVec[i] == NewBldVec[j]) {
1442 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1443 RemapSwizzle[i] = j;
1444 break;
1445 }
1446 }
1447 }
1448
1449 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1450 VectorEntry.getValueType(), NewBldVec, 4);
1451}
1452
Benjamin Kramer193960c2013-06-11 13:32:25 +00001453static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1454 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001455 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1456 assert(RemapSwizzle.empty());
1457 SDValue NewBldVec[4] = {
1458 VectorEntry.getOperand(0),
1459 VectorEntry.getOperand(1),
1460 VectorEntry.getOperand(2),
1461 VectorEntry.getOperand(3)
1462 };
1463 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001464 for (unsigned i = 0; i < 4; i++)
1465 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001466
1467 for (unsigned i = 0; i < 4; i++) {
1468 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1469 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1470 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001471 if (i == Idx) {
1472 isUnmovable[Idx] = true;
1473 continue;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001474 }
Vincent Lejeune301beb82013-10-13 17:56:04 +00001475 if (isUnmovable[Idx])
1476 continue;
1477 // Swap i and Idx
1478 std::swap(NewBldVec[Idx], NewBldVec[i]);
1479 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1480 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001481 }
1482 }
1483
1484 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1485 VectorEntry.getValueType(), NewBldVec, 4);
1486}
1487
1488
1489SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1490SDValue Swz[4], SelectionDAG &DAG) const {
1491 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1492 // Old -> New swizzle values
1493 DenseMap<unsigned, unsigned> SwizzleRemap;
1494
1495 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1496 for (unsigned i = 0; i < 4; i++) {
1497 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1498 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1499 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1500 }
1501
1502 SwizzleRemap.clear();
1503 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1504 for (unsigned i = 0; i < 4; i++) {
1505 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1506 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1507 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1508 }
1509
1510 return BuildVector;
1511}
1512
1513
Tom Stellard75aadc22012-12-11 21:25:42 +00001514//===----------------------------------------------------------------------===//
1515// Custom DAG Optimizations
1516//===----------------------------------------------------------------------===//
1517
1518SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1519 DAGCombinerInfo &DCI) const {
1520 SelectionDAG &DAG = DCI.DAG;
1521
1522 switch (N->getOpcode()) {
1523 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1524 case ISD::FP_ROUND: {
1525 SDValue Arg = N->getOperand(0);
1526 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001527 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001528 Arg.getOperand(0));
1529 }
1530 break;
1531 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001532
1533 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1534 // (i32 select_cc f32, f32, -1, 0 cc)
1535 //
1536 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1537 // this to one of the SET*_DX10 instructions.
1538 case ISD::FP_TO_SINT: {
1539 SDValue FNeg = N->getOperand(0);
1540 if (FNeg.getOpcode() != ISD::FNEG) {
1541 return SDValue();
1542 }
1543 SDValue SelectCC = FNeg.getOperand(0);
1544 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1545 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1546 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1547 !isHWTrueValue(SelectCC.getOperand(2)) ||
1548 !isHWFalseValue(SelectCC.getOperand(3))) {
1549 return SDValue();
1550 }
1551
Andrew Trickef9de2a2013-05-25 02:42:55 +00001552 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001553 SelectCC.getOperand(0), // LHS
1554 SelectCC.getOperand(1), // RHS
1555 DAG.getConstant(-1, MVT::i32), // True
1556 DAG.getConstant(0, MVT::i32), // Flase
1557 SelectCC.getOperand(4)); // CC
1558
1559 break;
1560 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001561
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001562 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1563 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001564 case ISD::INSERT_VECTOR_ELT: {
1565 SDValue InVec = N->getOperand(0);
1566 SDValue InVal = N->getOperand(1);
1567 SDValue EltNo = N->getOperand(2);
1568 SDLoc dl(N);
1569
1570 // If the inserted element is an UNDEF, just use the input vector.
1571 if (InVal.getOpcode() == ISD::UNDEF)
1572 return InVec;
1573
1574 EVT VT = InVec.getValueType();
1575
1576 // If we can't generate a legal BUILD_VECTOR, exit
1577 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1578 return SDValue();
1579
1580 // Check that we know which element is being inserted
1581 if (!isa<ConstantSDNode>(EltNo))
1582 return SDValue();
1583 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1584
1585 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1586 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1587 // vector elements.
1588 SmallVector<SDValue, 8> Ops;
1589 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1590 Ops.append(InVec.getNode()->op_begin(),
1591 InVec.getNode()->op_end());
1592 } else if (InVec.getOpcode() == ISD::UNDEF) {
1593 unsigned NElts = VT.getVectorNumElements();
1594 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1595 } else {
1596 return SDValue();
1597 }
1598
1599 // Insert the element
1600 if (Elt < Ops.size()) {
1601 // All the operands of BUILD_VECTOR must have the same type;
1602 // we enforce that here.
1603 EVT OpVT = Ops[0].getValueType();
1604 if (InVal.getValueType() != OpVT)
1605 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1606 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1607 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1608 Ops[Elt] = InVal;
1609 }
1610
1611 // Return the new vector
1612 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1613 VT, &Ops[0], Ops.size());
1614 }
1615
Tom Stellard365366f2013-01-23 02:09:06 +00001616 // Extract_vec (Build_vector) generated by custom lowering
1617 // also needs to be customly combined
1618 case ISD::EXTRACT_VECTOR_ELT: {
1619 SDValue Arg = N->getOperand(0);
1620 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1621 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1622 unsigned Element = Const->getZExtValue();
1623 return Arg->getOperand(Element);
1624 }
1625 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001626 if (Arg.getOpcode() == ISD::BITCAST &&
1627 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1628 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1629 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001630 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001631 Arg->getOperand(0).getOperand(Element));
1632 }
1633 }
Tom Stellard365366f2013-01-23 02:09:06 +00001634 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001635
1636 case ISD::SELECT_CC: {
1637 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1638 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001639 //
1640 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1641 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001642 SDValue LHS = N->getOperand(0);
1643 if (LHS.getOpcode() != ISD::SELECT_CC) {
1644 return SDValue();
1645 }
1646
1647 SDValue RHS = N->getOperand(1);
1648 SDValue True = N->getOperand(2);
1649 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001650 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001651
1652 if (LHS.getOperand(2).getNode() != True.getNode() ||
1653 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001654 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001655 return SDValue();
1656 }
1657
Tom Stellard5e524892013-03-08 15:37:11 +00001658 switch (NCC) {
1659 default: return SDValue();
1660 case ISD::SETNE: return LHS;
1661 case ISD::SETEQ: {
1662 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1663 LHSCC = ISD::getSetCCInverse(LHSCC,
1664 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001665 if (DCI.isBeforeLegalizeOps() ||
1666 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1667 return DAG.getSelectCC(SDLoc(N),
1668 LHS.getOperand(0),
1669 LHS.getOperand(1),
1670 LHS.getOperand(2),
1671 LHS.getOperand(3),
1672 LHSCC);
1673 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001674 }
Tom Stellard5e524892013-03-08 15:37:11 +00001675 }
Tom Stellardcd428182013-09-28 02:50:38 +00001676 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001677 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001678
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001679 case AMDGPUISD::EXPORT: {
1680 SDValue Arg = N->getOperand(1);
1681 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1682 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001683
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001684 SDValue NewArgs[8] = {
1685 N->getOperand(0), // Chain
1686 SDValue(),
1687 N->getOperand(2), // ArrayBase
1688 N->getOperand(3), // Type
1689 N->getOperand(4), // SWZ_X
1690 N->getOperand(5), // SWZ_Y
1691 N->getOperand(6), // SWZ_Z
1692 N->getOperand(7) // SWZ_W
1693 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001694 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001695 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001696 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001697 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001698 case AMDGPUISD::TEXTURE_FETCH: {
1699 SDValue Arg = N->getOperand(1);
1700 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1701 break;
1702
1703 SDValue NewArgs[19] = {
1704 N->getOperand(0),
1705 N->getOperand(1),
1706 N->getOperand(2),
1707 N->getOperand(3),
1708 N->getOperand(4),
1709 N->getOperand(5),
1710 N->getOperand(6),
1711 N->getOperand(7),
1712 N->getOperand(8),
1713 N->getOperand(9),
1714 N->getOperand(10),
1715 N->getOperand(11),
1716 N->getOperand(12),
1717 N->getOperand(13),
1718 N->getOperand(14),
1719 N->getOperand(15),
1720 N->getOperand(16),
1721 N->getOperand(17),
1722 N->getOperand(18),
1723 };
1724 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1725 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1726 NewArgs, 19);
1727 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001728 }
1729 return SDValue();
1730}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001731
1732static bool
1733FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001734 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001735 const R600InstrInfo *TII =
1736 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1737 if (!Src.isMachineOpcode())
1738 return false;
1739 switch (Src.getMachineOpcode()) {
1740 case AMDGPU::FNEG_R600:
1741 if (!Neg.getNode())
1742 return false;
1743 Src = Src.getOperand(0);
1744 Neg = DAG.getTargetConstant(1, MVT::i32);
1745 return true;
1746 case AMDGPU::FABS_R600:
1747 if (!Abs.getNode())
1748 return false;
1749 Src = Src.getOperand(0);
1750 Abs = DAG.getTargetConstant(1, MVT::i32);
1751 return true;
1752 case AMDGPU::CONST_COPY: {
1753 unsigned Opcode = ParentNode->getMachineOpcode();
1754 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1755
1756 if (!Sel.getNode())
1757 return false;
1758
1759 SDValue CstOffset = Src.getOperand(0);
1760 if (ParentNode->getValueType(0).isVector())
1761 return false;
1762
1763 // Gather constants values
1764 int SrcIndices[] = {
1765 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1766 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1767 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1768 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1769 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1770 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1771 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1772 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1773 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1774 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1775 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1776 };
1777 std::vector<unsigned> Consts;
1778 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1779 int OtherSrcIdx = SrcIndices[i];
1780 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1781 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1782 continue;
1783 if (HasDst) {
1784 OtherSrcIdx--;
1785 OtherSelIdx--;
1786 }
1787 if (RegisterSDNode *Reg =
1788 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1789 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1790 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1791 ParentNode->getOperand(OtherSelIdx));
1792 Consts.push_back(Cst->getZExtValue());
1793 }
1794 }
1795 }
1796
1797 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1798 Consts.push_back(Cst->getZExtValue());
1799 if (!TII->fitsConstReadLimitations(Consts)) {
1800 return false;
1801 }
1802
1803 Sel = CstOffset;
1804 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1805 return true;
1806 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001807 case AMDGPU::MOV_IMM_I32:
1808 case AMDGPU::MOV_IMM_F32: {
1809 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1810 uint64_t ImmValue = 0;
1811
1812
1813 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1814 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1815 float FloatValue = FPC->getValueAPF().convertToFloat();
1816 if (FloatValue == 0.0) {
1817 ImmReg = AMDGPU::ZERO;
1818 } else if (FloatValue == 0.5) {
1819 ImmReg = AMDGPU::HALF;
1820 } else if (FloatValue == 1.0) {
1821 ImmReg = AMDGPU::ONE;
1822 } else {
1823 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1824 }
1825 } else {
1826 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1827 uint64_t Value = C->getZExtValue();
1828 if (Value == 0) {
1829 ImmReg = AMDGPU::ZERO;
1830 } else if (Value == 1) {
1831 ImmReg = AMDGPU::ONE_INT;
1832 } else {
1833 ImmValue = Value;
1834 }
1835 }
1836
1837 // Check that we aren't already using an immediate.
1838 // XXX: It's possible for an instruction to have more than one
1839 // immediate operand, but this is not supported yet.
1840 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1841 if (!Imm.getNode())
1842 return false;
1843 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1844 assert(C);
1845 if (C->getZExtValue())
1846 return false;
1847 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1848 }
1849 Src = DAG.getRegister(ImmReg, MVT::i32);
1850 return true;
1851 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001852 default:
1853 return false;
1854 }
1855}
1856
1857
1858/// \brief Fold the instructions after selecting them
1859SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1860 SelectionDAG &DAG) const {
1861 const R600InstrInfo *TII =
1862 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1863 if (!Node->isMachineOpcode())
1864 return Node;
1865 unsigned Opcode = Node->getMachineOpcode();
1866 SDValue FakeOp;
1867
1868 std::vector<SDValue> Ops;
1869 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1870 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001871 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001872
1873 if (Opcode == AMDGPU::DOT_4) {
1874 int OperandIdx[] = {
1875 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1876 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1878 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001883 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001884 int NegIdx[] = {
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1886 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1887 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1891 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1892 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1893 };
1894 int AbsIdx[] = {
1895 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1896 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1897 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1898 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1899 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1900 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1901 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1902 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1903 };
1904 for (unsigned i = 0; i < 8; i++) {
1905 if (OperandIdx[i] < 0)
1906 return Node;
1907 SDValue &Src = Ops[OperandIdx[i] - 1];
1908 SDValue &Neg = Ops[NegIdx[i] - 1];
1909 SDValue &Abs = Ops[AbsIdx[i] - 1];
1910 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1911 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1912 if (HasDst)
1913 SelIdx--;
1914 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001915 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1916 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1917 }
1918 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1919 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1920 SDValue &Src = Ops[i];
1921 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001922 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1923 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001924 } else if (Opcode == AMDGPU::CLAMP_R600) {
1925 SDValue Src = Node->getOperand(0);
1926 if (!Src.isMachineOpcode() ||
1927 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1928 return Node;
1929 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1930 AMDGPU::OpName::clamp);
1931 if (ClampIdx < 0)
1932 return Node;
1933 std::vector<SDValue> Ops;
1934 unsigned NumOp = Src.getNumOperands();
1935 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001936 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001937 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1938 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1939 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001940 } else {
1941 if (!TII->hasInstrModifiers(Opcode))
1942 return Node;
1943 int OperandIdx[] = {
1944 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1945 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1946 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1947 };
1948 int NegIdx[] = {
1949 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1950 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1951 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1952 };
1953 int AbsIdx[] = {
1954 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1955 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1956 -1
1957 };
1958 for (unsigned i = 0; i < 3; i++) {
1959 if (OperandIdx[i] < 0)
1960 return Node;
1961 SDValue &Src = Ops[OperandIdx[i] - 1];
1962 SDValue &Neg = Ops[NegIdx[i] - 1];
1963 SDValue FakeAbs;
1964 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1965 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1966 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001967 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1968 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001969 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001970 ImmIdx--;
1971 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001972 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001973 SDValue &Imm = Ops[ImmIdx];
1974 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001975 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1976 }
1977 }
1978
1979 return Node;
1980}