blob: 8d71919704dff96b40a192c847ce534c909a591b [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellard0351ea22013-09-28 02:50:50 +000041 // Set condition code actions
42 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000044 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000045 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000046 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
47 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000048 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
49 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
50 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
51 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000052 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
53 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
54
55 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
56 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
57 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
58 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
59
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000060 setOperationAction(ISD::FCOS, MVT::f32, Custom);
61 setOperationAction(ISD::FSIN, MVT::f32, Custom);
62
Tom Stellard75aadc22012-12-11 21:25:42 +000063 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000064 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard492ebea2013-03-08 15:37:07 +000066 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
67 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000068
69 setOperationAction(ISD::FSUB, MVT::f32, Expand);
70
71 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
72 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
73 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000074
Tom Stellard75aadc22012-12-11 21:25:42 +000075 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
76 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
77
Tom Stellarde8f9f282013-03-08 15:37:05 +000078 setOperationAction(ISD::SETCC, MVT::i32, Expand);
79 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000080 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
81
Tom Stellard53f2f902013-09-05 18:38:03 +000082 setOperationAction(ISD::SELECT, MVT::i32, Expand);
83 setOperationAction(ISD::SELECT, MVT::f32, Expand);
84 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
85 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
86 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
87 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000088
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000089 // Legalize loads and stores to the private address space.
90 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000091 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000092 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +000093
94 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95 // spaces, so it is custom lowered to handle those where it isn't.
Tom Stellard1e803092013-07-23 01:48:18 +000096 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
97 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
98 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
99 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Matt Arsenault00a0d6f2013-11-13 02:39:07 +0000100 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
101 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
102
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000103 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000104 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000105 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +0000107 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
108 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000109
Tom Stellard365366f2013-01-23 02:09:06 +0000110 setOperationAction(ISD::LOAD, MVT::i32, Custom);
111 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000112 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
113
Tom Stellard75aadc22012-12-11 21:25:42 +0000114 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000115 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000116 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000117 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000118 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000119
Michel Danzer49812b52013-07-10 16:37:07 +0000120 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
121
Tom Stellardb852af52013-03-08 15:37:03 +0000122 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000123 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000124 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000125}
126
127MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
128 MachineInstr * MI, MachineBasicBlock * BB) const {
129 MachineFunction * MF = BB->getParent();
130 MachineRegisterInfo &MRI = MF->getRegInfo();
131 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000132 const R600InstrInfo *TII =
133 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000134
135 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000136 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000137 // Replace LDS_*_RET instruction that don't have any uses with the
138 // equivalent LDS_*_NORET instruction.
139 if (TII->isLDSRetInstr(MI->getOpcode())) {
Tom Stellard13c68ef2013-09-05 18:38:09 +0000140 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141 assert(DstIdx != -1);
142 MachineInstrBuilder NewMI;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000143 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144 return BB;
145
146 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000148 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149 NewMI.addOperand(MI->getOperand(i));
150 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000151 } else {
152 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
153 }
154 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000155 case AMDGPU::CLAMP_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161 break;
162 }
163
164 case AMDGPU::FABS_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170 break;
171 }
172
173 case AMDGPU::FNEG_R600: {
174 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175 AMDGPU::MOV,
176 MI->getOperand(0).getReg(),
177 MI->getOperand(1).getReg());
178 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179 break;
180 }
181
Tom Stellard75aadc22012-12-11 21:25:42 +0000182 case AMDGPU::MASK_WRITE: {
183 unsigned maskedRegister = MI->getOperand(0).getReg();
184 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187 break;
188 }
189
190 case AMDGPU::MOV_IMM_F32:
191 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192 MI->getOperand(1).getFPImm()->getValueAPF()
193 .bitcastToAPInt().getZExtValue());
194 break;
195 case AMDGPU::MOV_IMM_I32:
196 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197 MI->getOperand(1).getImm());
198 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000199 case AMDGPU::CONST_COPY: {
200 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000202 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000203 MI->getOperand(1).getImm());
204 break;
205 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000206
207 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000208 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000209 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
210 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
211
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213 .addOperand(MI->getOperand(0))
214 .addOperand(MI->getOperand(1))
215 .addImm(EOP); // Set End of program bit
216 break;
217 }
218
Tom Stellard75aadc22012-12-11 21:25:42 +0000219 case AMDGPU::TXD: {
220 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000222 MachineOperand &RID = MI->getOperand(4);
223 MachineOperand &SID = MI->getOperand(5);
224 unsigned TextureId = MI->getOperand(6).getImm();
225 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000227
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000228 switch (TextureId) {
229 case 5: // Rect
230 CTX = CTY = 0;
231 break;
232 case 6: // Shadow1D
233 SrcW = SrcZ;
234 break;
235 case 7: // Shadow2D
236 SrcW = SrcZ;
237 break;
238 case 8: // ShadowRect
239 CTX = CTY = 0;
240 SrcW = SrcZ;
241 break;
242 case 9: // 1DArray
243 SrcZ = SrcY;
244 CTZ = 0;
245 break;
246 case 10: // 2DArray
247 CTZ = 0;
248 break;
249 case 11: // Shadow1DArray
250 SrcZ = SrcY;
251 CTZ = 0;
252 break;
253 case 12: // Shadow2DArray
254 CTZ = 0;
255 break;
256 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000259 .addImm(SrcX)
260 .addImm(SrcY)
261 .addImm(SrcZ)
262 .addImm(SrcW)
263 .addImm(0)
264 .addImm(0)
265 .addImm(0)
266 .addImm(0)
267 .addImm(1)
268 .addImm(2)
269 .addImm(3)
270 .addOperand(RID)
271 .addOperand(SID)
272 .addImm(CTX)
273 .addImm(CTY)
274 .addImm(CTZ)
275 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000278 .addImm(SrcX)
279 .addImm(SrcY)
280 .addImm(SrcZ)
281 .addImm(SrcW)
282 .addImm(0)
283 .addImm(0)
284 .addImm(0)
285 .addImm(0)
286 .addImm(1)
287 .addImm(2)
288 .addImm(3)
289 .addOperand(RID)
290 .addOperand(SID)
291 .addImm(CTX)
292 .addImm(CTY)
293 .addImm(CTZ)
294 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000295 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296 .addOperand(MI->getOperand(0))
297 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000298 .addImm(SrcX)
299 .addImm(SrcY)
300 .addImm(SrcZ)
301 .addImm(SrcW)
302 .addImm(0)
303 .addImm(0)
304 .addImm(0)
305 .addImm(0)
306 .addImm(1)
307 .addImm(2)
308 .addImm(3)
309 .addOperand(RID)
310 .addOperand(SID)
311 .addImm(CTX)
312 .addImm(CTY)
313 .addImm(CTZ)
314 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000315 .addReg(T0, RegState::Implicit)
316 .addReg(T1, RegState::Implicit);
317 break;
318 }
319
320 case AMDGPU::TXD_SHADOW: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000323 MachineOperand &RID = MI->getOperand(4);
324 MachineOperand &SID = MI->getOperand(5);
325 unsigned TextureId = MI->getOperand(6).getImm();
326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328
329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000358
359 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398 .addOperand(MI->getOperand(0))
399 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
419 break;
420 }
421
422 case AMDGPU::BRANCH:
423 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000424 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000425 break;
426
427 case AMDGPU::BRANCH_COND_f32: {
428 MachineInstr *NewMI =
429 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430 AMDGPU::PREDICATE_BIT)
431 .addOperand(MI->getOperand(1))
432 .addImm(OPCODE_IS_NOT_ZERO)
433 .addImm(0); // Flags
434 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000435 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000436 .addOperand(MI->getOperand(0))
437 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438 break;
439 }
440
441 case AMDGPU::BRANCH_COND_i32: {
442 MachineInstr *NewMI =
443 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444 AMDGPU::PREDICATE_BIT)
445 .addOperand(MI->getOperand(1))
446 .addImm(OPCODE_IS_NOT_ZERO_INT)
447 .addImm(0); // Flags
448 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000449 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000450 .addOperand(MI->getOperand(0))
451 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452 break;
453 }
454
Tom Stellard75aadc22012-12-11 21:25:42 +0000455 case AMDGPU::EG_ExportSwz:
456 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000457 // Instruction is left unmodified if its not the last one of its type
458 bool isLastInstructionOfItsType = true;
459 unsigned InstExportType = MI->getOperand(1).getImm();
460 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
461 EndBlock = BB->end(); NextExportInst != EndBlock;
462 NextExportInst = llvm::next(NextExportInst)) {
463 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466 .getImm();
467 if (CurrentInstExportType == InstExportType) {
468 isLastInstructionOfItsType = false;
469 break;
470 }
471 }
472 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000473 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000474 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000475 return BB;
476 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478 .addOperand(MI->getOperand(0))
479 .addOperand(MI->getOperand(1))
480 .addOperand(MI->getOperand(2))
481 .addOperand(MI->getOperand(3))
482 .addOperand(MI->getOperand(4))
483 .addOperand(MI->getOperand(5))
484 .addOperand(MI->getOperand(6))
485 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000486 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000487 break;
488 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000489 case AMDGPU::RETURN: {
490 // RETURN instructions must have the live-out registers as implicit uses,
491 // otherwise they appear dead.
492 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
493 MachineInstrBuilder MIB(*MF, MI);
494 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496 return BB;
497 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000498 }
499
500 MI->eraseFromParent();
501 return BB;
502}
503
504//===----------------------------------------------------------------------===//
505// Custom DAG Lowering Operations
506//===----------------------------------------------------------------------===//
507
Tom Stellard75aadc22012-12-11 21:25:42 +0000508SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000509 MachineFunction &MF = DAG.getMachineFunction();
510 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 switch (Op.getOpcode()) {
512 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000513 case ISD::FCOS:
514 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000515 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000516 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000517 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000518 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000519 case ISD::INTRINSIC_VOID: {
520 SDValue Chain = Op.getOperand(0);
521 unsigned IntrinsicID =
522 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523 switch (IntrinsicID) {
524 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000525 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000527 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000528 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000529 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000530 case AMDGPUIntrinsic::R600_store_swizzle: {
531 const SDValue Args[8] = {
532 Chain,
533 Op.getOperand(2), // Export Value
534 Op.getOperand(3), // ArrayBase
535 Op.getOperand(4), // Type
536 DAG.getConstant(0, MVT::i32), // SWZ_X
537 DAG.getConstant(1, MVT::i32), // SWZ_Y
538 DAG.getConstant(2, MVT::i32), // SWZ_Z
539 DAG.getConstant(3, MVT::i32) // SWZ_W
540 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000541 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000542 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000544
Tom Stellard75aadc22012-12-11 21:25:42 +0000545 // default for switch(IntrinsicID)
546 default: break;
547 }
548 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549 break;
550 }
551 case ISD::INTRINSIC_WO_CHAIN: {
552 unsigned IntrinsicID =
553 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000555 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 switch(IntrinsicID) {
557 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneaee3a102013-11-12 16:26:47 +0000558 case AMDGPUIntrinsic::R600_load_input: {
559 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
561 MachineFunction &MF = DAG.getMachineFunction();
562 MachineRegisterInfo &MRI = MF.getRegInfo();
563 MRI.addLiveIn(Reg);
564 return DAG.getCopyFromReg(DAG.getEntryNode(),
565 SDLoc(DAG.getEntryNode()), Reg, VT);
566 }
567
568 case AMDGPUIntrinsic::R600_interp_input: {
569 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571 MachineSDNode *interp;
572 if (ijb < 0) {
573 const MachineFunction &MF = DAG.getMachineFunction();
574 const R600InstrInfo *TII =
575 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578 return DAG.getTargetExtractSubreg(
579 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580 DL, MVT::f32, SDValue(interp, 0));
581 }
582 MachineFunction &MF = DAG.getMachineFunction();
583 MachineRegisterInfo &MRI = MF.getRegInfo();
584 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586 MRI.addLiveIn(RegisterI);
587 MRI.addLiveIn(RegisterJ);
588 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592
593 if (slot % 4 < 2)
594 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596 RegisterJNode, RegisterINode);
597 else
598 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600 RegisterJNode, RegisterINode);
601 return SDValue(interp, slot % 2);
602 }
Vincent Lejeunef143af32013-11-11 22:10:24 +0000603 case AMDGPUIntrinsic::R600_interp_xy:
604 case AMDGPUIntrinsic::R600_interp_zw: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000606 MachineSDNode *interp;
Vincent Lejeunef143af32013-11-11 22:10:24 +0000607 SDValue RegisterINode = Op.getOperand(2);
608 SDValue RegisterJNode = Op.getOperand(3);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000609
Vincent Lejeunef143af32013-11-11 22:10:24 +0000610 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
Tom Stellard41afe6a2013-02-05 17:09:14 +0000611 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000612 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000613 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000614 else
615 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
Vincent Lejeunef143af32013-11-11 22:10:24 +0000616 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000617 RegisterJNode, RegisterINode);
Vincent Lejeunef143af32013-11-11 22:10:24 +0000618 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619 SDValue(interp, 0), SDValue(interp, 1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000620 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000621 case AMDGPUIntrinsic::R600_tex:
622 case AMDGPUIntrinsic::R600_texc:
623 case AMDGPUIntrinsic::R600_txl:
624 case AMDGPUIntrinsic::R600_txlc:
625 case AMDGPUIntrinsic::R600_txb:
626 case AMDGPUIntrinsic::R600_txbc:
627 case AMDGPUIntrinsic::R600_txf:
628 case AMDGPUIntrinsic::R600_txq:
629 case AMDGPUIntrinsic::R600_ddx:
Vincent Lejeune6df39432013-10-02 16:00:33 +0000630 case AMDGPUIntrinsic::R600_ddy:
631 case AMDGPUIntrinsic::R600_ldptr: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000632 unsigned TextureOp;
633 switch (IntrinsicID) {
634 case AMDGPUIntrinsic::R600_tex:
635 TextureOp = 0;
636 break;
637 case AMDGPUIntrinsic::R600_texc:
638 TextureOp = 1;
639 break;
640 case AMDGPUIntrinsic::R600_txl:
641 TextureOp = 2;
642 break;
643 case AMDGPUIntrinsic::R600_txlc:
644 TextureOp = 3;
645 break;
646 case AMDGPUIntrinsic::R600_txb:
647 TextureOp = 4;
648 break;
649 case AMDGPUIntrinsic::R600_txbc:
650 TextureOp = 5;
651 break;
652 case AMDGPUIntrinsic::R600_txf:
653 TextureOp = 6;
654 break;
655 case AMDGPUIntrinsic::R600_txq:
656 TextureOp = 7;
657 break;
658 case AMDGPUIntrinsic::R600_ddx:
659 TextureOp = 8;
660 break;
661 case AMDGPUIntrinsic::R600_ddy:
662 TextureOp = 9;
663 break;
Vincent Lejeune6df39432013-10-02 16:00:33 +0000664 case AMDGPUIntrinsic::R600_ldptr:
665 TextureOp = 10;
666 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000667 default:
668 llvm_unreachable("Unknow Texture Operation");
669 }
670
671 SDValue TexArgs[19] = {
672 DAG.getConstant(TextureOp, MVT::i32),
673 Op.getOperand(1),
674 DAG.getConstant(0, MVT::i32),
675 DAG.getConstant(1, MVT::i32),
676 DAG.getConstant(2, MVT::i32),
677 DAG.getConstant(3, MVT::i32),
678 Op.getOperand(2),
679 Op.getOperand(3),
680 Op.getOperand(4),
681 DAG.getConstant(0, MVT::i32),
682 DAG.getConstant(1, MVT::i32),
683 DAG.getConstant(2, MVT::i32),
684 DAG.getConstant(3, MVT::i32),
685 Op.getOperand(5),
686 Op.getOperand(6),
687 Op.getOperand(7),
688 Op.getOperand(8),
689 Op.getOperand(9),
690 Op.getOperand(10)
691 };
692 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000694 case AMDGPUIntrinsic::AMDGPU_dp4: {
695 SDValue Args[8] = {
696 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
697 DAG.getConstant(0, MVT::i32)),
698 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
699 DAG.getConstant(0, MVT::i32)),
700 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
701 DAG.getConstant(1, MVT::i32)),
702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
703 DAG.getConstant(1, MVT::i32)),
704 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
705 DAG.getConstant(2, MVT::i32)),
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
707 DAG.getConstant(2, MVT::i32)),
708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
709 DAG.getConstant(3, MVT::i32)),
710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
711 DAG.getConstant(3, MVT::i32))
712 };
713 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000715
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000716 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000717 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000720 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000721 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000722 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000723 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000724 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000725 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000726 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000727 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000728 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000729 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000730 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000731 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000732 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000733 return LowerImplicitParameter(DAG, VT, DL, 8);
734
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000738 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000739 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000744 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000745 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000750 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000751 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752 AMDGPU::T0_Z, VT);
753 }
754 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755 break;
756 }
757 } // end switch(Op.getOpcode())
758 return SDValue();
759}
760
761void R600TargetLowering::ReplaceNodeResults(SDNode *N,
762 SmallVectorImpl<SDValue> &Results,
763 SelectionDAG &DAG) const {
764 switch (N->getOpcode()) {
765 default: return;
766 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000767 return;
768 case ISD::LOAD: {
769 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
770 Results.push_back(SDValue(Node, 0));
771 Results.push_back(SDValue(Node, 1));
772 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
773 // function
774 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
775 return;
776 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000777 case ISD::STORE:
778 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
779 Results.push_back(SDValue(Node, 0));
780 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 }
782}
783
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000784SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
785 // On hw >= R700, COS/SIN input must be between -1. and 1.
786 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
787 EVT VT = Op.getValueType();
788 SDValue Arg = Op.getOperand(0);
789 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
790 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
791 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
792 DAG.getConstantFP(0.15915494309, MVT::f32)),
793 DAG.getConstantFP(0.5, MVT::f32)));
794 unsigned TrigNode;
795 switch (Op.getOpcode()) {
796 case ISD::FCOS:
797 TrigNode = AMDGPUISD::COS_HW;
798 break;
799 case ISD::FSIN:
800 TrigNode = AMDGPUISD::SIN_HW;
801 break;
802 default:
803 llvm_unreachable("Wrong trig opcode");
804 }
805 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
806 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
807 DAG.getConstantFP(-0.5, MVT::f32)));
808 if (Gen >= AMDGPUSubtarget::R700)
809 return TrigVal;
810 // On R600 hw, COS/SIN input must be between -Pi and Pi.
811 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
812 DAG.getConstantFP(3.14159265359, MVT::f32));
813}
814
Tom Stellard75aadc22012-12-11 21:25:42 +0000815SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
816 return DAG.getNode(
817 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000818 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000819 MVT::i1,
820 Op, DAG.getConstantFP(0.0f, MVT::f32),
821 DAG.getCondCode(ISD::SETNE)
822 );
823}
824
Tom Stellard75aadc22012-12-11 21:25:42 +0000825SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000826 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000827 unsigned DwordOffset) const {
828 unsigned ByteOffset = DwordOffset * 4;
829 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000830 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000831
832 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
833 assert(isInt<16>(ByteOffset));
834
835 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
836 DAG.getConstant(ByteOffset, MVT::i32), // PTR
837 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
838 false, false, false, 0);
839}
840
Tom Stellard75aadc22012-12-11 21:25:42 +0000841bool R600TargetLowering::isZero(SDValue Op) const {
842 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
843 return Cst->isNullValue();
844 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
845 return CstFP->isZero();
846 } else {
847 return false;
848 }
849}
850
851SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000852 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000853 EVT VT = Op.getValueType();
854
855 SDValue LHS = Op.getOperand(0);
856 SDValue RHS = Op.getOperand(1);
857 SDValue True = Op.getOperand(2);
858 SDValue False = Op.getOperand(3);
859 SDValue CC = Op.getOperand(4);
860 SDValue Temp;
861
862 // LHS and RHS are guaranteed to be the same value type
863 EVT CompareVT = LHS.getValueType();
864
865 // Check if we can lower this to a native operation.
866
Tom Stellard2add82d2013-03-08 15:37:09 +0000867 // Try to lower to a SET* instruction:
868 //
869 // SET* can match the following patterns:
870 //
Tom Stellardcd428182013-09-28 02:50:38 +0000871 // select_cc f32, f32, -1, 0, cc_supported
872 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
873 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000874 //
875
876 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000877 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
878 ISD::CondCode InverseCC =
879 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000880 if (isHWTrueValue(False) && isHWFalseValue(True)) {
881 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
882 std::swap(False, True);
883 CC = DAG.getCondCode(InverseCC);
884 } else {
885 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
886 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
887 std::swap(False, True);
888 std::swap(LHS, RHS);
889 CC = DAG.getCondCode(SwapInvCC);
890 }
891 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000892 }
893
894 if (isHWTrueValue(True) && isHWFalseValue(False) &&
895 (CompareVT == VT || VT == MVT::i32)) {
896 // This can be matched by a SET* instruction.
897 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
898 }
899
Tom Stellard75aadc22012-12-11 21:25:42 +0000900 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000901 //
902 // CND* can match the following patterns:
903 //
Tom Stellardcd428182013-09-28 02:50:38 +0000904 // select_cc f32, 0.0, f32, f32, cc_supported
905 // select_cc f32, 0.0, i32, i32, cc_supported
906 // select_cc i32, 0, f32, f32, cc_supported
907 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000908 //
Tom Stellardcd428182013-09-28 02:50:38 +0000909
910 // Try to move the zero value to the RHS
911 if (isZero(LHS)) {
912 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
913 // Try swapping the operands
914 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
915 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
916 std::swap(LHS, RHS);
917 CC = DAG.getCondCode(CCSwapped);
918 } else {
919 // Try inverting the conditon and then swapping the operands
920 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
921 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
922 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
923 std::swap(True, False);
924 std::swap(LHS, RHS);
925 CC = DAG.getCondCode(CCSwapped);
926 }
927 }
928 }
929 if (isZero(RHS)) {
930 SDValue Cond = LHS;
931 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000932 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
933 if (CompareVT != VT) {
934 // Bitcast True / False to the correct types. This will end up being
935 // a nop, but it allows us to define only a single pattern in the
936 // .TD files for each CND* instruction rather than having to have
937 // one pattern for integer True/False and one for fp True/False
938 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
939 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
940 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000941
942 switch (CCOpcode) {
943 case ISD::SETONE:
944 case ISD::SETUNE:
945 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000946 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
947 Temp = True;
948 True = False;
949 False = Temp;
950 break;
951 default:
952 break;
953 }
954 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
955 Cond, Zero,
956 True, False,
957 DAG.getCondCode(CCOpcode));
958 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
959 }
960
Tom Stellard75aadc22012-12-11 21:25:42 +0000961
962 // Possible Min/Max pattern
963 SDValue MinMax = LowerMinMax(Op, DAG);
964 if (MinMax.getNode()) {
965 return MinMax;
966 }
967
968 // If we make it this for it means we have no native instructions to handle
969 // this SELECT_CC, so we must lower it.
970 SDValue HWTrue, HWFalse;
971
972 if (CompareVT == MVT::f32) {
973 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
974 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
975 } else if (CompareVT == MVT::i32) {
976 HWTrue = DAG.getConstant(-1, CompareVT);
977 HWFalse = DAG.getConstant(0, CompareVT);
978 }
979 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000980 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +0000981 }
982
983 // Lower this unsupported SELECT_CC into a combination of two supported
984 // SELECT_CC operations.
985 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
986
987 return DAG.getNode(ISD::SELECT_CC, DL, VT,
988 Cond, HWFalse,
989 True, False,
990 DAG.getCondCode(ISD::SETNE));
991}
992
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000993/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
994/// convert these pointers to a register index. Each register holds
995/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
996/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
997/// for indirect addressing.
998SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
999 unsigned StackWidth,
1000 SelectionDAG &DAG) const {
1001 unsigned SRLPad;
1002 switch(StackWidth) {
1003 case 1:
1004 SRLPad = 2;
1005 break;
1006 case 2:
1007 SRLPad = 3;
1008 break;
1009 case 4:
1010 SRLPad = 4;
1011 break;
1012 default: llvm_unreachable("Invalid stack width");
1013 }
1014
Andrew Trickef9de2a2013-05-25 02:42:55 +00001015 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001016 DAG.getConstant(SRLPad, MVT::i32));
1017}
1018
1019void R600TargetLowering::getStackAddress(unsigned StackWidth,
1020 unsigned ElemIdx,
1021 unsigned &Channel,
1022 unsigned &PtrIncr) const {
1023 switch (StackWidth) {
1024 default:
1025 case 1:
1026 Channel = 0;
1027 if (ElemIdx > 0) {
1028 PtrIncr = 1;
1029 } else {
1030 PtrIncr = 0;
1031 }
1032 break;
1033 case 2:
1034 Channel = ElemIdx % 2;
1035 if (ElemIdx == 2) {
1036 PtrIncr = 1;
1037 } else {
1038 PtrIncr = 0;
1039 }
1040 break;
1041 case 4:
1042 Channel = ElemIdx;
1043 PtrIncr = 0;
1044 break;
1045 }
1046}
1047
Tom Stellard75aadc22012-12-11 21:25:42 +00001048SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001049 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001050 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1051 SDValue Chain = Op.getOperand(0);
1052 SDValue Value = Op.getOperand(1);
1053 SDValue Ptr = Op.getOperand(2);
1054
Tom Stellard2ffc3302013-08-26 15:05:44 +00001055 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001056 if (Result.getNode()) {
1057 return Result;
1058 }
1059
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001060 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1061 if (StoreNode->isTruncatingStore()) {
1062 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001063 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001064 EVT MemVT = StoreNode->getMemoryVT();
1065 SDValue MaskConstant;
1066 if (MemVT == MVT::i8) {
1067 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1068 } else {
1069 assert(MemVT == MVT::i16);
1070 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1071 }
1072 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1073 DAG.getConstant(2, MVT::i32));
1074 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1075 DAG.getConstant(0x00000003, VT));
1076 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1077 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1078 DAG.getConstant(3, VT));
1079 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1080 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1081 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1082 // vector instead.
1083 SDValue Src[4] = {
1084 ShiftedValue,
1085 DAG.getConstant(0, MVT::i32),
1086 DAG.getConstant(0, MVT::i32),
1087 Mask
1088 };
1089 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1090 SDValue Args[3] = { Chain, Input, DWordAddr };
1091 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1092 Op->getVTList(), Args, 3, MemVT,
1093 StoreNode->getMemOperand());
1094 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1095 Value.getValueType().bitsGE(MVT::i32)) {
1096 // Convert pointer from byte address to dword address.
1097 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1098 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1099 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001100
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001101 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001102 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001103 } else {
1104 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1105 }
1106 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001107 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001108 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001109
1110 EVT ValueVT = Value.getValueType();
1111
1112 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1113 return SDValue();
1114 }
1115
1116 // Lowering for indirect addressing
1117
1118 const MachineFunction &MF = DAG.getMachineFunction();
1119 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1120 getTargetMachine().getFrameLowering());
1121 unsigned StackWidth = TFL->getStackWidth(MF);
1122
1123 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1124
1125 if (ValueVT.isVector()) {
1126 unsigned NumElemVT = ValueVT.getVectorNumElements();
1127 EVT ElemVT = ValueVT.getVectorElementType();
1128 SDValue Stores[4];
1129
1130 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1131 "vector width in load");
1132
1133 for (unsigned i = 0; i < NumElemVT; ++i) {
1134 unsigned Channel, PtrIncr;
1135 getStackAddress(StackWidth, i, Channel, PtrIncr);
1136 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1137 DAG.getConstant(PtrIncr, MVT::i32));
1138 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1139 Value, DAG.getConstant(i, MVT::i32));
1140
1141 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1142 Chain, Elem, Ptr,
1143 DAG.getTargetConstant(Channel, MVT::i32));
1144 }
1145 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1146 } else {
1147 if (ValueVT == MVT::i8) {
1148 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1149 }
1150 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001151 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001152 }
1153
1154 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001155}
1156
Tom Stellard365366f2013-01-23 02:09:06 +00001157// return (512 + (kc_bank << 12)
1158static int
1159ConstantAddressBlock(unsigned AddressSpace) {
1160 switch (AddressSpace) {
1161 case AMDGPUAS::CONSTANT_BUFFER_0:
1162 return 512;
1163 case AMDGPUAS::CONSTANT_BUFFER_1:
1164 return 512 + 4096;
1165 case AMDGPUAS::CONSTANT_BUFFER_2:
1166 return 512 + 4096 * 2;
1167 case AMDGPUAS::CONSTANT_BUFFER_3:
1168 return 512 + 4096 * 3;
1169 case AMDGPUAS::CONSTANT_BUFFER_4:
1170 return 512 + 4096 * 4;
1171 case AMDGPUAS::CONSTANT_BUFFER_5:
1172 return 512 + 4096 * 5;
1173 case AMDGPUAS::CONSTANT_BUFFER_6:
1174 return 512 + 4096 * 6;
1175 case AMDGPUAS::CONSTANT_BUFFER_7:
1176 return 512 + 4096 * 7;
1177 case AMDGPUAS::CONSTANT_BUFFER_8:
1178 return 512 + 4096 * 8;
1179 case AMDGPUAS::CONSTANT_BUFFER_9:
1180 return 512 + 4096 * 9;
1181 case AMDGPUAS::CONSTANT_BUFFER_10:
1182 return 512 + 4096 * 10;
1183 case AMDGPUAS::CONSTANT_BUFFER_11:
1184 return 512 + 4096 * 11;
1185 case AMDGPUAS::CONSTANT_BUFFER_12:
1186 return 512 + 4096 * 12;
1187 case AMDGPUAS::CONSTANT_BUFFER_13:
1188 return 512 + 4096 * 13;
1189 case AMDGPUAS::CONSTANT_BUFFER_14:
1190 return 512 + 4096 * 14;
1191 case AMDGPUAS::CONSTANT_BUFFER_15:
1192 return 512 + 4096 * 15;
1193 default:
1194 return -1;
1195 }
1196}
1197
1198SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1199{
1200 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001201 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001202 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1203 SDValue Chain = Op.getOperand(0);
1204 SDValue Ptr = Op.getOperand(1);
1205 SDValue LoweredLoad;
1206
Tom Stellard35bb18c2013-08-26 15:06:04 +00001207 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1208 SDValue MergedValues[2] = {
1209 SplitVectorLoad(Op, DAG),
1210 Chain
1211 };
1212 return DAG.getMergeValues(MergedValues, 2, DL);
1213 }
1214
Tom Stellard365366f2013-01-23 02:09:06 +00001215 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001216 if (ConstantBlock > -1 &&
1217 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1218 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001219 SDValue Result;
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001220 if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1221 isa<Constant>(LoadNode->getSrcValue()) ||
1222 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001223 SDValue Slots[4];
1224 for (unsigned i = 0; i < 4; i++) {
1225 // We want Const position encoded with the following formula :
1226 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1227 // const_index is Ptr computed by llvm using an alignment of 16.
1228 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1229 // then div by 4 at the ISel step
1230 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1231 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1232 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1233 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001234 EVT NewVT = MVT::v4i32;
1235 unsigned NumElements = 4;
1236 if (VT.isVector()) {
1237 NewVT = VT;
1238 NumElements = VT.getVectorNumElements();
1239 }
1240 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001241 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001242 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001243 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001244 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001245 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001246 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001247 );
1248 }
1249
1250 if (!VT.isVector()) {
1251 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1252 DAG.getConstant(0, MVT::i32));
1253 }
1254
1255 SDValue MergedValues[2] = {
1256 Result,
1257 Chain
1258 };
1259 return DAG.getMergeValues(MergedValues, 2, DL);
1260 }
1261
Matt Arsenault909d0c02013-10-30 23:43:29 +00001262 // For most operations returning SDValue() will result in the node being
1263 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1264 // need to manually expand loads that may be legal in some address spaces and
1265 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1266 // compute shaders, since the data is sign extended when it is uploaded to the
1267 // buffer. However SEXT loads from other address spaces are not supported, so
1268 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001269 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1270 EVT MemVT = LoadNode->getMemoryVT();
1271 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1272 SDValue ShiftAmount =
1273 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1274 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1275 LoadNode->getPointerInfo(), MemVT,
1276 LoadNode->isVolatile(),
1277 LoadNode->isNonTemporal(),
1278 LoadNode->getAlignment());
1279 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1280 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1281
1282 SDValue MergedValues[2] = { Sra, Chain };
1283 return DAG.getMergeValues(MergedValues, 2, DL);
1284 }
1285
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001286 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1287 return SDValue();
1288 }
1289
1290 // Lowering for indirect addressing
1291 const MachineFunction &MF = DAG.getMachineFunction();
1292 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1293 getTargetMachine().getFrameLowering());
1294 unsigned StackWidth = TFL->getStackWidth(MF);
1295
1296 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1297
1298 if (VT.isVector()) {
1299 unsigned NumElemVT = VT.getVectorNumElements();
1300 EVT ElemVT = VT.getVectorElementType();
1301 SDValue Loads[4];
1302
1303 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1304 "vector width in load");
1305
1306 for (unsigned i = 0; i < NumElemVT; ++i) {
1307 unsigned Channel, PtrIncr;
1308 getStackAddress(StackWidth, i, Channel, PtrIncr);
1309 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1310 DAG.getConstant(PtrIncr, MVT::i32));
1311 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1312 Chain, Ptr,
1313 DAG.getTargetConstant(Channel, MVT::i32),
1314 Op.getOperand(2));
1315 }
1316 for (unsigned i = NumElemVT; i < 4; ++i) {
1317 Loads[i] = DAG.getUNDEF(ElemVT);
1318 }
1319 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1320 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1321 } else {
1322 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1323 Chain, Ptr,
1324 DAG.getTargetConstant(0, MVT::i32), // Channel
1325 Op.getOperand(2));
1326 }
1327
1328 SDValue Ops[2];
1329 Ops[0] = LoweredLoad;
1330 Ops[1] = Chain;
1331
1332 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001333}
Tom Stellard75aadc22012-12-11 21:25:42 +00001334
Tom Stellard75aadc22012-12-11 21:25:42 +00001335/// XXX Only kernel functions are supported, so we can assume for now that
1336/// every function is a kernel function, but in the future we should use
1337/// separate calling conventions for kernel and non-kernel functions.
1338SDValue R600TargetLowering::LowerFormalArguments(
1339 SDValue Chain,
1340 CallingConv::ID CallConv,
1341 bool isVarArg,
1342 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001343 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001344 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001345 SmallVector<CCValAssign, 16> ArgLocs;
1346 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1347 getTargetMachine(), ArgLocs, *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001348 MachineFunction &MF = DAG.getMachineFunction();
1349 unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
Tom Stellardacfeebf2013-07-23 01:48:05 +00001350
Tom Stellardaf775432013-10-23 00:44:32 +00001351 SmallVector<ISD::InputArg, 8> LocalIns;
1352
1353 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
1354 LocalIns);
1355
1356 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001357
Tom Stellard1e803092013-07-23 01:48:18 +00001358 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001359 CCValAssign &VA = ArgLocs[i];
Tom Stellardaf775432013-10-23 00:44:32 +00001360 EVT VT = Ins[i].VT;
1361 EVT MemVT = LocalIns[i].VT;
Tom Stellard78e01292013-07-23 01:47:58 +00001362
Vincent Lejeunef143af32013-11-11 22:10:24 +00001363 if (ShaderType != ShaderType::COMPUTE) {
1364 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1365 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1366 InVals.push_back(Register);
1367 continue;
1368 }
1369
Tom Stellard75aadc22012-12-11 21:25:42 +00001370 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001371 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001372
1373 // The first 36 bytes of the input buffer contains information about
1374 // thread group and global sizes.
Tom Stellardaf775432013-10-23 00:44:32 +00001375 SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1376 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1377 MachinePointerInfo(UndefValue::get(PtrTy)),
1378 MemVT, false, false, 4);
1379 // 4 is the prefered alignment for
1380 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001381 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001382 }
1383 return Chain;
1384}
1385
Matt Arsenault758659232013-05-18 00:21:46 +00001386EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001387 if (!VT.isVector()) return MVT::i32;
1388 return VT.changeVectorElementTypeToInteger();
1389}
1390
Benjamin Kramer193960c2013-06-11 13:32:25 +00001391static SDValue
1392CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1393 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001394 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1395 assert(RemapSwizzle.empty());
1396 SDValue NewBldVec[4] = {
1397 VectorEntry.getOperand(0),
1398 VectorEntry.getOperand(1),
1399 VectorEntry.getOperand(2),
1400 VectorEntry.getOperand(3)
1401 };
1402
1403 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001404 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1405 // We mask write here to teach later passes that the ith element of this
1406 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1407 // break false dependencies and additionnaly make assembly easier to read.
1408 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001409 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1410 if (C->isZero()) {
1411 RemapSwizzle[i] = 4; // SEL_0
1412 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1413 } else if (C->isExactlyValue(1.0)) {
1414 RemapSwizzle[i] = 5; // SEL_1
1415 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1416 }
1417 }
1418
1419 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1420 continue;
1421 for (unsigned j = 0; j < i; j++) {
1422 if (NewBldVec[i] == NewBldVec[j]) {
1423 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1424 RemapSwizzle[i] = j;
1425 break;
1426 }
1427 }
1428 }
1429
1430 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1431 VectorEntry.getValueType(), NewBldVec, 4);
1432}
1433
Benjamin Kramer193960c2013-06-11 13:32:25 +00001434static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1435 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001436 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1437 assert(RemapSwizzle.empty());
1438 SDValue NewBldVec[4] = {
1439 VectorEntry.getOperand(0),
1440 VectorEntry.getOperand(1),
1441 VectorEntry.getOperand(2),
1442 VectorEntry.getOperand(3)
1443 };
1444 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001445 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001446 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001447 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1448 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1449 ->getZExtValue();
1450 if (i == Idx)
1451 isUnmovable[Idx] = true;
1452 }
1453 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001454
1455 for (unsigned i = 0; i < 4; i++) {
1456 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1457 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1458 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001459 if (isUnmovable[Idx])
1460 continue;
1461 // Swap i and Idx
1462 std::swap(NewBldVec[Idx], NewBldVec[i]);
1463 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1464 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001465 }
1466 }
1467
1468 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1469 VectorEntry.getValueType(), NewBldVec, 4);
1470}
1471
1472
1473SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1474SDValue Swz[4], SelectionDAG &DAG) const {
1475 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1476 // Old -> New swizzle values
1477 DenseMap<unsigned, unsigned> SwizzleRemap;
1478
1479 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1480 for (unsigned i = 0; i < 4; i++) {
1481 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1482 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1483 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1484 }
1485
1486 SwizzleRemap.clear();
1487 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1488 for (unsigned i = 0; i < 4; i++) {
1489 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1490 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1491 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1492 }
1493
1494 return BuildVector;
1495}
1496
1497
Tom Stellard75aadc22012-12-11 21:25:42 +00001498//===----------------------------------------------------------------------===//
1499// Custom DAG Optimizations
1500//===----------------------------------------------------------------------===//
1501
1502SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1503 DAGCombinerInfo &DCI) const {
1504 SelectionDAG &DAG = DCI.DAG;
1505
1506 switch (N->getOpcode()) {
1507 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1508 case ISD::FP_ROUND: {
1509 SDValue Arg = N->getOperand(0);
1510 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001511 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001512 Arg.getOperand(0));
1513 }
1514 break;
1515 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001516
1517 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1518 // (i32 select_cc f32, f32, -1, 0 cc)
1519 //
1520 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1521 // this to one of the SET*_DX10 instructions.
1522 case ISD::FP_TO_SINT: {
1523 SDValue FNeg = N->getOperand(0);
1524 if (FNeg.getOpcode() != ISD::FNEG) {
1525 return SDValue();
1526 }
1527 SDValue SelectCC = FNeg.getOperand(0);
1528 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1529 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1530 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1531 !isHWTrueValue(SelectCC.getOperand(2)) ||
1532 !isHWFalseValue(SelectCC.getOperand(3))) {
1533 return SDValue();
1534 }
1535
Andrew Trickef9de2a2013-05-25 02:42:55 +00001536 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001537 SelectCC.getOperand(0), // LHS
1538 SelectCC.getOperand(1), // RHS
1539 DAG.getConstant(-1, MVT::i32), // True
1540 DAG.getConstant(0, MVT::i32), // Flase
1541 SelectCC.getOperand(4)); // CC
1542
1543 break;
1544 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001545
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001546 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1547 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001548 case ISD::INSERT_VECTOR_ELT: {
1549 SDValue InVec = N->getOperand(0);
1550 SDValue InVal = N->getOperand(1);
1551 SDValue EltNo = N->getOperand(2);
1552 SDLoc dl(N);
1553
1554 // If the inserted element is an UNDEF, just use the input vector.
1555 if (InVal.getOpcode() == ISD::UNDEF)
1556 return InVec;
1557
1558 EVT VT = InVec.getValueType();
1559
1560 // If we can't generate a legal BUILD_VECTOR, exit
1561 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1562 return SDValue();
1563
1564 // Check that we know which element is being inserted
1565 if (!isa<ConstantSDNode>(EltNo))
1566 return SDValue();
1567 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1568
1569 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1570 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1571 // vector elements.
1572 SmallVector<SDValue, 8> Ops;
1573 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1574 Ops.append(InVec.getNode()->op_begin(),
1575 InVec.getNode()->op_end());
1576 } else if (InVec.getOpcode() == ISD::UNDEF) {
1577 unsigned NElts = VT.getVectorNumElements();
1578 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1579 } else {
1580 return SDValue();
1581 }
1582
1583 // Insert the element
1584 if (Elt < Ops.size()) {
1585 // All the operands of BUILD_VECTOR must have the same type;
1586 // we enforce that here.
1587 EVT OpVT = Ops[0].getValueType();
1588 if (InVal.getValueType() != OpVT)
1589 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1590 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1591 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1592 Ops[Elt] = InVal;
1593 }
1594
1595 // Return the new vector
1596 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1597 VT, &Ops[0], Ops.size());
1598 }
1599
Tom Stellard365366f2013-01-23 02:09:06 +00001600 // Extract_vec (Build_vector) generated by custom lowering
1601 // also needs to be customly combined
1602 case ISD::EXTRACT_VECTOR_ELT: {
1603 SDValue Arg = N->getOperand(0);
1604 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1605 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1606 unsigned Element = Const->getZExtValue();
1607 return Arg->getOperand(Element);
1608 }
1609 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001610 if (Arg.getOpcode() == ISD::BITCAST &&
1611 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1612 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1613 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001614 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001615 Arg->getOperand(0).getOperand(Element));
1616 }
1617 }
Tom Stellard365366f2013-01-23 02:09:06 +00001618 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001619
1620 case ISD::SELECT_CC: {
1621 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1622 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001623 //
1624 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1625 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001626 SDValue LHS = N->getOperand(0);
1627 if (LHS.getOpcode() != ISD::SELECT_CC) {
1628 return SDValue();
1629 }
1630
1631 SDValue RHS = N->getOperand(1);
1632 SDValue True = N->getOperand(2);
1633 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001634 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001635
1636 if (LHS.getOperand(2).getNode() != True.getNode() ||
1637 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001638 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001639 return SDValue();
1640 }
1641
Tom Stellard5e524892013-03-08 15:37:11 +00001642 switch (NCC) {
1643 default: return SDValue();
1644 case ISD::SETNE: return LHS;
1645 case ISD::SETEQ: {
1646 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1647 LHSCC = ISD::getSetCCInverse(LHSCC,
1648 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001649 if (DCI.isBeforeLegalizeOps() ||
1650 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1651 return DAG.getSelectCC(SDLoc(N),
1652 LHS.getOperand(0),
1653 LHS.getOperand(1),
1654 LHS.getOperand(2),
1655 LHS.getOperand(3),
1656 LHSCC);
1657 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001658 }
Tom Stellard5e524892013-03-08 15:37:11 +00001659 }
Tom Stellardcd428182013-09-28 02:50:38 +00001660 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001661 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001662
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001663 case AMDGPUISD::EXPORT: {
1664 SDValue Arg = N->getOperand(1);
1665 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1666 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001667
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001668 SDValue NewArgs[8] = {
1669 N->getOperand(0), // Chain
1670 SDValue(),
1671 N->getOperand(2), // ArrayBase
1672 N->getOperand(3), // Type
1673 N->getOperand(4), // SWZ_X
1674 N->getOperand(5), // SWZ_Y
1675 N->getOperand(6), // SWZ_Z
1676 N->getOperand(7) // SWZ_W
1677 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001678 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001679 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001680 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001681 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001682 case AMDGPUISD::TEXTURE_FETCH: {
1683 SDValue Arg = N->getOperand(1);
1684 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1685 break;
1686
1687 SDValue NewArgs[19] = {
1688 N->getOperand(0),
1689 N->getOperand(1),
1690 N->getOperand(2),
1691 N->getOperand(3),
1692 N->getOperand(4),
1693 N->getOperand(5),
1694 N->getOperand(6),
1695 N->getOperand(7),
1696 N->getOperand(8),
1697 N->getOperand(9),
1698 N->getOperand(10),
1699 N->getOperand(11),
1700 N->getOperand(12),
1701 N->getOperand(13),
1702 N->getOperand(14),
1703 N->getOperand(15),
1704 N->getOperand(16),
1705 N->getOperand(17),
1706 N->getOperand(18),
1707 };
1708 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1709 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1710 NewArgs, 19);
1711 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001712 }
1713 return SDValue();
1714}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001715
1716static bool
1717FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001718 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001719 const R600InstrInfo *TII =
1720 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1721 if (!Src.isMachineOpcode())
1722 return false;
1723 switch (Src.getMachineOpcode()) {
1724 case AMDGPU::FNEG_R600:
1725 if (!Neg.getNode())
1726 return false;
1727 Src = Src.getOperand(0);
1728 Neg = DAG.getTargetConstant(1, MVT::i32);
1729 return true;
1730 case AMDGPU::FABS_R600:
1731 if (!Abs.getNode())
1732 return false;
1733 Src = Src.getOperand(0);
1734 Abs = DAG.getTargetConstant(1, MVT::i32);
1735 return true;
1736 case AMDGPU::CONST_COPY: {
1737 unsigned Opcode = ParentNode->getMachineOpcode();
1738 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1739
1740 if (!Sel.getNode())
1741 return false;
1742
1743 SDValue CstOffset = Src.getOperand(0);
1744 if (ParentNode->getValueType(0).isVector())
1745 return false;
1746
1747 // Gather constants values
1748 int SrcIndices[] = {
1749 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1750 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1751 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1752 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1753 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1754 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1755 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1756 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1757 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1758 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1759 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1760 };
1761 std::vector<unsigned> Consts;
1762 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1763 int OtherSrcIdx = SrcIndices[i];
1764 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1765 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1766 continue;
1767 if (HasDst) {
1768 OtherSrcIdx--;
1769 OtherSelIdx--;
1770 }
1771 if (RegisterSDNode *Reg =
1772 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1773 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1774 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1775 ParentNode->getOperand(OtherSelIdx));
1776 Consts.push_back(Cst->getZExtValue());
1777 }
1778 }
1779 }
1780
1781 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1782 Consts.push_back(Cst->getZExtValue());
1783 if (!TII->fitsConstReadLimitations(Consts)) {
1784 return false;
1785 }
1786
1787 Sel = CstOffset;
1788 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1789 return true;
1790 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001791 case AMDGPU::MOV_IMM_I32:
1792 case AMDGPU::MOV_IMM_F32: {
1793 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1794 uint64_t ImmValue = 0;
1795
1796
1797 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1798 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1799 float FloatValue = FPC->getValueAPF().convertToFloat();
1800 if (FloatValue == 0.0) {
1801 ImmReg = AMDGPU::ZERO;
1802 } else if (FloatValue == 0.5) {
1803 ImmReg = AMDGPU::HALF;
1804 } else if (FloatValue == 1.0) {
1805 ImmReg = AMDGPU::ONE;
1806 } else {
1807 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1808 }
1809 } else {
1810 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1811 uint64_t Value = C->getZExtValue();
1812 if (Value == 0) {
1813 ImmReg = AMDGPU::ZERO;
1814 } else if (Value == 1) {
1815 ImmReg = AMDGPU::ONE_INT;
1816 } else {
1817 ImmValue = Value;
1818 }
1819 }
1820
1821 // Check that we aren't already using an immediate.
1822 // XXX: It's possible for an instruction to have more than one
1823 // immediate operand, but this is not supported yet.
1824 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1825 if (!Imm.getNode())
1826 return false;
1827 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1828 assert(C);
1829 if (C->getZExtValue())
1830 return false;
1831 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1832 }
1833 Src = DAG.getRegister(ImmReg, MVT::i32);
1834 return true;
1835 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001836 default:
1837 return false;
1838 }
1839}
1840
1841
1842/// \brief Fold the instructions after selecting them
1843SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1844 SelectionDAG &DAG) const {
1845 const R600InstrInfo *TII =
1846 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1847 if (!Node->isMachineOpcode())
1848 return Node;
1849 unsigned Opcode = Node->getMachineOpcode();
1850 SDValue FakeOp;
1851
1852 std::vector<SDValue> Ops;
1853 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1854 I != E; ++I)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001855 Ops.push_back(*I);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001856
1857 if (Opcode == AMDGPU::DOT_4) {
1858 int OperandIdx[] = {
1859 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1860 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1861 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1862 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1863 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1864 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1865 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1866 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001867 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001868 int NegIdx[] = {
1869 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1870 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1871 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1872 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1873 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1874 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1875 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1876 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1877 };
1878 int AbsIdx[] = {
1879 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1886 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1887 };
1888 for (unsigned i = 0; i < 8; i++) {
1889 if (OperandIdx[i] < 0)
1890 return Node;
1891 SDValue &Src = Ops[OperandIdx[i] - 1];
1892 SDValue &Neg = Ops[NegIdx[i] - 1];
1893 SDValue &Abs = Ops[AbsIdx[i] - 1];
1894 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1895 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1896 if (HasDst)
1897 SelIdx--;
1898 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001899 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1900 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1901 }
1902 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1903 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1904 SDValue &Src = Ops[i];
1905 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001906 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1907 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001908 } else if (Opcode == AMDGPU::CLAMP_R600) {
1909 SDValue Src = Node->getOperand(0);
1910 if (!Src.isMachineOpcode() ||
1911 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1912 return Node;
1913 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1914 AMDGPU::OpName::clamp);
1915 if (ClampIdx < 0)
1916 return Node;
1917 std::vector<SDValue> Ops;
1918 unsigned NumOp = Src.getNumOperands();
1919 for(unsigned i = 0; i < NumOp; ++i)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00001920 Ops.push_back(Src.getOperand(i));
Vincent Lejeune0167a312013-09-12 23:45:00 +00001921 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1922 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1923 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001924 } else {
1925 if (!TII->hasInstrModifiers(Opcode))
1926 return Node;
1927 int OperandIdx[] = {
1928 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1929 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1930 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1931 };
1932 int NegIdx[] = {
1933 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1934 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1935 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1936 };
1937 int AbsIdx[] = {
1938 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1939 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1940 -1
1941 };
1942 for (unsigned i = 0; i < 3; i++) {
1943 if (OperandIdx[i] < 0)
1944 return Node;
1945 SDValue &Src = Ops[OperandIdx[i] - 1];
1946 SDValue &Neg = Ops[NegIdx[i] - 1];
1947 SDValue FakeAbs;
1948 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1949 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1950 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001951 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1952 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001953 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001954 ImmIdx--;
1955 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001956 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001957 SDValue &Imm = Ops[ImmIdx];
1958 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001959 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1960 }
1961 }
1962
1963 return Node;
1964}