blob: 5e9048a7019c446054ff86257c9b72738335e9c7 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellardcd428182013-09-28 02:50:38 +000041 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
42 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
44 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
45 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
47
48 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
49 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
50 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
51 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
52
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000053 setOperationAction(ISD::FCOS, MVT::f32, Custom);
54 setOperationAction(ISD::FSIN, MVT::f32, Custom);
55
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000057 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000058
Tom Stellard492ebea2013-03-08 15:37:07 +000059 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
60 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061
62 setOperationAction(ISD::FSUB, MVT::f32, Expand);
63
64 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
65 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
66 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000067
Tom Stellard75aadc22012-12-11 21:25:42 +000068 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
69 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
70
Tom Stellarde8f9f282013-03-08 15:37:05 +000071 setOperationAction(ISD::SETCC, MVT::i32, Expand);
72 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000073 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
74
Tom Stellard53f2f902013-09-05 18:38:03 +000075 setOperationAction(ISD::SELECT, MVT::i32, Expand);
76 setOperationAction(ISD::SELECT, MVT::f32, Expand);
77 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
78 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
79 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
80 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000081
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000082 // Legalize loads and stores to the private address space.
83 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000084 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000085 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000086 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
87 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
88 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
89 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000090 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000091 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000092 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000093 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000094 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
95 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000096
Tom Stellard365366f2013-01-23 02:09:06 +000097 setOperationAction(ISD::LOAD, MVT::i32, Custom);
98 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000099 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
100
Tom Stellard75aadc22012-12-11 21:25:42 +0000101 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000102 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000103 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000104 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000105 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106
Michel Danzer49812b52013-07-10 16:37:07 +0000107 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
108
Tom Stellardb852af52013-03-08 15:37:03 +0000109 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000110 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000111 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112}
113
114MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
115 MachineInstr * MI, MachineBasicBlock * BB) const {
116 MachineFunction * MF = BB->getParent();
117 MachineRegisterInfo &MRI = MF->getRegInfo();
118 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000119 const R600InstrInfo *TII =
120 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000121
122 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000123 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000124 if (TII->isLDSInstr(MI->getOpcode()) &&
125 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
126 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
127 assert(DstIdx != -1);
128 MachineInstrBuilder NewMI;
129 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
130 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
131 AMDGPU::OQAP);
132 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
133 MI->getOperand(0).getReg(),
134 AMDGPU::OQAP);
135 } else {
136 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
137 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
138 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000139 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
140 NewMI.addOperand(MI->getOperand(i));
141 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000142 } else {
143 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
144 }
145 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000146 case AMDGPU::CLAMP_R600: {
147 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
148 AMDGPU::MOV,
149 MI->getOperand(0).getReg(),
150 MI->getOperand(1).getReg());
151 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
152 break;
153 }
154
155 case AMDGPU::FABS_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
161 break;
162 }
163
164 case AMDGPU::FNEG_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
170 break;
171 }
172
Tom Stellard75aadc22012-12-11 21:25:42 +0000173 case AMDGPU::MASK_WRITE: {
174 unsigned maskedRegister = MI->getOperand(0).getReg();
175 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
176 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
177 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
178 break;
179 }
180
181 case AMDGPU::MOV_IMM_F32:
182 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
183 MI->getOperand(1).getFPImm()->getValueAPF()
184 .bitcastToAPInt().getZExtValue());
185 break;
186 case AMDGPU::MOV_IMM_I32:
187 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
188 MI->getOperand(1).getImm());
189 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000190 case AMDGPU::CONST_COPY: {
191 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
192 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000193 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000194 MI->getOperand(1).getImm());
195 break;
196 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000197
198 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000199 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000200 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
201 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
202
203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
204 .addOperand(MI->getOperand(0))
205 .addOperand(MI->getOperand(1))
206 .addImm(EOP); // Set End of program bit
207 break;
208 }
209
Tom Stellard75aadc22012-12-11 21:25:42 +0000210 case AMDGPU::TXD: {
211 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
212 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000213 MachineOperand &RID = MI->getOperand(4);
214 MachineOperand &SID = MI->getOperand(5);
215 unsigned TextureId = MI->getOperand(6).getImm();
216 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
217 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000218
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000219 switch (TextureId) {
220 case 5: // Rect
221 CTX = CTY = 0;
222 break;
223 case 6: // Shadow1D
224 SrcW = SrcZ;
225 break;
226 case 7: // Shadow2D
227 SrcW = SrcZ;
228 break;
229 case 8: // ShadowRect
230 CTX = CTY = 0;
231 SrcW = SrcZ;
232 break;
233 case 9: // 1DArray
234 SrcZ = SrcY;
235 CTZ = 0;
236 break;
237 case 10: // 2DArray
238 CTZ = 0;
239 break;
240 case 11: // Shadow1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 12: // Shadow2DArray
245 CTZ = 0;
246 break;
247 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000248 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
249 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000250 .addImm(SrcX)
251 .addImm(SrcY)
252 .addImm(SrcZ)
253 .addImm(SrcW)
254 .addImm(0)
255 .addImm(0)
256 .addImm(0)
257 .addImm(0)
258 .addImm(1)
259 .addImm(2)
260 .addImm(3)
261 .addOperand(RID)
262 .addOperand(SID)
263 .addImm(CTX)
264 .addImm(CTY)
265 .addImm(CTZ)
266 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000267 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
268 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000269 .addImm(SrcX)
270 .addImm(SrcY)
271 .addImm(SrcZ)
272 .addImm(SrcW)
273 .addImm(0)
274 .addImm(0)
275 .addImm(0)
276 .addImm(0)
277 .addImm(1)
278 .addImm(2)
279 .addImm(3)
280 .addOperand(RID)
281 .addOperand(SID)
282 .addImm(CTX)
283 .addImm(CTY)
284 .addImm(CTZ)
285 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000286 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
287 .addOperand(MI->getOperand(0))
288 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000289 .addImm(SrcX)
290 .addImm(SrcY)
291 .addImm(SrcZ)
292 .addImm(SrcW)
293 .addImm(0)
294 .addImm(0)
295 .addImm(0)
296 .addImm(0)
297 .addImm(1)
298 .addImm(2)
299 .addImm(3)
300 .addOperand(RID)
301 .addOperand(SID)
302 .addImm(CTX)
303 .addImm(CTY)
304 .addImm(CTZ)
305 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000306 .addReg(T0, RegState::Implicit)
307 .addReg(T1, RegState::Implicit);
308 break;
309 }
310
311 case AMDGPU::TXD_SHADOW: {
312 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
313 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000314 MachineOperand &RID = MI->getOperand(4);
315 MachineOperand &SID = MI->getOperand(5);
316 unsigned TextureId = MI->getOperand(6).getImm();
317 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
318 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
319
320 switch (TextureId) {
321 case 5: // Rect
322 CTX = CTY = 0;
323 break;
324 case 6: // Shadow1D
325 SrcW = SrcZ;
326 break;
327 case 7: // Shadow2D
328 SrcW = SrcZ;
329 break;
330 case 8: // ShadowRect
331 CTX = CTY = 0;
332 SrcW = SrcZ;
333 break;
334 case 9: // 1DArray
335 SrcZ = SrcY;
336 CTZ = 0;
337 break;
338 case 10: // 2DArray
339 CTZ = 0;
340 break;
341 case 11: // Shadow1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 12: // Shadow2DArray
346 CTZ = 0;
347 break;
348 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000349
350 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
351 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000352 .addImm(SrcX)
353 .addImm(SrcY)
354 .addImm(SrcZ)
355 .addImm(SrcW)
356 .addImm(0)
357 .addImm(0)
358 .addImm(0)
359 .addImm(0)
360 .addImm(1)
361 .addImm(2)
362 .addImm(3)
363 .addOperand(RID)
364 .addOperand(SID)
365 .addImm(CTX)
366 .addImm(CTY)
367 .addImm(CTZ)
368 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000369 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
370 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000371 .addImm(SrcX)
372 .addImm(SrcY)
373 .addImm(SrcZ)
374 .addImm(SrcW)
375 .addImm(0)
376 .addImm(0)
377 .addImm(0)
378 .addImm(0)
379 .addImm(1)
380 .addImm(2)
381 .addImm(3)
382 .addOperand(RID)
383 .addOperand(SID)
384 .addImm(CTX)
385 .addImm(CTY)
386 .addImm(CTZ)
387 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000388 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
389 .addOperand(MI->getOperand(0))
390 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000391 .addImm(SrcX)
392 .addImm(SrcY)
393 .addImm(SrcZ)
394 .addImm(SrcW)
395 .addImm(0)
396 .addImm(0)
397 .addImm(0)
398 .addImm(0)
399 .addImm(1)
400 .addImm(2)
401 .addImm(3)
402 .addOperand(RID)
403 .addOperand(SID)
404 .addImm(CTX)
405 .addImm(CTY)
406 .addImm(CTZ)
407 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000408 .addReg(T0, RegState::Implicit)
409 .addReg(T1, RegState::Implicit);
410 break;
411 }
412
413 case AMDGPU::BRANCH:
414 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000415 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000416 break;
417
418 case AMDGPU::BRANCH_COND_f32: {
419 MachineInstr *NewMI =
420 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
421 AMDGPU::PREDICATE_BIT)
422 .addOperand(MI->getOperand(1))
423 .addImm(OPCODE_IS_NOT_ZERO)
424 .addImm(0); // Flags
425 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000426 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000427 .addOperand(MI->getOperand(0))
428 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
429 break;
430 }
431
432 case AMDGPU::BRANCH_COND_i32: {
433 MachineInstr *NewMI =
434 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
435 AMDGPU::PREDICATE_BIT)
436 .addOperand(MI->getOperand(1))
437 .addImm(OPCODE_IS_NOT_ZERO_INT)
438 .addImm(0); // Flags
439 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000440 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 .addOperand(MI->getOperand(0))
442 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
443 break;
444 }
445
Tom Stellard75aadc22012-12-11 21:25:42 +0000446 case AMDGPU::EG_ExportSwz:
447 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000448 // Instruction is left unmodified if its not the last one of its type
449 bool isLastInstructionOfItsType = true;
450 unsigned InstExportType = MI->getOperand(1).getImm();
451 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
452 EndBlock = BB->end(); NextExportInst != EndBlock;
453 NextExportInst = llvm::next(NextExportInst)) {
454 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
455 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
456 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
457 .getImm();
458 if (CurrentInstExportType == InstExportType) {
459 isLastInstructionOfItsType = false;
460 break;
461 }
462 }
463 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000464 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000465 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 return BB;
467 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
468 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
469 .addOperand(MI->getOperand(0))
470 .addOperand(MI->getOperand(1))
471 .addOperand(MI->getOperand(2))
472 .addOperand(MI->getOperand(3))
473 .addOperand(MI->getOperand(4))
474 .addOperand(MI->getOperand(5))
475 .addOperand(MI->getOperand(6))
476 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000477 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 break;
479 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000480 case AMDGPU::RETURN: {
481 // RETURN instructions must have the live-out registers as implicit uses,
482 // otherwise they appear dead.
483 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
484 MachineInstrBuilder MIB(*MF, MI);
485 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
486 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
487 return BB;
488 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000489 }
490
491 MI->eraseFromParent();
492 return BB;
493}
494
495//===----------------------------------------------------------------------===//
496// Custom DAG Lowering Operations
497//===----------------------------------------------------------------------===//
498
Tom Stellard75aadc22012-12-11 21:25:42 +0000499SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000500 MachineFunction &MF = DAG.getMachineFunction();
501 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 switch (Op.getOpcode()) {
503 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000504 case ISD::FCOS:
505 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000507 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000508 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000509 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000510 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 case ISD::INTRINSIC_VOID: {
512 SDValue Chain = Op.getOperand(0);
513 unsigned IntrinsicID =
514 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
515 switch (IntrinsicID) {
516 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
518 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000519 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000520 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000521 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000522 case AMDGPUIntrinsic::R600_store_swizzle: {
523 const SDValue Args[8] = {
524 Chain,
525 Op.getOperand(2), // Export Value
526 Op.getOperand(3), // ArrayBase
527 Op.getOperand(4), // Type
528 DAG.getConstant(0, MVT::i32), // SWZ_X
529 DAG.getConstant(1, MVT::i32), // SWZ_Y
530 DAG.getConstant(2, MVT::i32), // SWZ_Z
531 DAG.getConstant(3, MVT::i32) // SWZ_W
532 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000533 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000534 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000536
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 // default for switch(IntrinsicID)
538 default: break;
539 }
540 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
541 break;
542 }
543 case ISD::INTRINSIC_WO_CHAIN: {
544 unsigned IntrinsicID =
545 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
546 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000547 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 switch(IntrinsicID) {
549 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
550 case AMDGPUIntrinsic::R600_load_input: {
551 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
552 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000553 MachineFunction &MF = DAG.getMachineFunction();
554 MachineRegisterInfo &MRI = MF.getRegInfo();
555 MRI.addLiveIn(Reg);
556 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000557 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000558 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000559
560 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000561 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000562 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
563 MachineSDNode *interp;
564 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000565 const MachineFunction &MF = DAG.getMachineFunction();
566 const R600InstrInfo *TII =
567 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000568 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
569 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
570 return DAG.getTargetExtractSubreg(
571 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
572 DL, MVT::f32, SDValue(interp, 0));
573 }
574
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000575 MachineFunction &MF = DAG.getMachineFunction();
576 MachineRegisterInfo &MRI = MF.getRegInfo();
577 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
578 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
579 MRI.addLiveIn(RegisterI);
580 MRI.addLiveIn(RegisterJ);
581 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
582 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
583 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
584 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
585
Tom Stellard41afe6a2013-02-05 17:09:14 +0000586 if (slot % 4 < 2)
587 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
588 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000589 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000590 else
591 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
592 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000593 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000594 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000595 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000596 case AMDGPUIntrinsic::R600_tex:
597 case AMDGPUIntrinsic::R600_texc:
598 case AMDGPUIntrinsic::R600_txl:
599 case AMDGPUIntrinsic::R600_txlc:
600 case AMDGPUIntrinsic::R600_txb:
601 case AMDGPUIntrinsic::R600_txbc:
602 case AMDGPUIntrinsic::R600_txf:
603 case AMDGPUIntrinsic::R600_txq:
604 case AMDGPUIntrinsic::R600_ddx:
605 case AMDGPUIntrinsic::R600_ddy: {
606 unsigned TextureOp;
607 switch (IntrinsicID) {
608 case AMDGPUIntrinsic::R600_tex:
609 TextureOp = 0;
610 break;
611 case AMDGPUIntrinsic::R600_texc:
612 TextureOp = 1;
613 break;
614 case AMDGPUIntrinsic::R600_txl:
615 TextureOp = 2;
616 break;
617 case AMDGPUIntrinsic::R600_txlc:
618 TextureOp = 3;
619 break;
620 case AMDGPUIntrinsic::R600_txb:
621 TextureOp = 4;
622 break;
623 case AMDGPUIntrinsic::R600_txbc:
624 TextureOp = 5;
625 break;
626 case AMDGPUIntrinsic::R600_txf:
627 TextureOp = 6;
628 break;
629 case AMDGPUIntrinsic::R600_txq:
630 TextureOp = 7;
631 break;
632 case AMDGPUIntrinsic::R600_ddx:
633 TextureOp = 8;
634 break;
635 case AMDGPUIntrinsic::R600_ddy:
636 TextureOp = 9;
637 break;
638 default:
639 llvm_unreachable("Unknow Texture Operation");
640 }
641
642 SDValue TexArgs[19] = {
643 DAG.getConstant(TextureOp, MVT::i32),
644 Op.getOperand(1),
645 DAG.getConstant(0, MVT::i32),
646 DAG.getConstant(1, MVT::i32),
647 DAG.getConstant(2, MVT::i32),
648 DAG.getConstant(3, MVT::i32),
649 Op.getOperand(2),
650 Op.getOperand(3),
651 Op.getOperand(4),
652 DAG.getConstant(0, MVT::i32),
653 DAG.getConstant(1, MVT::i32),
654 DAG.getConstant(2, MVT::i32),
655 DAG.getConstant(3, MVT::i32),
656 Op.getOperand(5),
657 Op.getOperand(6),
658 Op.getOperand(7),
659 Op.getOperand(8),
660 Op.getOperand(9),
661 Op.getOperand(10)
662 };
663 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
664 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000665 case AMDGPUIntrinsic::AMDGPU_dp4: {
666 SDValue Args[8] = {
667 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
668 DAG.getConstant(0, MVT::i32)),
669 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
670 DAG.getConstant(0, MVT::i32)),
671 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
672 DAG.getConstant(1, MVT::i32)),
673 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
674 DAG.getConstant(1, MVT::i32)),
675 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
676 DAG.getConstant(2, MVT::i32)),
677 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
678 DAG.getConstant(2, MVT::i32)),
679 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
680 DAG.getConstant(3, MVT::i32)),
681 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
682 DAG.getConstant(3, MVT::i32))
683 };
684 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
685 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000686
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000687 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000693 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000694 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000699 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000700 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000701 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000702 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000703 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000704 return LowerImplicitParameter(DAG, VT, DL, 8);
705
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
708 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000709 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000710 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
711 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000712 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000713 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
714 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000715 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000716 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
717 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
720 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000721 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000722 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
723 AMDGPU::T0_Z, VT);
724 }
725 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
726 break;
727 }
728 } // end switch(Op.getOpcode())
729 return SDValue();
730}
731
732void R600TargetLowering::ReplaceNodeResults(SDNode *N,
733 SmallVectorImpl<SDValue> &Results,
734 SelectionDAG &DAG) const {
735 switch (N->getOpcode()) {
736 default: return;
737 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000738 return;
739 case ISD::LOAD: {
740 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
741 Results.push_back(SDValue(Node, 0));
742 Results.push_back(SDValue(Node, 1));
743 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
744 // function
745 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
746 return;
747 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000748 case ISD::STORE:
749 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
750 Results.push_back(SDValue(Node, 0));
751 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000752 }
753}
754
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000755SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
756 // On hw >= R700, COS/SIN input must be between -1. and 1.
757 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
758 EVT VT = Op.getValueType();
759 SDValue Arg = Op.getOperand(0);
760 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
761 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
762 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
763 DAG.getConstantFP(0.15915494309, MVT::f32)),
764 DAG.getConstantFP(0.5, MVT::f32)));
765 unsigned TrigNode;
766 switch (Op.getOpcode()) {
767 case ISD::FCOS:
768 TrigNode = AMDGPUISD::COS_HW;
769 break;
770 case ISD::FSIN:
771 TrigNode = AMDGPUISD::SIN_HW;
772 break;
773 default:
774 llvm_unreachable("Wrong trig opcode");
775 }
776 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
777 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
778 DAG.getConstantFP(-0.5, MVT::f32)));
779 if (Gen >= AMDGPUSubtarget::R700)
780 return TrigVal;
781 // On R600 hw, COS/SIN input must be between -Pi and Pi.
782 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
783 DAG.getConstantFP(3.14159265359, MVT::f32));
784}
785
Tom Stellard75aadc22012-12-11 21:25:42 +0000786SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
787 return DAG.getNode(
788 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000789 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 MVT::i1,
791 Op, DAG.getConstantFP(0.0f, MVT::f32),
792 DAG.getCondCode(ISD::SETNE)
793 );
794}
795
Tom Stellard75aadc22012-12-11 21:25:42 +0000796SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000797 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 unsigned DwordOffset) const {
799 unsigned ByteOffset = DwordOffset * 4;
800 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000801 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000802
803 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
804 assert(isInt<16>(ByteOffset));
805
806 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
807 DAG.getConstant(ByteOffset, MVT::i32), // PTR
808 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
809 false, false, false, 0);
810}
811
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000812SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
813
814 MachineFunction &MF = DAG.getMachineFunction();
815 const AMDGPUFrameLowering *TFL =
816 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
817
818 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
819 assert(FIN);
820
821 unsigned FrameIndex = FIN->getIndex();
822 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
823 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
824}
825
Tom Stellard75aadc22012-12-11 21:25:42 +0000826bool R600TargetLowering::isZero(SDValue Op) const {
827 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
828 return Cst->isNullValue();
829 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
830 return CstFP->isZero();
831 } else {
832 return false;
833 }
834}
835
836SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000837 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000838 EVT VT = Op.getValueType();
839
840 SDValue LHS = Op.getOperand(0);
841 SDValue RHS = Op.getOperand(1);
842 SDValue True = Op.getOperand(2);
843 SDValue False = Op.getOperand(3);
844 SDValue CC = Op.getOperand(4);
845 SDValue Temp;
846
847 // LHS and RHS are guaranteed to be the same value type
848 EVT CompareVT = LHS.getValueType();
849
850 // Check if we can lower this to a native operation.
851
Tom Stellard2add82d2013-03-08 15:37:09 +0000852 // Try to lower to a SET* instruction:
853 //
854 // SET* can match the following patterns:
855 //
Tom Stellardcd428182013-09-28 02:50:38 +0000856 // select_cc f32, f32, -1, 0, cc_supported
857 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
858 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000859 //
860
861 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000862 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
863 ISD::CondCode InverseCC =
864 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +0000865 if (isHWTrueValue(False) && isHWFalseValue(True)) {
866 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
867 std::swap(False, True);
868 CC = DAG.getCondCode(InverseCC);
869 } else {
870 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
871 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
872 std::swap(False, True);
873 std::swap(LHS, RHS);
874 CC = DAG.getCondCode(SwapInvCC);
875 }
876 }
Tom Stellard2add82d2013-03-08 15:37:09 +0000877 }
878
879 if (isHWTrueValue(True) && isHWFalseValue(False) &&
880 (CompareVT == VT || VT == MVT::i32)) {
881 // This can be matched by a SET* instruction.
882 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
883 }
884
Tom Stellard75aadc22012-12-11 21:25:42 +0000885 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000886 //
887 // CND* can match the following patterns:
888 //
Tom Stellardcd428182013-09-28 02:50:38 +0000889 // select_cc f32, 0.0, f32, f32, cc_supported
890 // select_cc f32, 0.0, i32, i32, cc_supported
891 // select_cc i32, 0, f32, f32, cc_supported
892 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000893 //
Tom Stellardcd428182013-09-28 02:50:38 +0000894
895 // Try to move the zero value to the RHS
896 if (isZero(LHS)) {
897 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
898 // Try swapping the operands
899 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
900 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
901 std::swap(LHS, RHS);
902 CC = DAG.getCondCode(CCSwapped);
903 } else {
904 // Try inverting the conditon and then swapping the operands
905 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
906 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
907 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
908 std::swap(True, False);
909 std::swap(LHS, RHS);
910 CC = DAG.getCondCode(CCSwapped);
911 }
912 }
913 }
914 if (isZero(RHS)) {
915 SDValue Cond = LHS;
916 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000917 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
918 if (CompareVT != VT) {
919 // Bitcast True / False to the correct types. This will end up being
920 // a nop, but it allows us to define only a single pattern in the
921 // .TD files for each CND* instruction rather than having to have
922 // one pattern for integer True/False and one for fp True/False
923 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
924 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
925 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000926
927 switch (CCOpcode) {
928 case ISD::SETONE:
929 case ISD::SETUNE:
930 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000931 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
932 Temp = True;
933 True = False;
934 False = Temp;
935 break;
936 default:
937 break;
938 }
939 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
940 Cond, Zero,
941 True, False,
942 DAG.getCondCode(CCOpcode));
943 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
944 }
945
Tom Stellard75aadc22012-12-11 21:25:42 +0000946
947 // Possible Min/Max pattern
948 SDValue MinMax = LowerMinMax(Op, DAG);
949 if (MinMax.getNode()) {
950 return MinMax;
951 }
952
953 // If we make it this for it means we have no native instructions to handle
954 // this SELECT_CC, so we must lower it.
955 SDValue HWTrue, HWFalse;
956
957 if (CompareVT == MVT::f32) {
958 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
959 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
960 } else if (CompareVT == MVT::i32) {
961 HWTrue = DAG.getConstant(-1, CompareVT);
962 HWFalse = DAG.getConstant(0, CompareVT);
963 }
964 else {
965 assert(!"Unhandled value type in LowerSELECT_CC");
966 }
967
968 // Lower this unsupported SELECT_CC into a combination of two supported
969 // SELECT_CC operations.
970 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
971
972 return DAG.getNode(ISD::SELECT_CC, DL, VT,
973 Cond, HWFalse,
974 True, False,
975 DAG.getCondCode(ISD::SETNE));
976}
977
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000978/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
979/// convert these pointers to a register index. Each register holds
980/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
981/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
982/// for indirect addressing.
983SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
984 unsigned StackWidth,
985 SelectionDAG &DAG) const {
986 unsigned SRLPad;
987 switch(StackWidth) {
988 case 1:
989 SRLPad = 2;
990 break;
991 case 2:
992 SRLPad = 3;
993 break;
994 case 4:
995 SRLPad = 4;
996 break;
997 default: llvm_unreachable("Invalid stack width");
998 }
999
Andrew Trickef9de2a2013-05-25 02:42:55 +00001000 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001001 DAG.getConstant(SRLPad, MVT::i32));
1002}
1003
1004void R600TargetLowering::getStackAddress(unsigned StackWidth,
1005 unsigned ElemIdx,
1006 unsigned &Channel,
1007 unsigned &PtrIncr) const {
1008 switch (StackWidth) {
1009 default:
1010 case 1:
1011 Channel = 0;
1012 if (ElemIdx > 0) {
1013 PtrIncr = 1;
1014 } else {
1015 PtrIncr = 0;
1016 }
1017 break;
1018 case 2:
1019 Channel = ElemIdx % 2;
1020 if (ElemIdx == 2) {
1021 PtrIncr = 1;
1022 } else {
1023 PtrIncr = 0;
1024 }
1025 break;
1026 case 4:
1027 Channel = ElemIdx;
1028 PtrIncr = 0;
1029 break;
1030 }
1031}
1032
Tom Stellard75aadc22012-12-11 21:25:42 +00001033SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001034 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001035 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1036 SDValue Chain = Op.getOperand(0);
1037 SDValue Value = Op.getOperand(1);
1038 SDValue Ptr = Op.getOperand(2);
1039
Tom Stellard2ffc3302013-08-26 15:05:44 +00001040 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001041 if (Result.getNode()) {
1042 return Result;
1043 }
1044
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001045 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1046 if (StoreNode->isTruncatingStore()) {
1047 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001048 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001049 EVT MemVT = StoreNode->getMemoryVT();
1050 SDValue MaskConstant;
1051 if (MemVT == MVT::i8) {
1052 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1053 } else {
1054 assert(MemVT == MVT::i16);
1055 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1056 }
1057 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1058 DAG.getConstant(2, MVT::i32));
1059 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1060 DAG.getConstant(0x00000003, VT));
1061 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1062 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1063 DAG.getConstant(3, VT));
1064 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1065 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1066 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1067 // vector instead.
1068 SDValue Src[4] = {
1069 ShiftedValue,
1070 DAG.getConstant(0, MVT::i32),
1071 DAG.getConstant(0, MVT::i32),
1072 Mask
1073 };
1074 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1075 SDValue Args[3] = { Chain, Input, DWordAddr };
1076 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1077 Op->getVTList(), Args, 3, MemVT,
1078 StoreNode->getMemOperand());
1079 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1080 Value.getValueType().bitsGE(MVT::i32)) {
1081 // Convert pointer from byte address to dword address.
1082 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1083 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1084 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001085
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001086 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1087 assert(!"Truncated and indexed stores not supported yet");
1088 } else {
1089 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1090 }
1091 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001092 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001093 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001094
1095 EVT ValueVT = Value.getValueType();
1096
1097 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1098 return SDValue();
1099 }
1100
1101 // Lowering for indirect addressing
1102
1103 const MachineFunction &MF = DAG.getMachineFunction();
1104 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1105 getTargetMachine().getFrameLowering());
1106 unsigned StackWidth = TFL->getStackWidth(MF);
1107
1108 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1109
1110 if (ValueVT.isVector()) {
1111 unsigned NumElemVT = ValueVT.getVectorNumElements();
1112 EVT ElemVT = ValueVT.getVectorElementType();
1113 SDValue Stores[4];
1114
1115 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1116 "vector width in load");
1117
1118 for (unsigned i = 0; i < NumElemVT; ++i) {
1119 unsigned Channel, PtrIncr;
1120 getStackAddress(StackWidth, i, Channel, PtrIncr);
1121 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1122 DAG.getConstant(PtrIncr, MVT::i32));
1123 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1124 Value, DAG.getConstant(i, MVT::i32));
1125
1126 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1127 Chain, Elem, Ptr,
1128 DAG.getTargetConstant(Channel, MVT::i32));
1129 }
1130 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1131 } else {
1132 if (ValueVT == MVT::i8) {
1133 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1134 }
1135 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001136 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001137 }
1138
1139 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001140}
1141
Tom Stellard365366f2013-01-23 02:09:06 +00001142// return (512 + (kc_bank << 12)
1143static int
1144ConstantAddressBlock(unsigned AddressSpace) {
1145 switch (AddressSpace) {
1146 case AMDGPUAS::CONSTANT_BUFFER_0:
1147 return 512;
1148 case AMDGPUAS::CONSTANT_BUFFER_1:
1149 return 512 + 4096;
1150 case AMDGPUAS::CONSTANT_BUFFER_2:
1151 return 512 + 4096 * 2;
1152 case AMDGPUAS::CONSTANT_BUFFER_3:
1153 return 512 + 4096 * 3;
1154 case AMDGPUAS::CONSTANT_BUFFER_4:
1155 return 512 + 4096 * 4;
1156 case AMDGPUAS::CONSTANT_BUFFER_5:
1157 return 512 + 4096 * 5;
1158 case AMDGPUAS::CONSTANT_BUFFER_6:
1159 return 512 + 4096 * 6;
1160 case AMDGPUAS::CONSTANT_BUFFER_7:
1161 return 512 + 4096 * 7;
1162 case AMDGPUAS::CONSTANT_BUFFER_8:
1163 return 512 + 4096 * 8;
1164 case AMDGPUAS::CONSTANT_BUFFER_9:
1165 return 512 + 4096 * 9;
1166 case AMDGPUAS::CONSTANT_BUFFER_10:
1167 return 512 + 4096 * 10;
1168 case AMDGPUAS::CONSTANT_BUFFER_11:
1169 return 512 + 4096 * 11;
1170 case AMDGPUAS::CONSTANT_BUFFER_12:
1171 return 512 + 4096 * 12;
1172 case AMDGPUAS::CONSTANT_BUFFER_13:
1173 return 512 + 4096 * 13;
1174 case AMDGPUAS::CONSTANT_BUFFER_14:
1175 return 512 + 4096 * 14;
1176 case AMDGPUAS::CONSTANT_BUFFER_15:
1177 return 512 + 4096 * 15;
1178 default:
1179 return -1;
1180 }
1181}
1182
1183SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1184{
1185 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001186 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001187 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1188 SDValue Chain = Op.getOperand(0);
1189 SDValue Ptr = Op.getOperand(1);
1190 SDValue LoweredLoad;
1191
Tom Stellard35bb18c2013-08-26 15:06:04 +00001192 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1193 SDValue MergedValues[2] = {
1194 SplitVectorLoad(Op, DAG),
1195 Chain
1196 };
1197 return DAG.getMergeValues(MergedValues, 2, DL);
1198 }
1199
Tom Stellard365366f2013-01-23 02:09:06 +00001200 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1201 if (ConstantBlock > -1) {
1202 SDValue Result;
1203 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001204 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1205 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001206 SDValue Slots[4];
1207 for (unsigned i = 0; i < 4; i++) {
1208 // We want Const position encoded with the following formula :
1209 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1210 // const_index is Ptr computed by llvm using an alignment of 16.
1211 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1212 // then div by 4 at the ISel step
1213 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1214 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1215 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1216 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001217 EVT NewVT = MVT::v4i32;
1218 unsigned NumElements = 4;
1219 if (VT.isVector()) {
1220 NewVT = VT;
1221 NumElements = VT.getVectorNumElements();
1222 }
1223 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001224 } else {
1225 // non constant ptr cant be folded, keeps it as a v4f32 load
1226 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001227 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001228 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001229 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001230 );
1231 }
1232
1233 if (!VT.isVector()) {
1234 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1235 DAG.getConstant(0, MVT::i32));
1236 }
1237
1238 SDValue MergedValues[2] = {
1239 Result,
1240 Chain
1241 };
1242 return DAG.getMergeValues(MergedValues, 2, DL);
1243 }
1244
Tom Stellard84021442013-07-23 01:48:24 +00001245 // For most operations returning SDValue() will result int he node being
1246 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1247 // we need to manually expand loads that may be legal in some address spaces
1248 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1249 // for compute shaders, since the data is sign extended when it is uploaded
1250 // to the buffer. Howerver SEXT loads from other addresspaces are not
1251 // supported, so we need to expand them here.
1252 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1253 EVT MemVT = LoadNode->getMemoryVT();
1254 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1255 SDValue ShiftAmount =
1256 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1257 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1258 LoadNode->getPointerInfo(), MemVT,
1259 LoadNode->isVolatile(),
1260 LoadNode->isNonTemporal(),
1261 LoadNode->getAlignment());
1262 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1263 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1264
1265 SDValue MergedValues[2] = { Sra, Chain };
1266 return DAG.getMergeValues(MergedValues, 2, DL);
1267 }
1268
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001269 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1270 return SDValue();
1271 }
1272
1273 // Lowering for indirect addressing
1274 const MachineFunction &MF = DAG.getMachineFunction();
1275 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1276 getTargetMachine().getFrameLowering());
1277 unsigned StackWidth = TFL->getStackWidth(MF);
1278
1279 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1280
1281 if (VT.isVector()) {
1282 unsigned NumElemVT = VT.getVectorNumElements();
1283 EVT ElemVT = VT.getVectorElementType();
1284 SDValue Loads[4];
1285
1286 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1287 "vector width in load");
1288
1289 for (unsigned i = 0; i < NumElemVT; ++i) {
1290 unsigned Channel, PtrIncr;
1291 getStackAddress(StackWidth, i, Channel, PtrIncr);
1292 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1293 DAG.getConstant(PtrIncr, MVT::i32));
1294 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1295 Chain, Ptr,
1296 DAG.getTargetConstant(Channel, MVT::i32),
1297 Op.getOperand(2));
1298 }
1299 for (unsigned i = NumElemVT; i < 4; ++i) {
1300 Loads[i] = DAG.getUNDEF(ElemVT);
1301 }
1302 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1303 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1304 } else {
1305 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1306 Chain, Ptr,
1307 DAG.getTargetConstant(0, MVT::i32), // Channel
1308 Op.getOperand(2));
1309 }
1310
1311 SDValue Ops[2];
1312 Ops[0] = LoweredLoad;
1313 Ops[1] = Chain;
1314
1315 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001316}
Tom Stellard75aadc22012-12-11 21:25:42 +00001317
Tom Stellard75aadc22012-12-11 21:25:42 +00001318/// XXX Only kernel functions are supported, so we can assume for now that
1319/// every function is a kernel function, but in the future we should use
1320/// separate calling conventions for kernel and non-kernel functions.
1321SDValue R600TargetLowering::LowerFormalArguments(
1322 SDValue Chain,
1323 CallingConv::ID CallConv,
1324 bool isVarArg,
1325 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001326 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001327 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001328 SmallVector<CCValAssign, 16> ArgLocs;
1329 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1330 getTargetMachine(), ArgLocs, *DAG.getContext());
1331
1332 AnalyzeFormalArguments(CCInfo, Ins);
1333
Tom Stellard1e803092013-07-23 01:48:18 +00001334 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001335 CCValAssign &VA = ArgLocs[i];
1336 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001337
Tom Stellard75aadc22012-12-11 21:25:42 +00001338 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001339 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001340
1341 // The first 36 bytes of the input buffer contains information about
1342 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001343 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001344 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001345 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1346 false, false, 4); // 4 is the prefered alignment for
1347 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001348 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001349 }
1350 return Chain;
1351}
1352
Matt Arsenault758659232013-05-18 00:21:46 +00001353EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001354 if (!VT.isVector()) return MVT::i32;
1355 return VT.changeVectorElementTypeToInteger();
1356}
1357
Benjamin Kramer193960c2013-06-11 13:32:25 +00001358static SDValue
1359CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1360 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001361 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1362 assert(RemapSwizzle.empty());
1363 SDValue NewBldVec[4] = {
1364 VectorEntry.getOperand(0),
1365 VectorEntry.getOperand(1),
1366 VectorEntry.getOperand(2),
1367 VectorEntry.getOperand(3)
1368 };
1369
1370 for (unsigned i = 0; i < 4; i++) {
1371 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1372 if (C->isZero()) {
1373 RemapSwizzle[i] = 4; // SEL_0
1374 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1375 } else if (C->isExactlyValue(1.0)) {
1376 RemapSwizzle[i] = 5; // SEL_1
1377 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1378 }
1379 }
1380
1381 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1382 continue;
1383 for (unsigned j = 0; j < i; j++) {
1384 if (NewBldVec[i] == NewBldVec[j]) {
1385 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1386 RemapSwizzle[i] = j;
1387 break;
1388 }
1389 }
1390 }
1391
1392 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1393 VectorEntry.getValueType(), NewBldVec, 4);
1394}
1395
Benjamin Kramer193960c2013-06-11 13:32:25 +00001396static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1397 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001398 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1399 assert(RemapSwizzle.empty());
1400 SDValue NewBldVec[4] = {
1401 VectorEntry.getOperand(0),
1402 VectorEntry.getOperand(1),
1403 VectorEntry.getOperand(2),
1404 VectorEntry.getOperand(3)
1405 };
1406 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001407 for (unsigned i = 0; i < 4; i++)
1408 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001409
1410 for (unsigned i = 0; i < 4; i++) {
1411 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1412 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1413 ->getZExtValue();
1414 if (!isUnmovable[Idx]) {
1415 // Swap i and Idx
1416 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001417 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001418 }
1419 isUnmovable[Idx] = true;
1420 }
1421 }
1422
1423 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1424 VectorEntry.getValueType(), NewBldVec, 4);
1425}
1426
1427
1428SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1429SDValue Swz[4], SelectionDAG &DAG) const {
1430 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1431 // Old -> New swizzle values
1432 DenseMap<unsigned, unsigned> SwizzleRemap;
1433
1434 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1435 for (unsigned i = 0; i < 4; i++) {
1436 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1437 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1438 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1439 }
1440
1441 SwizzleRemap.clear();
1442 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1443 for (unsigned i = 0; i < 4; i++) {
1444 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1445 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1446 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1447 }
1448
1449 return BuildVector;
1450}
1451
1452
Tom Stellard75aadc22012-12-11 21:25:42 +00001453//===----------------------------------------------------------------------===//
1454// Custom DAG Optimizations
1455//===----------------------------------------------------------------------===//
1456
1457SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1458 DAGCombinerInfo &DCI) const {
1459 SelectionDAG &DAG = DCI.DAG;
1460
1461 switch (N->getOpcode()) {
1462 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1463 case ISD::FP_ROUND: {
1464 SDValue Arg = N->getOperand(0);
1465 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001466 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001467 Arg.getOperand(0));
1468 }
1469 break;
1470 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001471
1472 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1473 // (i32 select_cc f32, f32, -1, 0 cc)
1474 //
1475 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1476 // this to one of the SET*_DX10 instructions.
1477 case ISD::FP_TO_SINT: {
1478 SDValue FNeg = N->getOperand(0);
1479 if (FNeg.getOpcode() != ISD::FNEG) {
1480 return SDValue();
1481 }
1482 SDValue SelectCC = FNeg.getOperand(0);
1483 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1484 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1485 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1486 !isHWTrueValue(SelectCC.getOperand(2)) ||
1487 !isHWFalseValue(SelectCC.getOperand(3))) {
1488 return SDValue();
1489 }
1490
Andrew Trickef9de2a2013-05-25 02:42:55 +00001491 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001492 SelectCC.getOperand(0), // LHS
1493 SelectCC.getOperand(1), // RHS
1494 DAG.getConstant(-1, MVT::i32), // True
1495 DAG.getConstant(0, MVT::i32), // Flase
1496 SelectCC.getOperand(4)); // CC
1497
1498 break;
1499 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001500
1501 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1502 // => build_vector elt0, …, NewEltIdx, …, eltN
1503 case ISD::INSERT_VECTOR_ELT: {
1504 SDValue InVec = N->getOperand(0);
1505 SDValue InVal = N->getOperand(1);
1506 SDValue EltNo = N->getOperand(2);
1507 SDLoc dl(N);
1508
1509 // If the inserted element is an UNDEF, just use the input vector.
1510 if (InVal.getOpcode() == ISD::UNDEF)
1511 return InVec;
1512
1513 EVT VT = InVec.getValueType();
1514
1515 // If we can't generate a legal BUILD_VECTOR, exit
1516 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1517 return SDValue();
1518
1519 // Check that we know which element is being inserted
1520 if (!isa<ConstantSDNode>(EltNo))
1521 return SDValue();
1522 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1523
1524 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1525 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1526 // vector elements.
1527 SmallVector<SDValue, 8> Ops;
1528 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1529 Ops.append(InVec.getNode()->op_begin(),
1530 InVec.getNode()->op_end());
1531 } else if (InVec.getOpcode() == ISD::UNDEF) {
1532 unsigned NElts = VT.getVectorNumElements();
1533 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1534 } else {
1535 return SDValue();
1536 }
1537
1538 // Insert the element
1539 if (Elt < Ops.size()) {
1540 // All the operands of BUILD_VECTOR must have the same type;
1541 // we enforce that here.
1542 EVT OpVT = Ops[0].getValueType();
1543 if (InVal.getValueType() != OpVT)
1544 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1545 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1546 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1547 Ops[Elt] = InVal;
1548 }
1549
1550 // Return the new vector
1551 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1552 VT, &Ops[0], Ops.size());
1553 }
1554
Tom Stellard365366f2013-01-23 02:09:06 +00001555 // Extract_vec (Build_vector) generated by custom lowering
1556 // also needs to be customly combined
1557 case ISD::EXTRACT_VECTOR_ELT: {
1558 SDValue Arg = N->getOperand(0);
1559 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1560 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1561 unsigned Element = Const->getZExtValue();
1562 return Arg->getOperand(Element);
1563 }
1564 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001565 if (Arg.getOpcode() == ISD::BITCAST &&
1566 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1567 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1568 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001569 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001570 Arg->getOperand(0).getOperand(Element));
1571 }
1572 }
Tom Stellard365366f2013-01-23 02:09:06 +00001573 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001574
1575 case ISD::SELECT_CC: {
1576 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1577 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001578 //
1579 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1580 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001581 SDValue LHS = N->getOperand(0);
1582 if (LHS.getOpcode() != ISD::SELECT_CC) {
1583 return SDValue();
1584 }
1585
1586 SDValue RHS = N->getOperand(1);
1587 SDValue True = N->getOperand(2);
1588 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001589 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001590
1591 if (LHS.getOperand(2).getNode() != True.getNode() ||
1592 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001593 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001594 return SDValue();
1595 }
1596
Tom Stellard5e524892013-03-08 15:37:11 +00001597 switch (NCC) {
1598 default: return SDValue();
1599 case ISD::SETNE: return LHS;
1600 case ISD::SETEQ: {
1601 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1602 LHSCC = ISD::getSetCCInverse(LHSCC,
1603 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001604 if (DCI.isBeforeLegalizeOps() ||
1605 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1606 return DAG.getSelectCC(SDLoc(N),
1607 LHS.getOperand(0),
1608 LHS.getOperand(1),
1609 LHS.getOperand(2),
1610 LHS.getOperand(3),
1611 LHSCC);
1612 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001613 }
Tom Stellard5e524892013-03-08 15:37:11 +00001614 }
Tom Stellardcd428182013-09-28 02:50:38 +00001615 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001616 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001617
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001618 case AMDGPUISD::EXPORT: {
1619 SDValue Arg = N->getOperand(1);
1620 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1621 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001622
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001623 SDValue NewArgs[8] = {
1624 N->getOperand(0), // Chain
1625 SDValue(),
1626 N->getOperand(2), // ArrayBase
1627 N->getOperand(3), // Type
1628 N->getOperand(4), // SWZ_X
1629 N->getOperand(5), // SWZ_Y
1630 N->getOperand(6), // SWZ_Z
1631 N->getOperand(7) // SWZ_W
1632 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001633 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001634 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001635 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001636 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001637 case AMDGPUISD::TEXTURE_FETCH: {
1638 SDValue Arg = N->getOperand(1);
1639 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1640 break;
1641
1642 SDValue NewArgs[19] = {
1643 N->getOperand(0),
1644 N->getOperand(1),
1645 N->getOperand(2),
1646 N->getOperand(3),
1647 N->getOperand(4),
1648 N->getOperand(5),
1649 N->getOperand(6),
1650 N->getOperand(7),
1651 N->getOperand(8),
1652 N->getOperand(9),
1653 N->getOperand(10),
1654 N->getOperand(11),
1655 N->getOperand(12),
1656 N->getOperand(13),
1657 N->getOperand(14),
1658 N->getOperand(15),
1659 N->getOperand(16),
1660 N->getOperand(17),
1661 N->getOperand(18),
1662 };
1663 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1664 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1665 NewArgs, 19);
1666 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001667 }
1668 return SDValue();
1669}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001670
1671static bool
1672FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001673 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001674 const R600InstrInfo *TII =
1675 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1676 if (!Src.isMachineOpcode())
1677 return false;
1678 switch (Src.getMachineOpcode()) {
1679 case AMDGPU::FNEG_R600:
1680 if (!Neg.getNode())
1681 return false;
1682 Src = Src.getOperand(0);
1683 Neg = DAG.getTargetConstant(1, MVT::i32);
1684 return true;
1685 case AMDGPU::FABS_R600:
1686 if (!Abs.getNode())
1687 return false;
1688 Src = Src.getOperand(0);
1689 Abs = DAG.getTargetConstant(1, MVT::i32);
1690 return true;
1691 case AMDGPU::CONST_COPY: {
1692 unsigned Opcode = ParentNode->getMachineOpcode();
1693 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1694
1695 if (!Sel.getNode())
1696 return false;
1697
1698 SDValue CstOffset = Src.getOperand(0);
1699 if (ParentNode->getValueType(0).isVector())
1700 return false;
1701
1702 // Gather constants values
1703 int SrcIndices[] = {
1704 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1705 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1706 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1707 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1708 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1709 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1710 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1711 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1712 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1713 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1714 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1715 };
1716 std::vector<unsigned> Consts;
1717 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1718 int OtherSrcIdx = SrcIndices[i];
1719 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1720 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1721 continue;
1722 if (HasDst) {
1723 OtherSrcIdx--;
1724 OtherSelIdx--;
1725 }
1726 if (RegisterSDNode *Reg =
1727 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1728 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1729 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1730 ParentNode->getOperand(OtherSelIdx));
1731 Consts.push_back(Cst->getZExtValue());
1732 }
1733 }
1734 }
1735
1736 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1737 Consts.push_back(Cst->getZExtValue());
1738 if (!TII->fitsConstReadLimitations(Consts)) {
1739 return false;
1740 }
1741
1742 Sel = CstOffset;
1743 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1744 return true;
1745 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001746 case AMDGPU::MOV_IMM_I32:
1747 case AMDGPU::MOV_IMM_F32: {
1748 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1749 uint64_t ImmValue = 0;
1750
1751
1752 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1753 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1754 float FloatValue = FPC->getValueAPF().convertToFloat();
1755 if (FloatValue == 0.0) {
1756 ImmReg = AMDGPU::ZERO;
1757 } else if (FloatValue == 0.5) {
1758 ImmReg = AMDGPU::HALF;
1759 } else if (FloatValue == 1.0) {
1760 ImmReg = AMDGPU::ONE;
1761 } else {
1762 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1763 }
1764 } else {
1765 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1766 uint64_t Value = C->getZExtValue();
1767 if (Value == 0) {
1768 ImmReg = AMDGPU::ZERO;
1769 } else if (Value == 1) {
1770 ImmReg = AMDGPU::ONE_INT;
1771 } else {
1772 ImmValue = Value;
1773 }
1774 }
1775
1776 // Check that we aren't already using an immediate.
1777 // XXX: It's possible for an instruction to have more than one
1778 // immediate operand, but this is not supported yet.
1779 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1780 if (!Imm.getNode())
1781 return false;
1782 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1783 assert(C);
1784 if (C->getZExtValue())
1785 return false;
1786 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1787 }
1788 Src = DAG.getRegister(ImmReg, MVT::i32);
1789 return true;
1790 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001791 default:
1792 return false;
1793 }
1794}
1795
1796
1797/// \brief Fold the instructions after selecting them
1798SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1799 SelectionDAG &DAG) const {
1800 const R600InstrInfo *TII =
1801 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1802 if (!Node->isMachineOpcode())
1803 return Node;
1804 unsigned Opcode = Node->getMachineOpcode();
1805 SDValue FakeOp;
1806
1807 std::vector<SDValue> Ops;
1808 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1809 I != E; ++I)
1810 Ops.push_back(*I);
1811
1812 if (Opcode == AMDGPU::DOT_4) {
1813 int OperandIdx[] = {
1814 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1815 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1816 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1817 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1818 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1819 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1820 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1821 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1822 };
1823 int NegIdx[] = {
1824 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1825 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1826 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1827 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1828 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1829 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1830 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1831 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1832 };
1833 int AbsIdx[] = {
1834 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1835 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1836 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1837 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1838 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1839 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1840 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1841 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1842 };
1843 for (unsigned i = 0; i < 8; i++) {
1844 if (OperandIdx[i] < 0)
1845 return Node;
1846 SDValue &Src = Ops[OperandIdx[i] - 1];
1847 SDValue &Neg = Ops[NegIdx[i] - 1];
1848 SDValue &Abs = Ops[AbsIdx[i] - 1];
1849 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1850 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1851 if (HasDst)
1852 SelIdx--;
1853 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001854 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1855 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1856 }
1857 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1858 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1859 SDValue &Src = Ops[i];
1860 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001861 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1862 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001863 } else if (Opcode == AMDGPU::CLAMP_R600) {
1864 SDValue Src = Node->getOperand(0);
1865 if (!Src.isMachineOpcode() ||
1866 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1867 return Node;
1868 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1869 AMDGPU::OpName::clamp);
1870 if (ClampIdx < 0)
1871 return Node;
1872 std::vector<SDValue> Ops;
1873 unsigned NumOp = Src.getNumOperands();
1874 for(unsigned i = 0; i < NumOp; ++i)
1875 Ops.push_back(Src.getOperand(i));
1876 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1877 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1878 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001879 } else {
1880 if (!TII->hasInstrModifiers(Opcode))
1881 return Node;
1882 int OperandIdx[] = {
1883 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1884 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1886 };
1887 int NegIdx[] = {
1888 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1889 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1890 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1891 };
1892 int AbsIdx[] = {
1893 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1894 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1895 -1
1896 };
1897 for (unsigned i = 0; i < 3; i++) {
1898 if (OperandIdx[i] < 0)
1899 return Node;
1900 SDValue &Src = Ops[OperandIdx[i] - 1];
1901 SDValue &Neg = Ops[NegIdx[i] - 1];
1902 SDValue FakeAbs;
1903 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1904 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1905 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001906 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1907 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001908 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001909 ImmIdx--;
1910 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001911 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001912 SDValue &Imm = Ops[ImmIdx];
1913 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001914 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1915 }
1916 }
1917
1918 return Node;
1919}