blob: 1d2c5e1bda4f8d7f7b2b498dc75a06e99f8bd467 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000019#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000024#include "llvm/IR/Argument.h"
25#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026
27using namespace llvm;
28
29R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000030 AMDGPUTargetLowering(TM),
31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000032 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000036 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38
Tom Stellard75aadc22012-12-11 21:25:42 +000039 computeRegisterProperties();
40
Tom Stellardcd428182013-09-28 02:50:38 +000041 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
42 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
43 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
44 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
45 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
46 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
47
48 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
49 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
50 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
51 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
52
Vincent Lejeuneb55940c2013-07-09 15:03:11 +000053 setOperationAction(ISD::FCOS, MVT::f32, Custom);
54 setOperationAction(ISD::FSIN, MVT::f32, Custom);
55
Tom Stellard75aadc22012-12-11 21:25:42 +000056 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +000057 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000058
Tom Stellard492ebea2013-03-08 15:37:07 +000059 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
60 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000061
62 setOperationAction(ISD::FSUB, MVT::f32, Expand);
63
64 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
65 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
66 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000067
Tom Stellard75aadc22012-12-11 21:25:42 +000068 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
69 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
70
Tom Stellarde8f9f282013-03-08 15:37:05 +000071 setOperationAction(ISD::SETCC, MVT::i32, Expand);
72 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000073 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
74
Tom Stellard53f2f902013-09-05 18:38:03 +000075 setOperationAction(ISD::SELECT, MVT::i32, Expand);
76 setOperationAction(ISD::SELECT, MVT::f32, Expand);
77 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
78 setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
79 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
80 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +000081
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000082 // Legalize loads and stores to the private address space.
83 setOperationAction(ISD::LOAD, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000084 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000085 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard1e803092013-07-23 01:48:18 +000086 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
87 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
88 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
89 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000090 setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000091 setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +000092 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000093 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
Tom Stellardd3ee8c12013-08-16 01:12:06 +000094 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
95 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +000096
Tom Stellard365366f2013-01-23 02:09:06 +000097 setOperationAction(ISD::LOAD, MVT::i32, Custom);
98 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000099 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
100
Tom Stellard75aadc22012-12-11 21:25:42 +0000101 setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellarde06163a2013-02-07 14:02:35 +0000102 setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard365366f2013-01-23 02:09:06 +0000103 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellarde06163a2013-02-07 14:02:35 +0000104 setTargetDAGCombine(ISD::SELECT_CC);
Quentin Colombete2e05482013-07-30 00:27:16 +0000105 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000106
Michel Danzer49812b52013-07-10 16:37:07 +0000107 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
108
Tom Stellardb852af52013-03-08 15:37:03 +0000109 setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard87047f62013-04-24 23:56:18 +0000110 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Tom Stellardfc455472013-08-12 22:33:21 +0000111 setSchedulingPreference(Sched::Source);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112}
113
114MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
115 MachineInstr * MI, MachineBasicBlock * BB) const {
116 MachineFunction * MF = BB->getParent();
117 MachineRegisterInfo &MRI = MF->getRegInfo();
118 MachineBasicBlock::iterator I = *MI;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000119 const R600InstrInfo *TII =
120 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000121
122 switch (MI->getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000123 default:
Tom Stellard13c68ef2013-09-05 18:38:09 +0000124 if (TII->isLDSInstr(MI->getOpcode()) &&
125 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
126 int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
127 assert(DstIdx != -1);
128 MachineInstrBuilder NewMI;
129 if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
130 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
131 AMDGPU::OQAP);
132 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
133 MI->getOperand(0).getReg(),
134 AMDGPU::OQAP);
135 } else {
136 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
137 TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
138 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000139 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
140 NewMI.addOperand(MI->getOperand(i));
141 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000142 } else {
143 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
144 }
145 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000146 case AMDGPU::CLAMP_R600: {
147 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
148 AMDGPU::MOV,
149 MI->getOperand(0).getReg(),
150 MI->getOperand(1).getReg());
151 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
152 break;
153 }
154
155 case AMDGPU::FABS_R600: {
156 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157 AMDGPU::MOV,
158 MI->getOperand(0).getReg(),
159 MI->getOperand(1).getReg());
160 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
161 break;
162 }
163
164 case AMDGPU::FNEG_R600: {
165 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166 AMDGPU::MOV,
167 MI->getOperand(0).getReg(),
168 MI->getOperand(1).getReg());
169 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
170 break;
171 }
172
Tom Stellard75aadc22012-12-11 21:25:42 +0000173 case AMDGPU::MASK_WRITE: {
174 unsigned maskedRegister = MI->getOperand(0).getReg();
175 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
176 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
177 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
178 break;
179 }
180
181 case AMDGPU::MOV_IMM_F32:
182 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
183 MI->getOperand(1).getFPImm()->getValueAPF()
184 .bitcastToAPInt().getZExtValue());
185 break;
186 case AMDGPU::MOV_IMM_I32:
187 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
188 MI->getOperand(1).getImm());
189 break;
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000190 case AMDGPU::CONST_COPY: {
191 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
192 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
Tom Stellard02661d92013-06-25 21:22:18 +0000193 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000194 MI->getOperand(1).getImm());
195 break;
196 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000197
198 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000199 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000200 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
201 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
202
203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
204 .addOperand(MI->getOperand(0))
205 .addOperand(MI->getOperand(1))
206 .addImm(EOP); // Set End of program bit
207 break;
208 }
209
Tom Stellard75aadc22012-12-11 21:25:42 +0000210 case AMDGPU::TXD: {
211 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
212 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000213 MachineOperand &RID = MI->getOperand(4);
214 MachineOperand &SID = MI->getOperand(5);
215 unsigned TextureId = MI->getOperand(6).getImm();
216 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
217 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000218
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000219 switch (TextureId) {
220 case 5: // Rect
221 CTX = CTY = 0;
222 break;
223 case 6: // Shadow1D
224 SrcW = SrcZ;
225 break;
226 case 7: // Shadow2D
227 SrcW = SrcZ;
228 break;
229 case 8: // ShadowRect
230 CTX = CTY = 0;
231 SrcW = SrcZ;
232 break;
233 case 9: // 1DArray
234 SrcZ = SrcY;
235 CTZ = 0;
236 break;
237 case 10: // 2DArray
238 CTZ = 0;
239 break;
240 case 11: // Shadow1DArray
241 SrcZ = SrcY;
242 CTZ = 0;
243 break;
244 case 12: // Shadow2DArray
245 CTZ = 0;
246 break;
247 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000248 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
249 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000250 .addImm(SrcX)
251 .addImm(SrcY)
252 .addImm(SrcZ)
253 .addImm(SrcW)
254 .addImm(0)
255 .addImm(0)
256 .addImm(0)
257 .addImm(0)
258 .addImm(1)
259 .addImm(2)
260 .addImm(3)
261 .addOperand(RID)
262 .addOperand(SID)
263 .addImm(CTX)
264 .addImm(CTY)
265 .addImm(CTZ)
266 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000267 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
268 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000269 .addImm(SrcX)
270 .addImm(SrcY)
271 .addImm(SrcZ)
272 .addImm(SrcW)
273 .addImm(0)
274 .addImm(0)
275 .addImm(0)
276 .addImm(0)
277 .addImm(1)
278 .addImm(2)
279 .addImm(3)
280 .addOperand(RID)
281 .addOperand(SID)
282 .addImm(CTX)
283 .addImm(CTY)
284 .addImm(CTZ)
285 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000286 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
287 .addOperand(MI->getOperand(0))
288 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000289 .addImm(SrcX)
290 .addImm(SrcY)
291 .addImm(SrcZ)
292 .addImm(SrcW)
293 .addImm(0)
294 .addImm(0)
295 .addImm(0)
296 .addImm(0)
297 .addImm(1)
298 .addImm(2)
299 .addImm(3)
300 .addOperand(RID)
301 .addOperand(SID)
302 .addImm(CTX)
303 .addImm(CTY)
304 .addImm(CTZ)
305 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000306 .addReg(T0, RegState::Implicit)
307 .addReg(T1, RegState::Implicit);
308 break;
309 }
310
311 case AMDGPU::TXD_SHADOW: {
312 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
313 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000314 MachineOperand &RID = MI->getOperand(4);
315 MachineOperand &SID = MI->getOperand(5);
316 unsigned TextureId = MI->getOperand(6).getImm();
317 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
318 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
319
320 switch (TextureId) {
321 case 5: // Rect
322 CTX = CTY = 0;
323 break;
324 case 6: // Shadow1D
325 SrcW = SrcZ;
326 break;
327 case 7: // Shadow2D
328 SrcW = SrcZ;
329 break;
330 case 8: // ShadowRect
331 CTX = CTY = 0;
332 SrcW = SrcZ;
333 break;
334 case 9: // 1DArray
335 SrcZ = SrcY;
336 CTZ = 0;
337 break;
338 case 10: // 2DArray
339 CTZ = 0;
340 break;
341 case 11: // Shadow1DArray
342 SrcZ = SrcY;
343 CTZ = 0;
344 break;
345 case 12: // Shadow2DArray
346 CTZ = 0;
347 break;
348 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000349
350 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
351 .addOperand(MI->getOperand(3))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000352 .addImm(SrcX)
353 .addImm(SrcY)
354 .addImm(SrcZ)
355 .addImm(SrcW)
356 .addImm(0)
357 .addImm(0)
358 .addImm(0)
359 .addImm(0)
360 .addImm(1)
361 .addImm(2)
362 .addImm(3)
363 .addOperand(RID)
364 .addOperand(SID)
365 .addImm(CTX)
366 .addImm(CTY)
367 .addImm(CTZ)
368 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000369 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
370 .addOperand(MI->getOperand(2))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000371 .addImm(SrcX)
372 .addImm(SrcY)
373 .addImm(SrcZ)
374 .addImm(SrcW)
375 .addImm(0)
376 .addImm(0)
377 .addImm(0)
378 .addImm(0)
379 .addImm(1)
380 .addImm(2)
381 .addImm(3)
382 .addOperand(RID)
383 .addOperand(SID)
384 .addImm(CTX)
385 .addImm(CTY)
386 .addImm(CTZ)
387 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000388 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
389 .addOperand(MI->getOperand(0))
390 .addOperand(MI->getOperand(1))
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000391 .addImm(SrcX)
392 .addImm(SrcY)
393 .addImm(SrcZ)
394 .addImm(SrcW)
395 .addImm(0)
396 .addImm(0)
397 .addImm(0)
398 .addImm(0)
399 .addImm(1)
400 .addImm(2)
401 .addImm(3)
402 .addOperand(RID)
403 .addOperand(SID)
404 .addImm(CTX)
405 .addImm(CTY)
406 .addImm(CTZ)
407 .addImm(CTW)
Tom Stellard75aadc22012-12-11 21:25:42 +0000408 .addReg(T0, RegState::Implicit)
409 .addReg(T1, RegState::Implicit);
410 break;
411 }
412
413 case AMDGPU::BRANCH:
414 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000415 .addOperand(MI->getOperand(0));
Tom Stellard75aadc22012-12-11 21:25:42 +0000416 break;
417
418 case AMDGPU::BRANCH_COND_f32: {
419 MachineInstr *NewMI =
420 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
421 AMDGPU::PREDICATE_BIT)
422 .addOperand(MI->getOperand(1))
423 .addImm(OPCODE_IS_NOT_ZERO)
424 .addImm(0); // Flags
425 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000426 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000427 .addOperand(MI->getOperand(0))
428 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
429 break;
430 }
431
432 case AMDGPU::BRANCH_COND_i32: {
433 MachineInstr *NewMI =
434 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
435 AMDGPU::PREDICATE_BIT)
436 .addOperand(MI->getOperand(1))
437 .addImm(OPCODE_IS_NOT_ZERO_INT)
438 .addImm(0); // Flags
439 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000440 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Tom Stellard75aadc22012-12-11 21:25:42 +0000441 .addOperand(MI->getOperand(0))
442 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
443 break;
444 }
445
Tom Stellard75aadc22012-12-11 21:25:42 +0000446 case AMDGPU::EG_ExportSwz:
447 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000448 // Instruction is left unmodified if its not the last one of its type
449 bool isLastInstructionOfItsType = true;
450 unsigned InstExportType = MI->getOperand(1).getImm();
451 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
452 EndBlock = BB->end(); NextExportInst != EndBlock;
453 NextExportInst = llvm::next(NextExportInst)) {
454 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
455 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
456 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
457 .getImm();
458 if (CurrentInstExportType == InstExportType) {
459 isLastInstructionOfItsType = false;
460 break;
461 }
462 }
463 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000464 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard6f1b8652013-01-23 21:39:49 +0000465 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000466 return BB;
467 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
468 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
469 .addOperand(MI->getOperand(0))
470 .addOperand(MI->getOperand(1))
471 .addOperand(MI->getOperand(2))
472 .addOperand(MI->getOperand(3))
473 .addOperand(MI->getOperand(4))
474 .addOperand(MI->getOperand(5))
475 .addOperand(MI->getOperand(6))
476 .addImm(CfInst)
Tom Stellard6f1b8652013-01-23 21:39:49 +0000477 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000478 break;
479 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000480 case AMDGPU::RETURN: {
481 // RETURN instructions must have the live-out registers as implicit uses,
482 // otherwise they appear dead.
483 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
484 MachineInstrBuilder MIB(*MF, MI);
485 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
486 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
487 return BB;
488 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000489 }
490
491 MI->eraseFromParent();
492 return BB;
493}
494
495//===----------------------------------------------------------------------===//
496// Custom DAG Lowering Operations
497//===----------------------------------------------------------------------===//
498
Tom Stellard75aadc22012-12-11 21:25:42 +0000499SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000500 MachineFunction &MF = DAG.getMachineFunction();
501 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 switch (Op.getOpcode()) {
503 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000504 case ISD::FCOS:
505 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000506 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000507 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard365366f2013-01-23 02:09:06 +0000508 case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000509 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000510 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000511 case ISD::INTRINSIC_VOID: {
512 SDValue Chain = Op.getOperand(0);
513 unsigned IntrinsicID =
514 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
515 switch (IntrinsicID) {
516 case AMDGPUIntrinsic::AMDGPU_store_output: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000517 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
518 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000519 MFI->LiveOuts.push_back(Reg);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000520 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
Tom Stellard75aadc22012-12-11 21:25:42 +0000521 }
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000522 case AMDGPUIntrinsic::R600_store_swizzle: {
523 const SDValue Args[8] = {
524 Chain,
525 Op.getOperand(2), // Export Value
526 Op.getOperand(3), // ArrayBase
527 Op.getOperand(4), // Type
528 DAG.getConstant(0, MVT::i32), // SWZ_X
529 DAG.getConstant(1, MVT::i32), // SWZ_Y
530 DAG.getConstant(2, MVT::i32), // SWZ_Z
531 DAG.getConstant(3, MVT::i32) // SWZ_W
532 };
Andrew Trickef9de2a2013-05-25 02:42:55 +0000533 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000534 Args, 8);
Tom Stellard75aadc22012-12-11 21:25:42 +0000535 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000536
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 // default for switch(IntrinsicID)
538 default: break;
539 }
540 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
541 break;
542 }
543 case ISD::INTRINSIC_WO_CHAIN: {
544 unsigned IntrinsicID =
545 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
546 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000547 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000548 switch(IntrinsicID) {
549 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
550 case AMDGPUIntrinsic::R600_load_input: {
551 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
552 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Vincent Lejeuned3fcb502013-05-17 16:51:06 +0000553 MachineFunction &MF = DAG.getMachineFunction();
554 MachineRegisterInfo &MRI = MF.getRegInfo();
555 MRI.addLiveIn(Reg);
556 return DAG.getCopyFromReg(DAG.getEntryNode(),
Andrew Trickef9de2a2013-05-25 02:42:55 +0000557 SDLoc(DAG.getEntryNode()), Reg, VT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000558 }
Tom Stellard41afe6a2013-02-05 17:09:14 +0000559
560 case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard75aadc22012-12-11 21:25:42 +0000561 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard41afe6a2013-02-05 17:09:14 +0000562 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
563 MachineSDNode *interp;
564 if (ijb < 0) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000565 const MachineFunction &MF = DAG.getMachineFunction();
566 const R600InstrInfo *TII =
567 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
Tom Stellard41afe6a2013-02-05 17:09:14 +0000568 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
569 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
570 return DAG.getTargetExtractSubreg(
571 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
572 DL, MVT::f32, SDValue(interp, 0));
573 }
574
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000575 MachineFunction &MF = DAG.getMachineFunction();
576 MachineRegisterInfo &MRI = MF.getRegInfo();
577 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
578 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
579 MRI.addLiveIn(RegisterI);
580 MRI.addLiveIn(RegisterJ);
581 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
582 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
583 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
584 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
585
Tom Stellard41afe6a2013-02-05 17:09:14 +0000586 if (slot % 4 < 2)
587 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
588 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000589 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000590 else
591 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
592 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
Vincent Lejeunea09873d2013-06-03 15:44:16 +0000593 RegisterJNode, RegisterINode);
Tom Stellard41afe6a2013-02-05 17:09:14 +0000594 return SDValue(interp, slot % 2);
Tom Stellard75aadc22012-12-11 21:25:42 +0000595 }
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000596 case AMDGPUIntrinsic::R600_tex:
597 case AMDGPUIntrinsic::R600_texc:
598 case AMDGPUIntrinsic::R600_txl:
599 case AMDGPUIntrinsic::R600_txlc:
600 case AMDGPUIntrinsic::R600_txb:
601 case AMDGPUIntrinsic::R600_txbc:
602 case AMDGPUIntrinsic::R600_txf:
603 case AMDGPUIntrinsic::R600_txq:
604 case AMDGPUIntrinsic::R600_ddx:
605 case AMDGPUIntrinsic::R600_ddy: {
606 unsigned TextureOp;
607 switch (IntrinsicID) {
608 case AMDGPUIntrinsic::R600_tex:
609 TextureOp = 0;
610 break;
611 case AMDGPUIntrinsic::R600_texc:
612 TextureOp = 1;
613 break;
614 case AMDGPUIntrinsic::R600_txl:
615 TextureOp = 2;
616 break;
617 case AMDGPUIntrinsic::R600_txlc:
618 TextureOp = 3;
619 break;
620 case AMDGPUIntrinsic::R600_txb:
621 TextureOp = 4;
622 break;
623 case AMDGPUIntrinsic::R600_txbc:
624 TextureOp = 5;
625 break;
626 case AMDGPUIntrinsic::R600_txf:
627 TextureOp = 6;
628 break;
629 case AMDGPUIntrinsic::R600_txq:
630 TextureOp = 7;
631 break;
632 case AMDGPUIntrinsic::R600_ddx:
633 TextureOp = 8;
634 break;
635 case AMDGPUIntrinsic::R600_ddy:
636 TextureOp = 9;
637 break;
638 default:
639 llvm_unreachable("Unknow Texture Operation");
640 }
641
642 SDValue TexArgs[19] = {
643 DAG.getConstant(TextureOp, MVT::i32),
644 Op.getOperand(1),
645 DAG.getConstant(0, MVT::i32),
646 DAG.getConstant(1, MVT::i32),
647 DAG.getConstant(2, MVT::i32),
648 DAG.getConstant(3, MVT::i32),
649 Op.getOperand(2),
650 Op.getOperand(3),
651 Op.getOperand(4),
652 DAG.getConstant(0, MVT::i32),
653 DAG.getConstant(1, MVT::i32),
654 DAG.getConstant(2, MVT::i32),
655 DAG.getConstant(3, MVT::i32),
656 Op.getOperand(5),
657 Op.getOperand(6),
658 Op.getOperand(7),
659 Op.getOperand(8),
660 Op.getOperand(9),
661 Op.getOperand(10)
662 };
663 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
664 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000665 case AMDGPUIntrinsic::AMDGPU_dp4: {
666 SDValue Args[8] = {
667 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
668 DAG.getConstant(0, MVT::i32)),
669 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
670 DAG.getConstant(0, MVT::i32)),
671 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
672 DAG.getConstant(1, MVT::i32)),
673 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
674 DAG.getConstant(1, MVT::i32)),
675 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
676 DAG.getConstant(2, MVT::i32)),
677 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
678 DAG.getConstant(2, MVT::i32)),
679 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
680 DAG.getConstant(3, MVT::i32)),
681 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
682 DAG.getConstant(3, MVT::i32))
683 };
684 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
685 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000686
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000687 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000688 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000689 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000690 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000691 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000692 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000693 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000694 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000695 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000696 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000697 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000698 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000699 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000700 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000701 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000702 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000703 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000704 return LowerImplicitParameter(DAG, VT, DL, 8);
705
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000706 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000707 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
708 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000709 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000710 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
711 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000712 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000713 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
714 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000715 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000716 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
717 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000718 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000719 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
720 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000721 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000722 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
723 AMDGPU::T0_Z, VT);
724 }
725 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
726 break;
727 }
728 } // end switch(Op.getOpcode())
729 return SDValue();
730}
731
732void R600TargetLowering::ReplaceNodeResults(SDNode *N,
733 SmallVectorImpl<SDValue> &Results,
734 SelectionDAG &DAG) const {
735 switch (N->getOpcode()) {
736 default: return;
737 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard365366f2013-01-23 02:09:06 +0000738 return;
739 case ISD::LOAD: {
740 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
741 Results.push_back(SDValue(Node, 0));
742 Results.push_back(SDValue(Node, 1));
743 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
744 // function
745 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
746 return;
747 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000748 case ISD::STORE:
749 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
750 Results.push_back(SDValue(Node, 0));
751 return;
Tom Stellard75aadc22012-12-11 21:25:42 +0000752 }
753}
754
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000755SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
756 // On hw >= R700, COS/SIN input must be between -1. and 1.
757 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
758 EVT VT = Op.getValueType();
759 SDValue Arg = Op.getOperand(0);
760 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
761 DAG.getNode(ISD::FADD, SDLoc(Op), VT,
762 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
763 DAG.getConstantFP(0.15915494309, MVT::f32)),
764 DAG.getConstantFP(0.5, MVT::f32)));
765 unsigned TrigNode;
766 switch (Op.getOpcode()) {
767 case ISD::FCOS:
768 TrigNode = AMDGPUISD::COS_HW;
769 break;
770 case ISD::FSIN:
771 TrigNode = AMDGPUISD::SIN_HW;
772 break;
773 default:
774 llvm_unreachable("Wrong trig opcode");
775 }
776 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
777 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
778 DAG.getConstantFP(-0.5, MVT::f32)));
779 if (Gen >= AMDGPUSubtarget::R700)
780 return TrigVal;
781 // On R600 hw, COS/SIN input must be between -Pi and Pi.
782 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
783 DAG.getConstantFP(3.14159265359, MVT::f32));
784}
785
Tom Stellard75aadc22012-12-11 21:25:42 +0000786SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
787 return DAG.getNode(
788 ISD::SETCC,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000789 SDLoc(Op),
Tom Stellard75aadc22012-12-11 21:25:42 +0000790 MVT::i1,
791 Op, DAG.getConstantFP(0.0f, MVT::f32),
792 DAG.getCondCode(ISD::SETNE)
793 );
794}
795
Tom Stellard75aadc22012-12-11 21:25:42 +0000796SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000797 SDLoc DL,
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 unsigned DwordOffset) const {
799 unsigned ByteOffset = DwordOffset * 4;
800 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +0000801 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +0000802
803 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
804 assert(isInt<16>(ByteOffset));
805
806 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
807 DAG.getConstant(ByteOffset, MVT::i32), // PTR
808 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
809 false, false, false, 0);
810}
811
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000812SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
813
814 MachineFunction &MF = DAG.getMachineFunction();
815 const AMDGPUFrameLowering *TFL =
816 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
817
818 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
819 assert(FIN);
820
821 unsigned FrameIndex = FIN->getIndex();
822 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
823 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
824}
825
Tom Stellard75aadc22012-12-11 21:25:42 +0000826bool R600TargetLowering::isZero(SDValue Op) const {
827 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
828 return Cst->isNullValue();
829 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
830 return CstFP->isZero();
831 } else {
832 return false;
833 }
834}
835
836SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000837 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000838 EVT VT = Op.getValueType();
839
840 SDValue LHS = Op.getOperand(0);
841 SDValue RHS = Op.getOperand(1);
842 SDValue True = Op.getOperand(2);
843 SDValue False = Op.getOperand(3);
844 SDValue CC = Op.getOperand(4);
845 SDValue Temp;
846
847 // LHS and RHS are guaranteed to be the same value type
848 EVT CompareVT = LHS.getValueType();
849
850 // Check if we can lower this to a native operation.
851
Tom Stellard2add82d2013-03-08 15:37:09 +0000852 // Try to lower to a SET* instruction:
853 //
854 // SET* can match the following patterns:
855 //
Tom Stellardcd428182013-09-28 02:50:38 +0000856 // select_cc f32, f32, -1, 0, cc_supported
857 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
858 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000859 //
860
861 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +0000862 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
863 ISD::CondCode InverseCC =
864 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
865 if (isHWTrueValue(False) && isHWFalseValue(True) &&
866 isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
Tom Stellard2add82d2013-03-08 15:37:09 +0000867 std::swap(False, True);
Tom Stellardcd428182013-09-28 02:50:38 +0000868 CC = DAG.getCondCode(InverseCC);
Tom Stellard2add82d2013-03-08 15:37:09 +0000869 }
870
871 if (isHWTrueValue(True) && isHWFalseValue(False) &&
872 (CompareVT == VT || VT == MVT::i32)) {
873 // This can be matched by a SET* instruction.
874 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
875 }
876
Tom Stellard75aadc22012-12-11 21:25:42 +0000877 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +0000878 //
879 // CND* can match the following patterns:
880 //
Tom Stellardcd428182013-09-28 02:50:38 +0000881 // select_cc f32, 0.0, f32, f32, cc_supported
882 // select_cc f32, 0.0, i32, i32, cc_supported
883 // select_cc i32, 0, f32, f32, cc_supported
884 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +0000885 //
Tom Stellardcd428182013-09-28 02:50:38 +0000886
887 // Try to move the zero value to the RHS
888 if (isZero(LHS)) {
889 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
890 // Try swapping the operands
891 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
892 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
893 std::swap(LHS, RHS);
894 CC = DAG.getCondCode(CCSwapped);
895 } else {
896 // Try inverting the conditon and then swapping the operands
897 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
898 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
899 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
900 std::swap(True, False);
901 std::swap(LHS, RHS);
902 CC = DAG.getCondCode(CCSwapped);
903 }
904 }
905 }
906 if (isZero(RHS)) {
907 SDValue Cond = LHS;
908 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +0000909 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
910 if (CompareVT != VT) {
911 // Bitcast True / False to the correct types. This will end up being
912 // a nop, but it allows us to define only a single pattern in the
913 // .TD files for each CND* instruction rather than having to have
914 // one pattern for integer True/False and one for fp True/False
915 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
916 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
917 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000918
919 switch (CCOpcode) {
920 case ISD::SETONE:
921 case ISD::SETUNE:
922 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +0000923 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
924 Temp = True;
925 True = False;
926 False = Temp;
927 break;
928 default:
929 break;
930 }
931 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
932 Cond, Zero,
933 True, False,
934 DAG.getCondCode(CCOpcode));
935 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
936 }
937
Tom Stellard75aadc22012-12-11 21:25:42 +0000938
939 // Possible Min/Max pattern
940 SDValue MinMax = LowerMinMax(Op, DAG);
941 if (MinMax.getNode()) {
942 return MinMax;
943 }
944
945 // If we make it this for it means we have no native instructions to handle
946 // this SELECT_CC, so we must lower it.
947 SDValue HWTrue, HWFalse;
948
949 if (CompareVT == MVT::f32) {
950 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
951 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
952 } else if (CompareVT == MVT::i32) {
953 HWTrue = DAG.getConstant(-1, CompareVT);
954 HWFalse = DAG.getConstant(0, CompareVT);
955 }
956 else {
957 assert(!"Unhandled value type in LowerSELECT_CC");
958 }
959
960 // Lower this unsupported SELECT_CC into a combination of two supported
961 // SELECT_CC operations.
962 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
963
964 return DAG.getNode(ISD::SELECT_CC, DL, VT,
965 Cond, HWFalse,
966 True, False,
967 DAG.getCondCode(ISD::SETNE));
968}
969
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000970/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
971/// convert these pointers to a register index. Each register holds
972/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
973/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
974/// for indirect addressing.
975SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
976 unsigned StackWidth,
977 SelectionDAG &DAG) const {
978 unsigned SRLPad;
979 switch(StackWidth) {
980 case 1:
981 SRLPad = 2;
982 break;
983 case 2:
984 SRLPad = 3;
985 break;
986 case 4:
987 SRLPad = 4;
988 break;
989 default: llvm_unreachable("Invalid stack width");
990 }
991
Andrew Trickef9de2a2013-05-25 02:42:55 +0000992 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000993 DAG.getConstant(SRLPad, MVT::i32));
994}
995
996void R600TargetLowering::getStackAddress(unsigned StackWidth,
997 unsigned ElemIdx,
998 unsigned &Channel,
999 unsigned &PtrIncr) const {
1000 switch (StackWidth) {
1001 default:
1002 case 1:
1003 Channel = 0;
1004 if (ElemIdx > 0) {
1005 PtrIncr = 1;
1006 } else {
1007 PtrIncr = 0;
1008 }
1009 break;
1010 case 2:
1011 Channel = ElemIdx % 2;
1012 if (ElemIdx == 2) {
1013 PtrIncr = 1;
1014 } else {
1015 PtrIncr = 0;
1016 }
1017 break;
1018 case 4:
1019 Channel = ElemIdx;
1020 PtrIncr = 0;
1021 break;
1022 }
1023}
1024
Tom Stellard75aadc22012-12-11 21:25:42 +00001025SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001026 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001027 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1028 SDValue Chain = Op.getOperand(0);
1029 SDValue Value = Op.getOperand(1);
1030 SDValue Ptr = Op.getOperand(2);
1031
Tom Stellard2ffc3302013-08-26 15:05:44 +00001032 SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
Tom Stellardfbab8272013-08-16 01:12:11 +00001033 if (Result.getNode()) {
1034 return Result;
1035 }
1036
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001037 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1038 if (StoreNode->isTruncatingStore()) {
1039 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001040 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001041 EVT MemVT = StoreNode->getMemoryVT();
1042 SDValue MaskConstant;
1043 if (MemVT == MVT::i8) {
1044 MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1045 } else {
1046 assert(MemVT == MVT::i16);
1047 MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1048 }
1049 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1050 DAG.getConstant(2, MVT::i32));
1051 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1052 DAG.getConstant(0x00000003, VT));
1053 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1054 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1055 DAG.getConstant(3, VT));
1056 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1057 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1058 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1059 // vector instead.
1060 SDValue Src[4] = {
1061 ShiftedValue,
1062 DAG.getConstant(0, MVT::i32),
1063 DAG.getConstant(0, MVT::i32),
1064 Mask
1065 };
1066 SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1067 SDValue Args[3] = { Chain, Input, DWordAddr };
1068 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1069 Op->getVTList(), Args, 3, MemVT,
1070 StoreNode->getMemOperand());
1071 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1072 Value.getValueType().bitsGE(MVT::i32)) {
1073 // Convert pointer from byte address to dword address.
1074 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1075 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1076 Ptr, DAG.getConstant(2, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001077
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001078 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1079 assert(!"Truncated and indexed stores not supported yet");
1080 } else {
1081 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1082 }
1083 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001084 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001085 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001086
1087 EVT ValueVT = Value.getValueType();
1088
1089 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1090 return SDValue();
1091 }
1092
1093 // Lowering for indirect addressing
1094
1095 const MachineFunction &MF = DAG.getMachineFunction();
1096 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1097 getTargetMachine().getFrameLowering());
1098 unsigned StackWidth = TFL->getStackWidth(MF);
1099
1100 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1101
1102 if (ValueVT.isVector()) {
1103 unsigned NumElemVT = ValueVT.getVectorNumElements();
1104 EVT ElemVT = ValueVT.getVectorElementType();
1105 SDValue Stores[4];
1106
1107 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1108 "vector width in load");
1109
1110 for (unsigned i = 0; i < NumElemVT; ++i) {
1111 unsigned Channel, PtrIncr;
1112 getStackAddress(StackWidth, i, Channel, PtrIncr);
1113 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1114 DAG.getConstant(PtrIncr, MVT::i32));
1115 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1116 Value, DAG.getConstant(i, MVT::i32));
1117
1118 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1119 Chain, Elem, Ptr,
1120 DAG.getTargetConstant(Channel, MVT::i32));
1121 }
1122 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1123 } else {
1124 if (ValueVT == MVT::i8) {
1125 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1126 }
1127 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001128 DAG.getTargetConstant(0, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001129 }
1130
1131 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001132}
1133
Tom Stellard365366f2013-01-23 02:09:06 +00001134// return (512 + (kc_bank << 12)
1135static int
1136ConstantAddressBlock(unsigned AddressSpace) {
1137 switch (AddressSpace) {
1138 case AMDGPUAS::CONSTANT_BUFFER_0:
1139 return 512;
1140 case AMDGPUAS::CONSTANT_BUFFER_1:
1141 return 512 + 4096;
1142 case AMDGPUAS::CONSTANT_BUFFER_2:
1143 return 512 + 4096 * 2;
1144 case AMDGPUAS::CONSTANT_BUFFER_3:
1145 return 512 + 4096 * 3;
1146 case AMDGPUAS::CONSTANT_BUFFER_4:
1147 return 512 + 4096 * 4;
1148 case AMDGPUAS::CONSTANT_BUFFER_5:
1149 return 512 + 4096 * 5;
1150 case AMDGPUAS::CONSTANT_BUFFER_6:
1151 return 512 + 4096 * 6;
1152 case AMDGPUAS::CONSTANT_BUFFER_7:
1153 return 512 + 4096 * 7;
1154 case AMDGPUAS::CONSTANT_BUFFER_8:
1155 return 512 + 4096 * 8;
1156 case AMDGPUAS::CONSTANT_BUFFER_9:
1157 return 512 + 4096 * 9;
1158 case AMDGPUAS::CONSTANT_BUFFER_10:
1159 return 512 + 4096 * 10;
1160 case AMDGPUAS::CONSTANT_BUFFER_11:
1161 return 512 + 4096 * 11;
1162 case AMDGPUAS::CONSTANT_BUFFER_12:
1163 return 512 + 4096 * 12;
1164 case AMDGPUAS::CONSTANT_BUFFER_13:
1165 return 512 + 4096 * 13;
1166 case AMDGPUAS::CONSTANT_BUFFER_14:
1167 return 512 + 4096 * 14;
1168 case AMDGPUAS::CONSTANT_BUFFER_15:
1169 return 512 + 4096 * 15;
1170 default:
1171 return -1;
1172 }
1173}
1174
1175SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1176{
1177 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001178 SDLoc DL(Op);
Tom Stellard365366f2013-01-23 02:09:06 +00001179 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1180 SDValue Chain = Op.getOperand(0);
1181 SDValue Ptr = Op.getOperand(1);
1182 SDValue LoweredLoad;
1183
Tom Stellard35bb18c2013-08-26 15:06:04 +00001184 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1185 SDValue MergedValues[2] = {
1186 SplitVectorLoad(Op, DAG),
1187 Chain
1188 };
1189 return DAG.getMergeValues(MergedValues, 2, DL);
1190 }
1191
Tom Stellard365366f2013-01-23 02:09:06 +00001192 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1193 if (ConstantBlock > -1) {
1194 SDValue Result;
1195 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
Vincent Lejeune743dca02013-03-05 15:04:29 +00001196 dyn_cast<Constant>(LoadNode->getSrcValue()) ||
1197 dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001198 SDValue Slots[4];
1199 for (unsigned i = 0; i < 4; i++) {
1200 // We want Const position encoded with the following formula :
1201 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1202 // const_index is Ptr computed by llvm using an alignment of 16.
1203 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1204 // then div by 4 at the ISel step
1205 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1206 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1207 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1208 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001209 EVT NewVT = MVT::v4i32;
1210 unsigned NumElements = 4;
1211 if (VT.isVector()) {
1212 NewVT = VT;
1213 NumElements = VT.getVectorNumElements();
1214 }
1215 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
Tom Stellard365366f2013-01-23 02:09:06 +00001216 } else {
1217 // non constant ptr cant be folded, keeps it as a v4f32 load
1218 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune743dca02013-03-05 15:04:29 +00001219 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig189357c2013-03-07 09:03:59 +00001220 DAG.getConstant(LoadNode->getAddressSpace() -
NAKAMURA Takumi18ca09c2013-05-22 06:37:25 +00001221 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001222 );
1223 }
1224
1225 if (!VT.isVector()) {
1226 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1227 DAG.getConstant(0, MVT::i32));
1228 }
1229
1230 SDValue MergedValues[2] = {
1231 Result,
1232 Chain
1233 };
1234 return DAG.getMergeValues(MergedValues, 2, DL);
1235 }
1236
Tom Stellard84021442013-07-23 01:48:24 +00001237 // For most operations returning SDValue() will result int he node being
1238 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
1239 // we need to manually expand loads that may be legal in some address spaces
1240 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
1241 // for compute shaders, since the data is sign extended when it is uploaded
1242 // to the buffer. Howerver SEXT loads from other addresspaces are not
1243 // supported, so we need to expand them here.
1244 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1245 EVT MemVT = LoadNode->getMemoryVT();
1246 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1247 SDValue ShiftAmount =
1248 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1249 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1250 LoadNode->getPointerInfo(), MemVT,
1251 LoadNode->isVolatile(),
1252 LoadNode->isNonTemporal(),
1253 LoadNode->getAlignment());
1254 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1255 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1256
1257 SDValue MergedValues[2] = { Sra, Chain };
1258 return DAG.getMergeValues(MergedValues, 2, DL);
1259 }
1260
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001261 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1262 return SDValue();
1263 }
1264
1265 // Lowering for indirect addressing
1266 const MachineFunction &MF = DAG.getMachineFunction();
1267 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1268 getTargetMachine().getFrameLowering());
1269 unsigned StackWidth = TFL->getStackWidth(MF);
1270
1271 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1272
1273 if (VT.isVector()) {
1274 unsigned NumElemVT = VT.getVectorNumElements();
1275 EVT ElemVT = VT.getVectorElementType();
1276 SDValue Loads[4];
1277
1278 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1279 "vector width in load");
1280
1281 for (unsigned i = 0; i < NumElemVT; ++i) {
1282 unsigned Channel, PtrIncr;
1283 getStackAddress(StackWidth, i, Channel, PtrIncr);
1284 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1285 DAG.getConstant(PtrIncr, MVT::i32));
1286 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1287 Chain, Ptr,
1288 DAG.getTargetConstant(Channel, MVT::i32),
1289 Op.getOperand(2));
1290 }
1291 for (unsigned i = NumElemVT; i < 4; ++i) {
1292 Loads[i] = DAG.getUNDEF(ElemVT);
1293 }
1294 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1295 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1296 } else {
1297 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1298 Chain, Ptr,
1299 DAG.getTargetConstant(0, MVT::i32), // Channel
1300 Op.getOperand(2));
1301 }
1302
1303 SDValue Ops[2];
1304 Ops[0] = LoweredLoad;
1305 Ops[1] = Chain;
1306
1307 return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001308}
Tom Stellard75aadc22012-12-11 21:25:42 +00001309
Tom Stellard75aadc22012-12-11 21:25:42 +00001310/// XXX Only kernel functions are supported, so we can assume for now that
1311/// every function is a kernel function, but in the future we should use
1312/// separate calling conventions for kernel and non-kernel functions.
1313SDValue R600TargetLowering::LowerFormalArguments(
1314 SDValue Chain,
1315 CallingConv::ID CallConv,
1316 bool isVarArg,
1317 const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001318 SDLoc DL, SelectionDAG &DAG,
Tom Stellard75aadc22012-12-11 21:25:42 +00001319 SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001320 SmallVector<CCValAssign, 16> ArgLocs;
1321 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1322 getTargetMachine(), ArgLocs, *DAG.getContext());
1323
1324 AnalyzeFormalArguments(CCInfo, Ins);
1325
Tom Stellard1e803092013-07-23 01:48:18 +00001326 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001327 CCValAssign &VA = ArgLocs[i];
1328 EVT VT = VA.getLocVT();
Tom Stellard78e01292013-07-23 01:47:58 +00001329
Tom Stellard75aadc22012-12-11 21:25:42 +00001330 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001331 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001332
1333 // The first 36 bytes of the input buffer contains information about
1334 // thread group and global sizes.
Tom Stellard1e803092013-07-23 01:48:18 +00001335 SDValue Arg = DAG.getLoad(VT, DL, Chain,
Tom Stellardacfeebf2013-07-23 01:48:05 +00001336 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
Tom Stellard1e803092013-07-23 01:48:18 +00001337 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1338 false, false, 4); // 4 is the prefered alignment for
1339 // the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001340 InVals.push_back(Arg);
Tom Stellard75aadc22012-12-11 21:25:42 +00001341 }
1342 return Chain;
1343}
1344
Matt Arsenault758659232013-05-18 00:21:46 +00001345EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard75aadc22012-12-11 21:25:42 +00001346 if (!VT.isVector()) return MVT::i32;
1347 return VT.changeVectorElementTypeToInteger();
1348}
1349
Benjamin Kramer193960c2013-06-11 13:32:25 +00001350static SDValue
1351CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
1352 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001353 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1354 assert(RemapSwizzle.empty());
1355 SDValue NewBldVec[4] = {
1356 VectorEntry.getOperand(0),
1357 VectorEntry.getOperand(1),
1358 VectorEntry.getOperand(2),
1359 VectorEntry.getOperand(3)
1360 };
1361
1362 for (unsigned i = 0; i < 4; i++) {
1363 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1364 if (C->isZero()) {
1365 RemapSwizzle[i] = 4; // SEL_0
1366 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1367 } else if (C->isExactlyValue(1.0)) {
1368 RemapSwizzle[i] = 5; // SEL_1
1369 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1370 }
1371 }
1372
1373 if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1374 continue;
1375 for (unsigned j = 0; j < i; j++) {
1376 if (NewBldVec[i] == NewBldVec[j]) {
1377 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1378 RemapSwizzle[i] = j;
1379 break;
1380 }
1381 }
1382 }
1383
1384 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1385 VectorEntry.getValueType(), NewBldVec, 4);
1386}
1387
Benjamin Kramer193960c2013-06-11 13:32:25 +00001388static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1389 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001390 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1391 assert(RemapSwizzle.empty());
1392 SDValue NewBldVec[4] = {
1393 VectorEntry.getOperand(0),
1394 VectorEntry.getOperand(1),
1395 VectorEntry.getOperand(2),
1396 VectorEntry.getOperand(3)
1397 };
1398 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001399 for (unsigned i = 0; i < 4; i++)
1400 RemapSwizzle[i] = i;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001401
1402 for (unsigned i = 0; i < 4; i++) {
1403 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1404 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1405 ->getZExtValue();
1406 if (!isUnmovable[Idx]) {
1407 // Swap i and Idx
1408 std::swap(NewBldVec[Idx], NewBldVec[i]);
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001409 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001410 }
1411 isUnmovable[Idx] = true;
1412 }
1413 }
1414
1415 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1416 VectorEntry.getValueType(), NewBldVec, 4);
1417}
1418
1419
1420SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1421SDValue Swz[4], SelectionDAG &DAG) const {
1422 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1423 // Old -> New swizzle values
1424 DenseMap<unsigned, unsigned> SwizzleRemap;
1425
1426 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1427 for (unsigned i = 0; i < 4; i++) {
1428 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1429 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1430 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1431 }
1432
1433 SwizzleRemap.clear();
1434 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1435 for (unsigned i = 0; i < 4; i++) {
1436 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1437 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1438 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1439 }
1440
1441 return BuildVector;
1442}
1443
1444
Tom Stellard75aadc22012-12-11 21:25:42 +00001445//===----------------------------------------------------------------------===//
1446// Custom DAG Optimizations
1447//===----------------------------------------------------------------------===//
1448
1449SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1450 DAGCombinerInfo &DCI) const {
1451 SelectionDAG &DAG = DCI.DAG;
1452
1453 switch (N->getOpcode()) {
1454 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1455 case ISD::FP_ROUND: {
1456 SDValue Arg = N->getOperand(0);
1457 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001458 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001459 Arg.getOperand(0));
1460 }
1461 break;
1462 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001463
1464 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1465 // (i32 select_cc f32, f32, -1, 0 cc)
1466 //
1467 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1468 // this to one of the SET*_DX10 instructions.
1469 case ISD::FP_TO_SINT: {
1470 SDValue FNeg = N->getOperand(0);
1471 if (FNeg.getOpcode() != ISD::FNEG) {
1472 return SDValue();
1473 }
1474 SDValue SelectCC = FNeg.getOperand(0);
1475 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1476 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1477 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1478 !isHWTrueValue(SelectCC.getOperand(2)) ||
1479 !isHWFalseValue(SelectCC.getOperand(3))) {
1480 return SDValue();
1481 }
1482
Andrew Trickef9de2a2013-05-25 02:42:55 +00001483 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001484 SelectCC.getOperand(0), // LHS
1485 SelectCC.getOperand(1), // RHS
1486 DAG.getConstant(-1, MVT::i32), // True
1487 DAG.getConstant(0, MVT::i32), // Flase
1488 SelectCC.getOperand(4)); // CC
1489
1490 break;
1491 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001492
1493 // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
1494 // => build_vector elt0, …, NewEltIdx, …, eltN
1495 case ISD::INSERT_VECTOR_ELT: {
1496 SDValue InVec = N->getOperand(0);
1497 SDValue InVal = N->getOperand(1);
1498 SDValue EltNo = N->getOperand(2);
1499 SDLoc dl(N);
1500
1501 // If the inserted element is an UNDEF, just use the input vector.
1502 if (InVal.getOpcode() == ISD::UNDEF)
1503 return InVec;
1504
1505 EVT VT = InVec.getValueType();
1506
1507 // If we can't generate a legal BUILD_VECTOR, exit
1508 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1509 return SDValue();
1510
1511 // Check that we know which element is being inserted
1512 if (!isa<ConstantSDNode>(EltNo))
1513 return SDValue();
1514 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1515
1516 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1517 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1518 // vector elements.
1519 SmallVector<SDValue, 8> Ops;
1520 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1521 Ops.append(InVec.getNode()->op_begin(),
1522 InVec.getNode()->op_end());
1523 } else if (InVec.getOpcode() == ISD::UNDEF) {
1524 unsigned NElts = VT.getVectorNumElements();
1525 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1526 } else {
1527 return SDValue();
1528 }
1529
1530 // Insert the element
1531 if (Elt < Ops.size()) {
1532 // All the operands of BUILD_VECTOR must have the same type;
1533 // we enforce that here.
1534 EVT OpVT = Ops[0].getValueType();
1535 if (InVal.getValueType() != OpVT)
1536 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1537 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1538 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1539 Ops[Elt] = InVal;
1540 }
1541
1542 // Return the new vector
1543 return DAG.getNode(ISD::BUILD_VECTOR, dl,
1544 VT, &Ops[0], Ops.size());
1545 }
1546
Tom Stellard365366f2013-01-23 02:09:06 +00001547 // Extract_vec (Build_vector) generated by custom lowering
1548 // also needs to be customly combined
1549 case ISD::EXTRACT_VECTOR_ELT: {
1550 SDValue Arg = N->getOperand(0);
1551 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1552 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1553 unsigned Element = Const->getZExtValue();
1554 return Arg->getOperand(Element);
1555 }
1556 }
Tom Stellarddd04c832013-01-31 22:11:53 +00001557 if (Arg.getOpcode() == ISD::BITCAST &&
1558 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1559 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1560 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001561 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00001562 Arg->getOperand(0).getOperand(Element));
1563 }
1564 }
Tom Stellard365366f2013-01-23 02:09:06 +00001565 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001566
1567 case ISD::SELECT_CC: {
1568 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1569 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00001570 //
1571 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1572 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00001573 SDValue LHS = N->getOperand(0);
1574 if (LHS.getOpcode() != ISD::SELECT_CC) {
1575 return SDValue();
1576 }
1577
1578 SDValue RHS = N->getOperand(1);
1579 SDValue True = N->getOperand(2);
1580 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00001581 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00001582
1583 if (LHS.getOperand(2).getNode() != True.getNode() ||
1584 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00001585 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00001586 return SDValue();
1587 }
1588
Tom Stellard5e524892013-03-08 15:37:11 +00001589 switch (NCC) {
1590 default: return SDValue();
1591 case ISD::SETNE: return LHS;
1592 case ISD::SETEQ: {
1593 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1594 LHSCC = ISD::getSetCCInverse(LHSCC,
1595 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00001596 if (DCI.isBeforeLegalizeOps() ||
1597 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1598 return DAG.getSelectCC(SDLoc(N),
1599 LHS.getOperand(0),
1600 LHS.getOperand(1),
1601 LHS.getOperand(2),
1602 LHS.getOperand(3),
1603 LHSCC);
1604 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001605 }
Tom Stellard5e524892013-03-08 15:37:11 +00001606 }
Tom Stellardcd428182013-09-28 02:50:38 +00001607 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00001608 }
Tom Stellardfbab8272013-08-16 01:12:11 +00001609
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001610 case AMDGPUISD::EXPORT: {
1611 SDValue Arg = N->getOperand(1);
1612 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1613 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001614
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001615 SDValue NewArgs[8] = {
1616 N->getOperand(0), // Chain
1617 SDValue(),
1618 N->getOperand(2), // ArrayBase
1619 N->getOperand(3), // Type
1620 N->getOperand(4), // SWZ_X
1621 N->getOperand(5), // SWZ_Y
1622 N->getOperand(6), // SWZ_Z
1623 N->getOperand(7) // SWZ_W
1624 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00001625 SDLoc DL(N);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001626 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
Vincent Lejeuned80bc152013-02-14 16:55:06 +00001627 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellarde06163a2013-02-07 14:02:35 +00001628 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001629 case AMDGPUISD::TEXTURE_FETCH: {
1630 SDValue Arg = N->getOperand(1);
1631 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1632 break;
1633
1634 SDValue NewArgs[19] = {
1635 N->getOperand(0),
1636 N->getOperand(1),
1637 N->getOperand(2),
1638 N->getOperand(3),
1639 N->getOperand(4),
1640 N->getOperand(5),
1641 N->getOperand(6),
1642 N->getOperand(7),
1643 N->getOperand(8),
1644 N->getOperand(9),
1645 N->getOperand(10),
1646 N->getOperand(11),
1647 N->getOperand(12),
1648 N->getOperand(13),
1649 N->getOperand(14),
1650 N->getOperand(15),
1651 N->getOperand(16),
1652 N->getOperand(17),
1653 N->getOperand(18),
1654 };
1655 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1656 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1657 NewArgs, 19);
1658 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001659 }
1660 return SDValue();
1661}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001662
1663static bool
1664FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001665 SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001666 const R600InstrInfo *TII =
1667 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1668 if (!Src.isMachineOpcode())
1669 return false;
1670 switch (Src.getMachineOpcode()) {
1671 case AMDGPU::FNEG_R600:
1672 if (!Neg.getNode())
1673 return false;
1674 Src = Src.getOperand(0);
1675 Neg = DAG.getTargetConstant(1, MVT::i32);
1676 return true;
1677 case AMDGPU::FABS_R600:
1678 if (!Abs.getNode())
1679 return false;
1680 Src = Src.getOperand(0);
1681 Abs = DAG.getTargetConstant(1, MVT::i32);
1682 return true;
1683 case AMDGPU::CONST_COPY: {
1684 unsigned Opcode = ParentNode->getMachineOpcode();
1685 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1686
1687 if (!Sel.getNode())
1688 return false;
1689
1690 SDValue CstOffset = Src.getOperand(0);
1691 if (ParentNode->getValueType(0).isVector())
1692 return false;
1693
1694 // Gather constants values
1695 int SrcIndices[] = {
1696 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1697 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1698 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1699 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1700 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1701 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1702 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1703 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1704 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1705 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1706 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1707 };
1708 std::vector<unsigned> Consts;
1709 for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1710 int OtherSrcIdx = SrcIndices[i];
1711 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1712 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1713 continue;
1714 if (HasDst) {
1715 OtherSrcIdx--;
1716 OtherSelIdx--;
1717 }
1718 if (RegisterSDNode *Reg =
1719 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1720 if (Reg->getReg() == AMDGPU::ALU_CONST) {
1721 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
1722 ParentNode->getOperand(OtherSelIdx));
1723 Consts.push_back(Cst->getZExtValue());
1724 }
1725 }
1726 }
1727
1728 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1729 Consts.push_back(Cst->getZExtValue());
1730 if (!TII->fitsConstReadLimitations(Consts)) {
1731 return false;
1732 }
1733
1734 Sel = CstOffset;
1735 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1736 return true;
1737 }
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001738 case AMDGPU::MOV_IMM_I32:
1739 case AMDGPU::MOV_IMM_F32: {
1740 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1741 uint64_t ImmValue = 0;
1742
1743
1744 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1745 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
1746 float FloatValue = FPC->getValueAPF().convertToFloat();
1747 if (FloatValue == 0.0) {
1748 ImmReg = AMDGPU::ZERO;
1749 } else if (FloatValue == 0.5) {
1750 ImmReg = AMDGPU::HALF;
1751 } else if (FloatValue == 1.0) {
1752 ImmReg = AMDGPU::ONE;
1753 } else {
1754 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1755 }
1756 } else {
1757 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
1758 uint64_t Value = C->getZExtValue();
1759 if (Value == 0) {
1760 ImmReg = AMDGPU::ZERO;
1761 } else if (Value == 1) {
1762 ImmReg = AMDGPU::ONE_INT;
1763 } else {
1764 ImmValue = Value;
1765 }
1766 }
1767
1768 // Check that we aren't already using an immediate.
1769 // XXX: It's possible for an instruction to have more than one
1770 // immediate operand, but this is not supported yet.
1771 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1772 if (!Imm.getNode())
1773 return false;
1774 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
1775 assert(C);
1776 if (C->getZExtValue())
1777 return false;
1778 Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1779 }
1780 Src = DAG.getRegister(ImmReg, MVT::i32);
1781 return true;
1782 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001783 default:
1784 return false;
1785 }
1786}
1787
1788
1789/// \brief Fold the instructions after selecting them
1790SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1791 SelectionDAG &DAG) const {
1792 const R600InstrInfo *TII =
1793 static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1794 if (!Node->isMachineOpcode())
1795 return Node;
1796 unsigned Opcode = Node->getMachineOpcode();
1797 SDValue FakeOp;
1798
1799 std::vector<SDValue> Ops;
1800 for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1801 I != E; ++I)
1802 Ops.push_back(*I);
1803
1804 if (Opcode == AMDGPU::DOT_4) {
1805 int OperandIdx[] = {
1806 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1807 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1808 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1809 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1810 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1811 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1812 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1813 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1814 };
1815 int NegIdx[] = {
1816 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1817 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1818 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1819 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1820 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1821 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1822 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1823 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1824 };
1825 int AbsIdx[] = {
1826 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1827 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1828 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1829 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1830 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1831 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1832 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1833 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1834 };
1835 for (unsigned i = 0; i < 8; i++) {
1836 if (OperandIdx[i] < 0)
1837 return Node;
1838 SDValue &Src = Ops[OperandIdx[i] - 1];
1839 SDValue &Neg = Ops[NegIdx[i] - 1];
1840 SDValue &Abs = Ops[AbsIdx[i] - 1];
1841 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1842 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1843 if (HasDst)
1844 SelIdx--;
1845 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001846 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1847 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1848 }
1849 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1850 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1851 SDValue &Src = Ops[i];
1852 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001853 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1854 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00001855 } else if (Opcode == AMDGPU::CLAMP_R600) {
1856 SDValue Src = Node->getOperand(0);
1857 if (!Src.isMachineOpcode() ||
1858 !TII->hasInstrModifiers(Src.getMachineOpcode()))
1859 return Node;
1860 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1861 AMDGPU::OpName::clamp);
1862 if (ClampIdx < 0)
1863 return Node;
1864 std::vector<SDValue> Ops;
1865 unsigned NumOp = Src.getNumOperands();
1866 for(unsigned i = 0; i < NumOp; ++i)
1867 Ops.push_back(Src.getOperand(i));
1868 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1869 return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1870 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001871 } else {
1872 if (!TII->hasInstrModifiers(Opcode))
1873 return Node;
1874 int OperandIdx[] = {
1875 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1876 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1877 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1878 };
1879 int NegIdx[] = {
1880 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1881 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1882 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1883 };
1884 int AbsIdx[] = {
1885 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1886 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1887 -1
1888 };
1889 for (unsigned i = 0; i < 3; i++) {
1890 if (OperandIdx[i] < 0)
1891 return Node;
1892 SDValue &Src = Ops[OperandIdx[i] - 1];
1893 SDValue &Neg = Ops[NegIdx[i] - 1];
1894 SDValue FakeAbs;
1895 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1896 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1897 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001898 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1899 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001900 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001901 ImmIdx--;
1902 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001903 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00001904 SDValue &Imm = Ops[ImmIdx];
1905 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00001906 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1907 }
1908 }
1909
1910 return Node;
1911}