blob: 3ba4d4a038d9a2c8ea4e74192072a30d08d962f6 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Matt Arsenault43e92fe2016-06-24 06:30:11 +000033R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
34 const R600Subtarget &STI)
Eric Christopher7792e322015-01-30 23:24:40 +000035 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard75aadc22012-12-11 21:25:42 +000037 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000038 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
39 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
Matt Arsenault71e66762016-05-21 02:27:49 +000040 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
41 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Matt Arsenault71e66762016-05-21 02:27:49 +000045 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, MVT::i32, Custom);
47 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
48 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
52 for (MVT VT : MVT::integer_valuetypes()) {
53 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
54 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
55 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
56
57 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
58 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
59 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
60
61 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
62 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
63 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
64 }
65
Matt Arsenaultd1097a32016-06-02 19:54:26 +000066 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
67 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
68 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
69 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
70
71 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
72 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
74
75
Matt Arsenault71e66762016-05-21 02:27:49 +000076 setOperationAction(ISD::STORE, MVT::i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i32, Custom);
78 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
79 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
80
81 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
82 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
83
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
Tom Stellard0351ea22013-09-28 02:50:50 +000088 // Set condition code actions
89 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
90 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000091 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000092 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000093 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
94 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000095 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
96 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
97 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
98 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000099 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
100 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
101
102 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
103 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
104 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
105 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
106
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000107 setOperationAction(ISD::FCOS, MVT::f32, Custom);
108 setOperationAction(ISD::FSIN, MVT::f32, Custom);
109
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000111 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112
Tom Stellard492ebea2013-03-08 15:37:07 +0000113 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
114 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000115 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000116
117 setOperationAction(ISD::FSUB, MVT::f32, Expand);
118
Tom Stellard75aadc22012-12-11 21:25:42 +0000119 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
120 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
121
Tom Stellarde8f9f282013-03-08 15:37:05 +0000122 setOperationAction(ISD::SETCC, MVT::i32, Expand);
123 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000125 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
126 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000127
Tom Stellard53f2f902013-09-05 18:38:03 +0000128 setOperationAction(ISD::SELECT, MVT::i32, Expand);
129 setOperationAction(ISD::SELECT, MVT::f32, Expand);
130 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +0000131 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000132
Jan Vesely808fff52015-04-30 17:15:56 +0000133 // ADD, SUB overflow.
134 // TODO: turn these into Legal?
135 if (Subtarget->hasCARRY())
136 setOperationAction(ISD::UADDO, MVT::i32, Custom);
137
138 if (Subtarget->hasBORROW())
139 setOperationAction(ISD::USUBO, MVT::i32, Custom);
140
Matt Arsenault4e466652014-04-16 01:41:30 +0000141 // Expand sign extension of vectors
142 if (!Subtarget->hasBFE())
143 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
144
145 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
146 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
147
148 if (!Subtarget->hasBFE())
149 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
150 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
151 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
152
153 if (!Subtarget->hasBFE())
154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
157
158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
161
162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
163
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000164 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
165
Tom Stellard880a80a2014-06-17 16:53:14 +0000166 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
167 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
168 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
170
171 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
174 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
175
Jan Vesely25f36272014-06-18 12:27:13 +0000176 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
177 // to be Legal/Custom in order to avoid library calls.
178 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000179 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000180 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000181
Michel Danzer49812b52013-07-10 16:37:07 +0000182 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
183
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000184 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
185 for (MVT VT : ScalarIntVTs) {
186 setOperationAction(ISD::ADDC, VT, Expand);
187 setOperationAction(ISD::SUBC, VT, Expand);
188 setOperationAction(ISD::ADDE, VT, Expand);
189 setOperationAction(ISD::SUBE, VT, Expand);
190 }
191
Tom Stellardfc455472013-08-12 22:33:21 +0000192 setSchedulingPreference(Sched::Source);
Matt Arsenault71e66762016-05-21 02:27:49 +0000193
194
195 setTargetDAGCombine(ISD::FP_ROUND);
196 setTargetDAGCombine(ISD::FP_TO_SINT);
197 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
198 setTargetDAGCombine(ISD::SELECT_CC);
199 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000200}
201
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000202const R600Subtarget *R600TargetLowering::getSubtarget() const {
203 return static_cast<const R600Subtarget *>(Subtarget);
204}
205
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000206static inline bool isEOP(MachineBasicBlock::iterator I) {
207 return std::next(I)->getOpcode() == AMDGPU::RETURN;
208}
209
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000210MachineBasicBlock *
211R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
212 MachineBasicBlock *BB) const {
Tom Stellard75aadc22012-12-11 21:25:42 +0000213 MachineFunction * MF = BB->getParent();
214 MachineRegisterInfo &MRI = MF->getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000215 MachineBasicBlock::iterator I = MI;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000216 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Tom Stellard75aadc22012-12-11 21:25:42 +0000217
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000218 switch (MI.getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000219 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000220 // Replace LDS_*_RET instruction that don't have any uses with the
221 // equivalent LDS_*_NORET instruction.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000222 if (TII->isLDSRetInstr(MI.getOpcode())) {
223 int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
Tom Stellard13c68ef2013-09-05 18:38:09 +0000224 assert(DstIdx != -1);
225 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000226 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
227 // LDS_1A2D support and remove this special case.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000228 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
229 MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000230 return BB;
231
232 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000233 TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
234 for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
235 NewMI.addOperand(MI.getOperand(i));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000236 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000237 } else {
238 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
239 }
240 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000241 case AMDGPU::CLAMP_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000242 MachineInstr *NewMI = TII->buildDefaultInstruction(
243 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
244 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000245 TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000246 break;
247 }
248
249 case AMDGPU::FABS_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000250 MachineInstr *NewMI = TII->buildDefaultInstruction(
251 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
252 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000253 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
Tom Stellard75aadc22012-12-11 21:25:42 +0000254 break;
255 }
256
257 case AMDGPU::FNEG_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000258 MachineInstr *NewMI = TII->buildDefaultInstruction(
259 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
260 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000261 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000262 break;
263 }
264
Tom Stellard75aadc22012-12-11 21:25:42 +0000265 case AMDGPU::MASK_WRITE: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000266 unsigned maskedRegister = MI.getOperand(0).getReg();
Tom Stellard75aadc22012-12-11 21:25:42 +0000267 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
268 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000269 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
Tom Stellard75aadc22012-12-11 21:25:42 +0000270 break;
271 }
272
273 case AMDGPU::MOV_IMM_F32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000274 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
275 .getFPImm()
276 ->getValueAPF()
277 .bitcastToAPInt()
278 .getZExtValue());
Tom Stellard75aadc22012-12-11 21:25:42 +0000279 break;
280 case AMDGPU::MOV_IMM_I32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000281 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
282 MI.getOperand(1).getImm());
Tom Stellard75aadc22012-12-11 21:25:42 +0000283 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000284 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
285 //TODO: Perhaps combine this instruction with the next if possible
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000286 auto MIB = TII->buildDefaultInstruction(
287 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
Jan Veselyf97de002016-05-13 20:39:29 +0000288 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
289 //TODO: Ugh this is rather ugly
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000290 MIB->getOperand(Idx) = MI.getOperand(1);
Jan Veselyf97de002016-05-13 20:39:29 +0000291 break;
292 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000293 case AMDGPU::CONST_COPY: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000294 MachineInstr *NewMI = TII->buildDefaultInstruction(
295 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000296 TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000297 MI.getOperand(1).getImm());
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000298 break;
299 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000300
301 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000302 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000303 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000304 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
305 .addOperand(MI.getOperand(0))
306 .addOperand(MI.getOperand(1))
307 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000308 break;
309 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000310 case AMDGPU::RAT_STORE_TYPED_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000311 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
312 .addOperand(MI.getOperand(0))
313 .addOperand(MI.getOperand(1))
314 .addOperand(MI.getOperand(2))
315 .addImm(isEOP(I)); // Set End of program bit
Tom Stellarde0e582c2015-10-01 17:51:34 +0000316 break;
317 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000318
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 case AMDGPU::TXD: {
320 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
321 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000322 MachineOperand &RID = MI.getOperand(4);
323 MachineOperand &SID = MI.getOperand(5);
324 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000325 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
326 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000327
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000328 switch (TextureId) {
329 case 5: // Rect
330 CTX = CTY = 0;
331 break;
332 case 6: // Shadow1D
333 SrcW = SrcZ;
334 break;
335 case 7: // Shadow2D
336 SrcW = SrcZ;
337 break;
338 case 8: // ShadowRect
339 CTX = CTY = 0;
340 SrcW = SrcZ;
341 break;
342 case 9: // 1DArray
343 SrcZ = SrcY;
344 CTZ = 0;
345 break;
346 case 10: // 2DArray
347 CTZ = 0;
348 break;
349 case 11: // Shadow1DArray
350 SrcZ = SrcY;
351 CTZ = 0;
352 break;
353 case 12: // Shadow2DArray
354 CTZ = 0;
355 break;
356 }
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000357 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
358 T0)
359 .addOperand(MI.getOperand(3))
360 .addImm(SrcX)
361 .addImm(SrcY)
362 .addImm(SrcZ)
363 .addImm(SrcW)
364 .addImm(0)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(1)
369 .addImm(2)
370 .addImm(3)
371 .addOperand(RID)
372 .addOperand(SID)
373 .addImm(CTX)
374 .addImm(CTY)
375 .addImm(CTZ)
376 .addImm(CTW);
377 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
378 T1)
379 .addOperand(MI.getOperand(2))
380 .addImm(SrcX)
381 .addImm(SrcY)
382 .addImm(SrcZ)
383 .addImm(SrcW)
384 .addImm(0)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(1)
389 .addImm(2)
390 .addImm(3)
391 .addOperand(RID)
392 .addOperand(SID)
393 .addImm(CTX)
394 .addImm(CTY)
395 .addImm(CTZ)
396 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000397 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000398 .addOperand(MI.getOperand(0))
399 .addOperand(MI.getOperand(1))
400 .addImm(SrcX)
401 .addImm(SrcY)
402 .addImm(SrcZ)
403 .addImm(SrcW)
404 .addImm(0)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(1)
409 .addImm(2)
410 .addImm(3)
411 .addOperand(RID)
412 .addOperand(SID)
413 .addImm(CTX)
414 .addImm(CTY)
415 .addImm(CTZ)
416 .addImm(CTW)
417 .addReg(T0, RegState::Implicit)
418 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000419 break;
420 }
421
422 case AMDGPU::TXD_SHADOW: {
423 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
424 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000425 MachineOperand &RID = MI.getOperand(4);
426 MachineOperand &SID = MI.getOperand(5);
427 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000428 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
429 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
430
431 switch (TextureId) {
432 case 5: // Rect
433 CTX = CTY = 0;
434 break;
435 case 6: // Shadow1D
436 SrcW = SrcZ;
437 break;
438 case 7: // Shadow2D
439 SrcW = SrcZ;
440 break;
441 case 8: // ShadowRect
442 CTX = CTY = 0;
443 SrcW = SrcZ;
444 break;
445 case 9: // 1DArray
446 SrcZ = SrcY;
447 CTZ = 0;
448 break;
449 case 10: // 2DArray
450 CTZ = 0;
451 break;
452 case 11: // Shadow1DArray
453 SrcZ = SrcY;
454 CTZ = 0;
455 break;
456 case 12: // Shadow2DArray
457 CTZ = 0;
458 break;
459 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000460
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000461 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
462 T0)
463 .addOperand(MI.getOperand(3))
464 .addImm(SrcX)
465 .addImm(SrcY)
466 .addImm(SrcZ)
467 .addImm(SrcW)
468 .addImm(0)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(1)
473 .addImm(2)
474 .addImm(3)
475 .addOperand(RID)
476 .addOperand(SID)
477 .addImm(CTX)
478 .addImm(CTY)
479 .addImm(CTZ)
480 .addImm(CTW);
481 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
482 T1)
483 .addOperand(MI.getOperand(2))
484 .addImm(SrcX)
485 .addImm(SrcY)
486 .addImm(SrcZ)
487 .addImm(SrcW)
488 .addImm(0)
489 .addImm(0)
490 .addImm(0)
491 .addImm(0)
492 .addImm(1)
493 .addImm(2)
494 .addImm(3)
495 .addOperand(RID)
496 .addOperand(SID)
497 .addImm(CTX)
498 .addImm(CTY)
499 .addImm(CTZ)
500 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000501 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000502 .addOperand(MI.getOperand(0))
503 .addOperand(MI.getOperand(1))
504 .addImm(SrcX)
505 .addImm(SrcY)
506 .addImm(SrcZ)
507 .addImm(SrcW)
508 .addImm(0)
509 .addImm(0)
510 .addImm(0)
511 .addImm(0)
512 .addImm(1)
513 .addImm(2)
514 .addImm(3)
515 .addOperand(RID)
516 .addOperand(SID)
517 .addImm(CTX)
518 .addImm(CTY)
519 .addImm(CTZ)
520 .addImm(CTW)
521 .addReg(T0, RegState::Implicit)
522 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000523 break;
524 }
525
526 case AMDGPU::BRANCH:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000527 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
528 .addOperand(MI.getOperand(0));
529 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000530
531 case AMDGPU::BRANCH_COND_f32: {
532 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000533 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
534 AMDGPU::PREDICATE_BIT)
535 .addOperand(MI.getOperand(1))
536 .addImm(OPCODE_IS_NOT_ZERO)
537 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000538 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000539 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000540 .addOperand(MI.getOperand(0))
541 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000542 break;
543 }
544
545 case AMDGPU::BRANCH_COND_i32: {
546 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000547 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
548 AMDGPU::PREDICATE_BIT)
549 .addOperand(MI.getOperand(1))
Tom Stellard75aadc22012-12-11 21:25:42 +0000550 .addImm(OPCODE_IS_NOT_ZERO_INT)
551 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000552 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000553 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000554 .addOperand(MI.getOperand(0))
555 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000556 break;
557 }
558
Tom Stellard75aadc22012-12-11 21:25:42 +0000559 case AMDGPU::EG_ExportSwz:
560 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000561 // Instruction is left unmodified if its not the last one of its type
562 bool isLastInstructionOfItsType = true;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000563 unsigned InstExportType = MI.getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000564 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000565 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000566 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000567 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
568 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
569 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
570 .getImm();
571 if (CurrentInstExportType == InstExportType) {
572 isLastInstructionOfItsType = false;
573 break;
574 }
575 }
576 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000577 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000578 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000579 return BB;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000580 unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
581 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
582 .addOperand(MI.getOperand(0))
583 .addOperand(MI.getOperand(1))
584 .addOperand(MI.getOperand(2))
585 .addOperand(MI.getOperand(3))
586 .addOperand(MI.getOperand(4))
587 .addOperand(MI.getOperand(5))
588 .addOperand(MI.getOperand(6))
589 .addImm(CfInst)
590 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000591 break;
592 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000593 case AMDGPU::RETURN: {
594 // RETURN instructions must have the live-out registers as implicit uses,
595 // otherwise they appear dead.
596 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
597 MachineInstrBuilder MIB(*MF, MI);
598 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
599 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
600 return BB;
601 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000602 }
603
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000604 MI.eraseFromParent();
Tom Stellard75aadc22012-12-11 21:25:42 +0000605 return BB;
606}
607
608//===----------------------------------------------------------------------===//
609// Custom DAG Lowering Operations
610//===----------------------------------------------------------------------===//
611
Tom Stellard75aadc22012-12-11 21:25:42 +0000612SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000613 MachineFunction &MF = DAG.getMachineFunction();
614 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000615 switch (Op.getOpcode()) {
616 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000617 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
618 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000619 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000620 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000621 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000622 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
623 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000624 case ISD::FCOS:
625 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000626 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000628 case ISD::LOAD: {
629 SDValue Result = LowerLOAD(Op, DAG);
630 assert((!Result.getNode() ||
631 Result.getNode()->getNumValues() == 2) &&
632 "Load should return a value and a chain");
633 return Result;
634 }
635
Matt Arsenault1d555c42014-06-23 18:00:55 +0000636 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000637 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000638 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000639 case ISD::INTRINSIC_VOID: {
640 SDValue Chain = Op.getOperand(0);
641 unsigned IntrinsicID =
642 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
643 switch (IntrinsicID) {
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000644 case AMDGPUIntrinsic::R600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000645 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000646 const SDValue Args[8] = {
647 Chain,
648 Op.getOperand(2), // Export Value
649 Op.getOperand(3), // ArrayBase
650 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000651 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
652 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
653 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
654 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000655 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000656 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000657 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000658
Tom Stellard75aadc22012-12-11 21:25:42 +0000659 // default for switch(IntrinsicID)
660 default: break;
661 }
662 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
663 break;
664 }
665 case ISD::INTRINSIC_WO_CHAIN: {
666 unsigned IntrinsicID =
667 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
668 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000669 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000670 switch(IntrinsicID) {
671 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Matt Arsenault59bd3012016-01-22 19:00:09 +0000672 case AMDGPUIntrinsic::r600_tex:
673 case AMDGPUIntrinsic::r600_texc:
674 case AMDGPUIntrinsic::r600_txl:
675 case AMDGPUIntrinsic::r600_txlc:
676 case AMDGPUIntrinsic::r600_txb:
677 case AMDGPUIntrinsic::r600_txbc:
678 case AMDGPUIntrinsic::r600_txf:
679 case AMDGPUIntrinsic::r600_txq:
680 case AMDGPUIntrinsic::r600_ddx:
Matt Arsenault648e4222016-07-14 05:23:23 +0000681 case AMDGPUIntrinsic::r600_ddy: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000682 unsigned TextureOp;
683 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000684 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000685 TextureOp = 0;
686 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000687 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000688 TextureOp = 1;
689 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000690 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000691 TextureOp = 2;
692 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000693 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000694 TextureOp = 3;
695 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000696 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000697 TextureOp = 4;
698 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000699 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000700 TextureOp = 5;
701 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000702 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 TextureOp = 6;
704 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000705 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000706 TextureOp = 7;
707 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000708 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000709 TextureOp = 8;
710 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000711 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000712 TextureOp = 9;
713 break;
714 default:
715 llvm_unreachable("Unknow Texture Operation");
716 }
717
718 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000719 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000720 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000721 DAG.getConstant(0, DL, MVT::i32),
722 DAG.getConstant(1, DL, MVT::i32),
723 DAG.getConstant(2, DL, MVT::i32),
724 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000725 Op.getOperand(2),
726 Op.getOperand(3),
727 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000728 DAG.getConstant(0, DL, MVT::i32),
729 DAG.getConstant(1, DL, MVT::i32),
730 DAG.getConstant(2, DL, MVT::i32),
731 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000732 Op.getOperand(5),
733 Op.getOperand(6),
734 Op.getOperand(7),
735 Op.getOperand(8),
736 Op.getOperand(9),
737 Op.getOperand(10)
738 };
Craig Topper48d114b2014-04-26 18:35:24 +0000739 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000740 }
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000741 case AMDGPUIntrinsic::AMDGPU_dp4: {
742 SDValue Args[8] = {
743 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000746 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000747 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000749 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000750 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000751 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000752 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000753 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000754 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000755 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000756 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000757 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000758 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000759 };
Craig Topper48d114b2014-04-26 18:35:24 +0000760 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000761 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000762
Jan Vesely2fa28c32016-07-10 21:20:29 +0000763 case Intrinsic::r600_implicitarg_ptr: {
764 MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
765 uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
766 return DAG.getConstant(ByteOffset, DL, PtrVT);
767 }
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000770 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000771 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000772 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000773 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000774 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000775 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000776 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000777 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000778 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000779 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000780 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000781 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000782 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000783 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000784 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000785 return LowerImplicitParameter(DAG, VT, DL, 8);
786
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000787 case Intrinsic::r600_read_workdim:
788 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000789 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
790 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
791 }
Jan Veselye5121f32014-10-14 20:05:26 +0000792
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000793 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000794 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
795 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000796 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000797 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
798 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000799 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000800 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
801 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000802 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000803 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
804 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000805 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000806 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
807 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000808 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000809 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
810 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000811
812 // FIXME: Should be renamed to r600 prefix
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000813 case AMDGPUIntrinsic::AMDGPU_rsq_clamped:
Matt Arsenault79963e82016-02-13 01:03:00 +0000814 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000815
816 case Intrinsic::r600_rsq:
Matt Arsenault0c3e2332016-01-26 04:14:16 +0000817 case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
Matt Arsenault257d48d2014-06-24 22:13:39 +0000818 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
819 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 }
821 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
822 break;
823 }
824 } // end switch(Op.getOpcode())
825 return SDValue();
826}
827
828void R600TargetLowering::ReplaceNodeResults(SDNode *N,
829 SmallVectorImpl<SDValue> &Results,
830 SelectionDAG &DAG) const {
831 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000832 default:
833 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
834 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000835 case ISD::FP_TO_UINT:
836 if (N->getValueType(0) == MVT::i1) {
837 Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
838 return;
839 }
840 // Fall-through. Since we don't care about out of bounds values
841 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
842 // considers some extra cases which are not necessary here.
843 case ISD::FP_TO_SINT: {
844 SDValue Result;
845 if (expandFP_TO_SINT(N, Result, DAG))
846 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000847 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000848 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000849 case ISD::SDIVREM: {
850 SDValue Op = SDValue(N, 1);
851 SDValue RES = LowerSDIVREM(Op, DAG);
852 Results.push_back(RES);
853 Results.push_back(RES.getValue(1));
854 break;
855 }
856 case ISD::UDIVREM: {
857 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000858 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000859 break;
860 }
861 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000862}
863
Tom Stellard880a80a2014-06-17 16:53:14 +0000864SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
865 SDValue Vector) const {
866
867 SDLoc DL(Vector);
868 EVT VecVT = Vector.getValueType();
869 EVT EltVT = VecVT.getVectorElementType();
870 SmallVector<SDValue, 8> Args;
871
872 for (unsigned i = 0, e = VecVT.getVectorNumElements();
873 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000874 Args.push_back(DAG.getNode(
875 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
876 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000877 }
878
879 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
880}
881
882SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
883 SelectionDAG &DAG) const {
884
885 SDLoc DL(Op);
886 SDValue Vector = Op.getOperand(0);
887 SDValue Index = Op.getOperand(1);
888
889 if (isa<ConstantSDNode>(Index) ||
890 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
891 return Op;
892
893 Vector = vectorToVerticalVector(DAG, Vector);
894 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
895 Vector, Index);
896}
897
898SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
899 SelectionDAG &DAG) const {
900 SDLoc DL(Op);
901 SDValue Vector = Op.getOperand(0);
902 SDValue Value = Op.getOperand(1);
903 SDValue Index = Op.getOperand(2);
904
905 if (isa<ConstantSDNode>(Index) ||
906 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
907 return Op;
908
909 Vector = vectorToVerticalVector(DAG, Vector);
910 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
911 Vector, Value, Index);
912 return vectorToVerticalVector(DAG, Insert);
913}
914
Tom Stellard27233b72016-05-02 18:05:17 +0000915SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
916 SDValue Op,
917 SelectionDAG &DAG) const {
918
919 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
920 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
921 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
922
923 const DataLayout &DL = DAG.getDataLayout();
924 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000925 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
926
Jan Veselyf97de002016-05-13 20:39:29 +0000927 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
928 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000929}
930
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000931SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
932 // On hw >= R700, COS/SIN input must be between -1. and 1.
933 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
934 EVT VT = Op.getValueType();
935 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000936 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000937
938 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000939 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
940 DAG.getNode(ISD::FADD, DL, VT,
941 DAG.getNode(ISD::FMUL, DL, VT, Arg,
942 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
943 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000944 unsigned TrigNode;
945 switch (Op.getOpcode()) {
946 case ISD::FCOS:
947 TrigNode = AMDGPUISD::COS_HW;
948 break;
949 case ISD::FSIN:
950 TrigNode = AMDGPUISD::SIN_HW;
951 break;
952 default:
953 llvm_unreachable("Wrong trig opcode");
954 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000955 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
956 DAG.getNode(ISD::FADD, DL, VT, FractPart,
957 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000958 if (Gen >= R600Subtarget::R700)
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000959 return TrigVal;
960 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000961 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
962 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000963}
964
Jan Vesely25f36272014-06-18 12:27:13 +0000965SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
966 SDLoc DL(Op);
967 EVT VT = Op.getValueType();
968
969 SDValue Lo = Op.getOperand(0);
970 SDValue Hi = Op.getOperand(1);
971 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000972 SDValue Zero = DAG.getConstant(0, DL, VT);
973 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000974
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000975 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
976 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000977 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
978 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
979
980 // The dance around Width1 is necessary for 0 special case.
981 // Without it the CompShift might be 32, producing incorrect results in
982 // Overflow. So we do the shift in two steps, the alternative is to
983 // add a conditional to filter the special case.
984
985 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
986 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
987
988 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
989 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
990 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
991
992 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
993 SDValue LoBig = Zero;
994
995 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
996 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
997
998 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
999}
1000
Jan Vesely900ff2e2014-06-18 12:27:15 +00001001SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1002 SDLoc DL(Op);
1003 EVT VT = Op.getValueType();
1004
1005 SDValue Lo = Op.getOperand(0);
1006 SDValue Hi = Op.getOperand(1);
1007 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001008 SDValue Zero = DAG.getConstant(0, DL, VT);
1009 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001010
Jan Veselyecf51332014-06-18 12:27:17 +00001011 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1012
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001013 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1014 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001015 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1016 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1017
1018 // The dance around Width1 is necessary for 0 special case.
1019 // Without it the CompShift might be 32, producing incorrect results in
1020 // Overflow. So we do the shift in two steps, the alternative is to
1021 // add a conditional to filter the special case.
1022
1023 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1024 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1025
Jan Veselyecf51332014-06-18 12:27:17 +00001026 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001027 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1028 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1029
Jan Veselyecf51332014-06-18 12:27:17 +00001030 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1031 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001032
1033 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1034 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1035
1036 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1037}
1038
Jan Vesely808fff52015-04-30 17:15:56 +00001039SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1040 unsigned mainop, unsigned ovf) const {
1041 SDLoc DL(Op);
1042 EVT VT = Op.getValueType();
1043
1044 SDValue Lo = Op.getOperand(0);
1045 SDValue Hi = Op.getOperand(1);
1046
1047 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1048 // Extend sign.
1049 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1050 DAG.getValueType(MVT::i1));
1051
1052 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1053
1054 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1055}
1056
Tom Stellard75aadc22012-12-11 21:25:42 +00001057SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001058 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001059 return DAG.getNode(
1060 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001061 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001062 MVT::i1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001063 Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
Tom Stellard75aadc22012-12-11 21:25:42 +00001064 DAG.getCondCode(ISD::SETNE)
1065 );
1066}
1067
Tom Stellard75aadc22012-12-11 21:25:42 +00001068SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001069 const SDLoc &DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001070 unsigned DwordOffset) const {
1071 unsigned ByteOffset = DwordOffset * 4;
1072 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001073 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001074
1075 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1076 assert(isInt<16>(ByteOffset));
1077
1078 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001079 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Tom Stellard75aadc22012-12-11 21:25:42 +00001080 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
1081 false, false, false, 0);
1082}
1083
Tom Stellard75aadc22012-12-11 21:25:42 +00001084bool R600TargetLowering::isZero(SDValue Op) const {
1085 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1086 return Cst->isNullValue();
1087 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1088 return CstFP->isZero();
1089 } else {
1090 return false;
1091 }
1092}
1093
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001094bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1095 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1096 return CFP->isExactlyValue(1.0);
1097 }
1098 return isAllOnesConstant(Op);
1099}
1100
1101bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1102 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1103 return CFP->getValueAPF().isZero();
1104 }
1105 return isNullConstant(Op);
1106}
1107
Tom Stellard75aadc22012-12-11 21:25:42 +00001108SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001109 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001110 EVT VT = Op.getValueType();
1111
1112 SDValue LHS = Op.getOperand(0);
1113 SDValue RHS = Op.getOperand(1);
1114 SDValue True = Op.getOperand(2);
1115 SDValue False = Op.getOperand(3);
1116 SDValue CC = Op.getOperand(4);
1117 SDValue Temp;
1118
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001119 if (VT == MVT::f32) {
1120 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1121 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1122 if (MinMax)
1123 return MinMax;
1124 }
1125
Tom Stellard75aadc22012-12-11 21:25:42 +00001126 // LHS and RHS are guaranteed to be the same value type
1127 EVT CompareVT = LHS.getValueType();
1128
1129 // Check if we can lower this to a native operation.
1130
Tom Stellard2add82d2013-03-08 15:37:09 +00001131 // Try to lower to a SET* instruction:
1132 //
1133 // SET* can match the following patterns:
1134 //
Tom Stellardcd428182013-09-28 02:50:38 +00001135 // select_cc f32, f32, -1, 0, cc_supported
1136 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1137 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001138 //
1139
1140 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001141 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1142 ISD::CondCode InverseCC =
1143 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001144 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1145 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1146 std::swap(False, True);
1147 CC = DAG.getCondCode(InverseCC);
1148 } else {
1149 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1150 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1151 std::swap(False, True);
1152 std::swap(LHS, RHS);
1153 CC = DAG.getCondCode(SwapInvCC);
1154 }
1155 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001156 }
1157
1158 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1159 (CompareVT == VT || VT == MVT::i32)) {
1160 // This can be matched by a SET* instruction.
1161 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1162 }
1163
Tom Stellard75aadc22012-12-11 21:25:42 +00001164 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001165 //
1166 // CND* can match the following patterns:
1167 //
Tom Stellardcd428182013-09-28 02:50:38 +00001168 // select_cc f32, 0.0, f32, f32, cc_supported
1169 // select_cc f32, 0.0, i32, i32, cc_supported
1170 // select_cc i32, 0, f32, f32, cc_supported
1171 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001172 //
Tom Stellardcd428182013-09-28 02:50:38 +00001173
1174 // Try to move the zero value to the RHS
1175 if (isZero(LHS)) {
1176 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1177 // Try swapping the operands
1178 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1179 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1180 std::swap(LHS, RHS);
1181 CC = DAG.getCondCode(CCSwapped);
1182 } else {
1183 // Try inverting the conditon and then swapping the operands
1184 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1185 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1186 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1187 std::swap(True, False);
1188 std::swap(LHS, RHS);
1189 CC = DAG.getCondCode(CCSwapped);
1190 }
1191 }
1192 }
1193 if (isZero(RHS)) {
1194 SDValue Cond = LHS;
1195 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001196 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1197 if (CompareVT != VT) {
1198 // Bitcast True / False to the correct types. This will end up being
1199 // a nop, but it allows us to define only a single pattern in the
1200 // .TD files for each CND* instruction rather than having to have
1201 // one pattern for integer True/False and one for fp True/False
1202 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1203 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1204 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001205
1206 switch (CCOpcode) {
1207 case ISD::SETONE:
1208 case ISD::SETUNE:
1209 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001210 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1211 Temp = True;
1212 True = False;
1213 False = Temp;
1214 break;
1215 default:
1216 break;
1217 }
1218 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1219 Cond, Zero,
1220 True, False,
1221 DAG.getCondCode(CCOpcode));
1222 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1223 }
1224
Tom Stellard75aadc22012-12-11 21:25:42 +00001225 // If we make it this for it means we have no native instructions to handle
1226 // this SELECT_CC, so we must lower it.
1227 SDValue HWTrue, HWFalse;
1228
1229 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001230 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1231 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001232 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001233 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1234 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001235 }
1236 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001237 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001238 }
1239
1240 // Lower this unsupported SELECT_CC into a combination of two supported
1241 // SELECT_CC operations.
1242 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1243
1244 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1245 Cond, HWFalse,
1246 True, False,
1247 DAG.getCondCode(ISD::SETNE));
1248}
1249
Alp Tokercb402912014-01-24 17:20:08 +00001250/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001251/// convert these pointers to a register index. Each register holds
1252/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1253/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1254/// for indirect addressing.
1255SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1256 unsigned StackWidth,
1257 SelectionDAG &DAG) const {
1258 unsigned SRLPad;
1259 switch(StackWidth) {
1260 case 1:
1261 SRLPad = 2;
1262 break;
1263 case 2:
1264 SRLPad = 3;
1265 break;
1266 case 4:
1267 SRLPad = 4;
1268 break;
1269 default: llvm_unreachable("Invalid stack width");
1270 }
1271
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001272 SDLoc DL(Ptr);
1273 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1274 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001275}
1276
1277void R600TargetLowering::getStackAddress(unsigned StackWidth,
1278 unsigned ElemIdx,
1279 unsigned &Channel,
1280 unsigned &PtrIncr) const {
1281 switch (StackWidth) {
1282 default:
1283 case 1:
1284 Channel = 0;
1285 if (ElemIdx > 0) {
1286 PtrIncr = 1;
1287 } else {
1288 PtrIncr = 0;
1289 }
1290 break;
1291 case 2:
1292 Channel = ElemIdx % 2;
1293 if (ElemIdx == 2) {
1294 PtrIncr = 1;
1295 } else {
1296 PtrIncr = 0;
1297 }
1298 break;
1299 case 4:
1300 Channel = ElemIdx;
1301 PtrIncr = 0;
1302 break;
1303 }
1304}
1305
Matt Arsenault95245662016-02-11 05:32:46 +00001306SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1307 SelectionDAG &DAG) const {
1308 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001309
Matt Arsenault95245662016-02-11 05:32:46 +00001310 unsigned Mask = 0;
1311 if (Store->getMemoryVT() == MVT::i8) {
1312 Mask = 0xff;
1313 } else if (Store->getMemoryVT() == MVT::i16) {
1314 Mask = 0xffff;
1315 }
1316
1317 SDValue Chain = Store->getChain();
1318 SDValue BasePtr = Store->getBasePtr();
1319 EVT MemVT = Store->getMemoryVT();
1320
1321 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1322 DAG.getConstant(2, DL, MVT::i32));
1323 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1324 Chain, Ptr,
1325 DAG.getTargetConstant(0, DL, MVT::i32));
1326
1327 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1328 DAG.getConstant(0x3, DL, MVT::i32));
1329
1330 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1331 DAG.getConstant(3, DL, MVT::i32));
1332
1333 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1334 Store->getValue());
1335
1336 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1337
1338 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1339 MaskedValue, ShiftAmt);
1340
1341 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1342 DAG.getConstant(Mask, DL, MVT::i32),
1343 ShiftAmt);
1344 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1345 DAG.getConstant(0xffffffff, DL, MVT::i32));
1346 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1347
1348 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1349 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1350 Chain, Value, Ptr,
1351 DAG.getTargetConstant(0, DL, MVT::i32));
1352}
1353
1354SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1355 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001356 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001357
Matt Arsenault95245662016-02-11 05:32:46 +00001358 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1359 unsigned AS = StoreNode->getAddressSpace();
1360 SDValue Value = StoreNode->getValue();
1361 EVT ValueVT = Value.getValueType();
1362
1363 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1364 ValueVT.isVector()) {
1365 return SplitVectorStore(Op, DAG);
1366 }
1367
1368 SDLoc DL(Op);
1369 SDValue Chain = StoreNode->getChain();
1370 SDValue Ptr = StoreNode->getBasePtr();
1371
1372 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 if (StoreNode->isTruncatingStore()) {
1374 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001375 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001376 EVT MemVT = StoreNode->getMemoryVT();
1377 SDValue MaskConstant;
1378 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001379 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001380 } else {
1381 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001382 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001383 }
1384 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001385 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001386 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001387 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1389 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001390 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001391 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1392 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1393 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1394 // vector instead.
1395 SDValue Src[4] = {
1396 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001397 DAG.getConstant(0, DL, MVT::i32),
1398 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001399 Mask
1400 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001401 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001402 SDValue Args[3] = { Chain, Input, DWordAddr };
1403 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001404 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001405 StoreNode->getMemOperand());
1406 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001407 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001408 // Convert pointer from byte address to dword address.
1409 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1410 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001411 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001412
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001413 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001414 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001415 } else {
1416 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1417 }
1418 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001419 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001420 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001421
Matt Arsenault95245662016-02-11 05:32:46 +00001422 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001423 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001424
Matt Arsenault95245662016-02-11 05:32:46 +00001425 EVT MemVT = StoreNode->getMemoryVT();
1426 if (MemVT.bitsLT(MVT::i32))
1427 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001428
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001429 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001430 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001431 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001432 unsigned StackWidth = TFL->getStackWidth(MF);
1433
1434 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1435
1436 if (ValueVT.isVector()) {
1437 unsigned NumElemVT = ValueVT.getVectorNumElements();
1438 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001439 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001440
1441 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1442 "vector width in load");
1443
1444 for (unsigned i = 0; i < NumElemVT; ++i) {
1445 unsigned Channel, PtrIncr;
1446 getStackAddress(StackWidth, i, Channel, PtrIncr);
1447 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001448 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001449 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001450 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001451
1452 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1453 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001454 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001455 }
Craig Topper48d114b2014-04-26 18:35:24 +00001456 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001457 } else {
1458 if (ValueVT == MVT::i8) {
1459 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1460 }
1461 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001462 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001463 }
1464
1465 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001466}
1467
Tom Stellard365366f2013-01-23 02:09:06 +00001468// return (512 + (kc_bank << 12)
1469static int
1470ConstantAddressBlock(unsigned AddressSpace) {
1471 switch (AddressSpace) {
1472 case AMDGPUAS::CONSTANT_BUFFER_0:
1473 return 512;
1474 case AMDGPUAS::CONSTANT_BUFFER_1:
1475 return 512 + 4096;
1476 case AMDGPUAS::CONSTANT_BUFFER_2:
1477 return 512 + 4096 * 2;
1478 case AMDGPUAS::CONSTANT_BUFFER_3:
1479 return 512 + 4096 * 3;
1480 case AMDGPUAS::CONSTANT_BUFFER_4:
1481 return 512 + 4096 * 4;
1482 case AMDGPUAS::CONSTANT_BUFFER_5:
1483 return 512 + 4096 * 5;
1484 case AMDGPUAS::CONSTANT_BUFFER_6:
1485 return 512 + 4096 * 6;
1486 case AMDGPUAS::CONSTANT_BUFFER_7:
1487 return 512 + 4096 * 7;
1488 case AMDGPUAS::CONSTANT_BUFFER_8:
1489 return 512 + 4096 * 8;
1490 case AMDGPUAS::CONSTANT_BUFFER_9:
1491 return 512 + 4096 * 9;
1492 case AMDGPUAS::CONSTANT_BUFFER_10:
1493 return 512 + 4096 * 10;
1494 case AMDGPUAS::CONSTANT_BUFFER_11:
1495 return 512 + 4096 * 11;
1496 case AMDGPUAS::CONSTANT_BUFFER_12:
1497 return 512 + 4096 * 12;
1498 case AMDGPUAS::CONSTANT_BUFFER_13:
1499 return 512 + 4096 * 13;
1500 case AMDGPUAS::CONSTANT_BUFFER_14:
1501 return 512 + 4096 * 14;
1502 case AMDGPUAS::CONSTANT_BUFFER_15:
1503 return 512 + 4096 * 15;
1504 default:
1505 return -1;
1506 }
1507}
1508
Matt Arsenault6dfda962016-02-10 18:21:39 +00001509SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1510 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001511 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001512 LoadSDNode *Load = cast<LoadSDNode>(Op);
1513 ISD::LoadExtType ExtType = Load->getExtensionType();
1514 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001515
Matt Arsenault6dfda962016-02-10 18:21:39 +00001516 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1517 // register (2-)byte extract.
1518
1519 // Get Register holding the target.
1520 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1521 DAG.getConstant(2, DL, MVT::i32));
1522 // Load the Register.
1523 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1524 Load->getChain(),
1525 Ptr,
1526 DAG.getTargetConstant(0, DL, MVT::i32),
1527 Op.getOperand(2));
1528
1529 // Get offset within the register.
1530 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1531 Load->getBasePtr(),
1532 DAG.getConstant(0x3, DL, MVT::i32));
1533
1534 // Bit offset of target byte (byteIdx * 8).
1535 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1536 DAG.getConstant(3, DL, MVT::i32));
1537
1538 // Shift to the right.
1539 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1540
1541 // Eliminate the upper bits by setting them to ...
1542 EVT MemEltVT = MemVT.getScalarType();
1543
1544 // ... ones.
1545 if (ExtType == ISD::SEXTLOAD) {
1546 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1547
1548 SDValue Ops[] = {
1549 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1550 Load->getChain()
1551 };
1552
1553 return DAG.getMergeValues(Ops, DL);
1554 }
1555
1556 // ... or zeros.
1557 SDValue Ops[] = {
1558 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1559 Load->getChain()
1560 };
1561
1562 return DAG.getMergeValues(Ops, DL);
1563}
1564
1565SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1566 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1567 unsigned AS = LoadNode->getAddressSpace();
1568 EVT MemVT = LoadNode->getMemoryVT();
1569 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1570
1571 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1572 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1573 return lowerPrivateExtLoad(Op, DAG);
1574 }
1575
1576 SDLoc DL(Op);
1577 EVT VT = Op.getValueType();
1578 SDValue Chain = LoadNode->getChain();
1579 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001580
Tom Stellard35bb18c2013-08-26 15:06:04 +00001581 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1582 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001583 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001584 Chain
1585 };
Craig Topper64941d92014-04-27 19:20:57 +00001586 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001587 }
1588
Tom Stellard365366f2013-01-23 02:09:06 +00001589 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001590 if (ConstantBlock > -1 &&
1591 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1592 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001593 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001594 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1595 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001596 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001597 SDValue Slots[4];
1598 for (unsigned i = 0; i < 4; i++) {
1599 // We want Const position encoded with the following formula :
1600 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1601 // const_index is Ptr computed by llvm using an alignment of 16.
1602 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1603 // then div by 4 at the ISel step
1604 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001605 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001606 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1607 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001608 EVT NewVT = MVT::v4i32;
1609 unsigned NumElements = 4;
1610 if (VT.isVector()) {
1611 NewVT = VT;
1612 NumElements = VT.getVectorNumElements();
1613 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001614 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001615 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001616 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001617 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001618 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1619 DAG.getConstant(4, DL, MVT::i32)),
1620 DAG.getConstant(LoadNode->getAddressSpace() -
1621 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001622 );
1623 }
1624
1625 if (!VT.isVector()) {
1626 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001627 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001628 }
1629
1630 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001631 Result,
1632 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001633 };
Craig Topper64941d92014-04-27 19:20:57 +00001634 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001635 }
1636
Matt Arsenault6dfda962016-02-10 18:21:39 +00001637 SDValue LoweredLoad;
1638
Matt Arsenault909d0c02013-10-30 23:43:29 +00001639 // For most operations returning SDValue() will result in the node being
1640 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1641 // need to manually expand loads that may be legal in some address spaces and
1642 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1643 // compute shaders, since the data is sign extended when it is uploaded to the
1644 // buffer. However SEXT loads from other address spaces are not supported, so
1645 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001646 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1647 EVT MemVT = LoadNode->getMemoryVT();
1648 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Tom Stellard84021442013-07-23 01:48:24 +00001649 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1650 LoadNode->getPointerInfo(), MemVT,
1651 LoadNode->isVolatile(),
1652 LoadNode->isNonTemporal(),
Louis Gerbarg67474e32014-07-31 21:45:05 +00001653 LoadNode->isInvariant(),
Tom Stellard84021442013-07-23 01:48:24 +00001654 LoadNode->getAlignment());
Jan Veselyb670d372015-05-26 18:07:22 +00001655 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1656 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001657
Jan Veselyb670d372015-05-26 18:07:22 +00001658 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001659 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001660 }
1661
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001662 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1663 return SDValue();
1664 }
1665
1666 // Lowering for indirect addressing
1667 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001668 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001669 unsigned StackWidth = TFL->getStackWidth(MF);
1670
1671 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1672
1673 if (VT.isVector()) {
1674 unsigned NumElemVT = VT.getVectorNumElements();
1675 EVT ElemVT = VT.getVectorElementType();
1676 SDValue Loads[4];
1677
Jan Vesely687ca8d2016-05-16 23:56:32 +00001678 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001679 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1680 "vector width in load");
1681
1682 for (unsigned i = 0; i < NumElemVT; ++i) {
1683 unsigned Channel, PtrIncr;
1684 getStackAddress(StackWidth, i, Channel, PtrIncr);
1685 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001686 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001687 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1688 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001689 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001690 Op.getOperand(2));
1691 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001692 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1693 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001694 } else {
1695 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1696 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001697 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001698 Op.getOperand(2));
1699 }
1700
Matt Arsenault7939acd2014-04-07 16:44:24 +00001701 SDValue Ops[2] = {
1702 LoweredLoad,
1703 Chain
1704 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001705
Craig Topper64941d92014-04-27 19:20:57 +00001706 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001707}
Tom Stellard75aadc22012-12-11 21:25:42 +00001708
Matt Arsenault1d555c42014-06-23 18:00:55 +00001709SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1710 SDValue Chain = Op.getOperand(0);
1711 SDValue Cond = Op.getOperand(1);
1712 SDValue Jump = Op.getOperand(2);
1713
1714 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1715 Chain, Jump, Cond);
1716}
1717
Matt Arsenault81d06012016-03-07 21:10:13 +00001718SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1719 SelectionDAG &DAG) const {
1720 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001721 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Matt Arsenault81d06012016-03-07 21:10:13 +00001722
1723 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1724
1725 unsigned FrameIndex = FIN->getIndex();
1726 unsigned IgnoredFrameReg;
1727 unsigned Offset =
1728 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1729 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1730 Op.getValueType());
1731}
1732
Tom Stellard75aadc22012-12-11 21:25:42 +00001733/// XXX Only kernel functions are supported, so we can assume for now that
1734/// every function is a kernel function, but in the future we should use
1735/// separate calling conventions for kernel and non-kernel functions.
1736SDValue R600TargetLowering::LowerFormalArguments(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001737 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1738 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1739 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001740 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001741 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1742 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001743 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001744 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001745
Tom Stellardaf775432013-10-23 00:44:32 +00001746 SmallVector<ISD::InputArg, 8> LocalIns;
1747
Matt Arsenault209a7b92014-04-18 07:40:20 +00001748 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001749
1750 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001751
Tom Stellard1e803092013-07-23 01:48:18 +00001752 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001753 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001754 const ISD::InputArg &In = Ins[i];
1755 EVT VT = In.VT;
1756 EVT MemVT = VA.getLocVT();
1757 if (!VT.isVector() && MemVT.isVector()) {
1758 // Get load source type if scalarized.
1759 MemVT = MemVT.getVectorElementType();
1760 }
Tom Stellard78e01292013-07-23 01:47:58 +00001761
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001762 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001763 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1764 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1765 InVals.push_back(Register);
1766 continue;
1767 }
1768
Tom Stellard75aadc22012-12-11 21:25:42 +00001769 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001770 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001771
Matt Arsenaultfae02982014-03-17 18:58:11 +00001772 // i64 isn't a legal type, so the register type used ends up as i32, which
1773 // isn't expected here. It attempts to create this sextload, but it ends up
1774 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1775 // for <1 x i64>.
1776
Tom Stellardacfeebf2013-07-23 01:48:05 +00001777 // The first 36 bytes of the input buffer contains information about
1778 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001779 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1780 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1781 // FIXME: This should really check the extload type, but the handling of
1782 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001783
Matt Arsenault74ef2772014-08-13 18:14:11 +00001784 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1785 Ext = ISD::SEXTLOAD;
1786 }
1787
1788 // Compute the offset from the value.
1789 // XXX - I think PartOffset should give you this, but it seems to give the
1790 // size of the register which isn't useful.
1791
Andrew Trick05938a52015-02-16 18:10:47 +00001792 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001793 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001794 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001795
1796 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1797 SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001798 DAG.getConstant(Offset, DL, MVT::i32),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001799 DAG.getUNDEF(MVT::i32),
1800 PtrInfo,
1801 MemVT, false, true, true, 4);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001802
1803 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001804 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001805 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001806 }
1807 return Chain;
1808}
1809
Mehdi Amini44ede332015-07-09 02:09:04 +00001810EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1811 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001812 if (!VT.isVector())
1813 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001814 return VT.changeVectorElementTypeToInteger();
1815}
1816
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001817bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1818 unsigned AddrSpace,
1819 unsigned Align,
1820 bool *IsFast) const {
1821 if (IsFast)
1822 *IsFast = false;
1823
1824 if (!VT.isSimple() || VT == MVT::Other)
1825 return false;
1826
1827 if (VT.bitsLT(MVT::i32))
1828 return false;
1829
1830 // TODO: This is a rough estimate.
1831 if (IsFast)
1832 *IsFast = true;
1833
1834 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1835}
1836
Matt Arsenault209a7b92014-04-18 07:40:20 +00001837static SDValue CompactSwizzlableVector(
1838 SelectionDAG &DAG, SDValue VectorEntry,
1839 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001840 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1841 assert(RemapSwizzle.empty());
1842 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001843 VectorEntry.getOperand(0),
1844 VectorEntry.getOperand(1),
1845 VectorEntry.getOperand(2),
1846 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001847 };
1848
1849 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001850 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001851 // We mask write here to teach later passes that the ith element of this
1852 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1853 // break false dependencies and additionnaly make assembly easier to read.
1854 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001855 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1856 if (C->isZero()) {
1857 RemapSwizzle[i] = 4; // SEL_0
1858 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1859 } else if (C->isExactlyValue(1.0)) {
1860 RemapSwizzle[i] = 5; // SEL_1
1861 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1862 }
1863 }
1864
Sanjay Patel57195842016-03-14 17:28:46 +00001865 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001866 continue;
1867 for (unsigned j = 0; j < i; j++) {
1868 if (NewBldVec[i] == NewBldVec[j]) {
1869 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1870 RemapSwizzle[i] = j;
1871 break;
1872 }
1873 }
1874 }
1875
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001876 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1877 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001878}
1879
Benjamin Kramer193960c2013-06-11 13:32:25 +00001880static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1881 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001882 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1883 assert(RemapSwizzle.empty());
1884 SDValue NewBldVec[4] = {
1885 VectorEntry.getOperand(0),
1886 VectorEntry.getOperand(1),
1887 VectorEntry.getOperand(2),
1888 VectorEntry.getOperand(3)
1889 };
1890 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001891 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001892 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001893 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1894 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1895 ->getZExtValue();
1896 if (i == Idx)
1897 isUnmovable[Idx] = true;
1898 }
1899 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001900
1901 for (unsigned i = 0; i < 4; i++) {
1902 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1903 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1904 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001905 if (isUnmovable[Idx])
1906 continue;
1907 // Swap i and Idx
1908 std::swap(NewBldVec[Idx], NewBldVec[i]);
1909 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1910 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001911 }
1912 }
1913
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001914 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1915 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001916}
1917
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001918SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1919 SelectionDAG &DAG,
1920 const SDLoc &DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001921 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1922 // Old -> New swizzle values
1923 DenseMap<unsigned, unsigned> SwizzleRemap;
1924
1925 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1926 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001927 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001928 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001929 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001930 }
1931
1932 SwizzleRemap.clear();
1933 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1934 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001935 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001936 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001937 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001938 }
1939
1940 return BuildVector;
1941}
1942
1943
Tom Stellard75aadc22012-12-11 21:25:42 +00001944//===----------------------------------------------------------------------===//
1945// Custom DAG Optimizations
1946//===----------------------------------------------------------------------===//
1947
1948SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1949 DAGCombinerInfo &DCI) const {
1950 SelectionDAG &DAG = DCI.DAG;
1951
1952 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001953 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001954 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1955 case ISD::FP_ROUND: {
1956 SDValue Arg = N->getOperand(0);
1957 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001958 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001959 Arg.getOperand(0));
1960 }
1961 break;
1962 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001963
1964 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1965 // (i32 select_cc f32, f32, -1, 0 cc)
1966 //
1967 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1968 // this to one of the SET*_DX10 instructions.
1969 case ISD::FP_TO_SINT: {
1970 SDValue FNeg = N->getOperand(0);
1971 if (FNeg.getOpcode() != ISD::FNEG) {
1972 return SDValue();
1973 }
1974 SDValue SelectCC = FNeg.getOperand(0);
1975 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1976 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1977 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1978 !isHWTrueValue(SelectCC.getOperand(2)) ||
1979 !isHWFalseValue(SelectCC.getOperand(3))) {
1980 return SDValue();
1981 }
1982
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001983 SDLoc dl(N);
1984 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001985 SelectCC.getOperand(0), // LHS
1986 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001987 DAG.getConstant(-1, dl, MVT::i32), // True
1988 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001989 SelectCC.getOperand(4)); // CC
1990
1991 break;
1992 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001993
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001994 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1995 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001996 case ISD::INSERT_VECTOR_ELT: {
1997 SDValue InVec = N->getOperand(0);
1998 SDValue InVal = N->getOperand(1);
1999 SDValue EltNo = N->getOperand(2);
2000 SDLoc dl(N);
2001
2002 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002003 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002004 return InVec;
2005
2006 EVT VT = InVec.getValueType();
2007
2008 // If we can't generate a legal BUILD_VECTOR, exit
2009 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2010 return SDValue();
2011
2012 // Check that we know which element is being inserted
2013 if (!isa<ConstantSDNode>(EltNo))
2014 return SDValue();
2015 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2016
2017 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2018 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2019 // vector elements.
2020 SmallVector<SDValue, 8> Ops;
2021 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2022 Ops.append(InVec.getNode()->op_begin(),
2023 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002024 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002025 unsigned NElts = VT.getVectorNumElements();
2026 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2027 } else {
2028 return SDValue();
2029 }
2030
2031 // Insert the element
2032 if (Elt < Ops.size()) {
2033 // All the operands of BUILD_VECTOR must have the same type;
2034 // we enforce that here.
2035 EVT OpVT = Ops[0].getValueType();
2036 if (InVal.getValueType() != OpVT)
2037 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2038 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2039 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2040 Ops[Elt] = InVal;
2041 }
2042
2043 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002044 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002045 }
2046
Tom Stellard365366f2013-01-23 02:09:06 +00002047 // Extract_vec (Build_vector) generated by custom lowering
2048 // also needs to be customly combined
2049 case ISD::EXTRACT_VECTOR_ELT: {
2050 SDValue Arg = N->getOperand(0);
2051 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2052 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2053 unsigned Element = Const->getZExtValue();
2054 return Arg->getOperand(Element);
2055 }
2056 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002057 if (Arg.getOpcode() == ISD::BITCAST &&
2058 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2059 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2060 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002061 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002062 Arg->getOperand(0).getOperand(Element));
2063 }
2064 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002065 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002066 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002067
2068 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002069 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002070 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002071 return Ret;
2072
Tom Stellarde06163a2013-02-07 14:02:35 +00002073 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2074 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002075 //
2076 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2077 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002078 SDValue LHS = N->getOperand(0);
2079 if (LHS.getOpcode() != ISD::SELECT_CC) {
2080 return SDValue();
2081 }
2082
2083 SDValue RHS = N->getOperand(1);
2084 SDValue True = N->getOperand(2);
2085 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002086 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002087
2088 if (LHS.getOperand(2).getNode() != True.getNode() ||
2089 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002090 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002091 return SDValue();
2092 }
2093
Tom Stellard5e524892013-03-08 15:37:11 +00002094 switch (NCC) {
2095 default: return SDValue();
2096 case ISD::SETNE: return LHS;
2097 case ISD::SETEQ: {
2098 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2099 LHSCC = ISD::getSetCCInverse(LHSCC,
2100 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002101 if (DCI.isBeforeLegalizeOps() ||
2102 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2103 return DAG.getSelectCC(SDLoc(N),
2104 LHS.getOperand(0),
2105 LHS.getOperand(1),
2106 LHS.getOperand(2),
2107 LHS.getOperand(3),
2108 LHSCC);
2109 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002110 }
Tom Stellard5e524892013-03-08 15:37:11 +00002111 }
Tom Stellardcd428182013-09-28 02:50:38 +00002112 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002113 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002114
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002115 case AMDGPUISD::EXPORT: {
2116 SDValue Arg = N->getOperand(1);
2117 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2118 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002119
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002120 SDValue NewArgs[8] = {
2121 N->getOperand(0), // Chain
2122 SDValue(),
2123 N->getOperand(2), // ArrayBase
2124 N->getOperand(3), // Type
2125 N->getOperand(4), // SWZ_X
2126 N->getOperand(5), // SWZ_Y
2127 N->getOperand(6), // SWZ_Z
2128 N->getOperand(7) // SWZ_W
2129 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002130 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002131 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002132 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002133 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002134 case AMDGPUISD::TEXTURE_FETCH: {
2135 SDValue Arg = N->getOperand(1);
2136 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2137 break;
2138
2139 SDValue NewArgs[19] = {
2140 N->getOperand(0),
2141 N->getOperand(1),
2142 N->getOperand(2),
2143 N->getOperand(3),
2144 N->getOperand(4),
2145 N->getOperand(5),
2146 N->getOperand(6),
2147 N->getOperand(7),
2148 N->getOperand(8),
2149 N->getOperand(9),
2150 N->getOperand(10),
2151 N->getOperand(11),
2152 N->getOperand(12),
2153 N->getOperand(13),
2154 N->getOperand(14),
2155 N->getOperand(15),
2156 N->getOperand(16),
2157 N->getOperand(17),
2158 N->getOperand(18),
2159 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002160 SDLoc DL(N);
2161 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2162 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002163 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002164 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002165
2166 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002167}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002168
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002169bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2170 SDValue &Src, SDValue &Neg, SDValue &Abs,
2171 SDValue &Sel, SDValue &Imm,
2172 SelectionDAG &DAG) const {
2173 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002174 if (!Src.isMachineOpcode())
2175 return false;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002176
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002177 switch (Src.getMachineOpcode()) {
2178 case AMDGPU::FNEG_R600:
2179 if (!Neg.getNode())
2180 return false;
2181 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002182 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002183 return true;
2184 case AMDGPU::FABS_R600:
2185 if (!Abs.getNode())
2186 return false;
2187 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002188 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002189 return true;
2190 case AMDGPU::CONST_COPY: {
2191 unsigned Opcode = ParentNode->getMachineOpcode();
2192 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2193
2194 if (!Sel.getNode())
2195 return false;
2196
2197 SDValue CstOffset = Src.getOperand(0);
2198 if (ParentNode->getValueType(0).isVector())
2199 return false;
2200
2201 // Gather constants values
2202 int SrcIndices[] = {
2203 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2204 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2205 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2206 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2207 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2208 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2209 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2210 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2211 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2214 };
2215 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002216 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002217 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2218 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2219 continue;
2220 if (HasDst) {
2221 OtherSrcIdx--;
2222 OtherSelIdx--;
2223 }
2224 if (RegisterSDNode *Reg =
2225 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2226 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002227 ConstantSDNode *Cst
2228 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002229 Consts.push_back(Cst->getZExtValue());
2230 }
2231 }
2232 }
2233
Matt Arsenault37c12d72014-05-12 20:42:57 +00002234 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002235 Consts.push_back(Cst->getZExtValue());
2236 if (!TII->fitsConstReadLimitations(Consts)) {
2237 return false;
2238 }
2239
2240 Sel = CstOffset;
2241 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2242 return true;
2243 }
Jan Vesely16800392016-05-13 20:39:31 +00002244 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2245 // Check if the Imm slot is used. Taken from below.
2246 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2247 return false;
2248 Imm = Src.getOperand(0);
2249 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2250 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002251 case AMDGPU::MOV_IMM_I32:
2252 case AMDGPU::MOV_IMM_F32: {
2253 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2254 uint64_t ImmValue = 0;
2255
2256
2257 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2258 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2259 float FloatValue = FPC->getValueAPF().convertToFloat();
2260 if (FloatValue == 0.0) {
2261 ImmReg = AMDGPU::ZERO;
2262 } else if (FloatValue == 0.5) {
2263 ImmReg = AMDGPU::HALF;
2264 } else if (FloatValue == 1.0) {
2265 ImmReg = AMDGPU::ONE;
2266 } else {
2267 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2268 }
2269 } else {
2270 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2271 uint64_t Value = C->getZExtValue();
2272 if (Value == 0) {
2273 ImmReg = AMDGPU::ZERO;
2274 } else if (Value == 1) {
2275 ImmReg = AMDGPU::ONE_INT;
2276 } else {
2277 ImmValue = Value;
2278 }
2279 }
2280
2281 // Check that we aren't already using an immediate.
2282 // XXX: It's possible for an instruction to have more than one
2283 // immediate operand, but this is not supported yet.
2284 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2285 if (!Imm.getNode())
2286 return false;
2287 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2288 assert(C);
2289 if (C->getZExtValue())
2290 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002291 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002292 }
2293 Src = DAG.getRegister(ImmReg, MVT::i32);
2294 return true;
2295 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002296 default:
2297 return false;
2298 }
2299}
2300
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002301/// \brief Fold the instructions after selecting them
2302SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2303 SelectionDAG &DAG) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002304 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002305 if (!Node->isMachineOpcode())
2306 return Node;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002307
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002308 unsigned Opcode = Node->getMachineOpcode();
2309 SDValue FakeOp;
2310
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002311 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002312
2313 if (Opcode == AMDGPU::DOT_4) {
2314 int OperandIdx[] = {
2315 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2316 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2317 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2318 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2319 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2320 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2321 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2322 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002323 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002324 int NegIdx[] = {
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2326 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2327 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2328 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2332 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2333 };
2334 int AbsIdx[] = {
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2337 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2338 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2339 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2340 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2341 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2342 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2343 };
2344 for (unsigned i = 0; i < 8; i++) {
2345 if (OperandIdx[i] < 0)
2346 return Node;
2347 SDValue &Src = Ops[OperandIdx[i] - 1];
2348 SDValue &Neg = Ops[NegIdx[i] - 1];
2349 SDValue &Abs = Ops[AbsIdx[i] - 1];
2350 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2351 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2352 if (HasDst)
2353 SelIdx--;
2354 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002355 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2356 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2357 }
2358 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2359 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2360 SDValue &Src = Ops[i];
2361 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002362 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2363 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002364 } else if (Opcode == AMDGPU::CLAMP_R600) {
2365 SDValue Src = Node->getOperand(0);
2366 if (!Src.isMachineOpcode() ||
2367 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2368 return Node;
2369 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2370 AMDGPU::OpName::clamp);
2371 if (ClampIdx < 0)
2372 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002373 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002374 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002375 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2376 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2377 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002378 } else {
2379 if (!TII->hasInstrModifiers(Opcode))
2380 return Node;
2381 int OperandIdx[] = {
2382 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2383 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2384 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2385 };
2386 int NegIdx[] = {
2387 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2388 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2389 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2390 };
2391 int AbsIdx[] = {
2392 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2393 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2394 -1
2395 };
2396 for (unsigned i = 0; i < 3; i++) {
2397 if (OperandIdx[i] < 0)
2398 return Node;
2399 SDValue &Src = Ops[OperandIdx[i] - 1];
2400 SDValue &Neg = Ops[NegIdx[i] - 1];
2401 SDValue FakeAbs;
2402 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2403 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2404 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002405 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2406 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002407 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002408 ImmIdx--;
2409 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002410 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002411 SDValue &Imm = Ops[ImmIdx];
2412 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002413 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2414 }
2415 }
2416
2417 return Node;
2418}