blob: 8ccd176930a61bf3b533e8e01b04807409a8f8bf [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Matt Arsenault43e92fe2016-06-24 06:30:11 +000033R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
34 const R600Subtarget &STI)
Eric Christopher7792e322015-01-30 23:24:40 +000035 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard75aadc22012-12-11 21:25:42 +000037 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000038 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
39 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
Matt Arsenault71e66762016-05-21 02:27:49 +000040 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
41 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Matt Arsenault71e66762016-05-21 02:27:49 +000045 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, MVT::i32, Custom);
47 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
48 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
52 for (MVT VT : MVT::integer_valuetypes()) {
53 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
54 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
55 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
56
57 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
58 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
59 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
60
61 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
62 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
63 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
64 }
65
Matt Arsenaultd1097a32016-06-02 19:54:26 +000066 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
67 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
68 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
69 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
70
71 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
72 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
74
75
Matt Arsenault71e66762016-05-21 02:27:49 +000076 setOperationAction(ISD::STORE, MVT::i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i32, Custom);
78 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
79 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
80
81 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
82 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
83
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
Tom Stellard0351ea22013-09-28 02:50:50 +000088 // Set condition code actions
89 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
90 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000091 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000092 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000093 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
94 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000095 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
96 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
97 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
98 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000099 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
100 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
101
102 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
103 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
104 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
105 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
106
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000107 setOperationAction(ISD::FCOS, MVT::f32, Custom);
108 setOperationAction(ISD::FSIN, MVT::f32, Custom);
109
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000111 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112
Tom Stellard492ebea2013-03-08 15:37:07 +0000113 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
114 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000115 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000116
117 setOperationAction(ISD::FSUB, MVT::f32, Expand);
118
Tom Stellard75aadc22012-12-11 21:25:42 +0000119 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
120 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
121
Tom Stellarde8f9f282013-03-08 15:37:05 +0000122 setOperationAction(ISD::SETCC, MVT::i32, Expand);
123 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Matt Arsenault7fb961f2016-07-22 17:01:21 +0000125 setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000126 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
127 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
Tom Stellard53f2f902013-09-05 18:38:03 +0000129 setOperationAction(ISD::SELECT, MVT::i32, Expand);
130 setOperationAction(ISD::SELECT, MVT::f32, Expand);
131 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +0000132 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133
Jan Vesely808fff52015-04-30 17:15:56 +0000134 // ADD, SUB overflow.
135 // TODO: turn these into Legal?
136 if (Subtarget->hasCARRY())
137 setOperationAction(ISD::UADDO, MVT::i32, Custom);
138
139 if (Subtarget->hasBORROW())
140 setOperationAction(ISD::USUBO, MVT::i32, Custom);
141
Matt Arsenault4e466652014-04-16 01:41:30 +0000142 // Expand sign extension of vectors
143 if (!Subtarget->hasBFE())
144 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
145
146 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
147 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
148
149 if (!Subtarget->hasBFE())
150 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
151 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
152 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
153
154 if (!Subtarget->hasBFE())
155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
158
159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
162
163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
164
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000165 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
166
Tom Stellard880a80a2014-06-17 16:53:14 +0000167 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
168 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
170 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
171
172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
174 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
175 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
176
Jan Vesely25f36272014-06-18 12:27:13 +0000177 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
178 // to be Legal/Custom in order to avoid library calls.
179 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000180 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000181 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000182
Michel Danzer49812b52013-07-10 16:37:07 +0000183 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
184
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000185 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
186 for (MVT VT : ScalarIntVTs) {
187 setOperationAction(ISD::ADDC, VT, Expand);
188 setOperationAction(ISD::SUBC, VT, Expand);
189 setOperationAction(ISD::ADDE, VT, Expand);
190 setOperationAction(ISD::SUBE, VT, Expand);
191 }
192
Tom Stellardfc455472013-08-12 22:33:21 +0000193 setSchedulingPreference(Sched::Source);
Matt Arsenault71e66762016-05-21 02:27:49 +0000194
195
196 setTargetDAGCombine(ISD::FP_ROUND);
197 setTargetDAGCombine(ISD::FP_TO_SINT);
198 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
199 setTargetDAGCombine(ISD::SELECT_CC);
200 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000201}
202
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000203const R600Subtarget *R600TargetLowering::getSubtarget() const {
204 return static_cast<const R600Subtarget *>(Subtarget);
205}
206
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000207static inline bool isEOP(MachineBasicBlock::iterator I) {
208 return std::next(I)->getOpcode() == AMDGPU::RETURN;
209}
210
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000211MachineBasicBlock *
212R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
213 MachineBasicBlock *BB) const {
Tom Stellard75aadc22012-12-11 21:25:42 +0000214 MachineFunction * MF = BB->getParent();
215 MachineRegisterInfo &MRI = MF->getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000216 MachineBasicBlock::iterator I = MI;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000217 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Tom Stellard75aadc22012-12-11 21:25:42 +0000218
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000219 switch (MI.getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000220 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000221 // Replace LDS_*_RET instruction that don't have any uses with the
222 // equivalent LDS_*_NORET instruction.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000223 if (TII->isLDSRetInstr(MI.getOpcode())) {
224 int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
Tom Stellard13c68ef2013-09-05 18:38:09 +0000225 assert(DstIdx != -1);
226 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000227 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
228 // LDS_1A2D support and remove this special case.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000229 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
230 MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000231 return BB;
232
233 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000234 TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
235 for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
236 NewMI.addOperand(MI.getOperand(i));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000237 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000238 } else {
239 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
240 }
241 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000242 case AMDGPU::CLAMP_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000243 MachineInstr *NewMI = TII->buildDefaultInstruction(
244 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
245 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000246 TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000247 break;
248 }
249
250 case AMDGPU::FABS_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000251 MachineInstr *NewMI = TII->buildDefaultInstruction(
252 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
253 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000254 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 break;
256 }
257
258 case AMDGPU::FNEG_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000259 MachineInstr *NewMI = TII->buildDefaultInstruction(
260 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
261 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000262 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000263 break;
264 }
265
Tom Stellard75aadc22012-12-11 21:25:42 +0000266 case AMDGPU::MASK_WRITE: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000267 unsigned maskedRegister = MI.getOperand(0).getReg();
Tom Stellard75aadc22012-12-11 21:25:42 +0000268 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
269 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000270 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
Tom Stellard75aadc22012-12-11 21:25:42 +0000271 break;
272 }
273
274 case AMDGPU::MOV_IMM_F32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
276 .getFPImm()
277 ->getValueAPF()
278 .bitcastToAPInt()
279 .getZExtValue());
Tom Stellard75aadc22012-12-11 21:25:42 +0000280 break;
281 case AMDGPU::MOV_IMM_I32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000282 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
283 MI.getOperand(1).getImm());
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000285 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
286 //TODO: Perhaps combine this instruction with the next if possible
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000287 auto MIB = TII->buildDefaultInstruction(
288 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
Jan Veselyf97de002016-05-13 20:39:29 +0000289 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
290 //TODO: Ugh this is rather ugly
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000291 MIB->getOperand(Idx) = MI.getOperand(1);
Jan Veselyf97de002016-05-13 20:39:29 +0000292 break;
293 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000294 case AMDGPU::CONST_COPY: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000295 MachineInstr *NewMI = TII->buildDefaultInstruction(
296 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000297 TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000298 MI.getOperand(1).getImm());
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000299 break;
300 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000301
302 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000303 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000304 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000305 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
306 .addOperand(MI.getOperand(0))
307 .addOperand(MI.getOperand(1))
308 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000309 break;
310 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000311 case AMDGPU::RAT_STORE_TYPED_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000312 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
313 .addOperand(MI.getOperand(0))
314 .addOperand(MI.getOperand(1))
315 .addOperand(MI.getOperand(2))
316 .addImm(isEOP(I)); // Set End of program bit
Tom Stellarde0e582c2015-10-01 17:51:34 +0000317 break;
318 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000319
Tom Stellard75aadc22012-12-11 21:25:42 +0000320 case AMDGPU::TXD: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000323 MachineOperand &RID = MI.getOperand(4);
324 MachineOperand &SID = MI.getOperand(5);
325 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000328
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000358 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
359 T0)
360 .addOperand(MI.getOperand(3))
361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
379 T1)
380 .addOperand(MI.getOperand(2))
381 .addImm(SrcX)
382 .addImm(SrcY)
383 .addImm(SrcZ)
384 .addImm(SrcW)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(0)
389 .addImm(1)
390 .addImm(2)
391 .addImm(3)
392 .addOperand(RID)
393 .addOperand(SID)
394 .addImm(CTX)
395 .addImm(CTY)
396 .addImm(CTZ)
397 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000399 .addOperand(MI.getOperand(0))
400 .addOperand(MI.getOperand(1))
401 .addImm(SrcX)
402 .addImm(SrcY)
403 .addImm(SrcZ)
404 .addImm(SrcW)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(0)
409 .addImm(1)
410 .addImm(2)
411 .addImm(3)
412 .addOperand(RID)
413 .addOperand(SID)
414 .addImm(CTX)
415 .addImm(CTY)
416 .addImm(CTZ)
417 .addImm(CTW)
418 .addReg(T0, RegState::Implicit)
419 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000420 break;
421 }
422
423 case AMDGPU::TXD_SHADOW: {
424 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
425 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000426 MachineOperand &RID = MI.getOperand(4);
427 MachineOperand &SID = MI.getOperand(5);
428 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000429 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
430 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
431
432 switch (TextureId) {
433 case 5: // Rect
434 CTX = CTY = 0;
435 break;
436 case 6: // Shadow1D
437 SrcW = SrcZ;
438 break;
439 case 7: // Shadow2D
440 SrcW = SrcZ;
441 break;
442 case 8: // ShadowRect
443 CTX = CTY = 0;
444 SrcW = SrcZ;
445 break;
446 case 9: // 1DArray
447 SrcZ = SrcY;
448 CTZ = 0;
449 break;
450 case 10: // 2DArray
451 CTZ = 0;
452 break;
453 case 11: // Shadow1DArray
454 SrcZ = SrcY;
455 CTZ = 0;
456 break;
457 case 12: // Shadow2DArray
458 CTZ = 0;
459 break;
460 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000461
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
463 T0)
464 .addOperand(MI.getOperand(3))
465 .addImm(SrcX)
466 .addImm(SrcY)
467 .addImm(SrcZ)
468 .addImm(SrcW)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(0)
473 .addImm(1)
474 .addImm(2)
475 .addImm(3)
476 .addOperand(RID)
477 .addOperand(SID)
478 .addImm(CTX)
479 .addImm(CTY)
480 .addImm(CTZ)
481 .addImm(CTW);
482 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
483 T1)
484 .addOperand(MI.getOperand(2))
485 .addImm(SrcX)
486 .addImm(SrcY)
487 .addImm(SrcZ)
488 .addImm(SrcW)
489 .addImm(0)
490 .addImm(0)
491 .addImm(0)
492 .addImm(0)
493 .addImm(1)
494 .addImm(2)
495 .addImm(3)
496 .addOperand(RID)
497 .addOperand(SID)
498 .addImm(CTX)
499 .addImm(CTY)
500 .addImm(CTZ)
501 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000503 .addOperand(MI.getOperand(0))
504 .addOperand(MI.getOperand(1))
505 .addImm(SrcX)
506 .addImm(SrcY)
507 .addImm(SrcZ)
508 .addImm(SrcW)
509 .addImm(0)
510 .addImm(0)
511 .addImm(0)
512 .addImm(0)
513 .addImm(1)
514 .addImm(2)
515 .addImm(3)
516 .addOperand(RID)
517 .addOperand(SID)
518 .addImm(CTX)
519 .addImm(CTY)
520 .addImm(CTZ)
521 .addImm(CTW)
522 .addReg(T0, RegState::Implicit)
523 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 break;
525 }
526
527 case AMDGPU::BRANCH:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
529 .addOperand(MI.getOperand(0));
530 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000531
532 case AMDGPU::BRANCH_COND_f32: {
533 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000534 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
535 AMDGPU::PREDICATE_BIT)
536 .addOperand(MI.getOperand(1))
537 .addImm(OPCODE_IS_NOT_ZERO)
538 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000539 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000540 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000541 .addOperand(MI.getOperand(0))
542 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 break;
544 }
545
546 case AMDGPU::BRANCH_COND_i32: {
547 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000548 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
549 AMDGPU::PREDICATE_BIT)
550 .addOperand(MI.getOperand(1))
Tom Stellard75aadc22012-12-11 21:25:42 +0000551 .addImm(OPCODE_IS_NOT_ZERO_INT)
552 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000553 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000554 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000555 .addOperand(MI.getOperand(0))
556 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 break;
558 }
559
Tom Stellard75aadc22012-12-11 21:25:42 +0000560 case AMDGPU::EG_ExportSwz:
561 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000562 // Instruction is left unmodified if its not the last one of its type
563 bool isLastInstructionOfItsType = true;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000564 unsigned InstExportType = MI.getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000565 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000566 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000567 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000568 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
569 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
570 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
571 .getImm();
572 if (CurrentInstExportType == InstExportType) {
573 isLastInstructionOfItsType = false;
574 break;
575 }
576 }
577 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000578 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000579 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 return BB;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000581 unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
582 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
583 .addOperand(MI.getOperand(0))
584 .addOperand(MI.getOperand(1))
585 .addOperand(MI.getOperand(2))
586 .addOperand(MI.getOperand(3))
587 .addOperand(MI.getOperand(4))
588 .addOperand(MI.getOperand(5))
589 .addOperand(MI.getOperand(6))
590 .addImm(CfInst)
591 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000592 break;
593 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000594 case AMDGPU::RETURN: {
595 // RETURN instructions must have the live-out registers as implicit uses,
596 // otherwise they appear dead.
597 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
598 MachineInstrBuilder MIB(*MF, MI);
599 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
600 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
601 return BB;
602 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000603 }
604
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000605 MI.eraseFromParent();
Tom Stellard75aadc22012-12-11 21:25:42 +0000606 return BB;
607}
608
609//===----------------------------------------------------------------------===//
610// Custom DAG Lowering Operations
611//===----------------------------------------------------------------------===//
612
Tom Stellard75aadc22012-12-11 21:25:42 +0000613SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000614 MachineFunction &MF = DAG.getMachineFunction();
615 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000616 switch (Op.getOpcode()) {
617 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000618 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
619 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000620 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000621 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000622 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000623 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
624 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000625 case ISD::FCOS:
626 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000627 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000628 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000629 case ISD::LOAD: {
630 SDValue Result = LowerLOAD(Op, DAG);
631 assert((!Result.getNode() ||
632 Result.getNode()->getNumValues() == 2) &&
633 "Load should return a value and a chain");
634 return Result;
635 }
636
Matt Arsenault1d555c42014-06-23 18:00:55 +0000637 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000638 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000639 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000640 case ISD::INTRINSIC_VOID: {
641 SDValue Chain = Op.getOperand(0);
642 unsigned IntrinsicID =
643 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644 switch (IntrinsicID) {
Matt Arsenault82e5e1e2016-07-15 21:27:08 +0000645 case AMDGPUIntrinsic::r600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000646 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000647 const SDValue Args[8] = {
648 Chain,
649 Op.getOperand(2), // Export Value
650 Op.getOperand(3), // ArrayBase
651 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000652 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
653 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
654 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
655 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000656 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000657 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000658 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000659
Tom Stellard75aadc22012-12-11 21:25:42 +0000660 // default for switch(IntrinsicID)
661 default: break;
662 }
663 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
664 break;
665 }
666 case ISD::INTRINSIC_WO_CHAIN: {
667 unsigned IntrinsicID =
668 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
669 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000670 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000671 switch(IntrinsicID) {
672 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Matt Arsenault59bd3012016-01-22 19:00:09 +0000673 case AMDGPUIntrinsic::r600_tex:
674 case AMDGPUIntrinsic::r600_texc:
675 case AMDGPUIntrinsic::r600_txl:
676 case AMDGPUIntrinsic::r600_txlc:
677 case AMDGPUIntrinsic::r600_txb:
678 case AMDGPUIntrinsic::r600_txbc:
679 case AMDGPUIntrinsic::r600_txf:
680 case AMDGPUIntrinsic::r600_txq:
681 case AMDGPUIntrinsic::r600_ddx:
Matt Arsenault648e4222016-07-14 05:23:23 +0000682 case AMDGPUIntrinsic::r600_ddy: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000683 unsigned TextureOp;
684 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000685 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000686 TextureOp = 0;
687 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000688 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000689 TextureOp = 1;
690 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000691 case AMDGPUIntrinsic::r600_txl:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000692 TextureOp = 2;
693 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000694 case AMDGPUIntrinsic::r600_txlc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 TextureOp = 3;
696 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000697 case AMDGPUIntrinsic::r600_txb:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000698 TextureOp = 4;
699 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000700 case AMDGPUIntrinsic::r600_txbc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000701 TextureOp = 5;
702 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000703 case AMDGPUIntrinsic::r600_txf:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000704 TextureOp = 6;
705 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000706 case AMDGPUIntrinsic::r600_txq:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000707 TextureOp = 7;
708 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000709 case AMDGPUIntrinsic::r600_ddx:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000710 TextureOp = 8;
711 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000712 case AMDGPUIntrinsic::r600_ddy:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000713 TextureOp = 9;
714 break;
715 default:
716 llvm_unreachable("Unknow Texture Operation");
717 }
718
719 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000720 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000721 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000722 DAG.getConstant(0, DL, MVT::i32),
723 DAG.getConstant(1, DL, MVT::i32),
724 DAG.getConstant(2, DL, MVT::i32),
725 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000726 Op.getOperand(2),
727 Op.getOperand(3),
728 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000729 DAG.getConstant(0, DL, MVT::i32),
730 DAG.getConstant(1, DL, MVT::i32),
731 DAG.getConstant(2, DL, MVT::i32),
732 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000733 Op.getOperand(5),
734 Op.getOperand(6),
735 Op.getOperand(7),
736 Op.getOperand(8),
737 Op.getOperand(9),
738 Op.getOperand(10)
739 };
Craig Topper48d114b2014-04-26 18:35:24 +0000740 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000741 }
Matt Arsenaultca7f5702016-07-14 05:47:17 +0000742 case AMDGPUIntrinsic::r600_dot4: {
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000743 SDValue Args[8] = {
744 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000745 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000746 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000747 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000748 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000749 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000750 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000751 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000753 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000755 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000756 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000757 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000758 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000759 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000760 };
Craig Topper48d114b2014-04-26 18:35:24 +0000761 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000762 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000763
Jan Vesely2fa28c32016-07-10 21:20:29 +0000764 case Intrinsic::r600_implicitarg_ptr: {
765 MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
766 uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
767 return DAG.getConstant(ByteOffset, DL, PtrVT);
768 }
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000769 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000770 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000773 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000774 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000775 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000777 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000778 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000779 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000781 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000783 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000784 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000785 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000786 return LowerImplicitParameter(DAG, VT, DL, 8);
787
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000788 case Intrinsic::r600_read_workdim:
789 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000790 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
791 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
792 }
Jan Veselye5121f32014-10-14 20:05:26 +0000793
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000794 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000795 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
796 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000797 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000798 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
799 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000800 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000801 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
802 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000803 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000804 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
805 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000806 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000807 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
808 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000809 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000810 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
811 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000812
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000813 case Intrinsic::r600_recipsqrt_ieee:
814 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000815
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000816 case Intrinsic::r600_recipsqrt_clamped:
817 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000818 }
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000819
Tom Stellard75aadc22012-12-11 21:25:42 +0000820 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
821 break;
822 }
823 } // end switch(Op.getOpcode())
824 return SDValue();
825}
826
827void R600TargetLowering::ReplaceNodeResults(SDNode *N,
828 SmallVectorImpl<SDValue> &Results,
829 SelectionDAG &DAG) const {
830 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000831 default:
832 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
833 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000834 case ISD::FP_TO_UINT:
835 if (N->getValueType(0) == MVT::i1) {
Matt Arsenault7fb961f2016-07-22 17:01:21 +0000836 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000837 return;
838 }
839 // Fall-through. Since we don't care about out of bounds values
840 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
841 // considers some extra cases which are not necessary here.
842 case ISD::FP_TO_SINT: {
Matt Arsenault7fb961f2016-07-22 17:01:21 +0000843 if (N->getValueType(0) == MVT::i1) {
844 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
845 return;
846 }
847
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000848 SDValue Result;
849 if (expandFP_TO_SINT(N, Result, DAG))
850 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000851 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000852 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000853 case ISD::SDIVREM: {
854 SDValue Op = SDValue(N, 1);
855 SDValue RES = LowerSDIVREM(Op, DAG);
856 Results.push_back(RES);
857 Results.push_back(RES.getValue(1));
858 break;
859 }
860 case ISD::UDIVREM: {
861 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000862 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000863 break;
864 }
865 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000866}
867
Tom Stellard880a80a2014-06-17 16:53:14 +0000868SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
869 SDValue Vector) const {
870
871 SDLoc DL(Vector);
872 EVT VecVT = Vector.getValueType();
873 EVT EltVT = VecVT.getVectorElementType();
874 SmallVector<SDValue, 8> Args;
875
876 for (unsigned i = 0, e = VecVT.getVectorNumElements();
877 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000878 Args.push_back(DAG.getNode(
879 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
880 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000881 }
882
883 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
884}
885
886SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
887 SelectionDAG &DAG) const {
888
889 SDLoc DL(Op);
890 SDValue Vector = Op.getOperand(0);
891 SDValue Index = Op.getOperand(1);
892
893 if (isa<ConstantSDNode>(Index) ||
894 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
895 return Op;
896
897 Vector = vectorToVerticalVector(DAG, Vector);
898 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
899 Vector, Index);
900}
901
902SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
903 SelectionDAG &DAG) const {
904 SDLoc DL(Op);
905 SDValue Vector = Op.getOperand(0);
906 SDValue Value = Op.getOperand(1);
907 SDValue Index = Op.getOperand(2);
908
909 if (isa<ConstantSDNode>(Index) ||
910 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
911 return Op;
912
913 Vector = vectorToVerticalVector(DAG, Vector);
914 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
915 Vector, Value, Index);
916 return vectorToVerticalVector(DAG, Insert);
917}
918
Tom Stellard27233b72016-05-02 18:05:17 +0000919SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
920 SDValue Op,
921 SelectionDAG &DAG) const {
922
923 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
924 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
925 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
926
927 const DataLayout &DL = DAG.getDataLayout();
928 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000929 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
930
Jan Veselyf97de002016-05-13 20:39:29 +0000931 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
932 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000933}
934
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000935SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
936 // On hw >= R700, COS/SIN input must be between -1. and 1.
937 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
938 EVT VT = Op.getValueType();
939 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000940 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000941
942 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000943 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
944 DAG.getNode(ISD::FADD, DL, VT,
945 DAG.getNode(ISD::FMUL, DL, VT, Arg,
946 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
947 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000948 unsigned TrigNode;
949 switch (Op.getOpcode()) {
950 case ISD::FCOS:
951 TrigNode = AMDGPUISD::COS_HW;
952 break;
953 case ISD::FSIN:
954 TrigNode = AMDGPUISD::SIN_HW;
955 break;
956 default:
957 llvm_unreachable("Wrong trig opcode");
958 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000959 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
960 DAG.getNode(ISD::FADD, DL, VT, FractPart,
961 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000962 if (Gen >= R600Subtarget::R700)
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000963 return TrigVal;
964 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000965 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
966 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000967}
968
Jan Vesely25f36272014-06-18 12:27:13 +0000969SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
970 SDLoc DL(Op);
971 EVT VT = Op.getValueType();
972
973 SDValue Lo = Op.getOperand(0);
974 SDValue Hi = Op.getOperand(1);
975 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000976 SDValue Zero = DAG.getConstant(0, DL, VT);
977 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000978
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000979 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
980 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000981 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
982 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
983
984 // The dance around Width1 is necessary for 0 special case.
985 // Without it the CompShift might be 32, producing incorrect results in
986 // Overflow. So we do the shift in two steps, the alternative is to
987 // add a conditional to filter the special case.
988
989 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
990 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
991
992 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
993 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
994 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
995
996 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
997 SDValue LoBig = Zero;
998
999 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1000 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1001
1002 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1003}
1004
Jan Vesely900ff2e2014-06-18 12:27:15 +00001005SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1006 SDLoc DL(Op);
1007 EVT VT = Op.getValueType();
1008
1009 SDValue Lo = Op.getOperand(0);
1010 SDValue Hi = Op.getOperand(1);
1011 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001012 SDValue Zero = DAG.getConstant(0, DL, VT);
1013 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001014
Jan Veselyecf51332014-06-18 12:27:17 +00001015 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1016
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001017 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1018 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001019 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1020 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1021
1022 // The dance around Width1 is necessary for 0 special case.
1023 // Without it the CompShift might be 32, producing incorrect results in
1024 // Overflow. So we do the shift in two steps, the alternative is to
1025 // add a conditional to filter the special case.
1026
1027 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1028 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1029
Jan Veselyecf51332014-06-18 12:27:17 +00001030 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +00001031 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1032 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1033
Jan Veselyecf51332014-06-18 12:27:17 +00001034 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1035 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +00001036
1037 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1038 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1039
1040 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1041}
1042
Jan Vesely808fff52015-04-30 17:15:56 +00001043SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1044 unsigned mainop, unsigned ovf) const {
1045 SDLoc DL(Op);
1046 EVT VT = Op.getValueType();
1047
1048 SDValue Lo = Op.getOperand(0);
1049 SDValue Hi = Op.getOperand(1);
1050
1051 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1052 // Extend sign.
1053 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1054 DAG.getValueType(MVT::i1));
1055
1056 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1057
1058 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1059}
1060
Matt Arsenault7fb961f2016-07-22 17:01:21 +00001061SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001062 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001063 return DAG.getNode(
1064 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001065 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001066 MVT::i1,
Matt Arsenault7fb961f2016-07-22 17:01:21 +00001067 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
1068 DAG.getCondCode(ISD::SETEQ));
1069}
1070
1071SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
1072 SDLoc DL(Op);
1073 return DAG.getNode(
1074 ISD::SETCC,
1075 DL,
1076 MVT::i1,
1077 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
1078 DAG.getCondCode(ISD::SETEQ));
Tom Stellard75aadc22012-12-11 21:25:42 +00001079}
1080
Tom Stellard75aadc22012-12-11 21:25:42 +00001081SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001082 const SDLoc &DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001083 unsigned DwordOffset) const {
1084 unsigned ByteOffset = DwordOffset * 4;
1085 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001086 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001087
1088 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1089 assert(isInt<16>(ByteOffset));
1090
1091 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001092 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Justin Lebar9c375812016-07-15 18:27:10 +00001093 MachinePointerInfo(ConstantPointerNull::get(PtrType)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001094}
1095
Tom Stellard75aadc22012-12-11 21:25:42 +00001096bool R600TargetLowering::isZero(SDValue Op) const {
1097 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1098 return Cst->isNullValue();
1099 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1100 return CstFP->isZero();
1101 } else {
1102 return false;
1103 }
1104}
1105
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001106bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1107 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1108 return CFP->isExactlyValue(1.0);
1109 }
1110 return isAllOnesConstant(Op);
1111}
1112
1113bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1114 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1115 return CFP->getValueAPF().isZero();
1116 }
1117 return isNullConstant(Op);
1118}
1119
Tom Stellard75aadc22012-12-11 21:25:42 +00001120SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001121 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001122 EVT VT = Op.getValueType();
1123
1124 SDValue LHS = Op.getOperand(0);
1125 SDValue RHS = Op.getOperand(1);
1126 SDValue True = Op.getOperand(2);
1127 SDValue False = Op.getOperand(3);
1128 SDValue CC = Op.getOperand(4);
1129 SDValue Temp;
1130
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001131 if (VT == MVT::f32) {
1132 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1133 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1134 if (MinMax)
1135 return MinMax;
1136 }
1137
Tom Stellard75aadc22012-12-11 21:25:42 +00001138 // LHS and RHS are guaranteed to be the same value type
1139 EVT CompareVT = LHS.getValueType();
1140
1141 // Check if we can lower this to a native operation.
1142
Tom Stellard2add82d2013-03-08 15:37:09 +00001143 // Try to lower to a SET* instruction:
1144 //
1145 // SET* can match the following patterns:
1146 //
Tom Stellardcd428182013-09-28 02:50:38 +00001147 // select_cc f32, f32, -1, 0, cc_supported
1148 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1149 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001150 //
1151
1152 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001153 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1154 ISD::CondCode InverseCC =
1155 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001156 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1157 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1158 std::swap(False, True);
1159 CC = DAG.getCondCode(InverseCC);
1160 } else {
1161 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1162 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1163 std::swap(False, True);
1164 std::swap(LHS, RHS);
1165 CC = DAG.getCondCode(SwapInvCC);
1166 }
1167 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001168 }
1169
1170 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1171 (CompareVT == VT || VT == MVT::i32)) {
1172 // This can be matched by a SET* instruction.
1173 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1174 }
1175
Tom Stellard75aadc22012-12-11 21:25:42 +00001176 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001177 //
1178 // CND* can match the following patterns:
1179 //
Tom Stellardcd428182013-09-28 02:50:38 +00001180 // select_cc f32, 0.0, f32, f32, cc_supported
1181 // select_cc f32, 0.0, i32, i32, cc_supported
1182 // select_cc i32, 0, f32, f32, cc_supported
1183 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001184 //
Tom Stellardcd428182013-09-28 02:50:38 +00001185
1186 // Try to move the zero value to the RHS
1187 if (isZero(LHS)) {
1188 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1189 // Try swapping the operands
1190 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1191 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1192 std::swap(LHS, RHS);
1193 CC = DAG.getCondCode(CCSwapped);
1194 } else {
1195 // Try inverting the conditon and then swapping the operands
1196 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1197 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1198 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1199 std::swap(True, False);
1200 std::swap(LHS, RHS);
1201 CC = DAG.getCondCode(CCSwapped);
1202 }
1203 }
1204 }
1205 if (isZero(RHS)) {
1206 SDValue Cond = LHS;
1207 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001208 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1209 if (CompareVT != VT) {
1210 // Bitcast True / False to the correct types. This will end up being
1211 // a nop, but it allows us to define only a single pattern in the
1212 // .TD files for each CND* instruction rather than having to have
1213 // one pattern for integer True/False and one for fp True/False
1214 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1215 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1216 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001217
1218 switch (CCOpcode) {
1219 case ISD::SETONE:
1220 case ISD::SETUNE:
1221 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001222 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1223 Temp = True;
1224 True = False;
1225 False = Temp;
1226 break;
1227 default:
1228 break;
1229 }
1230 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1231 Cond, Zero,
1232 True, False,
1233 DAG.getCondCode(CCOpcode));
1234 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1235 }
1236
Tom Stellard75aadc22012-12-11 21:25:42 +00001237 // If we make it this for it means we have no native instructions to handle
1238 // this SELECT_CC, so we must lower it.
1239 SDValue HWTrue, HWFalse;
1240
1241 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001242 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1243 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001244 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001245 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1246 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001247 }
1248 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001249 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001250 }
1251
1252 // Lower this unsupported SELECT_CC into a combination of two supported
1253 // SELECT_CC operations.
1254 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1255
1256 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1257 Cond, HWFalse,
1258 True, False,
1259 DAG.getCondCode(ISD::SETNE));
1260}
1261
Alp Tokercb402912014-01-24 17:20:08 +00001262/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001263/// convert these pointers to a register index. Each register holds
1264/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1265/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1266/// for indirect addressing.
1267SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1268 unsigned StackWidth,
1269 SelectionDAG &DAG) const {
1270 unsigned SRLPad;
1271 switch(StackWidth) {
1272 case 1:
1273 SRLPad = 2;
1274 break;
1275 case 2:
1276 SRLPad = 3;
1277 break;
1278 case 4:
1279 SRLPad = 4;
1280 break;
1281 default: llvm_unreachable("Invalid stack width");
1282 }
1283
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001284 SDLoc DL(Ptr);
1285 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1286 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001287}
1288
1289void R600TargetLowering::getStackAddress(unsigned StackWidth,
1290 unsigned ElemIdx,
1291 unsigned &Channel,
1292 unsigned &PtrIncr) const {
1293 switch (StackWidth) {
1294 default:
1295 case 1:
1296 Channel = 0;
1297 if (ElemIdx > 0) {
1298 PtrIncr = 1;
1299 } else {
1300 PtrIncr = 0;
1301 }
1302 break;
1303 case 2:
1304 Channel = ElemIdx % 2;
1305 if (ElemIdx == 2) {
1306 PtrIncr = 1;
1307 } else {
1308 PtrIncr = 0;
1309 }
1310 break;
1311 case 4:
1312 Channel = ElemIdx;
1313 PtrIncr = 0;
1314 break;
1315 }
1316}
1317
Matt Arsenault95245662016-02-11 05:32:46 +00001318SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1319 SelectionDAG &DAG) const {
1320 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001321
Matt Arsenault95245662016-02-11 05:32:46 +00001322 unsigned Mask = 0;
1323 if (Store->getMemoryVT() == MVT::i8) {
1324 Mask = 0xff;
1325 } else if (Store->getMemoryVT() == MVT::i16) {
1326 Mask = 0xffff;
1327 }
1328
1329 SDValue Chain = Store->getChain();
1330 SDValue BasePtr = Store->getBasePtr();
1331 EVT MemVT = Store->getMemoryVT();
1332
1333 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1334 DAG.getConstant(2, DL, MVT::i32));
1335 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1336 Chain, Ptr,
1337 DAG.getTargetConstant(0, DL, MVT::i32));
1338
1339 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1340 DAG.getConstant(0x3, DL, MVT::i32));
1341
1342 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1343 DAG.getConstant(3, DL, MVT::i32));
1344
1345 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1346 Store->getValue());
1347
1348 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1349
1350 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1351 MaskedValue, ShiftAmt);
1352
1353 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1354 DAG.getConstant(Mask, DL, MVT::i32),
1355 ShiftAmt);
1356 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1357 DAG.getConstant(0xffffffff, DL, MVT::i32));
1358 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1359
1360 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1361 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1362 Chain, Value, Ptr,
1363 DAG.getTargetConstant(0, DL, MVT::i32));
1364}
1365
1366SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1367 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001368 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001369
Matt Arsenault95245662016-02-11 05:32:46 +00001370 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1371 unsigned AS = StoreNode->getAddressSpace();
1372 SDValue Value = StoreNode->getValue();
1373 EVT ValueVT = Value.getValueType();
1374
1375 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1376 ValueVT.isVector()) {
1377 return SplitVectorStore(Op, DAG);
1378 }
1379
1380 SDLoc DL(Op);
1381 SDValue Chain = StoreNode->getChain();
1382 SDValue Ptr = StoreNode->getBasePtr();
1383
1384 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001385 if (StoreNode->isTruncatingStore()) {
1386 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001387 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001388 EVT MemVT = StoreNode->getMemoryVT();
1389 SDValue MaskConstant;
1390 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001391 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001392 } else {
1393 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001394 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001395 }
1396 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001397 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001398 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001399 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001400 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1401 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001402 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001403 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1404 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1405 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1406 // vector instead.
1407 SDValue Src[4] = {
1408 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001409 DAG.getConstant(0, DL, MVT::i32),
1410 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001411 Mask
1412 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001413 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001414 SDValue Args[3] = { Chain, Input, DWordAddr };
1415 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001416 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001417 StoreNode->getMemOperand());
1418 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001419 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001420 // Convert pointer from byte address to dword address.
1421 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1422 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001423 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001424
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001425 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001426 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001427 } else {
1428 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1429 }
1430 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001431 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001432 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001433
Matt Arsenault95245662016-02-11 05:32:46 +00001434 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001435 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001436
Matt Arsenault95245662016-02-11 05:32:46 +00001437 EVT MemVT = StoreNode->getMemoryVT();
1438 if (MemVT.bitsLT(MVT::i32))
1439 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001440
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001441 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001442 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001443 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001444 unsigned StackWidth = TFL->getStackWidth(MF);
1445
1446 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1447
1448 if (ValueVT.isVector()) {
1449 unsigned NumElemVT = ValueVT.getVectorNumElements();
1450 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001451 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001452
1453 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1454 "vector width in load");
1455
1456 for (unsigned i = 0; i < NumElemVT; ++i) {
1457 unsigned Channel, PtrIncr;
1458 getStackAddress(StackWidth, i, Channel, PtrIncr);
1459 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001460 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001461 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001462 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001463
1464 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1465 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001466 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001467 }
Craig Topper48d114b2014-04-26 18:35:24 +00001468 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001469 } else {
1470 if (ValueVT == MVT::i8) {
1471 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1472 }
1473 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001474 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001475 }
1476
1477 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001478}
1479
Tom Stellard365366f2013-01-23 02:09:06 +00001480// return (512 + (kc_bank << 12)
1481static int
1482ConstantAddressBlock(unsigned AddressSpace) {
1483 switch (AddressSpace) {
1484 case AMDGPUAS::CONSTANT_BUFFER_0:
1485 return 512;
1486 case AMDGPUAS::CONSTANT_BUFFER_1:
1487 return 512 + 4096;
1488 case AMDGPUAS::CONSTANT_BUFFER_2:
1489 return 512 + 4096 * 2;
1490 case AMDGPUAS::CONSTANT_BUFFER_3:
1491 return 512 + 4096 * 3;
1492 case AMDGPUAS::CONSTANT_BUFFER_4:
1493 return 512 + 4096 * 4;
1494 case AMDGPUAS::CONSTANT_BUFFER_5:
1495 return 512 + 4096 * 5;
1496 case AMDGPUAS::CONSTANT_BUFFER_6:
1497 return 512 + 4096 * 6;
1498 case AMDGPUAS::CONSTANT_BUFFER_7:
1499 return 512 + 4096 * 7;
1500 case AMDGPUAS::CONSTANT_BUFFER_8:
1501 return 512 + 4096 * 8;
1502 case AMDGPUAS::CONSTANT_BUFFER_9:
1503 return 512 + 4096 * 9;
1504 case AMDGPUAS::CONSTANT_BUFFER_10:
1505 return 512 + 4096 * 10;
1506 case AMDGPUAS::CONSTANT_BUFFER_11:
1507 return 512 + 4096 * 11;
1508 case AMDGPUAS::CONSTANT_BUFFER_12:
1509 return 512 + 4096 * 12;
1510 case AMDGPUAS::CONSTANT_BUFFER_13:
1511 return 512 + 4096 * 13;
1512 case AMDGPUAS::CONSTANT_BUFFER_14:
1513 return 512 + 4096 * 14;
1514 case AMDGPUAS::CONSTANT_BUFFER_15:
1515 return 512 + 4096 * 15;
1516 default:
1517 return -1;
1518 }
1519}
1520
Matt Arsenault6dfda962016-02-10 18:21:39 +00001521SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1522 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001523 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001524 LoadSDNode *Load = cast<LoadSDNode>(Op);
1525 ISD::LoadExtType ExtType = Load->getExtensionType();
1526 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001527
Matt Arsenault6dfda962016-02-10 18:21:39 +00001528 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1529 // register (2-)byte extract.
1530
1531 // Get Register holding the target.
1532 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1533 DAG.getConstant(2, DL, MVT::i32));
1534 // Load the Register.
1535 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1536 Load->getChain(),
1537 Ptr,
1538 DAG.getTargetConstant(0, DL, MVT::i32),
1539 Op.getOperand(2));
1540
1541 // Get offset within the register.
1542 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1543 Load->getBasePtr(),
1544 DAG.getConstant(0x3, DL, MVT::i32));
1545
1546 // Bit offset of target byte (byteIdx * 8).
1547 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1548 DAG.getConstant(3, DL, MVT::i32));
1549
1550 // Shift to the right.
1551 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1552
1553 // Eliminate the upper bits by setting them to ...
1554 EVT MemEltVT = MemVT.getScalarType();
1555
1556 // ... ones.
1557 if (ExtType == ISD::SEXTLOAD) {
1558 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1559
1560 SDValue Ops[] = {
1561 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1562 Load->getChain()
1563 };
1564
1565 return DAG.getMergeValues(Ops, DL);
1566 }
1567
1568 // ... or zeros.
1569 SDValue Ops[] = {
1570 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1571 Load->getChain()
1572 };
1573
1574 return DAG.getMergeValues(Ops, DL);
1575}
1576
1577SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1578 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1579 unsigned AS = LoadNode->getAddressSpace();
1580 EVT MemVT = LoadNode->getMemoryVT();
1581 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1582
1583 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1584 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1585 return lowerPrivateExtLoad(Op, DAG);
1586 }
1587
1588 SDLoc DL(Op);
1589 EVT VT = Op.getValueType();
1590 SDValue Chain = LoadNode->getChain();
1591 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001592
Tom Stellard35bb18c2013-08-26 15:06:04 +00001593 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1594 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001595 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001596 Chain
1597 };
Craig Topper64941d92014-04-27 19:20:57 +00001598 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001599 }
1600
Tom Stellard365366f2013-01-23 02:09:06 +00001601 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001602 if (ConstantBlock > -1 &&
1603 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1604 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001605 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001606 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1607 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001608 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001609 SDValue Slots[4];
1610 for (unsigned i = 0; i < 4; i++) {
1611 // We want Const position encoded with the following formula :
1612 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1613 // const_index is Ptr computed by llvm using an alignment of 16.
1614 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1615 // then div by 4 at the ISel step
1616 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001617 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001618 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1619 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001620 EVT NewVT = MVT::v4i32;
1621 unsigned NumElements = 4;
1622 if (VT.isVector()) {
1623 NewVT = VT;
1624 NumElements = VT.getVectorNumElements();
1625 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001626 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001627 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001628 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001629 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001630 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1631 DAG.getConstant(4, DL, MVT::i32)),
1632 DAG.getConstant(LoadNode->getAddressSpace() -
1633 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001634 );
1635 }
1636
1637 if (!VT.isVector()) {
1638 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001639 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001640 }
1641
1642 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001643 Result,
1644 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001645 };
Craig Topper64941d92014-04-27 19:20:57 +00001646 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001647 }
1648
Matt Arsenault6dfda962016-02-10 18:21:39 +00001649 SDValue LoweredLoad;
1650
Matt Arsenault909d0c02013-10-30 23:43:29 +00001651 // For most operations returning SDValue() will result in the node being
1652 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1653 // need to manually expand loads that may be legal in some address spaces and
1654 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1655 // compute shaders, since the data is sign extended when it is uploaded to the
1656 // buffer. However SEXT loads from other address spaces are not supported, so
1657 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001658 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1659 EVT MemVT = LoadNode->getMemoryVT();
1660 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Justin Lebar9c375812016-07-15 18:27:10 +00001661 SDValue NewLoad = DAG.getExtLoad(
1662 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1663 LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
Jan Veselyb670d372015-05-26 18:07:22 +00001664 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1665 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001666
Jan Veselyb670d372015-05-26 18:07:22 +00001667 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001668 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001669 }
1670
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001671 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1672 return SDValue();
1673 }
1674
1675 // Lowering for indirect addressing
1676 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001677 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001678 unsigned StackWidth = TFL->getStackWidth(MF);
1679
1680 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1681
1682 if (VT.isVector()) {
1683 unsigned NumElemVT = VT.getVectorNumElements();
1684 EVT ElemVT = VT.getVectorElementType();
1685 SDValue Loads[4];
1686
Jan Vesely687ca8d2016-05-16 23:56:32 +00001687 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001688 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1689 "vector width in load");
1690
1691 for (unsigned i = 0; i < NumElemVT; ++i) {
1692 unsigned Channel, PtrIncr;
1693 getStackAddress(StackWidth, i, Channel, PtrIncr);
1694 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001695 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001696 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1697 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001698 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001699 Op.getOperand(2));
1700 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001701 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1702 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001703 } else {
1704 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1705 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001706 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001707 Op.getOperand(2));
1708 }
1709
Matt Arsenault7939acd2014-04-07 16:44:24 +00001710 SDValue Ops[2] = {
1711 LoweredLoad,
1712 Chain
1713 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001714
Craig Topper64941d92014-04-27 19:20:57 +00001715 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001716}
Tom Stellard75aadc22012-12-11 21:25:42 +00001717
Matt Arsenault1d555c42014-06-23 18:00:55 +00001718SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1719 SDValue Chain = Op.getOperand(0);
1720 SDValue Cond = Op.getOperand(1);
1721 SDValue Jump = Op.getOperand(2);
1722
1723 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1724 Chain, Jump, Cond);
1725}
1726
Matt Arsenault81d06012016-03-07 21:10:13 +00001727SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1728 SelectionDAG &DAG) const {
1729 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001730 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Matt Arsenault81d06012016-03-07 21:10:13 +00001731
1732 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1733
1734 unsigned FrameIndex = FIN->getIndex();
1735 unsigned IgnoredFrameReg;
1736 unsigned Offset =
1737 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1738 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1739 Op.getValueType());
1740}
1741
Tom Stellard75aadc22012-12-11 21:25:42 +00001742/// XXX Only kernel functions are supported, so we can assume for now that
1743/// every function is a kernel function, but in the future we should use
1744/// separate calling conventions for kernel and non-kernel functions.
1745SDValue R600TargetLowering::LowerFormalArguments(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001746 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1747 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1748 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001749 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001750 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1751 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001752 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001753 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001754
Tom Stellardaf775432013-10-23 00:44:32 +00001755 SmallVector<ISD::InputArg, 8> LocalIns;
1756
Matt Arsenault209a7b92014-04-18 07:40:20 +00001757 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001758
1759 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001760
Tom Stellard1e803092013-07-23 01:48:18 +00001761 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001762 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001763 const ISD::InputArg &In = Ins[i];
1764 EVT VT = In.VT;
1765 EVT MemVT = VA.getLocVT();
1766 if (!VT.isVector() && MemVT.isVector()) {
1767 // Get load source type if scalarized.
1768 MemVT = MemVT.getVectorElementType();
1769 }
Tom Stellard78e01292013-07-23 01:47:58 +00001770
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001771 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001772 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1773 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1774 InVals.push_back(Register);
1775 continue;
1776 }
1777
Tom Stellard75aadc22012-12-11 21:25:42 +00001778 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001779 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001780
Matt Arsenaultfae02982014-03-17 18:58:11 +00001781 // i64 isn't a legal type, so the register type used ends up as i32, which
1782 // isn't expected here. It attempts to create this sextload, but it ends up
1783 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1784 // for <1 x i64>.
1785
Tom Stellardacfeebf2013-07-23 01:48:05 +00001786 // The first 36 bytes of the input buffer contains information about
1787 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001788 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1789 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1790 // FIXME: This should really check the extload type, but the handling of
1791 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001792
Matt Arsenault74ef2772014-08-13 18:14:11 +00001793 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1794 Ext = ISD::SEXTLOAD;
1795 }
1796
1797 // Compute the offset from the value.
1798 // XXX - I think PartOffset should give you this, but it seems to give the
1799 // size of the register which isn't useful.
1800
Andrew Trick05938a52015-02-16 18:10:47 +00001801 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001802 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001803 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001804
1805 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
Justin Lebar9c375812016-07-15 18:27:10 +00001806 SDValue Arg = DAG.getLoad(
1807 ISD::UNINDEXED, Ext, VT, DL, Chain,
1808 DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1809 MemVT, /* Alignment = */ 4,
1810 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001811
1812 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001813 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001814 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001815 }
1816 return Chain;
1817}
1818
Mehdi Amini44ede332015-07-09 02:09:04 +00001819EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1820 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001821 if (!VT.isVector())
1822 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001823 return VT.changeVectorElementTypeToInteger();
1824}
1825
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001826bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1827 unsigned AddrSpace,
1828 unsigned Align,
1829 bool *IsFast) const {
1830 if (IsFast)
1831 *IsFast = false;
1832
1833 if (!VT.isSimple() || VT == MVT::Other)
1834 return false;
1835
1836 if (VT.bitsLT(MVT::i32))
1837 return false;
1838
1839 // TODO: This is a rough estimate.
1840 if (IsFast)
1841 *IsFast = true;
1842
1843 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1844}
1845
Matt Arsenault209a7b92014-04-18 07:40:20 +00001846static SDValue CompactSwizzlableVector(
1847 SelectionDAG &DAG, SDValue VectorEntry,
1848 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001849 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1850 assert(RemapSwizzle.empty());
1851 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001852 VectorEntry.getOperand(0),
1853 VectorEntry.getOperand(1),
1854 VectorEntry.getOperand(2),
1855 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001856 };
1857
1858 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001859 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001860 // We mask write here to teach later passes that the ith element of this
1861 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1862 // break false dependencies and additionnaly make assembly easier to read.
1863 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001864 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1865 if (C->isZero()) {
1866 RemapSwizzle[i] = 4; // SEL_0
1867 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1868 } else if (C->isExactlyValue(1.0)) {
1869 RemapSwizzle[i] = 5; // SEL_1
1870 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1871 }
1872 }
1873
Sanjay Patel57195842016-03-14 17:28:46 +00001874 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001875 continue;
1876 for (unsigned j = 0; j < i; j++) {
1877 if (NewBldVec[i] == NewBldVec[j]) {
1878 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1879 RemapSwizzle[i] = j;
1880 break;
1881 }
1882 }
1883 }
1884
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001885 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1886 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001887}
1888
Benjamin Kramer193960c2013-06-11 13:32:25 +00001889static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1890 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001891 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1892 assert(RemapSwizzle.empty());
1893 SDValue NewBldVec[4] = {
1894 VectorEntry.getOperand(0),
1895 VectorEntry.getOperand(1),
1896 VectorEntry.getOperand(2),
1897 VectorEntry.getOperand(3)
1898 };
1899 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001900 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001901 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001902 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1903 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1904 ->getZExtValue();
1905 if (i == Idx)
1906 isUnmovable[Idx] = true;
1907 }
1908 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001909
1910 for (unsigned i = 0; i < 4; i++) {
1911 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1912 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1913 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001914 if (isUnmovable[Idx])
1915 continue;
1916 // Swap i and Idx
1917 std::swap(NewBldVec[Idx], NewBldVec[i]);
1918 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1919 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001920 }
1921 }
1922
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001923 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1924 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001925}
1926
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001927SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1928 SelectionDAG &DAG,
1929 const SDLoc &DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001930 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1931 // Old -> New swizzle values
1932 DenseMap<unsigned, unsigned> SwizzleRemap;
1933
1934 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1935 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001936 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001937 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001938 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001939 }
1940
1941 SwizzleRemap.clear();
1942 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1943 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001944 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001945 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001946 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001947 }
1948
1949 return BuildVector;
1950}
1951
1952
Tom Stellard75aadc22012-12-11 21:25:42 +00001953//===----------------------------------------------------------------------===//
1954// Custom DAG Optimizations
1955//===----------------------------------------------------------------------===//
1956
1957SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1958 DAGCombinerInfo &DCI) const {
1959 SelectionDAG &DAG = DCI.DAG;
1960
1961 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001962 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001963 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1964 case ISD::FP_ROUND: {
1965 SDValue Arg = N->getOperand(0);
1966 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001967 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001968 Arg.getOperand(0));
1969 }
1970 break;
1971 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001972
1973 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1974 // (i32 select_cc f32, f32, -1, 0 cc)
1975 //
1976 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1977 // this to one of the SET*_DX10 instructions.
1978 case ISD::FP_TO_SINT: {
1979 SDValue FNeg = N->getOperand(0);
1980 if (FNeg.getOpcode() != ISD::FNEG) {
1981 return SDValue();
1982 }
1983 SDValue SelectCC = FNeg.getOperand(0);
1984 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1985 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1986 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1987 !isHWTrueValue(SelectCC.getOperand(2)) ||
1988 !isHWFalseValue(SelectCC.getOperand(3))) {
1989 return SDValue();
1990 }
1991
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001992 SDLoc dl(N);
1993 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001994 SelectCC.getOperand(0), // LHS
1995 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001996 DAG.getConstant(-1, dl, MVT::i32), // True
1997 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001998 SelectCC.getOperand(4)); // CC
1999
2000 break;
2001 }
Quentin Colombete2e05482013-07-30 00:27:16 +00002002
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00002003 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
2004 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00002005 case ISD::INSERT_VECTOR_ELT: {
2006 SDValue InVec = N->getOperand(0);
2007 SDValue InVal = N->getOperand(1);
2008 SDValue EltNo = N->getOperand(2);
2009 SDLoc dl(N);
2010
2011 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00002012 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00002013 return InVec;
2014
2015 EVT VT = InVec.getValueType();
2016
2017 // If we can't generate a legal BUILD_VECTOR, exit
2018 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
2019 return SDValue();
2020
2021 // Check that we know which element is being inserted
2022 if (!isa<ConstantSDNode>(EltNo))
2023 return SDValue();
2024 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
2025
2026 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
2027 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
2028 // vector elements.
2029 SmallVector<SDValue, 8> Ops;
2030 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
2031 Ops.append(InVec.getNode()->op_begin(),
2032 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00002033 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00002034 unsigned NElts = VT.getVectorNumElements();
2035 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
2036 } else {
2037 return SDValue();
2038 }
2039
2040 // Insert the element
2041 if (Elt < Ops.size()) {
2042 // All the operands of BUILD_VECTOR must have the same type;
2043 // we enforce that here.
2044 EVT OpVT = Ops[0].getValueType();
2045 if (InVal.getValueType() != OpVT)
2046 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2047 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2048 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2049 Ops[Elt] = InVal;
2050 }
2051
2052 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002053 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002054 }
2055
Tom Stellard365366f2013-01-23 02:09:06 +00002056 // Extract_vec (Build_vector) generated by custom lowering
2057 // also needs to be customly combined
2058 case ISD::EXTRACT_VECTOR_ELT: {
2059 SDValue Arg = N->getOperand(0);
2060 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2061 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2062 unsigned Element = Const->getZExtValue();
2063 return Arg->getOperand(Element);
2064 }
2065 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002066 if (Arg.getOpcode() == ISD::BITCAST &&
2067 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2068 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2069 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002070 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002071 Arg->getOperand(0).getOperand(Element));
2072 }
2073 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002074 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002075 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002076
2077 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002078 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002079 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002080 return Ret;
2081
Tom Stellarde06163a2013-02-07 14:02:35 +00002082 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2083 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002084 //
2085 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2086 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002087 SDValue LHS = N->getOperand(0);
2088 if (LHS.getOpcode() != ISD::SELECT_CC) {
2089 return SDValue();
2090 }
2091
2092 SDValue RHS = N->getOperand(1);
2093 SDValue True = N->getOperand(2);
2094 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002095 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002096
2097 if (LHS.getOperand(2).getNode() != True.getNode() ||
2098 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002099 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002100 return SDValue();
2101 }
2102
Tom Stellard5e524892013-03-08 15:37:11 +00002103 switch (NCC) {
2104 default: return SDValue();
2105 case ISD::SETNE: return LHS;
2106 case ISD::SETEQ: {
2107 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2108 LHSCC = ISD::getSetCCInverse(LHSCC,
2109 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002110 if (DCI.isBeforeLegalizeOps() ||
2111 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2112 return DAG.getSelectCC(SDLoc(N),
2113 LHS.getOperand(0),
2114 LHS.getOperand(1),
2115 LHS.getOperand(2),
2116 LHS.getOperand(3),
2117 LHSCC);
2118 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002119 }
Tom Stellard5e524892013-03-08 15:37:11 +00002120 }
Tom Stellardcd428182013-09-28 02:50:38 +00002121 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002122 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002123
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002124 case AMDGPUISD::EXPORT: {
2125 SDValue Arg = N->getOperand(1);
2126 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2127 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002128
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002129 SDValue NewArgs[8] = {
2130 N->getOperand(0), // Chain
2131 SDValue(),
2132 N->getOperand(2), // ArrayBase
2133 N->getOperand(3), // Type
2134 N->getOperand(4), // SWZ_X
2135 N->getOperand(5), // SWZ_Y
2136 N->getOperand(6), // SWZ_Z
2137 N->getOperand(7) // SWZ_W
2138 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002139 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002140 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002141 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002142 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002143 case AMDGPUISD::TEXTURE_FETCH: {
2144 SDValue Arg = N->getOperand(1);
2145 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2146 break;
2147
2148 SDValue NewArgs[19] = {
2149 N->getOperand(0),
2150 N->getOperand(1),
2151 N->getOperand(2),
2152 N->getOperand(3),
2153 N->getOperand(4),
2154 N->getOperand(5),
2155 N->getOperand(6),
2156 N->getOperand(7),
2157 N->getOperand(8),
2158 N->getOperand(9),
2159 N->getOperand(10),
2160 N->getOperand(11),
2161 N->getOperand(12),
2162 N->getOperand(13),
2163 N->getOperand(14),
2164 N->getOperand(15),
2165 N->getOperand(16),
2166 N->getOperand(17),
2167 N->getOperand(18),
2168 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002169 SDLoc DL(N);
2170 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2171 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002172 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002173 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002174
2175 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002176}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002177
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002178bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2179 SDValue &Src, SDValue &Neg, SDValue &Abs,
2180 SDValue &Sel, SDValue &Imm,
2181 SelectionDAG &DAG) const {
2182 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002183 if (!Src.isMachineOpcode())
2184 return false;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002185
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002186 switch (Src.getMachineOpcode()) {
2187 case AMDGPU::FNEG_R600:
2188 if (!Neg.getNode())
2189 return false;
2190 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002191 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002192 return true;
2193 case AMDGPU::FABS_R600:
2194 if (!Abs.getNode())
2195 return false;
2196 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002197 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002198 return true;
2199 case AMDGPU::CONST_COPY: {
2200 unsigned Opcode = ParentNode->getMachineOpcode();
2201 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2202
2203 if (!Sel.getNode())
2204 return false;
2205
2206 SDValue CstOffset = Src.getOperand(0);
2207 if (ParentNode->getValueType(0).isVector())
2208 return false;
2209
2210 // Gather constants values
2211 int SrcIndices[] = {
2212 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2213 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2214 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2215 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2216 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2217 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2218 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2219 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2220 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2221 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2222 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2223 };
2224 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002225 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002226 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2227 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2228 continue;
2229 if (HasDst) {
2230 OtherSrcIdx--;
2231 OtherSelIdx--;
2232 }
2233 if (RegisterSDNode *Reg =
2234 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2235 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002236 ConstantSDNode *Cst
2237 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002238 Consts.push_back(Cst->getZExtValue());
2239 }
2240 }
2241 }
2242
Matt Arsenault37c12d72014-05-12 20:42:57 +00002243 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002244 Consts.push_back(Cst->getZExtValue());
2245 if (!TII->fitsConstReadLimitations(Consts)) {
2246 return false;
2247 }
2248
2249 Sel = CstOffset;
2250 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2251 return true;
2252 }
Jan Vesely16800392016-05-13 20:39:31 +00002253 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2254 // Check if the Imm slot is used. Taken from below.
2255 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2256 return false;
2257 Imm = Src.getOperand(0);
2258 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2259 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002260 case AMDGPU::MOV_IMM_I32:
2261 case AMDGPU::MOV_IMM_F32: {
2262 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2263 uint64_t ImmValue = 0;
2264
2265
2266 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2267 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2268 float FloatValue = FPC->getValueAPF().convertToFloat();
2269 if (FloatValue == 0.0) {
2270 ImmReg = AMDGPU::ZERO;
2271 } else if (FloatValue == 0.5) {
2272 ImmReg = AMDGPU::HALF;
2273 } else if (FloatValue == 1.0) {
2274 ImmReg = AMDGPU::ONE;
2275 } else {
2276 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2277 }
2278 } else {
2279 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2280 uint64_t Value = C->getZExtValue();
2281 if (Value == 0) {
2282 ImmReg = AMDGPU::ZERO;
2283 } else if (Value == 1) {
2284 ImmReg = AMDGPU::ONE_INT;
2285 } else {
2286 ImmValue = Value;
2287 }
2288 }
2289
2290 // Check that we aren't already using an immediate.
2291 // XXX: It's possible for an instruction to have more than one
2292 // immediate operand, but this is not supported yet.
2293 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2294 if (!Imm.getNode())
2295 return false;
2296 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2297 assert(C);
2298 if (C->getZExtValue())
2299 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002300 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002301 }
2302 Src = DAG.getRegister(ImmReg, MVT::i32);
2303 return true;
2304 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002305 default:
2306 return false;
2307 }
2308}
2309
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002310/// \brief Fold the instructions after selecting them
2311SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2312 SelectionDAG &DAG) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002313 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002314 if (!Node->isMachineOpcode())
2315 return Node;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002316
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002317 unsigned Opcode = Node->getMachineOpcode();
2318 SDValue FakeOp;
2319
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002320 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002321
2322 if (Opcode == AMDGPU::DOT_4) {
2323 int OperandIdx[] = {
2324 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2325 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2326 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2327 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2328 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2329 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2330 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2331 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002332 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002333 int NegIdx[] = {
2334 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2335 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2336 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2337 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2338 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2339 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2340 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2341 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2342 };
2343 int AbsIdx[] = {
2344 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2345 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2346 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2347 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2348 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2349 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2350 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2351 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2352 };
2353 for (unsigned i = 0; i < 8; i++) {
2354 if (OperandIdx[i] < 0)
2355 return Node;
2356 SDValue &Src = Ops[OperandIdx[i] - 1];
2357 SDValue &Neg = Ops[NegIdx[i] - 1];
2358 SDValue &Abs = Ops[AbsIdx[i] - 1];
2359 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2360 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2361 if (HasDst)
2362 SelIdx--;
2363 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002364 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2365 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2366 }
2367 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2368 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2369 SDValue &Src = Ops[i];
2370 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002371 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2372 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002373 } else if (Opcode == AMDGPU::CLAMP_R600) {
2374 SDValue Src = Node->getOperand(0);
2375 if (!Src.isMachineOpcode() ||
2376 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2377 return Node;
2378 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2379 AMDGPU::OpName::clamp);
2380 if (ClampIdx < 0)
2381 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002382 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002383 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002384 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2385 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2386 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002387 } else {
2388 if (!TII->hasInstrModifiers(Opcode))
2389 return Node;
2390 int OperandIdx[] = {
2391 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2392 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2393 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2394 };
2395 int NegIdx[] = {
2396 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2397 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2398 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2399 };
2400 int AbsIdx[] = {
2401 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2402 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2403 -1
2404 };
2405 for (unsigned i = 0; i < 3; i++) {
2406 if (OperandIdx[i] < 0)
2407 return Node;
2408 SDValue &Src = Ops[OperandIdx[i] - 1];
2409 SDValue &Neg = Ops[NegIdx[i] - 1];
2410 SDValue FakeAbs;
2411 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2412 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2413 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002414 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2415 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002416 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002417 ImmIdx--;
2418 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002419 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002420 SDValue &Imm = Ops[ImmIdx];
2421 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002422 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2423 }
2424 }
2425
2426 return Node;
2427}