blob: f9ceb8e1407bdc114eaeab8aeb1a30ef322a9770 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000016#include "AMDGPUFrameLowering.h"
Matt Arsenaultc791f392014-06-23 18:00:31 +000017#include "AMDGPUIntrinsicInfo.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000018#include "AMDGPUSubtarget.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
21#include "R600MachineFunctionInfo.h"
Tom Stellard067c8152014-07-21 14:01:14 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf3b2a1e2013-02-06 17:32:29 +000024#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Argument.h"
29#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000030
31using namespace llvm;
32
Matt Arsenault43e92fe2016-06-24 06:30:11 +000033R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
34 const R600Subtarget &STI)
Eric Christopher7792e322015-01-30 23:24:40 +000035 : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
Tom Stellard75aadc22012-12-11 21:25:42 +000036 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard75aadc22012-12-11 21:25:42 +000037 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000038 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
39 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
Matt Arsenault71e66762016-05-21 02:27:49 +000040 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
41 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
Tom Stellard0344cdf2013-08-01 15:23:42 +000042
Eric Christopher23a3a7c2015-02-26 00:00:24 +000043 computeRegisterProperties(STI.getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +000044
Matt Arsenault71e66762016-05-21 02:27:49 +000045 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, MVT::i32, Custom);
47 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
48 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
52 for (MVT VT : MVT::integer_valuetypes()) {
53 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
54 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
55 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
56
57 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
58 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
59 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
60
61 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
62 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
63 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
64 }
65
Matt Arsenaultd1097a32016-06-02 19:54:26 +000066 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
67 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
68 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
69 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
70
71 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
72 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
73 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
74
75
Matt Arsenault71e66762016-05-21 02:27:49 +000076 setOperationAction(ISD::STORE, MVT::i8, Custom);
77 setOperationAction(ISD::STORE, MVT::i32, Custom);
78 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
79 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
80
81 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
82 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
83
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
Tom Stellard0351ea22013-09-28 02:50:50 +000088 // Set condition code actions
89 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
90 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000091 setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000092 setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000093 setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
94 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
Tom Stellard0351ea22013-09-28 02:50:50 +000095 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
96 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
97 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
98 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
Tom Stellardcd428182013-09-28 02:50:38 +000099 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
100 setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
101
102 setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
103 setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
104 setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
105 setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
106
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000107 setOperationAction(ISD::FCOS, MVT::f32, Custom);
108 setOperationAction(ISD::FSIN, MVT::f32, Custom);
109
Tom Stellard75aadc22012-12-11 21:25:42 +0000110 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000111 setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000112
Tom Stellard492ebea2013-03-08 15:37:07 +0000113 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
114 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
Matt Arsenault1d555c42014-06-23 18:00:55 +0000115 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000116
117 setOperationAction(ISD::FSUB, MVT::f32, Expand);
118
Tom Stellard75aadc22012-12-11 21:25:42 +0000119 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
120 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
121
Tom Stellarde8f9f282013-03-08 15:37:05 +0000122 setOperationAction(ISD::SETCC, MVT::i32, Expand);
123 setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
Matt Arsenault7fb961f2016-07-22 17:01:21 +0000125 setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000126 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
127 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
Tom Stellard75aadc22012-12-11 21:25:42 +0000128
Tom Stellard53f2f902013-09-05 18:38:03 +0000129 setOperationAction(ISD::SELECT, MVT::i32, Expand);
130 setOperationAction(ISD::SELECT, MVT::f32, Expand);
131 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
Tom Stellard53f2f902013-09-05 18:38:03 +0000132 setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
Tom Stellard75aadc22012-12-11 21:25:42 +0000133
Jan Vesely808fff52015-04-30 17:15:56 +0000134 // ADD, SUB overflow.
135 // TODO: turn these into Legal?
136 if (Subtarget->hasCARRY())
137 setOperationAction(ISD::UADDO, MVT::i32, Custom);
138
139 if (Subtarget->hasBORROW())
140 setOperationAction(ISD::USUBO, MVT::i32, Custom);
141
Matt Arsenault4e466652014-04-16 01:41:30 +0000142 // Expand sign extension of vectors
143 if (!Subtarget->hasBFE())
144 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
145
146 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
147 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
148
149 if (!Subtarget->hasBFE())
150 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
151 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
152 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
153
154 if (!Subtarget->hasBFE())
155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
158
159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
162
163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
164
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000165 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
166
Tom Stellard880a80a2014-06-17 16:53:14 +0000167 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
168 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
170 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
171
172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
174 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
175 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
176
Jan Vesely25f36272014-06-18 12:27:13 +0000177 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
178 // to be Legal/Custom in order to avoid library calls.
179 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000180 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
Jan Veselyecf51332014-06-18 12:27:17 +0000181 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
Jan Vesely25f36272014-06-18 12:27:13 +0000182
Michel Danzer49812b52013-07-10 16:37:07 +0000183 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
184
Matt Arsenaultc4d3d3a2014-06-23 18:00:49 +0000185 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
186 for (MVT VT : ScalarIntVTs) {
187 setOperationAction(ISD::ADDC, VT, Expand);
188 setOperationAction(ISD::SUBC, VT, Expand);
189 setOperationAction(ISD::ADDE, VT, Expand);
190 setOperationAction(ISD::SUBE, VT, Expand);
191 }
192
Tom Stellardfc455472013-08-12 22:33:21 +0000193 setSchedulingPreference(Sched::Source);
Matt Arsenault71e66762016-05-21 02:27:49 +0000194
195
196 setTargetDAGCombine(ISD::FP_ROUND);
197 setTargetDAGCombine(ISD::FP_TO_SINT);
198 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
199 setTargetDAGCombine(ISD::SELECT_CC);
200 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
Tom Stellard75aadc22012-12-11 21:25:42 +0000201}
202
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000203const R600Subtarget *R600TargetLowering::getSubtarget() const {
204 return static_cast<const R600Subtarget *>(Subtarget);
205}
206
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000207static inline bool isEOP(MachineBasicBlock::iterator I) {
208 return std::next(I)->getOpcode() == AMDGPU::RETURN;
209}
210
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000211MachineBasicBlock *
212R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
213 MachineBasicBlock *BB) const {
Tom Stellard75aadc22012-12-11 21:25:42 +0000214 MachineFunction * MF = BB->getParent();
215 MachineRegisterInfo &MRI = MF->getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000216 MachineBasicBlock::iterator I = MI;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000217 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Tom Stellard75aadc22012-12-11 21:25:42 +0000218
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000219 switch (MI.getOpcode()) {
Tom Stellardc6f4a292013-08-26 15:05:59 +0000220 default:
Tom Stellard8f9fc202013-11-15 00:12:45 +0000221 // Replace LDS_*_RET instruction that don't have any uses with the
222 // equivalent LDS_*_NORET instruction.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000223 if (TII->isLDSRetInstr(MI.getOpcode())) {
224 int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
Tom Stellard13c68ef2013-09-05 18:38:09 +0000225 assert(DstIdx != -1);
226 MachineInstrBuilder NewMI;
Aaron Watry1885e532014-09-11 15:02:54 +0000227 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
228 // LDS_1A2D support and remove this special case.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000229 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
230 MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000231 return BB;
232
233 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000234 TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
235 for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
236 NewMI.addOperand(MI.getOperand(i));
Tom Stellardc6f4a292013-08-26 15:05:59 +0000237 }
Tom Stellardc6f4a292013-08-26 15:05:59 +0000238 } else {
239 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
240 }
241 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000242 case AMDGPU::CLAMP_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000243 MachineInstr *NewMI = TII->buildDefaultInstruction(
244 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
245 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000246 TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000247 break;
248 }
249
250 case AMDGPU::FABS_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000251 MachineInstr *NewMI = TII->buildDefaultInstruction(
252 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
253 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000254 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 break;
256 }
257
258 case AMDGPU::FNEG_R600: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000259 MachineInstr *NewMI = TII->buildDefaultInstruction(
260 *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
261 MI.getOperand(1).getReg());
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000262 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000263 break;
264 }
265
Tom Stellard75aadc22012-12-11 21:25:42 +0000266 case AMDGPU::MASK_WRITE: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000267 unsigned maskedRegister = MI.getOperand(0).getReg();
Tom Stellard75aadc22012-12-11 21:25:42 +0000268 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
269 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000270 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
Tom Stellard75aadc22012-12-11 21:25:42 +0000271 break;
272 }
273
274 case AMDGPU::MOV_IMM_F32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
276 .getFPImm()
277 ->getValueAPF()
278 .bitcastToAPInt()
279 .getZExtValue());
Tom Stellard75aadc22012-12-11 21:25:42 +0000280 break;
281 case AMDGPU::MOV_IMM_I32:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000282 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
283 MI.getOperand(1).getImm());
Tom Stellard75aadc22012-12-11 21:25:42 +0000284 break;
Jan Veselyf97de002016-05-13 20:39:29 +0000285 case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
286 //TODO: Perhaps combine this instruction with the next if possible
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000287 auto MIB = TII->buildDefaultInstruction(
288 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
Jan Veselyf97de002016-05-13 20:39:29 +0000289 int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
290 //TODO: Ugh this is rather ugly
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000291 MIB->getOperand(Idx) = MI.getOperand(1);
Jan Veselyf97de002016-05-13 20:39:29 +0000292 break;
293 }
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000294 case AMDGPU::CONST_COPY: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000295 MachineInstr *NewMI = TII->buildDefaultInstruction(
296 *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000297 TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000298 MI.getOperand(1).getImm());
Vincent Lejeune0b72f102013-03-05 15:04:55 +0000299 break;
300 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000301
302 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
Tom Stellard0344cdf2013-08-01 15:23:42 +0000303 case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
Tom Stellard75aadc22012-12-11 21:25:42 +0000304 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000305 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
306 .addOperand(MI.getOperand(0))
307 .addOperand(MI.getOperand(1))
308 .addImm(isEOP(I)); // Set End of program bit
Tom Stellard75aadc22012-12-11 21:25:42 +0000309 break;
310 }
Tom Stellarde0e582c2015-10-01 17:51:34 +0000311 case AMDGPU::RAT_STORE_TYPED_eg: {
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000312 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
313 .addOperand(MI.getOperand(0))
314 .addOperand(MI.getOperand(1))
315 .addOperand(MI.getOperand(2))
316 .addImm(isEOP(I)); // Set End of program bit
Tom Stellarde0e582c2015-10-01 17:51:34 +0000317 break;
318 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000319
Tom Stellard75aadc22012-12-11 21:25:42 +0000320 case AMDGPU::TXD: {
321 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000323 MachineOperand &RID = MI.getOperand(4);
324 MachineOperand &SID = MI.getOperand(5);
325 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000326 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000328
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000329 switch (TextureId) {
330 case 5: // Rect
331 CTX = CTY = 0;
332 break;
333 case 6: // Shadow1D
334 SrcW = SrcZ;
335 break;
336 case 7: // Shadow2D
337 SrcW = SrcZ;
338 break;
339 case 8: // ShadowRect
340 CTX = CTY = 0;
341 SrcW = SrcZ;
342 break;
343 case 9: // 1DArray
344 SrcZ = SrcY;
345 CTZ = 0;
346 break;
347 case 10: // 2DArray
348 CTZ = 0;
349 break;
350 case 11: // Shadow1DArray
351 SrcZ = SrcY;
352 CTZ = 0;
353 break;
354 case 12: // Shadow2DArray
355 CTZ = 0;
356 break;
357 }
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000358 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
359 T0)
360 .addOperand(MI.getOperand(3))
361 .addImm(SrcX)
362 .addImm(SrcY)
363 .addImm(SrcZ)
364 .addImm(SrcW)
365 .addImm(0)
366 .addImm(0)
367 .addImm(0)
368 .addImm(0)
369 .addImm(1)
370 .addImm(2)
371 .addImm(3)
372 .addOperand(RID)
373 .addOperand(SID)
374 .addImm(CTX)
375 .addImm(CTY)
376 .addImm(CTZ)
377 .addImm(CTW);
378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
379 T1)
380 .addOperand(MI.getOperand(2))
381 .addImm(SrcX)
382 .addImm(SrcY)
383 .addImm(SrcZ)
384 .addImm(SrcW)
385 .addImm(0)
386 .addImm(0)
387 .addImm(0)
388 .addImm(0)
389 .addImm(1)
390 .addImm(2)
391 .addImm(3)
392 .addOperand(RID)
393 .addOperand(SID)
394 .addImm(CTX)
395 .addImm(CTY)
396 .addImm(CTZ)
397 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000398 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000399 .addOperand(MI.getOperand(0))
400 .addOperand(MI.getOperand(1))
401 .addImm(SrcX)
402 .addImm(SrcY)
403 .addImm(SrcZ)
404 .addImm(SrcW)
405 .addImm(0)
406 .addImm(0)
407 .addImm(0)
408 .addImm(0)
409 .addImm(1)
410 .addImm(2)
411 .addImm(3)
412 .addOperand(RID)
413 .addOperand(SID)
414 .addImm(CTX)
415 .addImm(CTY)
416 .addImm(CTZ)
417 .addImm(CTW)
418 .addReg(T0, RegState::Implicit)
419 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000420 break;
421 }
422
423 case AMDGPU::TXD_SHADOW: {
424 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
425 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000426 MachineOperand &RID = MI.getOperand(4);
427 MachineOperand &SID = MI.getOperand(5);
428 unsigned TextureId = MI.getOperand(6).getImm();
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000429 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
430 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
431
432 switch (TextureId) {
433 case 5: // Rect
434 CTX = CTY = 0;
435 break;
436 case 6: // Shadow1D
437 SrcW = SrcZ;
438 break;
439 case 7: // Shadow2D
440 SrcW = SrcZ;
441 break;
442 case 8: // ShadowRect
443 CTX = CTY = 0;
444 SrcW = SrcZ;
445 break;
446 case 9: // 1DArray
447 SrcZ = SrcY;
448 CTZ = 0;
449 break;
450 case 10: // 2DArray
451 CTZ = 0;
452 break;
453 case 11: // Shadow1DArray
454 SrcZ = SrcY;
455 CTZ = 0;
456 break;
457 case 12: // Shadow2DArray
458 CTZ = 0;
459 break;
460 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000461
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000462 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
463 T0)
464 .addOperand(MI.getOperand(3))
465 .addImm(SrcX)
466 .addImm(SrcY)
467 .addImm(SrcZ)
468 .addImm(SrcW)
469 .addImm(0)
470 .addImm(0)
471 .addImm(0)
472 .addImm(0)
473 .addImm(1)
474 .addImm(2)
475 .addImm(3)
476 .addOperand(RID)
477 .addOperand(SID)
478 .addImm(CTX)
479 .addImm(CTY)
480 .addImm(CTZ)
481 .addImm(CTW);
482 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
483 T1)
484 .addOperand(MI.getOperand(2))
485 .addImm(SrcX)
486 .addImm(SrcY)
487 .addImm(SrcZ)
488 .addImm(SrcW)
489 .addImm(0)
490 .addImm(0)
491 .addImm(0)
492 .addImm(0)
493 .addImm(1)
494 .addImm(2)
495 .addImm(3)
496 .addOperand(RID)
497 .addOperand(SID)
498 .addImm(CTX)
499 .addImm(CTY)
500 .addImm(CTZ)
501 .addImm(CTW);
Tom Stellard75aadc22012-12-11 21:25:42 +0000502 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000503 .addOperand(MI.getOperand(0))
504 .addOperand(MI.getOperand(1))
505 .addImm(SrcX)
506 .addImm(SrcY)
507 .addImm(SrcZ)
508 .addImm(SrcW)
509 .addImm(0)
510 .addImm(0)
511 .addImm(0)
512 .addImm(0)
513 .addImm(1)
514 .addImm(2)
515 .addImm(3)
516 .addOperand(RID)
517 .addOperand(SID)
518 .addImm(CTX)
519 .addImm(CTY)
520 .addImm(CTZ)
521 .addImm(CTW)
522 .addReg(T0, RegState::Implicit)
523 .addReg(T1, RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000524 break;
525 }
526
527 case AMDGPU::BRANCH:
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000528 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
529 .addOperand(MI.getOperand(0));
530 break;
Tom Stellard75aadc22012-12-11 21:25:42 +0000531
532 case AMDGPU::BRANCH_COND_f32: {
533 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000534 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
535 AMDGPU::PREDICATE_BIT)
536 .addOperand(MI.getOperand(1))
537 .addImm(OPCODE_IS_NOT_ZERO)
538 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000539 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000540 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000541 .addOperand(MI.getOperand(0))
542 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000543 break;
544 }
545
546 case AMDGPU::BRANCH_COND_i32: {
547 MachineInstr *NewMI =
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000548 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
549 AMDGPU::PREDICATE_BIT)
550 .addOperand(MI.getOperand(1))
Tom Stellard75aadc22012-12-11 21:25:42 +0000551 .addImm(OPCODE_IS_NOT_ZERO_INT)
552 .addImm(0); // Flags
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000553 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
Vincent Lejeunee5ecf102013-03-11 18:15:06 +0000554 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000555 .addOperand(MI.getOperand(0))
556 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
Tom Stellard75aadc22012-12-11 21:25:42 +0000557 break;
558 }
559
Tom Stellard75aadc22012-12-11 21:25:42 +0000560 case AMDGPU::EG_ExportSwz:
561 case AMDGPU::R600_ExportSwz: {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000562 // Instruction is left unmodified if its not the last one of its type
563 bool isLastInstructionOfItsType = true;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000564 unsigned InstExportType = MI.getOperand(1).getImm();
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000565 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
Tom Stellard6f1b8652013-01-23 21:39:49 +0000566 EndBlock = BB->end(); NextExportInst != EndBlock;
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +0000567 NextExportInst = std::next(NextExportInst)) {
Tom Stellard6f1b8652013-01-23 21:39:49 +0000568 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
569 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
570 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
571 .getImm();
572 if (CurrentInstExportType == InstExportType) {
573 isLastInstructionOfItsType = false;
574 break;
575 }
576 }
577 }
Tom Stellardc0f0fba2015-10-01 17:51:29 +0000578 bool EOP = isEOP(I);
Tom Stellard6f1b8652013-01-23 21:39:49 +0000579 if (!EOP && !isLastInstructionOfItsType)
Tom Stellard75aadc22012-12-11 21:25:42 +0000580 return BB;
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000581 unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
582 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
583 .addOperand(MI.getOperand(0))
584 .addOperand(MI.getOperand(1))
585 .addOperand(MI.getOperand(2))
586 .addOperand(MI.getOperand(3))
587 .addOperand(MI.getOperand(4))
588 .addOperand(MI.getOperand(5))
589 .addOperand(MI.getOperand(6))
590 .addImm(CfInst)
591 .addImm(EOP);
Tom Stellard75aadc22012-12-11 21:25:42 +0000592 break;
593 }
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000594 case AMDGPU::RETURN: {
Jakob Stoklund Olesenfdc37672013-02-05 17:53:52 +0000595 return BB;
596 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000597 }
598
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +0000599 MI.eraseFromParent();
Tom Stellard75aadc22012-12-11 21:25:42 +0000600 return BB;
601}
602
603//===----------------------------------------------------------------------===//
604// Custom DAG Lowering Operations
605//===----------------------------------------------------------------------===//
606
Tom Stellard75aadc22012-12-11 21:25:42 +0000607SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000608 MachineFunction &MF = DAG.getMachineFunction();
609 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard75aadc22012-12-11 21:25:42 +0000610 switch (Op.getOpcode()) {
611 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard880a80a2014-06-17 16:53:14 +0000612 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
613 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Jan Vesely25f36272014-06-18 12:27:13 +0000614 case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
Jan Veselyecf51332014-06-18 12:27:17 +0000615 case ISD::SRA_PARTS:
Jan Vesely900ff2e2014-06-18 12:27:15 +0000616 case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
Jan Vesely808fff52015-04-30 17:15:56 +0000617 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
618 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000619 case ISD::FCOS:
620 case ISD::FSIN: return LowerTrig(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000621 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000622 case ISD::STORE: return LowerSTORE(Op, DAG);
Matt Arsenaultd2c9e082014-07-07 18:34:45 +0000623 case ISD::LOAD: {
624 SDValue Result = LowerLOAD(Op, DAG);
625 assert((!Result.getNode() ||
626 Result.getNode()->getNumValues() == 2) &&
627 "Load should return a value and a chain");
628 return Result;
629 }
630
Matt Arsenault1d555c42014-06-23 18:00:55 +0000631 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000632 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
Matt Arsenault81d06012016-03-07 21:10:13 +0000633 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000634 case ISD::INTRINSIC_VOID: {
635 SDValue Chain = Op.getOperand(0);
636 unsigned IntrinsicID =
637 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
638 switch (IntrinsicID) {
Matt Arsenault82e5e1e2016-07-15 21:27:08 +0000639 case AMDGPUIntrinsic::r600_store_swizzle: {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000640 SDLoc DL(Op);
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000641 const SDValue Args[8] = {
642 Chain,
643 Op.getOperand(2), // Export Value
644 Op.getOperand(3), // ArrayBase
645 Op.getOperand(4), // Type
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000646 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
647 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
648 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
649 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
Vincent Lejeuned80bc152013-02-14 16:55:06 +0000650 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000651 return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
Tom Stellard75aadc22012-12-11 21:25:42 +0000652 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000653
Tom Stellard75aadc22012-12-11 21:25:42 +0000654 // default for switch(IntrinsicID)
655 default: break;
656 }
657 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
658 break;
659 }
660 case ISD::INTRINSIC_WO_CHAIN: {
661 unsigned IntrinsicID =
662 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
663 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000664 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000665 switch(IntrinsicID) {
666 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Matt Arsenault59bd3012016-01-22 19:00:09 +0000667 case AMDGPUIntrinsic::r600_tex:
Matt Arsenaultf9245b72016-07-22 17:01:25 +0000668 case AMDGPUIntrinsic::r600_texc: {
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000669 unsigned TextureOp;
670 switch (IntrinsicID) {
Matt Arsenault59bd3012016-01-22 19:00:09 +0000671 case AMDGPUIntrinsic::r600_tex:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000672 TextureOp = 0;
673 break;
Matt Arsenault59bd3012016-01-22 19:00:09 +0000674 case AMDGPUIntrinsic::r600_texc:
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000675 TextureOp = 1;
676 break;
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000677 default:
678 llvm_unreachable("Unknow Texture Operation");
679 }
680
681 SDValue TexArgs[19] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000682 DAG.getConstant(TextureOp, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000683 Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000684 DAG.getConstant(0, DL, MVT::i32),
685 DAG.getConstant(1, DL, MVT::i32),
686 DAG.getConstant(2, DL, MVT::i32),
687 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000688 Op.getOperand(2),
689 Op.getOperand(3),
690 Op.getOperand(4),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000691 DAG.getConstant(0, DL, MVT::i32),
692 DAG.getConstant(1, DL, MVT::i32),
693 DAG.getConstant(2, DL, MVT::i32),
694 DAG.getConstant(3, DL, MVT::i32),
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000695 Op.getOperand(5),
696 Op.getOperand(6),
697 Op.getOperand(7),
698 Op.getOperand(8),
699 Op.getOperand(9),
700 Op.getOperand(10)
701 };
Craig Topper48d114b2014-04-26 18:35:24 +0000702 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
Vincent Lejeuned3eed662013-05-17 16:50:20 +0000703 }
Matt Arsenaultca7f5702016-07-14 05:47:17 +0000704 case AMDGPUIntrinsic::r600_dot4: {
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000705 SDValue Args[8] = {
706 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000707 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000709 DAG.getConstant(0, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000711 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000712 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000713 DAG.getConstant(1, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000714 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000715 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000716 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000717 DAG.getConstant(2, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000718 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000719 DAG.getConstant(3, DL, MVT::i32)),
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000720 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000721 DAG.getConstant(3, DL, MVT::i32))
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000722 };
Craig Topper48d114b2014-04-26 18:35:24 +0000723 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
Vincent Lejeune519f21e2013-05-17 16:50:32 +0000724 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000725
Jan Vesely2fa28c32016-07-10 21:20:29 +0000726 case Intrinsic::r600_implicitarg_ptr: {
727 MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
728 uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
729 return DAG.getConstant(ByteOffset, DL, PtrVT);
730 }
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000731 case Intrinsic::r600_read_ngroups_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000732 return LowerImplicitParameter(DAG, VT, DL, 0);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000733 case Intrinsic::r600_read_ngroups_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000734 return LowerImplicitParameter(DAG, VT, DL, 1);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000735 case Intrinsic::r600_read_ngroups_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000736 return LowerImplicitParameter(DAG, VT, DL, 2);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000737 case Intrinsic::r600_read_global_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000738 return LowerImplicitParameter(DAG, VT, DL, 3);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000739 case Intrinsic::r600_read_global_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000740 return LowerImplicitParameter(DAG, VT, DL, 4);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000741 case Intrinsic::r600_read_global_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000742 return LowerImplicitParameter(DAG, VT, DL, 5);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000743 case Intrinsic::r600_read_local_size_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000744 return LowerImplicitParameter(DAG, VT, DL, 6);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000745 case Intrinsic::r600_read_local_size_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000746 return LowerImplicitParameter(DAG, VT, DL, 7);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000747 case Intrinsic::r600_read_local_size_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000748 return LowerImplicitParameter(DAG, VT, DL, 8);
749
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000750 case Intrinsic::r600_read_workdim:
751 case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
Tom Stellarddcb9f092015-07-09 21:20:37 +0000752 uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
753 return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
754 }
Jan Veselye5121f32014-10-14 20:05:26 +0000755
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000756 case Intrinsic::r600_read_tgid_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000757 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
758 AMDGPU::T1_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000759 case Intrinsic::r600_read_tgid_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000760 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
761 AMDGPU::T1_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000762 case Intrinsic::r600_read_tgid_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000763 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
764 AMDGPU::T1_Z, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000765 case Intrinsic::r600_read_tidig_x:
Tom Stellard75aadc22012-12-11 21:25:42 +0000766 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
767 AMDGPU::T0_X, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000768 case Intrinsic::r600_read_tidig_y:
Tom Stellard75aadc22012-12-11 21:25:42 +0000769 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
770 AMDGPU::T0_Y, VT);
NAKAMURA Takumi4f328e12013-05-22 06:37:31 +0000771 case Intrinsic::r600_read_tidig_z:
Tom Stellard75aadc22012-12-11 21:25:42 +0000772 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
773 AMDGPU::T0_Z, VT);
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000774
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000775 case Intrinsic::r600_recipsqrt_ieee:
776 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000777
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000778 case Intrinsic::r600_recipsqrt_clamped:
779 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000780 }
Matt Arsenault09b2c4a2016-07-15 21:26:52 +0000781
Tom Stellard75aadc22012-12-11 21:25:42 +0000782 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
783 break;
784 }
785 } // end switch(Op.getOpcode())
786 return SDValue();
787}
788
789void R600TargetLowering::ReplaceNodeResults(SDNode *N,
790 SmallVectorImpl<SDValue> &Results,
791 SelectionDAG &DAG) const {
792 switch (N->getOpcode()) {
Matt Arsenaultd125d742014-03-27 17:23:24 +0000793 default:
794 AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
795 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000796 case ISD::FP_TO_UINT:
797 if (N->getValueType(0) == MVT::i1) {
Matt Arsenault7fb961f2016-07-22 17:01:21 +0000798 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000799 return;
800 }
801 // Fall-through. Since we don't care about out of bounds values
802 // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
803 // considers some extra cases which are not necessary here.
804 case ISD::FP_TO_SINT: {
Matt Arsenault7fb961f2016-07-22 17:01:21 +0000805 if (N->getValueType(0) == MVT::i1) {
806 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
807 return;
808 }
809
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000810 SDValue Result;
811 if (expandFP_TO_SINT(N, Result, DAG))
812 Results.push_back(Result);
Tom Stellard365366f2013-01-23 02:09:06 +0000813 return;
Jan Vesely2cb62ce2014-07-10 22:40:21 +0000814 }
Jan Vesely343cd6f02014-06-22 21:43:01 +0000815 case ISD::SDIVREM: {
816 SDValue Op = SDValue(N, 1);
817 SDValue RES = LowerSDIVREM(Op, DAG);
818 Results.push_back(RES);
819 Results.push_back(RES.getValue(1));
820 break;
821 }
822 case ISD::UDIVREM: {
823 SDValue Op = SDValue(N, 0);
Tom Stellardbf69d762014-11-15 01:07:53 +0000824 LowerUDIVREM64(Op, DAG, Results);
Jan Vesely343cd6f02014-06-22 21:43:01 +0000825 break;
826 }
827 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000828}
829
Tom Stellard880a80a2014-06-17 16:53:14 +0000830SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
831 SDValue Vector) const {
832
833 SDLoc DL(Vector);
834 EVT VecVT = Vector.getValueType();
835 EVT EltVT = VecVT.getVectorElementType();
836 SmallVector<SDValue, 8> Args;
837
838 for (unsigned i = 0, e = VecVT.getVectorNumElements();
839 i != e; ++i) {
Mehdi Amini44ede332015-07-09 02:09:04 +0000840 Args.push_back(DAG.getNode(
841 ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
842 DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
Tom Stellard880a80a2014-06-17 16:53:14 +0000843 }
844
845 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
846}
847
848SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
849 SelectionDAG &DAG) const {
850
851 SDLoc DL(Op);
852 SDValue Vector = Op.getOperand(0);
853 SDValue Index = Op.getOperand(1);
854
855 if (isa<ConstantSDNode>(Index) ||
856 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
857 return Op;
858
859 Vector = vectorToVerticalVector(DAG, Vector);
860 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
861 Vector, Index);
862}
863
864SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
865 SelectionDAG &DAG) const {
866 SDLoc DL(Op);
867 SDValue Vector = Op.getOperand(0);
868 SDValue Value = Op.getOperand(1);
869 SDValue Index = Op.getOperand(2);
870
871 if (isa<ConstantSDNode>(Index) ||
872 Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
873 return Op;
874
875 Vector = vectorToVerticalVector(DAG, Vector);
876 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
877 Vector, Value, Index);
878 return vectorToVerticalVector(DAG, Insert);
879}
880
Tom Stellard27233b72016-05-02 18:05:17 +0000881SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
882 SDValue Op,
883 SelectionDAG &DAG) const {
884
885 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
886 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
887 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
888
889 const DataLayout &DL = DAG.getDataLayout();
890 const GlobalValue *GV = GSD->getGlobal();
Tom Stellard27233b72016-05-02 18:05:17 +0000891 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
892
Jan Veselyf97de002016-05-13 20:39:29 +0000893 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
894 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
Tom Stellard27233b72016-05-02 18:05:17 +0000895}
896
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000897SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
898 // On hw >= R700, COS/SIN input must be between -1. and 1.
899 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
900 EVT VT = Op.getValueType();
901 SDValue Arg = Op.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000902 SDLoc DL(Op);
Sanjay Patela2607012015-09-16 16:31:21 +0000903
904 // TODO: Should this propagate fast-math-flags?
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000905 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
906 DAG.getNode(ISD::FADD, DL, VT,
907 DAG.getNode(ISD::FMUL, DL, VT, Arg,
908 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
909 DAG.getConstantFP(0.5, DL, MVT::f32)));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000910 unsigned TrigNode;
911 switch (Op.getOpcode()) {
912 case ISD::FCOS:
913 TrigNode = AMDGPUISD::COS_HW;
914 break;
915 case ISD::FSIN:
916 TrigNode = AMDGPUISD::SIN_HW;
917 break;
918 default:
919 llvm_unreachable("Wrong trig opcode");
920 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000921 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
922 DAG.getNode(ISD::FADD, DL, VT, FractPart,
923 DAG.getConstantFP(-0.5, DL, MVT::f32)));
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000924 if (Gen >= R600Subtarget::R700)
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000925 return TrigVal;
926 // On R600 hw, COS/SIN input must be between -Pi and Pi.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000927 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
928 DAG.getConstantFP(3.14159265359, DL, MVT::f32));
Vincent Lejeuneb55940c2013-07-09 15:03:11 +0000929}
930
Jan Vesely25f36272014-06-18 12:27:13 +0000931SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
932 SDLoc DL(Op);
933 EVT VT = Op.getValueType();
934
935 SDValue Lo = Op.getOperand(0);
936 SDValue Hi = Op.getOperand(1);
937 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000938 SDValue Zero = DAG.getConstant(0, DL, VT);
939 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000940
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000941 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
942 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely25f36272014-06-18 12:27:13 +0000943 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
944 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
945
946 // The dance around Width1 is necessary for 0 special case.
947 // Without it the CompShift might be 32, producing incorrect results in
948 // Overflow. So we do the shift in two steps, the alternative is to
949 // add a conditional to filter the special case.
950
951 SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
952 Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
953
954 SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
955 HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
956 SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
957
958 SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
959 SDValue LoBig = Zero;
960
961 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
962 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
963
964 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
965}
966
Jan Vesely900ff2e2014-06-18 12:27:15 +0000967SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
968 SDLoc DL(Op);
969 EVT VT = Op.getValueType();
970
971 SDValue Lo = Op.getOperand(0);
972 SDValue Hi = Op.getOperand(1);
973 SDValue Shift = Op.getOperand(2);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000974 SDValue Zero = DAG.getConstant(0, DL, VT);
975 SDValue One = DAG.getConstant(1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000976
Jan Veselyecf51332014-06-18 12:27:17 +0000977 const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
978
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000979 SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
980 SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000981 SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
982 SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
983
984 // The dance around Width1 is necessary for 0 special case.
985 // Without it the CompShift might be 32, producing incorrect results in
986 // Overflow. So we do the shift in two steps, the alternative is to
987 // add a conditional to filter the special case.
988
989 SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
990 Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
991
Jan Veselyecf51332014-06-18 12:27:17 +0000992 SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
Jan Vesely900ff2e2014-06-18 12:27:15 +0000993 SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
994 LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
995
Jan Veselyecf51332014-06-18 12:27:17 +0000996 SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
997 SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
Jan Vesely900ff2e2014-06-18 12:27:15 +0000998
999 Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1000 Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1001
1002 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1003}
1004
Jan Vesely808fff52015-04-30 17:15:56 +00001005SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1006 unsigned mainop, unsigned ovf) const {
1007 SDLoc DL(Op);
1008 EVT VT = Op.getValueType();
1009
1010 SDValue Lo = Op.getOperand(0);
1011 SDValue Hi = Op.getOperand(1);
1012
1013 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1014 // Extend sign.
1015 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1016 DAG.getValueType(MVT::i1));
1017
1018 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1019
1020 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1021}
1022
Matt Arsenault7fb961f2016-07-22 17:01:21 +00001023SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001024 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001025 return DAG.getNode(
1026 ISD::SETCC,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001027 DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001028 MVT::i1,
Matt Arsenault7fb961f2016-07-22 17:01:21 +00001029 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
1030 DAG.getCondCode(ISD::SETEQ));
1031}
1032
1033SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
1034 SDLoc DL(Op);
1035 return DAG.getNode(
1036 ISD::SETCC,
1037 DL,
1038 MVT::i1,
1039 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
1040 DAG.getCondCode(ISD::SETEQ));
Tom Stellard75aadc22012-12-11 21:25:42 +00001041}
1042
Tom Stellard75aadc22012-12-11 21:25:42 +00001043SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001044 const SDLoc &DL,
Tom Stellard75aadc22012-12-11 21:25:42 +00001045 unsigned DwordOffset) const {
1046 unsigned ByteOffset = DwordOffset * 4;
1047 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Tom Stellard1e803092013-07-23 01:48:18 +00001048 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellard75aadc22012-12-11 21:25:42 +00001049
1050 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1051 assert(isInt<16>(ByteOffset));
1052
1053 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001054 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
Justin Lebar9c375812016-07-15 18:27:10 +00001055 MachinePointerInfo(ConstantPointerNull::get(PtrType)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001056}
1057
Tom Stellard75aadc22012-12-11 21:25:42 +00001058bool R600TargetLowering::isZero(SDValue Op) const {
1059 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1060 return Cst->isNullValue();
1061 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1062 return CstFP->isZero();
1063 } else {
1064 return false;
1065 }
1066}
1067
Matt Arsenault6b6a2c32016-03-11 08:00:27 +00001068bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
1069 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1070 return CFP->isExactlyValue(1.0);
1071 }
1072 return isAllOnesConstant(Op);
1073}
1074
1075bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
1076 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
1077 return CFP->getValueAPF().isZero();
1078 }
1079 return isNullConstant(Op);
1080}
1081
Tom Stellard75aadc22012-12-11 21:25:42 +00001082SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001083 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +00001084 EVT VT = Op.getValueType();
1085
1086 SDValue LHS = Op.getOperand(0);
1087 SDValue RHS = Op.getOperand(1);
1088 SDValue True = Op.getOperand(2);
1089 SDValue False = Op.getOperand(3);
1090 SDValue CC = Op.getOperand(4);
1091 SDValue Temp;
1092
Matt Arsenault1e3a4eb2014-12-12 02:30:37 +00001093 if (VT == MVT::f32) {
1094 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1095 SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1096 if (MinMax)
1097 return MinMax;
1098 }
1099
Tom Stellard75aadc22012-12-11 21:25:42 +00001100 // LHS and RHS are guaranteed to be the same value type
1101 EVT CompareVT = LHS.getValueType();
1102
1103 // Check if we can lower this to a native operation.
1104
Tom Stellard2add82d2013-03-08 15:37:09 +00001105 // Try to lower to a SET* instruction:
1106 //
1107 // SET* can match the following patterns:
1108 //
Tom Stellardcd428182013-09-28 02:50:38 +00001109 // select_cc f32, f32, -1, 0, cc_supported
1110 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1111 // select_cc i32, i32, -1, 0, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001112 //
1113
1114 // Move hardware True/False values to the correct operand.
Tom Stellardcd428182013-09-28 02:50:38 +00001115 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1116 ISD::CondCode InverseCC =
1117 ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Tom Stellard5694d302013-09-28 02:50:43 +00001118 if (isHWTrueValue(False) && isHWFalseValue(True)) {
1119 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1120 std::swap(False, True);
1121 CC = DAG.getCondCode(InverseCC);
1122 } else {
1123 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1124 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1125 std::swap(False, True);
1126 std::swap(LHS, RHS);
1127 CC = DAG.getCondCode(SwapInvCC);
1128 }
1129 }
Tom Stellard2add82d2013-03-08 15:37:09 +00001130 }
1131
1132 if (isHWTrueValue(True) && isHWFalseValue(False) &&
1133 (CompareVT == VT || VT == MVT::i32)) {
1134 // This can be matched by a SET* instruction.
1135 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1136 }
1137
Tom Stellard75aadc22012-12-11 21:25:42 +00001138 // Try to lower to a CND* instruction:
Tom Stellard2add82d2013-03-08 15:37:09 +00001139 //
1140 // CND* can match the following patterns:
1141 //
Tom Stellardcd428182013-09-28 02:50:38 +00001142 // select_cc f32, 0.0, f32, f32, cc_supported
1143 // select_cc f32, 0.0, i32, i32, cc_supported
1144 // select_cc i32, 0, f32, f32, cc_supported
1145 // select_cc i32, 0, i32, i32, cc_supported
Tom Stellard2add82d2013-03-08 15:37:09 +00001146 //
Tom Stellardcd428182013-09-28 02:50:38 +00001147
1148 // Try to move the zero value to the RHS
1149 if (isZero(LHS)) {
1150 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1151 // Try swapping the operands
1152 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1153 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1154 std::swap(LHS, RHS);
1155 CC = DAG.getCondCode(CCSwapped);
1156 } else {
1157 // Try inverting the conditon and then swapping the operands
1158 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1159 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1160 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1161 std::swap(True, False);
1162 std::swap(LHS, RHS);
1163 CC = DAG.getCondCode(CCSwapped);
1164 }
1165 }
1166 }
1167 if (isZero(RHS)) {
1168 SDValue Cond = LHS;
1169 SDValue Zero = RHS;
Tom Stellard75aadc22012-12-11 21:25:42 +00001170 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1171 if (CompareVT != VT) {
1172 // Bitcast True / False to the correct types. This will end up being
1173 // a nop, but it allows us to define only a single pattern in the
1174 // .TD files for each CND* instruction rather than having to have
1175 // one pattern for integer True/False and one for fp True/False
1176 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1177 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1178 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001179
1180 switch (CCOpcode) {
1181 case ISD::SETONE:
1182 case ISD::SETUNE:
1183 case ISD::SETNE:
Tom Stellard75aadc22012-12-11 21:25:42 +00001184 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1185 Temp = True;
1186 True = False;
1187 False = Temp;
1188 break;
1189 default:
1190 break;
1191 }
1192 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1193 Cond, Zero,
1194 True, False,
1195 DAG.getCondCode(CCOpcode));
1196 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1197 }
1198
Tom Stellard75aadc22012-12-11 21:25:42 +00001199 // If we make it this for it means we have no native instructions to handle
1200 // this SELECT_CC, so we must lower it.
1201 SDValue HWTrue, HWFalse;
1202
1203 if (CompareVT == MVT::f32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001204 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1205 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001206 } else if (CompareVT == MVT::i32) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001207 HWTrue = DAG.getConstant(-1, DL, CompareVT);
1208 HWFalse = DAG.getConstant(0, DL, CompareVT);
Tom Stellard75aadc22012-12-11 21:25:42 +00001209 }
1210 else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001211 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
Tom Stellard75aadc22012-12-11 21:25:42 +00001212 }
1213
1214 // Lower this unsupported SELECT_CC into a combination of two supported
1215 // SELECT_CC operations.
1216 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1217
1218 return DAG.getNode(ISD::SELECT_CC, DL, VT,
1219 Cond, HWFalse,
1220 True, False,
1221 DAG.getCondCode(ISD::SETNE));
1222}
1223
Alp Tokercb402912014-01-24 17:20:08 +00001224/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001225/// convert these pointers to a register index. Each register holds
1226/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1227/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1228/// for indirect addressing.
1229SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1230 unsigned StackWidth,
1231 SelectionDAG &DAG) const {
1232 unsigned SRLPad;
1233 switch(StackWidth) {
1234 case 1:
1235 SRLPad = 2;
1236 break;
1237 case 2:
1238 SRLPad = 3;
1239 break;
1240 case 4:
1241 SRLPad = 4;
1242 break;
1243 default: llvm_unreachable("Invalid stack width");
1244 }
1245
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001246 SDLoc DL(Ptr);
1247 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1248 DAG.getConstant(SRLPad, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001249}
1250
1251void R600TargetLowering::getStackAddress(unsigned StackWidth,
1252 unsigned ElemIdx,
1253 unsigned &Channel,
1254 unsigned &PtrIncr) const {
1255 switch (StackWidth) {
1256 default:
1257 case 1:
1258 Channel = 0;
1259 if (ElemIdx > 0) {
1260 PtrIncr = 1;
1261 } else {
1262 PtrIncr = 0;
1263 }
1264 break;
1265 case 2:
1266 Channel = ElemIdx % 2;
1267 if (ElemIdx == 2) {
1268 PtrIncr = 1;
1269 } else {
1270 PtrIncr = 0;
1271 }
1272 break;
1273 case 4:
1274 Channel = ElemIdx;
1275 PtrIncr = 0;
1276 break;
1277 }
1278}
1279
Matt Arsenault95245662016-02-11 05:32:46 +00001280SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1281 SelectionDAG &DAG) const {
1282 SDLoc DL(Store);
Tom Stellard75aadc22012-12-11 21:25:42 +00001283
Matt Arsenault95245662016-02-11 05:32:46 +00001284 unsigned Mask = 0;
1285 if (Store->getMemoryVT() == MVT::i8) {
1286 Mask = 0xff;
1287 } else if (Store->getMemoryVT() == MVT::i16) {
1288 Mask = 0xffff;
1289 }
1290
1291 SDValue Chain = Store->getChain();
1292 SDValue BasePtr = Store->getBasePtr();
1293 EVT MemVT = Store->getMemoryVT();
1294
1295 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1296 DAG.getConstant(2, DL, MVT::i32));
1297 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1298 Chain, Ptr,
1299 DAG.getTargetConstant(0, DL, MVT::i32));
1300
1301 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1302 DAG.getConstant(0x3, DL, MVT::i32));
1303
1304 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1305 DAG.getConstant(3, DL, MVT::i32));
1306
1307 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1308 Store->getValue());
1309
1310 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1311
1312 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1313 MaskedValue, ShiftAmt);
1314
1315 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
1316 DAG.getConstant(Mask, DL, MVT::i32),
1317 ShiftAmt);
1318 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1319 DAG.getConstant(0xffffffff, DL, MVT::i32));
1320 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1321
1322 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1323 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1324 Chain, Value, Ptr,
1325 DAG.getTargetConstant(0, DL, MVT::i32));
1326}
1327
1328SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1329 if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
Tom Stellardfbab8272013-08-16 01:12:11 +00001330 return Result;
Tom Stellardfbab8272013-08-16 01:12:11 +00001331
Matt Arsenault95245662016-02-11 05:32:46 +00001332 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1333 unsigned AS = StoreNode->getAddressSpace();
1334 SDValue Value = StoreNode->getValue();
1335 EVT ValueVT = Value.getValueType();
1336
1337 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1338 ValueVT.isVector()) {
1339 return SplitVectorStore(Op, DAG);
1340 }
1341
1342 SDLoc DL(Op);
1343 SDValue Chain = StoreNode->getChain();
1344 SDValue Ptr = StoreNode->getBasePtr();
1345
1346 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001347 if (StoreNode->isTruncatingStore()) {
1348 EVT VT = Value.getValueType();
Tom Stellardfbab8272013-08-16 01:12:11 +00001349 assert(VT.bitsLE(MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001350 EVT MemVT = StoreNode->getMemoryVT();
1351 SDValue MaskConstant;
1352 if (MemVT == MVT::i8) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001353 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001354 } else {
1355 assert(MemVT == MVT::i16);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001356 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001357 }
1358 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001359 DAG.getConstant(2, DL, MVT::i32));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001360 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001361 DAG.getConstant(0x00000003, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001362 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1363 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001364 DAG.getConstant(3, DL, VT));
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001365 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1366 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1367 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1368 // vector instead.
1369 SDValue Src[4] = {
1370 ShiftedValue,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001371 DAG.getConstant(0, DL, MVT::i32),
1372 DAG.getConstant(0, DL, MVT::i32),
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001373 Mask
1374 };
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001375 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001376 SDValue Args[3] = { Chain, Input, DWordAddr };
1377 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00001378 Op->getVTList(), Args, MemVT,
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001379 StoreNode->getMemOperand());
1380 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Matt Arsenault95245662016-02-11 05:32:46 +00001381 ValueVT.bitsGE(MVT::i32)) {
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001382 // Convert pointer from byte address to dword address.
1383 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1384 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001385 Ptr, DAG.getConstant(2, DL, MVT::i32)));
Tom Stellard75aadc22012-12-11 21:25:42 +00001386
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001387 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +00001388 llvm_unreachable("Truncated and indexed stores not supported yet");
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001389 } else {
1390 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1391 }
1392 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001393 }
Tom Stellard75aadc22012-12-11 21:25:42 +00001394 }
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001395
Matt Arsenault95245662016-02-11 05:32:46 +00001396 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001397 return SDValue();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001398
Matt Arsenault95245662016-02-11 05:32:46 +00001399 EVT MemVT = StoreNode->getMemoryVT();
1400 if (MemVT.bitsLT(MVT::i32))
1401 return lowerPrivateTruncStore(StoreNode, DAG);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001402
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00001403 // Lowering for indirect addressing
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001404 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001405 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001406 unsigned StackWidth = TFL->getStackWidth(MF);
1407
1408 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1409
1410 if (ValueVT.isVector()) {
1411 unsigned NumElemVT = ValueVT.getVectorNumElements();
1412 EVT ElemVT = ValueVT.getVectorElementType();
Craig Topper48d114b2014-04-26 18:35:24 +00001413 SmallVector<SDValue, 4> Stores(NumElemVT);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001414
1415 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1416 "vector width in load");
1417
1418 for (unsigned i = 0; i < NumElemVT; ++i) {
1419 unsigned Channel, PtrIncr;
1420 getStackAddress(StackWidth, i, Channel, PtrIncr);
1421 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001422 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001423 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001424 Value, DAG.getConstant(i, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001425
1426 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1427 Chain, Elem, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001428 DAG.getTargetConstant(Channel, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001429 }
Craig Topper48d114b2014-04-26 18:35:24 +00001430 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001431 } else {
1432 if (ValueVT == MVT::i8) {
1433 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1434 }
1435 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001436 DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001437 }
1438
1439 return Chain;
Tom Stellard75aadc22012-12-11 21:25:42 +00001440}
1441
Tom Stellard365366f2013-01-23 02:09:06 +00001442// return (512 + (kc_bank << 12)
1443static int
1444ConstantAddressBlock(unsigned AddressSpace) {
1445 switch (AddressSpace) {
1446 case AMDGPUAS::CONSTANT_BUFFER_0:
1447 return 512;
1448 case AMDGPUAS::CONSTANT_BUFFER_1:
1449 return 512 + 4096;
1450 case AMDGPUAS::CONSTANT_BUFFER_2:
1451 return 512 + 4096 * 2;
1452 case AMDGPUAS::CONSTANT_BUFFER_3:
1453 return 512 + 4096 * 3;
1454 case AMDGPUAS::CONSTANT_BUFFER_4:
1455 return 512 + 4096 * 4;
1456 case AMDGPUAS::CONSTANT_BUFFER_5:
1457 return 512 + 4096 * 5;
1458 case AMDGPUAS::CONSTANT_BUFFER_6:
1459 return 512 + 4096 * 6;
1460 case AMDGPUAS::CONSTANT_BUFFER_7:
1461 return 512 + 4096 * 7;
1462 case AMDGPUAS::CONSTANT_BUFFER_8:
1463 return 512 + 4096 * 8;
1464 case AMDGPUAS::CONSTANT_BUFFER_9:
1465 return 512 + 4096 * 9;
1466 case AMDGPUAS::CONSTANT_BUFFER_10:
1467 return 512 + 4096 * 10;
1468 case AMDGPUAS::CONSTANT_BUFFER_11:
1469 return 512 + 4096 * 11;
1470 case AMDGPUAS::CONSTANT_BUFFER_12:
1471 return 512 + 4096 * 12;
1472 case AMDGPUAS::CONSTANT_BUFFER_13:
1473 return 512 + 4096 * 13;
1474 case AMDGPUAS::CONSTANT_BUFFER_14:
1475 return 512 + 4096 * 14;
1476 case AMDGPUAS::CONSTANT_BUFFER_15:
1477 return 512 + 4096 * 15;
1478 default:
1479 return -1;
1480 }
1481}
1482
Matt Arsenault6dfda962016-02-10 18:21:39 +00001483SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1484 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001485 SDLoc DL(Op);
Matt Arsenault6dfda962016-02-10 18:21:39 +00001486 LoadSDNode *Load = cast<LoadSDNode>(Op);
1487 ISD::LoadExtType ExtType = Load->getExtensionType();
1488 EVT MemVT = Load->getMemoryVT();
Tom Stellard365366f2013-01-23 02:09:06 +00001489
Matt Arsenault6dfda962016-02-10 18:21:39 +00001490 // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
1491 // register (2-)byte extract.
1492
1493 // Get Register holding the target.
1494 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1495 DAG.getConstant(2, DL, MVT::i32));
1496 // Load the Register.
1497 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1498 Load->getChain(),
1499 Ptr,
1500 DAG.getTargetConstant(0, DL, MVT::i32),
1501 Op.getOperand(2));
1502
1503 // Get offset within the register.
1504 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1505 Load->getBasePtr(),
1506 DAG.getConstant(0x3, DL, MVT::i32));
1507
1508 // Bit offset of target byte (byteIdx * 8).
1509 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1510 DAG.getConstant(3, DL, MVT::i32));
1511
1512 // Shift to the right.
1513 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1514
1515 // Eliminate the upper bits by setting them to ...
1516 EVT MemEltVT = MemVT.getScalarType();
1517
1518 // ... ones.
1519 if (ExtType == ISD::SEXTLOAD) {
1520 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1521
1522 SDValue Ops[] = {
1523 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1524 Load->getChain()
1525 };
1526
1527 return DAG.getMergeValues(Ops, DL);
1528 }
1529
1530 // ... or zeros.
1531 SDValue Ops[] = {
1532 DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1533 Load->getChain()
1534 };
1535
1536 return DAG.getMergeValues(Ops, DL);
1537}
1538
1539SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1540 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1541 unsigned AS = LoadNode->getAddressSpace();
1542 EVT MemVT = LoadNode->getMemoryVT();
1543 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1544
1545 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1546 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1547 return lowerPrivateExtLoad(Op, DAG);
1548 }
1549
1550 SDLoc DL(Op);
1551 EVT VT = Op.getValueType();
1552 SDValue Chain = LoadNode->getChain();
1553 SDValue Ptr = LoadNode->getBasePtr();
Tom Stellarde9373602014-01-22 19:24:14 +00001554
Tom Stellard35bb18c2013-08-26 15:06:04 +00001555 if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1556 SDValue MergedValues[2] = {
Matt Arsenault9c499c32016-04-14 23:31:26 +00001557 scalarizeVectorLoad(LoadNode, DAG),
Tom Stellard35bb18c2013-08-26 15:06:04 +00001558 Chain
1559 };
Craig Topper64941d92014-04-27 19:20:57 +00001560 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard35bb18c2013-08-26 15:06:04 +00001561 }
1562
Tom Stellard365366f2013-01-23 02:09:06 +00001563 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
Matt Arsenault00a0d6f2013-11-13 02:39:07 +00001564 if (ConstantBlock > -1 &&
1565 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1566 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
Tom Stellard365366f2013-01-23 02:09:06 +00001567 SDValue Result;
Nick Lewyckyaad475b2014-04-15 07:22:52 +00001568 if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1569 isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
Matt Arsenaultef1a9502013-11-01 17:39:26 +00001570 isa<ConstantSDNode>(Ptr)) {
Tom Stellard365366f2013-01-23 02:09:06 +00001571 SDValue Slots[4];
1572 for (unsigned i = 0; i < 4; i++) {
1573 // We want Const position encoded with the following formula :
1574 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1575 // const_index is Ptr computed by llvm using an alignment of 16.
1576 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1577 // then div by 4 at the ISel step
1578 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001579 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001580 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1581 }
Tom Stellard0344cdf2013-08-01 15:23:42 +00001582 EVT NewVT = MVT::v4i32;
1583 unsigned NumElements = 4;
1584 if (VT.isVector()) {
1585 NewVT = VT;
1586 NumElements = VT.getVectorNumElements();
1587 }
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001588 Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
Tom Stellard365366f2013-01-23 02:09:06 +00001589 } else {
Alp Tokerf907b892013-12-05 05:44:44 +00001590 // non-constant ptr can't be folded, keeps it as a v4f32 load
Tom Stellard365366f2013-01-23 02:09:06 +00001591 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001592 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1593 DAG.getConstant(4, DL, MVT::i32)),
1594 DAG.getConstant(LoadNode->getAddressSpace() -
1595 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
Tom Stellard365366f2013-01-23 02:09:06 +00001596 );
1597 }
1598
1599 if (!VT.isVector()) {
1600 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001601 DAG.getConstant(0, DL, MVT::i32));
Tom Stellard365366f2013-01-23 02:09:06 +00001602 }
1603
1604 SDValue MergedValues[2] = {
Matt Arsenault7939acd2014-04-07 16:44:24 +00001605 Result,
1606 Chain
Tom Stellard365366f2013-01-23 02:09:06 +00001607 };
Craig Topper64941d92014-04-27 19:20:57 +00001608 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001609 }
1610
Matt Arsenault6dfda962016-02-10 18:21:39 +00001611 SDValue LoweredLoad;
1612
Matt Arsenault909d0c02013-10-30 23:43:29 +00001613 // For most operations returning SDValue() will result in the node being
1614 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1615 // need to manually expand loads that may be legal in some address spaces and
1616 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1617 // compute shaders, since the data is sign extended when it is uploaded to the
1618 // buffer. However SEXT loads from other address spaces are not supported, so
1619 // we need to expand them here.
Tom Stellard84021442013-07-23 01:48:24 +00001620 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1621 EVT MemVT = LoadNode->getMemoryVT();
1622 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
Justin Lebar9c375812016-07-15 18:27:10 +00001623 SDValue NewLoad = DAG.getExtLoad(
1624 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1625 LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
Jan Veselyb670d372015-05-26 18:07:22 +00001626 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1627 DAG.getValueType(MemVT));
Tom Stellard84021442013-07-23 01:48:24 +00001628
Jan Veselyb670d372015-05-26 18:07:22 +00001629 SDValue MergedValues[2] = { Res, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00001630 return DAG.getMergeValues(MergedValues, DL);
Tom Stellard84021442013-07-23 01:48:24 +00001631 }
1632
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001633 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1634 return SDValue();
1635 }
1636
1637 // Lowering for indirect addressing
1638 const MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001639 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001640 unsigned StackWidth = TFL->getStackWidth(MF);
1641
1642 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1643
1644 if (VT.isVector()) {
1645 unsigned NumElemVT = VT.getVectorNumElements();
1646 EVT ElemVT = VT.getVectorElementType();
1647 SDValue Loads[4];
1648
Jan Vesely687ca8d2016-05-16 23:56:32 +00001649 assert(NumElemVT <= 4);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001650 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1651 "vector width in load");
1652
1653 for (unsigned i = 0; i < NumElemVT; ++i) {
1654 unsigned Channel, PtrIncr;
1655 getStackAddress(StackWidth, i, Channel, PtrIncr);
1656 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001657 DAG.getConstant(PtrIncr, DL, MVT::i32));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001658 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1659 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001660 DAG.getTargetConstant(Channel, DL, MVT::i32),
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001661 Op.getOperand(2));
1662 }
Jan Vesely687ca8d2016-05-16 23:56:32 +00001663 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
1664 LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001665 } else {
1666 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1667 Chain, Ptr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001668 DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001669 Op.getOperand(2));
1670 }
1671
Matt Arsenault7939acd2014-04-07 16:44:24 +00001672 SDValue Ops[2] = {
1673 LoweredLoad,
1674 Chain
1675 };
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001676
Craig Topper64941d92014-04-27 19:20:57 +00001677 return DAG.getMergeValues(Ops, DL);
Tom Stellard365366f2013-01-23 02:09:06 +00001678}
Tom Stellard75aadc22012-12-11 21:25:42 +00001679
Matt Arsenault1d555c42014-06-23 18:00:55 +00001680SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1681 SDValue Chain = Op.getOperand(0);
1682 SDValue Cond = Op.getOperand(1);
1683 SDValue Jump = Op.getOperand(2);
1684
1685 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1686 Chain, Jump, Cond);
1687}
1688
Matt Arsenault81d06012016-03-07 21:10:13 +00001689SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1690 SelectionDAG &DAG) const {
1691 MachineFunction &MF = DAG.getMachineFunction();
Matt Arsenault43e92fe2016-06-24 06:30:11 +00001692 const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
Matt Arsenault81d06012016-03-07 21:10:13 +00001693
1694 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1695
1696 unsigned FrameIndex = FIN->getIndex();
1697 unsigned IgnoredFrameReg;
1698 unsigned Offset =
1699 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1700 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1701 Op.getValueType());
1702}
1703
Tom Stellard75aadc22012-12-11 21:25:42 +00001704/// XXX Only kernel functions are supported, so we can assume for now that
1705/// every function is a kernel function, but in the future we should use
1706/// separate calling conventions for kernel and non-kernel functions.
1707SDValue R600TargetLowering::LowerFormalArguments(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001708 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1709 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1710 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001711 SmallVector<CCValAssign, 16> ArgLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001712 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1713 *DAG.getContext());
Vincent Lejeunef143af32013-11-11 22:10:24 +00001714 MachineFunction &MF = DAG.getMachineFunction();
Jan Veselye5121f32014-10-14 20:05:26 +00001715 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellardacfeebf2013-07-23 01:48:05 +00001716
Tom Stellardaf775432013-10-23 00:44:32 +00001717 SmallVector<ISD::InputArg, 8> LocalIns;
1718
Matt Arsenault209a7b92014-04-18 07:40:20 +00001719 getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
Tom Stellardaf775432013-10-23 00:44:32 +00001720
1721 AnalyzeFormalArguments(CCInfo, LocalIns);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001722
Tom Stellard1e803092013-07-23 01:48:18 +00001723 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
Tom Stellardacfeebf2013-07-23 01:48:05 +00001724 CCValAssign &VA = ArgLocs[i];
Matt Arsenault74ef2772014-08-13 18:14:11 +00001725 const ISD::InputArg &In = Ins[i];
1726 EVT VT = In.VT;
1727 EVT MemVT = VA.getLocVT();
1728 if (!VT.isVector() && MemVT.isVector()) {
1729 // Get load source type if scalarized.
1730 MemVT = MemVT.getVectorElementType();
1731 }
Tom Stellard78e01292013-07-23 01:47:58 +00001732
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +00001733 if (AMDGPU::isShader(CallConv)) {
Vincent Lejeunef143af32013-11-11 22:10:24 +00001734 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1735 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1736 InVals.push_back(Register);
1737 continue;
1738 }
1739
Tom Stellard75aadc22012-12-11 21:25:42 +00001740 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
Matt Arsenault74ef2772014-08-13 18:14:11 +00001741 AMDGPUAS::CONSTANT_BUFFER_0);
Tom Stellardacfeebf2013-07-23 01:48:05 +00001742
Matt Arsenaultfae02982014-03-17 18:58:11 +00001743 // i64 isn't a legal type, so the register type used ends up as i32, which
1744 // isn't expected here. It attempts to create this sextload, but it ends up
1745 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1746 // for <1 x i64>.
1747
Tom Stellardacfeebf2013-07-23 01:48:05 +00001748 // The first 36 bytes of the input buffer contains information about
1749 // thread group and global sizes.
Matt Arsenault74ef2772014-08-13 18:14:11 +00001750 ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1751 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1752 // FIXME: This should really check the extload type, but the handling of
1753 // extload vector parameters seems to be broken.
Matt Arsenaulte1f030c2014-04-11 20:59:54 +00001754
Matt Arsenault74ef2772014-08-13 18:14:11 +00001755 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1756 Ext = ISD::SEXTLOAD;
1757 }
1758
1759 // Compute the offset from the value.
1760 // XXX - I think PartOffset should give you this, but it seems to give the
1761 // size of the register which isn't useful.
1762
Andrew Trick05938a52015-02-16 18:10:47 +00001763 unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001764 unsigned PartOffset = VA.getLocMemOffset();
Jan Veselye5121f32014-10-14 20:05:26 +00001765 unsigned Offset = 36 + VA.getLocMemOffset();
Matt Arsenault74ef2772014-08-13 18:14:11 +00001766
1767 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
Justin Lebar9c375812016-07-15 18:27:10 +00001768 SDValue Arg = DAG.getLoad(
1769 ISD::UNINDEXED, Ext, VT, DL, Chain,
1770 DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1771 MemVT, /* Alignment = */ 4,
1772 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant);
Matt Arsenault209a7b92014-04-18 07:40:20 +00001773
1774 // 4 is the preferred alignment for the CONSTANT memory space.
Tom Stellard75aadc22012-12-11 21:25:42 +00001775 InVals.push_back(Arg);
Jan Veselye5121f32014-10-14 20:05:26 +00001776 MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
Tom Stellard75aadc22012-12-11 21:25:42 +00001777 }
1778 return Chain;
1779}
1780
Mehdi Amini44ede332015-07-09 02:09:04 +00001781EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1782 EVT VT) const {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001783 if (!VT.isVector())
1784 return MVT::i32;
Tom Stellard75aadc22012-12-11 21:25:42 +00001785 return VT.changeVectorElementTypeToInteger();
1786}
1787
Matt Arsenaultfa67bdb2016-02-22 21:04:16 +00001788bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1789 unsigned AddrSpace,
1790 unsigned Align,
1791 bool *IsFast) const {
1792 if (IsFast)
1793 *IsFast = false;
1794
1795 if (!VT.isSimple() || VT == MVT::Other)
1796 return false;
1797
1798 if (VT.bitsLT(MVT::i32))
1799 return false;
1800
1801 // TODO: This is a rough estimate.
1802 if (IsFast)
1803 *IsFast = true;
1804
1805 return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1806}
1807
Matt Arsenault209a7b92014-04-18 07:40:20 +00001808static SDValue CompactSwizzlableVector(
1809 SelectionDAG &DAG, SDValue VectorEntry,
1810 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001811 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1812 assert(RemapSwizzle.empty());
1813 SDValue NewBldVec[4] = {
Matt Arsenault209a7b92014-04-18 07:40:20 +00001814 VectorEntry.getOperand(0),
1815 VectorEntry.getOperand(1),
1816 VectorEntry.getOperand(2),
1817 VectorEntry.getOperand(3)
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001818 };
1819
1820 for (unsigned i = 0; i < 4; i++) {
Sanjay Patel57195842016-03-14 17:28:46 +00001821 if (NewBldVec[i].isUndef())
Vincent Lejeunefa58a5f2013-10-13 17:56:10 +00001822 // We mask write here to teach later passes that the ith element of this
1823 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1824 // break false dependencies and additionnaly make assembly easier to read.
1825 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001826 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1827 if (C->isZero()) {
1828 RemapSwizzle[i] = 4; // SEL_0
1829 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1830 } else if (C->isExactlyValue(1.0)) {
1831 RemapSwizzle[i] = 5; // SEL_1
1832 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1833 }
1834 }
1835
Sanjay Patel57195842016-03-14 17:28:46 +00001836 if (NewBldVec[i].isUndef())
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001837 continue;
1838 for (unsigned j = 0; j < i; j++) {
1839 if (NewBldVec[i] == NewBldVec[j]) {
1840 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1841 RemapSwizzle[i] = j;
1842 break;
1843 }
1844 }
1845 }
1846
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001847 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1848 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001849}
1850
Benjamin Kramer193960c2013-06-11 13:32:25 +00001851static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1852 DenseMap<unsigned, unsigned> &RemapSwizzle) {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001853 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1854 assert(RemapSwizzle.empty());
1855 SDValue NewBldVec[4] = {
1856 VectorEntry.getOperand(0),
1857 VectorEntry.getOperand(1),
1858 VectorEntry.getOperand(2),
1859 VectorEntry.getOperand(3)
1860 };
1861 bool isUnmovable[4] = { false, false, false, false };
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001862 for (unsigned i = 0; i < 4; i++) {
Vincent Lejeuneb8aac8d2013-07-09 15:03:25 +00001863 RemapSwizzle[i] = i;
Vincent Lejeunecc0ea742013-12-10 14:43:31 +00001864 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1865 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1866 ->getZExtValue();
1867 if (i == Idx)
1868 isUnmovable[Idx] = true;
1869 }
1870 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001871
1872 for (unsigned i = 0; i < 4; i++) {
1873 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1874 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1875 ->getZExtValue();
Vincent Lejeune301beb82013-10-13 17:56:04 +00001876 if (isUnmovable[Idx])
1877 continue;
1878 // Swap i and Idx
1879 std::swap(NewBldVec[Idx], NewBldVec[i]);
1880 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1881 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001882 }
1883 }
1884
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001885 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1886 NewBldVec);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001887}
1888
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001889SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1890 SelectionDAG &DAG,
1891 const SDLoc &DL) const {
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001892 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1893 // Old -> New swizzle values
1894 DenseMap<unsigned, unsigned> SwizzleRemap;
1895
1896 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1897 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001898 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001899 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001900 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001901 }
1902
1903 SwizzleRemap.clear();
1904 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1905 for (unsigned i = 0; i < 4; i++) {
Benjamin Kramer619c4e52015-04-10 11:24:51 +00001906 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001907 if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001908 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00001909 }
1910
1911 return BuildVector;
1912}
1913
1914
Tom Stellard75aadc22012-12-11 21:25:42 +00001915//===----------------------------------------------------------------------===//
1916// Custom DAG Optimizations
1917//===----------------------------------------------------------------------===//
1918
1919SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1920 DAGCombinerInfo &DCI) const {
1921 SelectionDAG &DAG = DCI.DAG;
1922
1923 switch (N->getOpcode()) {
Tom Stellard50122a52014-04-07 19:45:41 +00001924 default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00001925 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1926 case ISD::FP_ROUND: {
1927 SDValue Arg = N->getOperand(0);
1928 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001929 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Tom Stellard75aadc22012-12-11 21:25:42 +00001930 Arg.getOperand(0));
1931 }
1932 break;
1933 }
Tom Stellarde06163a2013-02-07 14:02:35 +00001934
1935 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1936 // (i32 select_cc f32, f32, -1, 0 cc)
1937 //
1938 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1939 // this to one of the SET*_DX10 instructions.
1940 case ISD::FP_TO_SINT: {
1941 SDValue FNeg = N->getOperand(0);
1942 if (FNeg.getOpcode() != ISD::FNEG) {
1943 return SDValue();
1944 }
1945 SDValue SelectCC = FNeg.getOperand(0);
1946 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1947 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1948 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1949 !isHWTrueValue(SelectCC.getOperand(2)) ||
1950 !isHWFalseValue(SelectCC.getOperand(3))) {
1951 return SDValue();
1952 }
1953
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001954 SDLoc dl(N);
1955 return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
Tom Stellarde06163a2013-02-07 14:02:35 +00001956 SelectCC.getOperand(0), // LHS
1957 SelectCC.getOperand(1), // RHS
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001958 DAG.getConstant(-1, dl, MVT::i32), // True
1959 DAG.getConstant(0, dl, MVT::i32), // False
Tom Stellarde06163a2013-02-07 14:02:35 +00001960 SelectCC.getOperand(4)); // CC
1961
1962 break;
1963 }
Quentin Colombete2e05482013-07-30 00:27:16 +00001964
NAKAMURA Takumi8a046432013-10-28 04:07:38 +00001965 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1966 // => build_vector elt0, ... , NewEltIdx, ... , eltN
Quentin Colombete2e05482013-07-30 00:27:16 +00001967 case ISD::INSERT_VECTOR_ELT: {
1968 SDValue InVec = N->getOperand(0);
1969 SDValue InVal = N->getOperand(1);
1970 SDValue EltNo = N->getOperand(2);
1971 SDLoc dl(N);
1972
1973 // If the inserted element is an UNDEF, just use the input vector.
Sanjay Patel57195842016-03-14 17:28:46 +00001974 if (InVal.isUndef())
Quentin Colombete2e05482013-07-30 00:27:16 +00001975 return InVec;
1976
1977 EVT VT = InVec.getValueType();
1978
1979 // If we can't generate a legal BUILD_VECTOR, exit
1980 if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
1981 return SDValue();
1982
1983 // Check that we know which element is being inserted
1984 if (!isa<ConstantSDNode>(EltNo))
1985 return SDValue();
1986 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1987
1988 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1989 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1990 // vector elements.
1991 SmallVector<SDValue, 8> Ops;
1992 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1993 Ops.append(InVec.getNode()->op_begin(),
1994 InVec.getNode()->op_end());
Sanjay Patel57195842016-03-14 17:28:46 +00001995 } else if (InVec.isUndef()) {
Quentin Colombete2e05482013-07-30 00:27:16 +00001996 unsigned NElts = VT.getVectorNumElements();
1997 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1998 } else {
1999 return SDValue();
2000 }
2001
2002 // Insert the element
2003 if (Elt < Ops.size()) {
2004 // All the operands of BUILD_VECTOR must have the same type;
2005 // we enforce that here.
2006 EVT OpVT = Ops[0].getValueType();
2007 if (InVal.getValueType() != OpVT)
2008 InVal = OpVT.bitsGT(InVal.getValueType()) ?
2009 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
2010 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
2011 Ops[Elt] = InVal;
2012 }
2013
2014 // Return the new vector
Ahmed Bougacha128f8732016-04-26 21:15:30 +00002015 return DAG.getBuildVector(VT, dl, Ops);
Quentin Colombete2e05482013-07-30 00:27:16 +00002016 }
2017
Tom Stellard365366f2013-01-23 02:09:06 +00002018 // Extract_vec (Build_vector) generated by custom lowering
2019 // also needs to be customly combined
2020 case ISD::EXTRACT_VECTOR_ELT: {
2021 SDValue Arg = N->getOperand(0);
2022 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
2023 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2024 unsigned Element = Const->getZExtValue();
2025 return Arg->getOperand(Element);
2026 }
2027 }
Tom Stellarddd04c832013-01-31 22:11:53 +00002028 if (Arg.getOpcode() == ISD::BITCAST &&
2029 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2030 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2031 unsigned Element = Const->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002032 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Tom Stellarddd04c832013-01-31 22:11:53 +00002033 Arg->getOperand(0).getOperand(Element));
2034 }
2035 }
Mehdi Aminie029eae2015-07-16 06:23:12 +00002036 break;
Tom Stellard365366f2013-01-23 02:09:06 +00002037 }
Tom Stellarde06163a2013-02-07 14:02:35 +00002038
2039 case ISD::SELECT_CC: {
Tom Stellardafa8b532014-05-09 16:42:16 +00002040 // Try common optimizations
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00002041 if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
Tom Stellardafa8b532014-05-09 16:42:16 +00002042 return Ret;
2043
Tom Stellarde06163a2013-02-07 14:02:35 +00002044 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
2045 // selectcc x, y, a, b, inv(cc)
Tom Stellard5e524892013-03-08 15:37:11 +00002046 //
2047 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
2048 // selectcc x, y, a, b, cc
Tom Stellarde06163a2013-02-07 14:02:35 +00002049 SDValue LHS = N->getOperand(0);
2050 if (LHS.getOpcode() != ISD::SELECT_CC) {
2051 return SDValue();
2052 }
2053
2054 SDValue RHS = N->getOperand(1);
2055 SDValue True = N->getOperand(2);
2056 SDValue False = N->getOperand(3);
Tom Stellard5e524892013-03-08 15:37:11 +00002057 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
Tom Stellarde06163a2013-02-07 14:02:35 +00002058
2059 if (LHS.getOperand(2).getNode() != True.getNode() ||
2060 LHS.getOperand(3).getNode() != False.getNode() ||
Tom Stellard5e524892013-03-08 15:37:11 +00002061 RHS.getNode() != False.getNode()) {
Tom Stellarde06163a2013-02-07 14:02:35 +00002062 return SDValue();
2063 }
2064
Tom Stellard5e524892013-03-08 15:37:11 +00002065 switch (NCC) {
2066 default: return SDValue();
2067 case ISD::SETNE: return LHS;
2068 case ISD::SETEQ: {
2069 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2070 LHSCC = ISD::getSetCCInverse(LHSCC,
2071 LHS.getOperand(0).getValueType().isInteger());
Tom Stellardcd428182013-09-28 02:50:38 +00002072 if (DCI.isBeforeLegalizeOps() ||
2073 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
2074 return DAG.getSelectCC(SDLoc(N),
2075 LHS.getOperand(0),
2076 LHS.getOperand(1),
2077 LHS.getOperand(2),
2078 LHS.getOperand(3),
2079 LHSCC);
2080 break;
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002081 }
Tom Stellard5e524892013-03-08 15:37:11 +00002082 }
Tom Stellardcd428182013-09-28 02:50:38 +00002083 return SDValue();
Tom Stellard5e524892013-03-08 15:37:11 +00002084 }
Tom Stellardfbab8272013-08-16 01:12:11 +00002085
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002086 case AMDGPUISD::EXPORT: {
2087 SDValue Arg = N->getOperand(1);
2088 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2089 break;
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002090
Vincent Lejeuned80bc152013-02-14 16:55:06 +00002091 SDValue NewArgs[8] = {
2092 N->getOperand(0), // Chain
2093 SDValue(),
2094 N->getOperand(2), // ArrayBase
2095 N->getOperand(3), // Type
2096 N->getOperand(4), // SWZ_X
2097 N->getOperand(5), // SWZ_Y
2098 N->getOperand(6), // SWZ_Z
2099 N->getOperand(7) // SWZ_W
2100 };
Andrew Trickef9de2a2013-05-25 02:42:55 +00002101 SDLoc DL(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002102 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
Craig Topper48d114b2014-04-26 18:35:24 +00002103 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
Tom Stellarde06163a2013-02-07 14:02:35 +00002104 }
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002105 case AMDGPUISD::TEXTURE_FETCH: {
2106 SDValue Arg = N->getOperand(1);
2107 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2108 break;
2109
2110 SDValue NewArgs[19] = {
2111 N->getOperand(0),
2112 N->getOperand(1),
2113 N->getOperand(2),
2114 N->getOperand(3),
2115 N->getOperand(4),
2116 N->getOperand(5),
2117 N->getOperand(6),
2118 N->getOperand(7),
2119 N->getOperand(8),
2120 N->getOperand(9),
2121 N->getOperand(10),
2122 N->getOperand(11),
2123 N->getOperand(12),
2124 N->getOperand(13),
2125 N->getOperand(14),
2126 N->getOperand(15),
2127 N->getOperand(16),
2128 N->getOperand(17),
2129 N->getOperand(18),
2130 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002131 SDLoc DL(N);
2132 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2133 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
Vincent Lejeune276ceb82013-06-04 15:04:53 +00002134 }
Tom Stellard75aadc22012-12-11 21:25:42 +00002135 }
Matt Arsenault5565f65e2014-05-22 18:09:07 +00002136
2137 return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
Tom Stellard75aadc22012-12-11 21:25:42 +00002138}
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002139
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002140bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2141 SDValue &Src, SDValue &Neg, SDValue &Abs,
2142 SDValue &Sel, SDValue &Imm,
2143 SelectionDAG &DAG) const {
2144 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002145 if (!Src.isMachineOpcode())
2146 return false;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002147
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002148 switch (Src.getMachineOpcode()) {
2149 case AMDGPU::FNEG_R600:
2150 if (!Neg.getNode())
2151 return false;
2152 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002153 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002154 return true;
2155 case AMDGPU::FABS_R600:
2156 if (!Abs.getNode())
2157 return false;
2158 Src = Src.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002159 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002160 return true;
2161 case AMDGPU::CONST_COPY: {
2162 unsigned Opcode = ParentNode->getMachineOpcode();
2163 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2164
2165 if (!Sel.getNode())
2166 return false;
2167
2168 SDValue CstOffset = Src.getOperand(0);
2169 if (ParentNode->getValueType(0).isVector())
2170 return false;
2171
2172 // Gather constants values
2173 int SrcIndices[] = {
2174 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2175 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2176 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2177 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2178 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2179 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2180 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2181 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2182 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2183 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2184 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2185 };
2186 std::vector<unsigned> Consts;
Matt Arsenault4d64f962014-05-12 19:23:21 +00002187 for (int OtherSrcIdx : SrcIndices) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002188 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2189 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2190 continue;
2191 if (HasDst) {
2192 OtherSrcIdx--;
2193 OtherSelIdx--;
2194 }
2195 if (RegisterSDNode *Reg =
2196 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2197 if (Reg->getReg() == AMDGPU::ALU_CONST) {
Matt Arsenaultb3ee3882014-05-12 19:26:38 +00002198 ConstantSDNode *Cst
2199 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002200 Consts.push_back(Cst->getZExtValue());
2201 }
2202 }
2203 }
2204
Matt Arsenault37c12d72014-05-12 20:42:57 +00002205 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002206 Consts.push_back(Cst->getZExtValue());
2207 if (!TII->fitsConstReadLimitations(Consts)) {
2208 return false;
2209 }
2210
2211 Sel = CstOffset;
2212 Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2213 return true;
2214 }
Jan Vesely16800392016-05-13 20:39:31 +00002215 case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2216 // Check if the Imm slot is used. Taken from below.
2217 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2218 return false;
2219 Imm = Src.getOperand(0);
2220 Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2221 return true;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002222 case AMDGPU::MOV_IMM_I32:
2223 case AMDGPU::MOV_IMM_F32: {
2224 unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2225 uint64_t ImmValue = 0;
2226
2227
2228 if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2229 ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2230 float FloatValue = FPC->getValueAPF().convertToFloat();
2231 if (FloatValue == 0.0) {
2232 ImmReg = AMDGPU::ZERO;
2233 } else if (FloatValue == 0.5) {
2234 ImmReg = AMDGPU::HALF;
2235 } else if (FloatValue == 1.0) {
2236 ImmReg = AMDGPU::ONE;
2237 } else {
2238 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2239 }
2240 } else {
2241 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
2242 uint64_t Value = C->getZExtValue();
2243 if (Value == 0) {
2244 ImmReg = AMDGPU::ZERO;
2245 } else if (Value == 1) {
2246 ImmReg = AMDGPU::ONE_INT;
2247 } else {
2248 ImmValue = Value;
2249 }
2250 }
2251
2252 // Check that we aren't already using an immediate.
2253 // XXX: It's possible for an instruction to have more than one
2254 // immediate operand, but this is not supported yet.
2255 if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2256 if (!Imm.getNode())
2257 return false;
2258 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
2259 assert(C);
2260 if (C->getZExtValue())
2261 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002262 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002263 }
2264 Src = DAG.getRegister(ImmReg, MVT::i32);
2265 return true;
2266 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002267 default:
2268 return false;
2269 }
2270}
2271
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002272/// \brief Fold the instructions after selecting them
2273SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2274 SelectionDAG &DAG) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002275 const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002276 if (!Node->isMachineOpcode())
2277 return Node;
Matt Arsenault43e92fe2016-06-24 06:30:11 +00002278
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002279 unsigned Opcode = Node->getMachineOpcode();
2280 SDValue FakeOp;
2281
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002282 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002283
2284 if (Opcode == AMDGPU::DOT_4) {
2285 int OperandIdx[] = {
2286 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2287 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2288 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2289 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2290 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2291 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2292 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2293 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +00002294 };
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002295 int NegIdx[] = {
2296 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2297 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2298 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2299 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2300 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2301 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2302 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2303 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2304 };
2305 int AbsIdx[] = {
2306 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2307 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2308 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2309 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2310 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2311 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2312 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2313 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2314 };
2315 for (unsigned i = 0; i < 8; i++) {
2316 if (OperandIdx[i] < 0)
2317 return Node;
2318 SDValue &Src = Ops[OperandIdx[i] - 1];
2319 SDValue &Neg = Ops[NegIdx[i] - 1];
2320 SDValue &Abs = Ops[AbsIdx[i] - 1];
2321 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2322 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2323 if (HasDst)
2324 SelIdx--;
2325 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002326 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2327 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2328 }
2329 } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2330 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2331 SDValue &Src = Ops[i];
2332 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002333 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2334 }
Vincent Lejeune0167a312013-09-12 23:45:00 +00002335 } else if (Opcode == AMDGPU::CLAMP_R600) {
2336 SDValue Src = Node->getOperand(0);
2337 if (!Src.isMachineOpcode() ||
2338 !TII->hasInstrModifiers(Src.getMachineOpcode()))
2339 return Node;
2340 int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2341 AMDGPU::OpName::clamp);
2342 if (ClampIdx < 0)
2343 return Node;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002344 SDLoc DL(Node);
Benjamin Kramer6cd780f2015-02-17 15:29:18 +00002345 std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002346 Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2347 return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2348 Node->getVTList(), Ops);
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002349 } else {
2350 if (!TII->hasInstrModifiers(Opcode))
2351 return Node;
2352 int OperandIdx[] = {
2353 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2354 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2355 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2356 };
2357 int NegIdx[] = {
2358 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2359 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2360 TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2361 };
2362 int AbsIdx[] = {
2363 TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2364 TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2365 -1
2366 };
2367 for (unsigned i = 0; i < 3; i++) {
2368 if (OperandIdx[i] < 0)
2369 return Node;
2370 SDValue &Src = Ops[OperandIdx[i] - 1];
2371 SDValue &Neg = Ops[NegIdx[i] - 1];
2372 SDValue FakeAbs;
2373 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2374 bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2375 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002376 int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2377 if (HasDst) {
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002378 SelIdx--;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002379 ImmIdx--;
2380 }
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002381 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
Vincent Lejeune9a248e52013-09-12 23:44:53 +00002382 SDValue &Imm = Ops[ImmIdx];
2383 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
Vincent Lejeuneab3baf82013-09-12 23:44:44 +00002384 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2385 }
2386 }
2387
2388 return Node;
2389}