blob: 30d3903be9cb17d2e5f4835b09d3820897c4cd68 [file] [log] [blame]
Tom Stellardd8ea85a2016-12-21 19:06:24 +00001//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
Tom Stellard000c5af2016-04-14 19:09:28 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellard000c5af2016-04-14 19:09:28 +00006//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUCallLowering.h"
Tom Stellardca166212017-01-30 21:56:46 +000016#include "AMDGPU.h"
Tom Stellard000c5af2016-04-14 19:09:28 +000017#include "AMDGPUISelLowering.h"
Tom Stellardca166212017-01-30 21:56:46 +000018#include "AMDGPUSubtarget.h"
19#include "SIISelLowering.h"
Tom Stellardca166212017-01-30 21:56:46 +000020#include "SIMachineFunctionInfo.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000021#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000022#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard206b9922019-04-09 02:26:03 +000023#include "llvm/CodeGen/Analysis.h"
Tom Stellardca166212017-01-30 21:56:46 +000024#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellard000c5af2016-04-14 19:09:28 +000025#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
Tom Stellard206b9922019-04-09 02:26:03 +000027#include "llvm/Support/LowLevelTypeImpl.h"
Tom Stellard000c5af2016-04-14 19:09:28 +000028
29using namespace llvm;
30
Tom Stellard206b9922019-04-09 02:26:03 +000031namespace {
32
33struct OutgoingArgHandler : public CallLowering::ValueHandler {
34 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35 MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36 : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37
38 MachineInstrBuilder MIB;
39
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +000040 Register getStackAddress(uint64_t Size, int64_t Offset,
Tom Stellard206b9922019-04-09 02:26:03 +000041 MachinePointerInfo &MPO) override {
42 llvm_unreachable("not implemented");
43 }
44
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +000045 void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
Tom Stellard206b9922019-04-09 02:26:03 +000046 MachinePointerInfo &MPO, CCValAssign &VA) override {
47 llvm_unreachable("not implemented");
48 }
49
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +000050 void assignValueToReg(Register ValVReg, Register PhysReg,
Tom Stellard206b9922019-04-09 02:26:03 +000051 CCValAssign &VA) override {
52 MIB.addUse(PhysReg);
53 MIRBuilder.buildCopy(PhysReg, ValVReg);
54 }
55
56 bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57 CCValAssign::LocInfo LocInfo,
58 const CallLowering::ArgInfo &Info,
59 CCState &State) override {
60 return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61 }
62};
63
Matt Arsenaultfecf43e2019-07-19 14:15:18 +000064struct IncomingArgHandler : public CallLowering::ValueHandler {
65 uint64_t StackUsed = 0;
66
67 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
68 CCAssignFn *AssignFn)
69 : ValueHandler(MIRBuilder, MRI, AssignFn) {}
70
71 Register getStackAddress(uint64_t Size, int64_t Offset,
72 MachinePointerInfo &MPO) override {
73 auto &MFI = MIRBuilder.getMF().getFrameInfo();
74 int FI = MFI.CreateFixedObject(Size, Offset, true);
75 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
76 Register AddrReg = MRI.createGenericVirtualRegister(
77 LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
78 MIRBuilder.buildFrameIndex(AddrReg, FI);
79 StackUsed = std::max(StackUsed, Size + Offset);
80 return AddrReg;
81 }
82
83 void assignValueToReg(Register ValVReg, Register PhysReg,
84 CCValAssign &VA) override {
85 markPhysRegUsed(PhysReg);
86
87 if (VA.getLocVT().getSizeInBits() < 32) {
88 // 16-bit types are reported as legal for 32-bit registers. We need to do
89 // a 32-bit copy, and truncate to avoid the verifier complaining about it.
90 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
91 MIRBuilder.buildTrunc(ValVReg, Copy);
92 return;
93 }
94
95 switch (VA.getLocInfo()) {
96 case CCValAssign::LocInfo::SExt:
97 case CCValAssign::LocInfo::ZExt:
98 case CCValAssign::LocInfo::AExt: {
99 auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
100 MIRBuilder.buildTrunc(ValVReg, Copy);
101 break;
102 }
103 default:
104 MIRBuilder.buildCopy(ValVReg, PhysReg);
105 break;
106 }
107 }
108
109 void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
110 MachinePointerInfo &MPO, CCValAssign &VA) override {
111 // FIXME: Get alignment
112 auto MMO = MIRBuilder.getMF().getMachineMemOperand(
113 MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
114 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
115 }
116
117 /// How the physical register gets marked varies between formal
118 /// parameters (it's a basic-block live-in), and a call instruction
119 /// (it's an implicit-def of the BL).
120 virtual void markPhysRegUsed(unsigned PhysReg) = 0;
121
122 // FIXME: What is the point of this being a callback?
123 bool isArgumentHandler() const override { return true; }
124};
125
126struct FormalArgHandler : public IncomingArgHandler {
127 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
128 CCAssignFn *AssignFn)
129 : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
130
131 void markPhysRegUsed(unsigned PhysReg) override {
132 MIRBuilder.getMBB().addLiveIn(PhysReg);
133 }
134};
135
Tom Stellard206b9922019-04-09 02:26:03 +0000136}
137
Tom Stellard000c5af2016-04-14 19:09:28 +0000138AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
Matt Arsenault0da63502018-08-31 05:49:54 +0000139 : CallLowering(&TLI) {
Tom Stellard000c5af2016-04-14 19:09:28 +0000140}
141
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000142void AMDGPUCallLowering::splitToValueTypes(
143 const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
144 const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
145 SplitArgTy PerformArgSplit) const {
146 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
147 LLVMContext &Ctx = OrigArg.Ty->getContext();
148
149 if (OrigArg.Ty->isVoidTy())
150 return;
151
152 SmallVector<EVT, 4> SplitVTs;
153 ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
154
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000155 assert(OrigArg.Regs.size() == SplitVTs.size());
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000156
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000157 int SplitIdx = 0;
158 for (EVT VT : SplitVTs) {
159 unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
160 Type *Ty = VT.getTypeForEVT(Ctx);
161
162
163
164 if (NumParts == 1) {
165 // No splitting to do, but we want to replace the original type (e.g. [1 x
166 // double] -> double).
167 SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
168 OrigArg.Flags, OrigArg.IsFixed);
169
170 ++SplitIdx;
171 continue;
172 }
173
174 LLT LLTy = getLLTForType(*Ty, DL);
175
176 SmallVector<Register, 8> SplitRegs;
177
178 EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
179 Type *PartTy = PartVT.getTypeForEVT(Ctx);
180 LLT PartLLT = getLLTForType(*PartTy, DL);
181
182 // FIXME: Should we be reporting all of the part registers for a single
183 // argument, and let handleAssignments take care of the repacking?
184 for (unsigned i = 0; i < NumParts; ++i) {
185 Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
186 SplitRegs.push_back(PartReg);
187 SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
188 }
189
190 PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
191
192 ++SplitIdx;
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000193 }
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000194}
195
Tom Stellard000c5af2016-04-14 19:09:28 +0000196bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
Alexander Ivchenko49168f62018-08-02 08:33:31 +0000197 const Value *Val,
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000198 ArrayRef<Register> VRegs) const {
Tom Stellard206b9922019-04-09 02:26:03 +0000199
200 MachineFunction &MF = MIRBuilder.getMF();
201 MachineRegisterInfo &MRI = MF.getRegInfo();
202 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
203 MFI->setIfReturnsVoid(!Val);
204
205 if (!Val) {
206 MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
207 return true;
208 }
209
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000210 Register VReg = VRegs[0];
Tom Stellard206b9922019-04-09 02:26:03 +0000211
212 const Function &F = MF.getFunction();
213 auto &DL = F.getParent()->getDataLayout();
214 if (!AMDGPU::isShader(F.getCallingConv()))
Tom Stellard257882f2018-04-24 21:29:36 +0000215 return false;
216
Tom Stellard206b9922019-04-09 02:26:03 +0000217
218 const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
219 SmallVector<EVT, 4> SplitVTs;
220 SmallVector<uint64_t, 4> Offsets;
221 ArgInfo OrigArg{VReg, Val->getType()};
222 setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
223 ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
224
225 SmallVector<ArgInfo, 8> SplitArgs;
226 CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
227 for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
228 Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
229 SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
230 }
231 auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
232 OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
233 if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
234 return false;
235 MIRBuilder.insertInstr(RetInstr);
236
Tom Stellard000c5af2016-04-14 19:09:28 +0000237 return true;
238}
239
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000240Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Tom Stellardca166212017-01-30 21:56:46 +0000241 Type *ParamTy,
Matt Arsenault29f30372018-07-05 17:01:20 +0000242 uint64_t Offset) const {
Tom Stellardca166212017-01-30 21:56:46 +0000243
244 MachineFunction &MF = MIRBuilder.getMF();
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000245 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Tom Stellardca166212017-01-30 21:56:46 +0000246 MachineRegisterInfo &MRI = MF.getRegInfo();
Matthias Braunf1caa282017-12-15 22:22:58 +0000247 const Function &F = MF.getFunction();
Tom Stellardca166212017-01-30 21:56:46 +0000248 const DataLayout &DL = F.getParent()->getDataLayout();
Matt Arsenault0da63502018-08-31 05:49:54 +0000249 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
Daniel Sanders52b4ce72017-03-07 23:20:35 +0000250 LLT PtrType = getLLTForType(*PtrTy, DL);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000251 Register DstReg = MRI.createGenericVirtualRegister(PtrType);
252 Register KernArgSegmentPtr =
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000253 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000254 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
Tom Stellardca166212017-01-30 21:56:46 +0000255
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000256 Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Tom Stellardca166212017-01-30 21:56:46 +0000257 MIRBuilder.buildConstant(OffsetReg, Offset);
258
259 MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
260
261 return DstReg;
262}
263
264void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
Matt Arsenault29f30372018-07-05 17:01:20 +0000265 Type *ParamTy, uint64_t Offset,
266 unsigned Align,
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000267 Register DstReg) const {
Tom Stellardca166212017-01-30 21:56:46 +0000268 MachineFunction &MF = MIRBuilder.getMF();
Matthias Braunf1caa282017-12-15 22:22:58 +0000269 const Function &F = MF.getFunction();
Tom Stellardca166212017-01-30 21:56:46 +0000270 const DataLayout &DL = F.getParent()->getDataLayout();
Matt Arsenault0da63502018-08-31 05:49:54 +0000271 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
Tom Stellardca166212017-01-30 21:56:46 +0000272 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
273 unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000274 Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
Tom Stellardca166212017-01-30 21:56:46 +0000275
276 MachineMemOperand *MMO =
277 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
Matt Arsenault7df225d2019-07-19 17:52:56 +0000278 MachineMemOperand::MODereferenceable |
Tom Stellardca166212017-01-30 21:56:46 +0000279 MachineMemOperand::MOInvariant,
280 TypeSize, Align);
281
282 MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
283}
284
Matt Arsenaultbae36362019-07-01 18:50:50 +0000285// Allocate special inputs passed in user SGPRs.
286static void allocateHSAUserSGPRs(CCState &CCInfo,
287 MachineIRBuilder &MIRBuilder,
288 MachineFunction &MF,
289 const SIRegisterInfo &TRI,
290 SIMachineFunctionInfo &Info) {
291 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
292 if (Info.hasPrivateSegmentBuffer()) {
293 unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
294 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
295 CCInfo.AllocateReg(PrivateSegmentBufferReg);
296 }
297
298 if (Info.hasDispatchPtr()) {
299 unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
300 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
301 CCInfo.AllocateReg(DispatchPtrReg);
302 }
303
304 if (Info.hasQueuePtr()) {
305 unsigned QueuePtrReg = Info.addQueuePtr(TRI);
306 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
307 CCInfo.AllocateReg(QueuePtrReg);
308 }
309
310 if (Info.hasKernargSegmentPtr()) {
311 MachineRegisterInfo &MRI = MF.getRegInfo();
312 Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
313 const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
314 Register VReg = MRI.createGenericVirtualRegister(P4);
315 MRI.addLiveIn(InputPtrReg, VReg);
316 MIRBuilder.getMBB().addLiveIn(InputPtrReg);
317 MIRBuilder.buildCopy(VReg, InputPtrReg);
318 CCInfo.AllocateReg(InputPtrReg);
319 }
320
321 if (Info.hasDispatchID()) {
322 unsigned DispatchIDReg = Info.addDispatchID(TRI);
323 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
324 CCInfo.AllocateReg(DispatchIDReg);
325 }
326
327 if (Info.hasFlatScratchInit()) {
328 unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
329 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
330 CCInfo.AllocateReg(FlatScratchInitReg);
331 }
332
333 // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
334 // these from the dispatch pointer.
335}
336
Matt Arsenaultb725d272019-07-11 14:18:25 +0000337bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
338 MachineIRBuilder &MIRBuilder, const Function &F,
339 ArrayRef<ArrayRef<Register>> VRegs) const {
340 MachineFunction &MF = MIRBuilder.getMF();
341 const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
342 MachineRegisterInfo &MRI = MF.getRegInfo();
343 SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000344 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
345 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
346
Matt Arsenaultb725d272019-07-11 14:18:25 +0000347 const DataLayout &DL = F.getParent()->getDataLayout();
348
349 SmallVector<CCValAssign, 16> ArgLocs;
350 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
351
352 allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
353
354 unsigned i = 0;
355 const unsigned KernArgBaseAlign = 16;
356 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
357 uint64_t ExplicitArgOffset = 0;
358
359 // TODO: Align down to dword alignment and extract bits for extending loads.
360 for (auto &Arg : F.args()) {
361 Type *ArgTy = Arg.getType();
362 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
363 if (AllocSize == 0)
364 continue;
365
366 unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
367
368 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
369 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
370
371 ArrayRef<Register> OrigArgRegs = VRegs[i];
372 Register ArgReg =
373 OrigArgRegs.size() == 1
374 ? OrigArgRegs[0]
375 : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
376 unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
377 ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
378 lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
379 if (OrigArgRegs.size() > 1)
380 unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
381 ++i;
382 }
383
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000384 TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
385 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
Matt Arsenaultb725d272019-07-11 14:18:25 +0000386 return true;
387}
388
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000389static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,
390 ArrayRef<Register> OrigRegs,
391 ArrayRef<Register> Regs,
392 LLT LLTy,
393 LLT PartLLT) {
394 if (!LLTy.isVector() && !PartLLT.isVector()) {
395 MIRBuilder.buildMerge(OrigRegs[0], Regs);
396 return;
397 }
398
399 if (LLTy.isVector() && PartLLT.isVector()) {
400 assert(LLTy.getElementType() == PartLLT.getElementType());
401
402 int DstElts = LLTy.getNumElements();
403 int PartElts = PartLLT.getNumElements();
404 if (DstElts % PartElts == 0)
405 MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
406 else {
407 // Deal with v3s16 split into v2s16
408 assert(PartElts == 2 && DstElts % 2 != 0);
409 int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
410
411 LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
412 auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
413 MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
414 }
415
416 return;
417 }
418
419 assert(LLTy.isVector() && !PartLLT.isVector());
420
421 LLT DstEltTy = LLTy.getElementType();
422 if (DstEltTy == PartLLT) {
423 // Vector was trivially scalarized.
424 MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
425 } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
426 // Deal with vector with 64-bit elements decomposed to 32-bit
427 // registers. Need to create intermediate 64-bit elements.
428 SmallVector<Register, 8> EltMerges;
429 int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
430
431 assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
432
433 for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
434 auto Merge = MIRBuilder.buildMerge(DstEltTy,
435 Regs.take_front(PartsPerElt));
436 EltMerges.push_back(Merge.getReg(0));
437 Regs = Regs.drop_front(PartsPerElt);
438 }
439
440 MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
441 } else {
442 // Vector was split, and elements promoted to a wider type.
443 LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
444 auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
445 MIRBuilder.buildTrunc(OrigRegs[0], BV);
446 }
447}
448
Diana Picusc3dbe232019-06-27 08:54:17 +0000449bool AMDGPUCallLowering::lowerFormalArguments(
450 MachineIRBuilder &MIRBuilder, const Function &F,
451 ArrayRef<ArrayRef<Register>> VRegs) const {
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000452 CallingConv::ID CC = F.getCallingConv();
453
Matt Arsenaultb725d272019-07-11 14:18:25 +0000454 // The infrastructure for normal calling convention lowering is essentially
455 // useless for kernels. We want to avoid any kind of legalization or argument
456 // splitting.
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000457 if (CC == CallingConv::AMDGPU_KERNEL)
Matt Arsenaultb725d272019-07-11 14:18:25 +0000458 return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
459
Tom Stellard37444282018-05-07 22:17:54 +0000460 // AMDGPU_GS and AMDGP_HS are not supported yet.
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000461 if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS)
Tom Stellard6c814182018-04-30 15:15:23 +0000462 return false;
Tom Stellardca166212017-01-30 21:56:46 +0000463
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000464 const bool IsShader = AMDGPU::isShader(CC);
465 const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
466
Tom Stellardca166212017-01-30 21:56:46 +0000467 MachineFunction &MF = MIRBuilder.getMF();
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000468 MachineBasicBlock &MBB = MIRBuilder.getMBB();
Tom Stellardca166212017-01-30 21:56:46 +0000469 MachineRegisterInfo &MRI = MF.getRegInfo();
470 SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000471 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
472 const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000473 const DataLayout &DL = F.getParent()->getDataLayout();
474
Matt Arsenaulte0a4da82019-05-30 19:33:18 +0000475
Tom Stellardca166212017-01-30 21:56:46 +0000476 SmallVector<CCValAssign, 16> ArgLocs;
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000477 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
Tom Stellardca166212017-01-30 21:56:46 +0000478
Matt Arsenaultbae36362019-07-01 18:50:50 +0000479 if (Info->hasImplicitBufferPtr()) {
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000480 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
Matt Arsenaultbae36362019-07-01 18:50:50 +0000481 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
482 CCInfo.AllocateReg(ImplicitBufferPtrReg);
483 }
484
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000485
486 SmallVector<ArgInfo, 32> SplitArgs;
487 unsigned Idx = 0;
Tom Stellardc7709e12018-04-24 20:51:28 +0000488 unsigned PSInputNum = 0;
Tom Stellard9d8337d2017-08-01 12:38:33 +0000489
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000490 for (auto &Arg : F.args()) {
491 if (DL.getTypeStoreSize(Arg.getType()) == 0)
492 continue;
Tom Stellardc7709e12018-04-24 20:51:28 +0000493
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000494 const bool InReg = Arg.hasAttribute(Attribute::InReg);
495
496 // SGPR arguments to functions not implemented.
497 if (!IsShader && InReg)
498 return false;
499
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000500 // TODO: Handle sret.
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000501 if (Arg.hasAttribute(Attribute::StructRet) ||
502 Arg.hasAttribute(Attribute::SwiftSelf) ||
503 Arg.hasAttribute(Attribute::SwiftError) ||
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000504 Arg.hasAttribute(Attribute::Nest))
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000505 return false;
506
507 if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
508 const bool ArgUsed = !Arg.use_empty();
509 bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
510
511 if (!SkipArg) {
512 Info->markPSInputAllocated(PSInputNum);
513 if (ArgUsed)
514 Info->markPSInputEnabled(PSInputNum);
Tom Stellardc7709e12018-04-24 20:51:28 +0000515 }
516
Tom Stellardc7709e12018-04-24 20:51:28 +0000517 ++PSInputNum;
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000518
519 if (SkipArg) {
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000520 for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
521 MIRBuilder.buildUndef(VRegs[Idx][I]);
522
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000523 ++Idx;
524 continue;
525 }
Tom Stellardc7709e12018-04-24 20:51:28 +0000526 }
527
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000528 ArgInfo OrigArg(VRegs[Idx], Arg.getType());
529 setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000530
531 splitToValueTypes(
532 OrigArg, SplitArgs, DL, MRI, CC,
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000533 // FIXME: We should probably be passing multiple registers to
534 // handleAssignments to do this
Matt Arsenaultb60a2ae2019-07-19 14:29:30 +0000535 [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
536 packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
537 LLTy, PartLLT);
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000538 });
Tom Stellard9d8337d2017-08-01 12:38:33 +0000539
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000540 ++Idx;
541 }
Tom Stellardc7709e12018-04-24 20:51:28 +0000542
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000543 // At least one interpolation mode must be enabled or else the GPU will
544 // hang.
545 //
546 // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
547 // set PSInputAddr, the user wants to enable some bits after the compilation
548 // based on run-time states. Since we can't know what the final PSInputEna
549 // will look like, so we shouldn't do anything here and the user should take
550 // responsibility for the correct programming.
551 //
552 // Otherwise, the following restrictions apply:
553 // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
554 // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
555 // enabled too.
556 if (CC == CallingConv::AMDGPU_PS) {
557 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
558 ((Info->getPSInputAddr() & 0xF) == 0 &&
559 Info->isPSInputAllocated(11))) {
560 CCInfo.AllocateReg(AMDGPU::VGPR0);
561 CCInfo.AllocateReg(AMDGPU::VGPR1);
562 Info->markPSInputAllocated(0);
563 Info->markPSInputEnabled(0);
564 }
565
566 if (Subtarget.isAmdPalOS()) {
567 // For isAmdPalOS, the user does not enable some bits after compilation
568 // based on run-time states; the register values being generated here are
569 // the final ones set in hardware. Therefore we need to apply the
570 // workaround to PSInputAddr and PSInputEnable together. (The case where
571 // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
572 // set up an input arg for a particular interpolation mode, but nothing
573 // uses that input arg. Really we should have an earlier pass that removes
574 // such an arg.)
575 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
576 if ((PsInputBits & 0x7F) == 0 ||
577 ((PsInputBits & 0xF) == 0 &&
578 (PsInputBits >> 11 & 1)))
579 Info->markPSInputEnabled(
580 countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
Tom Stellardc7709e12018-04-24 20:51:28 +0000581 }
Tom Stellardca166212017-01-30 21:56:46 +0000582 }
583
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000584 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
585 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
Tom Stellard9d8337d2017-08-01 12:38:33 +0000586
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000587 if (!MBB.empty())
588 MIRBuilder.setInstr(*MBB.begin());
Matt Arsenaulte0a4da82019-05-30 19:33:18 +0000589
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000590 FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
591 if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
592 return false;
593
594 if (!IsEntryFunc) {
595 // Special inputs come after user arguments.
596 TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
Tom Stellard9d8337d2017-08-01 12:38:33 +0000597 }
598
Matt Arsenaultfecf43e2019-07-19 14:15:18 +0000599 // Start adding system SGPRs.
600 if (IsEntryFunc) {
601 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
602 } else {
603 CCInfo.AllocateReg(Info->getScratchRSrcReg());
604 CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
605 CCInfo.AllocateReg(Info->getFrameOffsetReg());
606 TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
607 }
608
609 // Move back to the end of the basic block.
610 MIRBuilder.setMBB(MBB);
611
612 return true;
Tom Stellard000c5af2016-04-14 19:09:28 +0000613}