Tom Stellard | d8ea85a | 2016-12-21 19:06:24 +0000 | [diff] [blame] | 1 | //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements the lowering of LLVM calls to machine code calls for |
| 11 | /// GlobalISel. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AMDGPUCallLowering.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 16 | #include "AMDGPU.h" |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 17 | #include "AMDGPUISelLowering.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 18 | #include "AMDGPUSubtarget.h" |
| 19 | #include "SIISelLowering.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 20 | #include "SIMachineFunctionInfo.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 21 | #include "SIRegisterInfo.h" |
Tom Stellard | 44b30b4 | 2018-05-22 02:03:23 +0000 | [diff] [blame] | 22 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 23 | #include "llvm/CodeGen/Analysis.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 24 | #include "llvm/CodeGen/CallingConvLower.h" |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 25 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 27 | #include "llvm/Support/LowLevelTypeImpl.h" |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 28 | |
| 29 | using namespace llvm; |
| 30 | |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 31 | namespace { |
| 32 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 33 | struct OutgoingValueHandler : public CallLowering::ValueHandler { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 34 | OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 35 | MachineInstrBuilder MIB, CCAssignFn *AssignFn) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 36 | : ValueHandler(B, MRI, AssignFn), MIB(MIB) {} |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 37 | |
| 38 | MachineInstrBuilder MIB; |
| 39 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 40 | Register getStackAddress(uint64_t Size, int64_t Offset, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 41 | MachinePointerInfo &MPO) override { |
| 42 | llvm_unreachable("not implemented"); |
| 43 | } |
| 44 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 45 | void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 46 | MachinePointerInfo &MPO, CCValAssign &VA) override { |
| 47 | llvm_unreachable("not implemented"); |
| 48 | } |
| 49 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 50 | void assignValueToReg(Register ValVReg, Register PhysReg, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 51 | CCValAssign &VA) override { |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 52 | Register ExtReg; |
| 53 | if (VA.getLocVT().getSizeInBits() < 32) { |
| 54 | // 16-bit types are reported as legal for 32-bit registers. We need to |
| 55 | // extend and do a 32-bit copy to avoid the verifier complaining about it. |
| 56 | ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); |
| 57 | } else |
| 58 | ExtReg = extendRegister(ValVReg, VA); |
| 59 | |
| 60 | MIRBuilder.buildCopy(PhysReg, ExtReg); |
| 61 | MIB.addUse(PhysReg, RegState::Implicit); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 62 | } |
| 63 | |
| 64 | bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, |
| 65 | CCValAssign::LocInfo LocInfo, |
| 66 | const CallLowering::ArgInfo &Info, |
Amara Emerson | fbaf425 | 2019-09-03 21:42:28 +0000 | [diff] [blame] | 67 | ISD::ArgFlagsTy Flags, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 68 | CCState &State) override { |
Amara Emerson | fbaf425 | 2019-09-03 21:42:28 +0000 | [diff] [blame] | 69 | return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 70 | } |
| 71 | }; |
| 72 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 73 | struct IncomingArgHandler : public CallLowering::ValueHandler { |
| 74 | uint64_t StackUsed = 0; |
| 75 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 76 | IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 77 | CCAssignFn *AssignFn) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 78 | : ValueHandler(B, MRI, AssignFn) {} |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 79 | |
| 80 | Register getStackAddress(uint64_t Size, int64_t Offset, |
| 81 | MachinePointerInfo &MPO) override { |
| 82 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
| 83 | int FI = MFI.CreateFixedObject(Size, Offset, true); |
| 84 | MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); |
| 85 | Register AddrReg = MRI.createGenericVirtualRegister( |
| 86 | LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); |
| 87 | MIRBuilder.buildFrameIndex(AddrReg, FI); |
| 88 | StackUsed = std::max(StackUsed, Size + Offset); |
| 89 | return AddrReg; |
| 90 | } |
| 91 | |
| 92 | void assignValueToReg(Register ValVReg, Register PhysReg, |
| 93 | CCValAssign &VA) override { |
| 94 | markPhysRegUsed(PhysReg); |
| 95 | |
| 96 | if (VA.getLocVT().getSizeInBits() < 32) { |
| 97 | // 16-bit types are reported as legal for 32-bit registers. We need to do |
| 98 | // a 32-bit copy, and truncate to avoid the verifier complaining about it. |
| 99 | auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); |
| 100 | MIRBuilder.buildTrunc(ValVReg, Copy); |
| 101 | return; |
| 102 | } |
| 103 | |
| 104 | switch (VA.getLocInfo()) { |
| 105 | case CCValAssign::LocInfo::SExt: |
| 106 | case CCValAssign::LocInfo::ZExt: |
| 107 | case CCValAssign::LocInfo::AExt: { |
| 108 | auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); |
| 109 | MIRBuilder.buildTrunc(ValVReg, Copy); |
| 110 | break; |
| 111 | } |
| 112 | default: |
| 113 | MIRBuilder.buildCopy(ValVReg, PhysReg); |
| 114 | break; |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, |
| 119 | MachinePointerInfo &MPO, CCValAssign &VA) override { |
| 120 | // FIXME: Get alignment |
| 121 | auto MMO = MIRBuilder.getMF().getMachineMemOperand( |
| 122 | MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); |
| 123 | MIRBuilder.buildLoad(ValVReg, Addr, *MMO); |
| 124 | } |
| 125 | |
| 126 | /// How the physical register gets marked varies between formal |
| 127 | /// parameters (it's a basic-block live-in), and a call instruction |
| 128 | /// (it's an implicit-def of the BL). |
| 129 | virtual void markPhysRegUsed(unsigned PhysReg) = 0; |
| 130 | |
| 131 | // FIXME: What is the point of this being a callback? |
Amara Emerson | bc1172d | 2019-08-05 23:05:28 +0000 | [diff] [blame] | 132 | bool isIncomingArgumentHandler() const override { return true; } |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 133 | }; |
| 134 | |
| 135 | struct FormalArgHandler : public IncomingArgHandler { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 136 | FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 137 | CCAssignFn *AssignFn) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 138 | : IncomingArgHandler(B, MRI, AssignFn) {} |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 139 | |
| 140 | void markPhysRegUsed(unsigned PhysReg) override { |
| 141 | MIRBuilder.getMBB().addLiveIn(PhysReg); |
| 142 | } |
| 143 | }; |
| 144 | |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 145 | } |
| 146 | |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 147 | AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 148 | : CallLowering(&TLI) { |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 149 | } |
| 150 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 151 | void AMDGPUCallLowering::splitToValueTypes( |
| 152 | const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, |
| 153 | const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, |
| 154 | SplitArgTy PerformArgSplit) const { |
| 155 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 156 | LLVMContext &Ctx = OrigArg.Ty->getContext(); |
| 157 | |
| 158 | if (OrigArg.Ty->isVoidTy()) |
| 159 | return; |
| 160 | |
| 161 | SmallVector<EVT, 4> SplitVTs; |
| 162 | ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); |
| 163 | |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 164 | assert(OrigArg.Regs.size() == SplitVTs.size()); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 165 | |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 166 | int SplitIdx = 0; |
| 167 | for (EVT VT : SplitVTs) { |
| 168 | unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); |
| 169 | Type *Ty = VT.getTypeForEVT(Ctx); |
| 170 | |
| 171 | |
| 172 | |
| 173 | if (NumParts == 1) { |
| 174 | // No splitting to do, but we want to replace the original type (e.g. [1 x |
| 175 | // double] -> double). |
| 176 | SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, |
| 177 | OrigArg.Flags, OrigArg.IsFixed); |
| 178 | |
| 179 | ++SplitIdx; |
| 180 | continue; |
| 181 | } |
| 182 | |
| 183 | LLT LLTy = getLLTForType(*Ty, DL); |
| 184 | |
| 185 | SmallVector<Register, 8> SplitRegs; |
| 186 | |
| 187 | EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); |
| 188 | Type *PartTy = PartVT.getTypeForEVT(Ctx); |
| 189 | LLT PartLLT = getLLTForType(*PartTy, DL); |
| 190 | |
| 191 | // FIXME: Should we be reporting all of the part registers for a single |
| 192 | // argument, and let handleAssignments take care of the repacking? |
| 193 | for (unsigned i = 0; i < NumParts; ++i) { |
| 194 | Register PartReg = MRI.createGenericVirtualRegister(PartLLT); |
| 195 | SplitRegs.push_back(PartReg); |
| 196 | SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); |
| 197 | } |
| 198 | |
| 199 | PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); |
| 200 | |
| 201 | ++SplitIdx; |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 202 | } |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 203 | } |
| 204 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 205 | // Get the appropriate type to make \p OrigTy \p Factor times bigger. |
| 206 | static LLT getMultipleType(LLT OrigTy, int Factor) { |
| 207 | if (OrigTy.isVector()) { |
| 208 | return LLT::vector(OrigTy.getNumElements() * Factor, |
| 209 | OrigTy.getElementType()); |
| 210 | } |
| 211 | |
| 212 | return LLT::scalar(OrigTy.getSizeInBits() * Factor); |
| 213 | } |
| 214 | |
| 215 | // TODO: Move to generic code |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 216 | static void unpackRegsToOrigType(MachineIRBuilder &B, |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 217 | ArrayRef<Register> DstRegs, |
| 218 | Register SrcReg, |
| 219 | LLT SrcTy, |
| 220 | LLT PartTy) { |
| 221 | assert(DstRegs.size() > 1 && "Nothing to unpack"); |
| 222 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 223 | MachineFunction &MF = B.getMF(); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 224 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 225 | |
| 226 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
| 227 | const unsigned PartSize = PartTy.getSizeInBits(); |
| 228 | |
| 229 | if (SrcTy.isVector() && !PartTy.isVector() && |
| 230 | PartSize > SrcTy.getElementType().getSizeInBits()) { |
| 231 | // Vector was scalarized, and the elements extended. |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 232 | auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 233 | SrcReg); |
| 234 | for (int i = 0, e = DstRegs.size(); i != e; ++i) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 235 | B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 236 | return; |
| 237 | } |
| 238 | |
| 239 | if (SrcSize % PartSize == 0) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 240 | B.buildUnmerge(DstRegs, SrcReg); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 241 | return; |
| 242 | } |
| 243 | |
| 244 | const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; |
| 245 | |
| 246 | LLT BigTy = getMultipleType(PartTy, NumRoundedParts); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 247 | auto ImpDef = B.buildUndef(BigTy); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 248 | |
| 249 | Register BigReg = MRI.createGenericVirtualRegister(BigTy); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 250 | B.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 251 | |
| 252 | int64_t Offset = 0; |
| 253 | for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 254 | B.buildExtract(DstRegs[i], BigReg, Offset); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 255 | } |
| 256 | |
| 257 | /// Lower the return value for the already existing \p Ret. This assumes that |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 258 | /// \p B's insertion point is correct. |
| 259 | bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 260 | const Value *Val, ArrayRef<Register> VRegs, |
| 261 | MachineInstrBuilder &Ret) const { |
| 262 | if (!Val) |
| 263 | return true; |
| 264 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 265 | auto &MF = B.getMF(); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 266 | const auto &F = MF.getFunction(); |
| 267 | const DataLayout &DL = MF.getDataLayout(); |
| 268 | |
| 269 | CallingConv::ID CC = F.getCallingConv(); |
| 270 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 271 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 272 | |
| 273 | ArgInfo OrigRetInfo(VRegs, Val->getType()); |
| 274 | setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); |
| 275 | SmallVector<ArgInfo, 4> SplitRetInfos; |
| 276 | |
| 277 | splitToValueTypes( |
| 278 | OrigRetInfo, SplitRetInfos, DL, MRI, CC, |
| 279 | [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 280 | unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 281 | }); |
| 282 | |
| 283 | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); |
| 284 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 285 | OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn); |
| 286 | return handleAssignments(B, SplitRetInfos, RetHandler); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 287 | } |
| 288 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 289 | bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, |
Alexander Ivchenko | 49168f6 | 2018-08-02 08:33:31 +0000 | [diff] [blame] | 290 | const Value *Val, |
Matt Arsenault | e3a676e | 2019-06-24 15:50:29 +0000 | [diff] [blame] | 291 | ArrayRef<Register> VRegs) const { |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 292 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 293 | MachineFunction &MF = B.getMF(); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 294 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 295 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 296 | MFI->setIfReturnsVoid(!Val); |
| 297 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 298 | assert(!Val == VRegs.empty() && "Return value without a vreg"); |
| 299 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 300 | CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 301 | const bool IsShader = AMDGPU::isShader(CC); |
| 302 | const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || |
| 303 | AMDGPU::isKernel(CC); |
| 304 | if (IsWaveEnd) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 305 | B.buildInstr(AMDGPU::S_ENDPGM) |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 306 | .addImm(0); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 307 | return true; |
| 308 | } |
| 309 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 310 | auto const &ST = B.getMF().getSubtarget<GCNSubtarget>(); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 311 | |
Michael Liao | 711556e | 2019-07-26 17:13:59 +0000 | [diff] [blame] | 312 | unsigned ReturnOpc = |
| 313 | IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; |
Tom Stellard | 257882f | 2018-04-24 21:29:36 +0000 | [diff] [blame] | 314 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 315 | auto Ret = B.buildInstrNoInsert(ReturnOpc); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 316 | Register ReturnAddrVReg; |
| 317 | if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { |
| 318 | ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); |
| 319 | Ret.addUse(ReturnAddrVReg); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 320 | } |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 321 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 322 | if (!lowerReturnVal(B, Val, VRegs, Ret)) |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 323 | return false; |
| 324 | |
| 325 | if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { |
| 326 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 327 | Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), |
| 328 | &AMDGPU::SGPR_64RegClass); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 329 | B.buildCopy(ReturnAddrVReg, LiveInReturn); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 330 | } |
| 331 | |
| 332 | // TODO: Handle CalleeSavedRegsViaCopy. |
| 333 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 334 | B.insertInstr(Ret); |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 335 | return true; |
| 336 | } |
| 337 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 338 | Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B, |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 339 | Type *ParamTy, |
Matt Arsenault | 29f3037 | 2018-07-05 17:01:20 +0000 | [diff] [blame] | 340 | uint64_t Offset) const { |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 341 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 342 | MachineFunction &MF = B.getMF(); |
Matt Arsenault | 8623e8d | 2017-08-03 23:00:29 +0000 | [diff] [blame] | 343 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 344 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 345 | const Function &F = MF.getFunction(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 346 | const DataLayout &DL = F.getParent()->getDataLayout(); |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 347 | PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); |
Daniel Sanders | 52b4ce7 | 2017-03-07 23:20:35 +0000 | [diff] [blame] | 348 | LLT PtrType = getLLTForType(*PtrTy, DL); |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 349 | Register DstReg = MRI.createGenericVirtualRegister(PtrType); |
| 350 | Register KernArgSegmentPtr = |
Matt Arsenault | 8623e8d | 2017-08-03 23:00:29 +0000 | [diff] [blame] | 351 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 352 | Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 353 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 354 | Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 355 | B.buildConstant(OffsetReg, Offset); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 356 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 357 | B.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 358 | |
| 359 | return DstReg; |
| 360 | } |
| 361 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 362 | void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, |
Matt Arsenault | 29f3037 | 2018-07-05 17:01:20 +0000 | [diff] [blame] | 363 | Type *ParamTy, uint64_t Offset, |
| 364 | unsigned Align, |
Matt Arsenault | e3a676e | 2019-06-24 15:50:29 +0000 | [diff] [blame] | 365 | Register DstReg) const { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 366 | MachineFunction &MF = B.getMF(); |
Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 367 | const Function &F = MF.getFunction(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 368 | const DataLayout &DL = F.getParent()->getDataLayout(); |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 369 | PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 370 | MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); |
| 371 | unsigned TypeSize = DL.getTypeStoreSize(ParamTy); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 372 | Register PtrReg = lowerParameterPtr(B, ParamTy, Offset); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 373 | |
| 374 | MachineMemOperand *MMO = |
| 375 | MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | |
Matt Arsenault | 7df225d | 2019-07-19 17:52:56 +0000 | [diff] [blame] | 376 | MachineMemOperand::MODereferenceable | |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 377 | MachineMemOperand::MOInvariant, |
| 378 | TypeSize, Align); |
| 379 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 380 | B.buildLoad(DstReg, PtrReg, *MMO); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 381 | } |
| 382 | |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 383 | // Allocate special inputs passed in user SGPRs. |
| 384 | static void allocateHSAUserSGPRs(CCState &CCInfo, |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 385 | MachineIRBuilder &B, |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 386 | MachineFunction &MF, |
| 387 | const SIRegisterInfo &TRI, |
| 388 | SIMachineFunctionInfo &Info) { |
| 389 | // FIXME: How should these inputs interact with inreg / custom SGPR inputs? |
| 390 | if (Info.hasPrivateSegmentBuffer()) { |
| 391 | unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); |
| 392 | MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); |
| 393 | CCInfo.AllocateReg(PrivateSegmentBufferReg); |
| 394 | } |
| 395 | |
| 396 | if (Info.hasDispatchPtr()) { |
| 397 | unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); |
| 398 | MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); |
| 399 | CCInfo.AllocateReg(DispatchPtrReg); |
| 400 | } |
| 401 | |
| 402 | if (Info.hasQueuePtr()) { |
| 403 | unsigned QueuePtrReg = Info.addQueuePtr(TRI); |
| 404 | MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); |
| 405 | CCInfo.AllocateReg(QueuePtrReg); |
| 406 | } |
| 407 | |
| 408 | if (Info.hasKernargSegmentPtr()) { |
| 409 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 410 | Register InputPtrReg = Info.addKernargSegmentPtr(TRI); |
| 411 | const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); |
| 412 | Register VReg = MRI.createGenericVirtualRegister(P4); |
| 413 | MRI.addLiveIn(InputPtrReg, VReg); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 414 | B.getMBB().addLiveIn(InputPtrReg); |
| 415 | B.buildCopy(VReg, InputPtrReg); |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 416 | CCInfo.AllocateReg(InputPtrReg); |
| 417 | } |
| 418 | |
| 419 | if (Info.hasDispatchID()) { |
| 420 | unsigned DispatchIDReg = Info.addDispatchID(TRI); |
| 421 | MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); |
| 422 | CCInfo.AllocateReg(DispatchIDReg); |
| 423 | } |
| 424 | |
| 425 | if (Info.hasFlatScratchInit()) { |
| 426 | unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); |
| 427 | MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); |
| 428 | CCInfo.AllocateReg(FlatScratchInitReg); |
| 429 | } |
| 430 | |
| 431 | // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read |
| 432 | // these from the dispatch pointer. |
| 433 | } |
| 434 | |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 435 | bool AMDGPUCallLowering::lowerFormalArgumentsKernel( |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 436 | MachineIRBuilder &B, const Function &F, |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 437 | ArrayRef<ArrayRef<Register>> VRegs) const { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 438 | MachineFunction &MF = B.getMF(); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 439 | const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); |
| 440 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 441 | SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 442 | const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| 443 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 444 | |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 445 | const DataLayout &DL = F.getParent()->getDataLayout(); |
| 446 | |
| 447 | SmallVector<CCValAssign, 16> ArgLocs; |
| 448 | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
| 449 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 450 | allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 451 | |
| 452 | unsigned i = 0; |
| 453 | const unsigned KernArgBaseAlign = 16; |
| 454 | const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); |
| 455 | uint64_t ExplicitArgOffset = 0; |
| 456 | |
| 457 | // TODO: Align down to dword alignment and extract bits for extending loads. |
| 458 | for (auto &Arg : F.args()) { |
| 459 | Type *ArgTy = Arg.getType(); |
| 460 | unsigned AllocSize = DL.getTypeAllocSize(ArgTy); |
| 461 | if (AllocSize == 0) |
| 462 | continue; |
| 463 | |
| 464 | unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); |
| 465 | |
| 466 | uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; |
| 467 | ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; |
| 468 | |
| 469 | ArrayRef<Register> OrigArgRegs = VRegs[i]; |
| 470 | Register ArgReg = |
| 471 | OrigArgRegs.size() == 1 |
| 472 | ? OrigArgRegs[0] |
| 473 | : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); |
| 474 | unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); |
| 475 | ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 476 | lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 477 | if (OrigArgRegs.size() > 1) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 478 | unpackRegs(OrigArgRegs, ArgReg, ArgTy, B); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 479 | ++i; |
| 480 | } |
| 481 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 482 | TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); |
| 483 | TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 484 | return true; |
| 485 | } |
| 486 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 487 | // TODO: Move this to generic code |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 488 | static void packSplitRegsToOrigType(MachineIRBuilder &B, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 489 | ArrayRef<Register> OrigRegs, |
| 490 | ArrayRef<Register> Regs, |
| 491 | LLT LLTy, |
| 492 | LLT PartLLT) { |
| 493 | if (!LLTy.isVector() && !PartLLT.isVector()) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 494 | B.buildMerge(OrigRegs[0], Regs); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 495 | return; |
| 496 | } |
| 497 | |
| 498 | if (LLTy.isVector() && PartLLT.isVector()) { |
| 499 | assert(LLTy.getElementType() == PartLLT.getElementType()); |
| 500 | |
| 501 | int DstElts = LLTy.getNumElements(); |
| 502 | int PartElts = PartLLT.getNumElements(); |
| 503 | if (DstElts % PartElts == 0) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 504 | B.buildConcatVectors(OrigRegs[0], Regs); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 505 | else { |
| 506 | // Deal with v3s16 split into v2s16 |
| 507 | assert(PartElts == 2 && DstElts % 2 != 0); |
| 508 | int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); |
| 509 | |
| 510 | LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 511 | auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs); |
| 512 | B.buildExtract(OrigRegs[0], RoundedConcat, 0); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 513 | } |
| 514 | |
| 515 | return; |
| 516 | } |
| 517 | |
| 518 | assert(LLTy.isVector() && !PartLLT.isVector()); |
| 519 | |
| 520 | LLT DstEltTy = LLTy.getElementType(); |
| 521 | if (DstEltTy == PartLLT) { |
| 522 | // Vector was trivially scalarized. |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 523 | B.buildBuildVector(OrigRegs[0], Regs); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 524 | } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { |
| 525 | // Deal with vector with 64-bit elements decomposed to 32-bit |
| 526 | // registers. Need to create intermediate 64-bit elements. |
| 527 | SmallVector<Register, 8> EltMerges; |
| 528 | int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); |
| 529 | |
| 530 | assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); |
| 531 | |
| 532 | for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 533 | auto Merge = B.buildMerge(DstEltTy, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 534 | Regs.take_front(PartsPerElt)); |
| 535 | EltMerges.push_back(Merge.getReg(0)); |
| 536 | Regs = Regs.drop_front(PartsPerElt); |
| 537 | } |
| 538 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 539 | B.buildBuildVector(OrigRegs[0], EltMerges); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 540 | } else { |
| 541 | // Vector was split, and elements promoted to a wider type. |
| 542 | LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 543 | auto BV = B.buildBuildVector(BVType, Regs); |
| 544 | B.buildTrunc(OrigRegs[0], BV); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 545 | } |
| 546 | } |
| 547 | |
Diana Picus | c3dbe23 | 2019-06-27 08:54:17 +0000 | [diff] [blame] | 548 | bool AMDGPUCallLowering::lowerFormalArguments( |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 549 | MachineIRBuilder &B, const Function &F, |
Diana Picus | c3dbe23 | 2019-06-27 08:54:17 +0000 | [diff] [blame] | 550 | ArrayRef<ArrayRef<Register>> VRegs) const { |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 551 | CallingConv::ID CC = F.getCallingConv(); |
| 552 | |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 553 | // The infrastructure for normal calling convention lowering is essentially |
| 554 | // useless for kernels. We want to avoid any kind of legalization or argument |
| 555 | // splitting. |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 556 | if (CC == CallingConv::AMDGPU_KERNEL) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 557 | return lowerFormalArgumentsKernel(B, F, VRegs); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 558 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 559 | const bool IsShader = AMDGPU::isShader(CC); |
| 560 | const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); |
| 561 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 562 | MachineFunction &MF = B.getMF(); |
| 563 | MachineBasicBlock &MBB = B.getMBB(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 564 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 565 | SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 566 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
| 567 | const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 568 | const DataLayout &DL = F.getParent()->getDataLayout(); |
| 569 | |
Matt Arsenault | e0a4da8 | 2019-05-30 19:33:18 +0000 | [diff] [blame] | 570 | |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 571 | SmallVector<CCValAssign, 16> ArgLocs; |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 572 | CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 573 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 574 | if (!IsEntryFunc) { |
| 575 | Register ReturnAddrReg = TRI->getReturnAddressReg(MF); |
| 576 | Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, |
| 577 | &AMDGPU::SGPR_64RegClass); |
| 578 | MBB.addLiveIn(ReturnAddrReg); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 579 | B.buildCopy(LiveInReturn, ReturnAddrReg); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 580 | } |
| 581 | |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 582 | if (Info->hasImplicitBufferPtr()) { |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 583 | Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 584 | MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); |
| 585 | CCInfo.AllocateReg(ImplicitBufferPtrReg); |
| 586 | } |
| 587 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 588 | |
| 589 | SmallVector<ArgInfo, 32> SplitArgs; |
| 590 | unsigned Idx = 0; |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 591 | unsigned PSInputNum = 0; |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 592 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 593 | for (auto &Arg : F.args()) { |
| 594 | if (DL.getTypeStoreSize(Arg.getType()) == 0) |
| 595 | continue; |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 596 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 597 | const bool InReg = Arg.hasAttribute(Attribute::InReg); |
| 598 | |
| 599 | // SGPR arguments to functions not implemented. |
| 600 | if (!IsShader && InReg) |
| 601 | return false; |
| 602 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 603 | if (Arg.hasAttribute(Attribute::SwiftSelf) || |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 604 | Arg.hasAttribute(Attribute::SwiftError) || |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 605 | Arg.hasAttribute(Attribute::Nest)) |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 606 | return false; |
| 607 | |
| 608 | if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { |
| 609 | const bool ArgUsed = !Arg.use_empty(); |
| 610 | bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); |
| 611 | |
| 612 | if (!SkipArg) { |
| 613 | Info->markPSInputAllocated(PSInputNum); |
| 614 | if (ArgUsed) |
| 615 | Info->markPSInputEnabled(PSInputNum); |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 616 | } |
| 617 | |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 618 | ++PSInputNum; |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 619 | |
| 620 | if (SkipArg) { |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 621 | for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 622 | B.buildUndef(VRegs[Idx][I]); |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 623 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 624 | ++Idx; |
| 625 | continue; |
| 626 | } |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 627 | } |
| 628 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 629 | ArgInfo OrigArg(VRegs[Idx], Arg.getType()); |
| 630 | setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 631 | |
| 632 | splitToValueTypes( |
| 633 | OrigArg, SplitArgs, DL, MRI, CC, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 634 | // FIXME: We should probably be passing multiple registers to |
| 635 | // handleAssignments to do this |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 636 | [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 637 | packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 638 | LLTy, PartLLT); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 639 | }); |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 640 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 641 | ++Idx; |
| 642 | } |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 643 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 644 | // At least one interpolation mode must be enabled or else the GPU will |
| 645 | // hang. |
| 646 | // |
| 647 | // Check PSInputAddr instead of PSInputEnable. The idea is that if the user |
| 648 | // set PSInputAddr, the user wants to enable some bits after the compilation |
| 649 | // based on run-time states. Since we can't know what the final PSInputEna |
| 650 | // will look like, so we shouldn't do anything here and the user should take |
| 651 | // responsibility for the correct programming. |
| 652 | // |
| 653 | // Otherwise, the following restrictions apply: |
| 654 | // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. |
| 655 | // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be |
| 656 | // enabled too. |
| 657 | if (CC == CallingConv::AMDGPU_PS) { |
| 658 | if ((Info->getPSInputAddr() & 0x7F) == 0 || |
| 659 | ((Info->getPSInputAddr() & 0xF) == 0 && |
| 660 | Info->isPSInputAllocated(11))) { |
| 661 | CCInfo.AllocateReg(AMDGPU::VGPR0); |
| 662 | CCInfo.AllocateReg(AMDGPU::VGPR1); |
| 663 | Info->markPSInputAllocated(0); |
| 664 | Info->markPSInputEnabled(0); |
| 665 | } |
| 666 | |
| 667 | if (Subtarget.isAmdPalOS()) { |
| 668 | // For isAmdPalOS, the user does not enable some bits after compilation |
| 669 | // based on run-time states; the register values being generated here are |
| 670 | // the final ones set in hardware. Therefore we need to apply the |
| 671 | // workaround to PSInputAddr and PSInputEnable together. (The case where |
| 672 | // a bit is set in PSInputAddr but not PSInputEnable is where the frontend |
| 673 | // set up an input arg for a particular interpolation mode, but nothing |
| 674 | // uses that input arg. Really we should have an earlier pass that removes |
| 675 | // such an arg.) |
| 676 | unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); |
| 677 | if ((PsInputBits & 0x7F) == 0 || |
| 678 | ((PsInputBits & 0xF) == 0 && |
| 679 | (PsInputBits >> 11 & 1))) |
| 680 | Info->markPSInputEnabled( |
| 681 | countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 682 | } |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 683 | } |
| 684 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 685 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 686 | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 687 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 688 | if (!MBB.empty()) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 689 | B.setInstr(*MBB.begin()); |
Matt Arsenault | e0a4da8 | 2019-05-30 19:33:18 +0000 | [diff] [blame] | 690 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 691 | FormalArgHandler Handler(B, MRI, AssignFn); |
| 692 | if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 693 | return false; |
| 694 | |
| 695 | if (!IsEntryFunc) { |
| 696 | // Special inputs come after user arguments. |
| 697 | TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 698 | } |
| 699 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 700 | // Start adding system SGPRs. |
| 701 | if (IsEntryFunc) { |
| 702 | TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); |
| 703 | } else { |
| 704 | CCInfo.AllocateReg(Info->getScratchRSrcReg()); |
| 705 | CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); |
| 706 | CCInfo.AllocateReg(Info->getFrameOffsetReg()); |
| 707 | TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); |
| 708 | } |
| 709 | |
| 710 | // Move back to the end of the basic block. |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 711 | B.setMBB(MBB); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 712 | |
| 713 | return true; |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 714 | } |