Tom Stellard | d8ea85a | 2016-12-21 19:06:24 +0000 | [diff] [blame] | 1 | //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements the lowering of LLVM calls to machine code calls for |
| 11 | /// GlobalISel. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AMDGPUCallLowering.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 16 | #include "AMDGPU.h" |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 17 | #include "AMDGPUISelLowering.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 18 | #include "AMDGPUSubtarget.h" |
| 19 | #include "SIISelLowering.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 20 | #include "SIMachineFunctionInfo.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 21 | #include "SIRegisterInfo.h" |
Tom Stellard | 44b30b4 | 2018-05-22 02:03:23 +0000 | [diff] [blame] | 22 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 23 | #include "llvm/CodeGen/Analysis.h" |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 24 | #include "llvm/CodeGen/CallingConvLower.h" |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 25 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 27 | #include "llvm/Support/LowLevelTypeImpl.h" |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 28 | |
| 29 | using namespace llvm; |
| 30 | |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 31 | namespace { |
| 32 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 33 | struct OutgoingValueHandler : public CallLowering::ValueHandler { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 34 | OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 35 | MachineInstrBuilder MIB, CCAssignFn *AssignFn) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 36 | : ValueHandler(B, MRI, AssignFn), MIB(MIB) {} |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 37 | |
| 38 | MachineInstrBuilder MIB; |
| 39 | |
Quentin Colombet | 9f9151d | 2019-10-18 20:13:42 +0000 | [diff] [blame] | 40 | bool isIncomingArgumentHandler() const override { return false; } |
| 41 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 42 | Register getStackAddress(uint64_t Size, int64_t Offset, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 43 | MachinePointerInfo &MPO) override { |
| 44 | llvm_unreachable("not implemented"); |
| 45 | } |
| 46 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 47 | void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 48 | MachinePointerInfo &MPO, CCValAssign &VA) override { |
| 49 | llvm_unreachable("not implemented"); |
| 50 | } |
| 51 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 52 | void assignValueToReg(Register ValVReg, Register PhysReg, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 53 | CCValAssign &VA) override { |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 54 | Register ExtReg; |
| 55 | if (VA.getLocVT().getSizeInBits() < 32) { |
| 56 | // 16-bit types are reported as legal for 32-bit registers. We need to |
| 57 | // extend and do a 32-bit copy to avoid the verifier complaining about it. |
| 58 | ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); |
| 59 | } else |
| 60 | ExtReg = extendRegister(ValVReg, VA); |
| 61 | |
| 62 | MIRBuilder.buildCopy(PhysReg, ExtReg); |
| 63 | MIB.addUse(PhysReg, RegState::Implicit); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 64 | } |
| 65 | |
| 66 | bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, |
| 67 | CCValAssign::LocInfo LocInfo, |
| 68 | const CallLowering::ArgInfo &Info, |
Amara Emerson | fbaf425 | 2019-09-03 21:42:28 +0000 | [diff] [blame] | 69 | ISD::ArgFlagsTy Flags, |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 70 | CCState &State) override { |
Amara Emerson | fbaf425 | 2019-09-03 21:42:28 +0000 | [diff] [blame] | 71 | return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 72 | } |
| 73 | }; |
| 74 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 75 | struct IncomingArgHandler : public CallLowering::ValueHandler { |
| 76 | uint64_t StackUsed = 0; |
| 77 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 78 | IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 79 | CCAssignFn *AssignFn) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 80 | : ValueHandler(B, MRI, AssignFn) {} |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 81 | |
| 82 | Register getStackAddress(uint64_t Size, int64_t Offset, |
| 83 | MachinePointerInfo &MPO) override { |
| 84 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
| 85 | int FI = MFI.CreateFixedObject(Size, Offset, true); |
| 86 | MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); |
| 87 | Register AddrReg = MRI.createGenericVirtualRegister( |
| 88 | LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); |
| 89 | MIRBuilder.buildFrameIndex(AddrReg, FI); |
| 90 | StackUsed = std::max(StackUsed, Size + Offset); |
| 91 | return AddrReg; |
| 92 | } |
| 93 | |
| 94 | void assignValueToReg(Register ValVReg, Register PhysReg, |
| 95 | CCValAssign &VA) override { |
| 96 | markPhysRegUsed(PhysReg); |
| 97 | |
| 98 | if (VA.getLocVT().getSizeInBits() < 32) { |
| 99 | // 16-bit types are reported as legal for 32-bit registers. We need to do |
| 100 | // a 32-bit copy, and truncate to avoid the verifier complaining about it. |
| 101 | auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); |
| 102 | MIRBuilder.buildTrunc(ValVReg, Copy); |
| 103 | return; |
| 104 | } |
| 105 | |
| 106 | switch (VA.getLocInfo()) { |
| 107 | case CCValAssign::LocInfo::SExt: |
| 108 | case CCValAssign::LocInfo::ZExt: |
| 109 | case CCValAssign::LocInfo::AExt: { |
| 110 | auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); |
| 111 | MIRBuilder.buildTrunc(ValVReg, Copy); |
| 112 | break; |
| 113 | } |
| 114 | default: |
| 115 | MIRBuilder.buildCopy(ValVReg, PhysReg); |
| 116 | break; |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, |
| 121 | MachinePointerInfo &MPO, CCValAssign &VA) override { |
| 122 | // FIXME: Get alignment |
| 123 | auto MMO = MIRBuilder.getMF().getMachineMemOperand( |
| 124 | MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); |
| 125 | MIRBuilder.buildLoad(ValVReg, Addr, *MMO); |
| 126 | } |
| 127 | |
| 128 | /// How the physical register gets marked varies between formal |
| 129 | /// parameters (it's a basic-block live-in), and a call instruction |
| 130 | /// (it's an implicit-def of the BL). |
| 131 | virtual void markPhysRegUsed(unsigned PhysReg) = 0; |
| 132 | |
| 133 | // FIXME: What is the point of this being a callback? |
Amara Emerson | bc1172d | 2019-08-05 23:05:28 +0000 | [diff] [blame] | 134 | bool isIncomingArgumentHandler() const override { return true; } |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 135 | }; |
| 136 | |
| 137 | struct FormalArgHandler : public IncomingArgHandler { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 138 | FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 139 | CCAssignFn *AssignFn) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 140 | : IncomingArgHandler(B, MRI, AssignFn) {} |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 141 | |
| 142 | void markPhysRegUsed(unsigned PhysReg) override { |
| 143 | MIRBuilder.getMBB().addLiveIn(PhysReg); |
| 144 | } |
| 145 | }; |
| 146 | |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 147 | } |
| 148 | |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 149 | AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 150 | : CallLowering(&TLI) { |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 151 | } |
| 152 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 153 | void AMDGPUCallLowering::splitToValueTypes( |
| 154 | const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, |
| 155 | const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, |
| 156 | SplitArgTy PerformArgSplit) const { |
| 157 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 158 | LLVMContext &Ctx = OrigArg.Ty->getContext(); |
| 159 | |
| 160 | if (OrigArg.Ty->isVoidTy()) |
| 161 | return; |
| 162 | |
| 163 | SmallVector<EVT, 4> SplitVTs; |
| 164 | ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); |
| 165 | |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 166 | assert(OrigArg.Regs.size() == SplitVTs.size()); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 167 | |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 168 | int SplitIdx = 0; |
| 169 | for (EVT VT : SplitVTs) { |
| 170 | unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); |
| 171 | Type *Ty = VT.getTypeForEVT(Ctx); |
| 172 | |
| 173 | |
| 174 | |
| 175 | if (NumParts == 1) { |
| 176 | // No splitting to do, but we want to replace the original type (e.g. [1 x |
| 177 | // double] -> double). |
| 178 | SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, |
| 179 | OrigArg.Flags, OrigArg.IsFixed); |
| 180 | |
| 181 | ++SplitIdx; |
| 182 | continue; |
| 183 | } |
| 184 | |
| 185 | LLT LLTy = getLLTForType(*Ty, DL); |
| 186 | |
| 187 | SmallVector<Register, 8> SplitRegs; |
| 188 | |
| 189 | EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); |
| 190 | Type *PartTy = PartVT.getTypeForEVT(Ctx); |
| 191 | LLT PartLLT = getLLTForType(*PartTy, DL); |
| 192 | |
| 193 | // FIXME: Should we be reporting all of the part registers for a single |
| 194 | // argument, and let handleAssignments take care of the repacking? |
| 195 | for (unsigned i = 0; i < NumParts; ++i) { |
| 196 | Register PartReg = MRI.createGenericVirtualRegister(PartLLT); |
| 197 | SplitRegs.push_back(PartReg); |
| 198 | SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); |
| 199 | } |
| 200 | |
| 201 | PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); |
| 202 | |
| 203 | ++SplitIdx; |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 204 | } |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 205 | } |
| 206 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 207 | // Get the appropriate type to make \p OrigTy \p Factor times bigger. |
| 208 | static LLT getMultipleType(LLT OrigTy, int Factor) { |
| 209 | if (OrigTy.isVector()) { |
| 210 | return LLT::vector(OrigTy.getNumElements() * Factor, |
| 211 | OrigTy.getElementType()); |
| 212 | } |
| 213 | |
| 214 | return LLT::scalar(OrigTy.getSizeInBits() * Factor); |
| 215 | } |
| 216 | |
| 217 | // TODO: Move to generic code |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 218 | static void unpackRegsToOrigType(MachineIRBuilder &B, |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 219 | ArrayRef<Register> DstRegs, |
| 220 | Register SrcReg, |
| 221 | LLT SrcTy, |
| 222 | LLT PartTy) { |
| 223 | assert(DstRegs.size() > 1 && "Nothing to unpack"); |
| 224 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 225 | MachineFunction &MF = B.getMF(); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 226 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 227 | |
| 228 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
| 229 | const unsigned PartSize = PartTy.getSizeInBits(); |
| 230 | |
| 231 | if (SrcTy.isVector() && !PartTy.isVector() && |
| 232 | PartSize > SrcTy.getElementType().getSizeInBits()) { |
| 233 | // Vector was scalarized, and the elements extended. |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 234 | auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 235 | SrcReg); |
| 236 | for (int i = 0, e = DstRegs.size(); i != e; ++i) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 237 | B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 238 | return; |
| 239 | } |
| 240 | |
| 241 | if (SrcSize % PartSize == 0) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 242 | B.buildUnmerge(DstRegs, SrcReg); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 243 | return; |
| 244 | } |
| 245 | |
| 246 | const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; |
| 247 | |
| 248 | LLT BigTy = getMultipleType(PartTy, NumRoundedParts); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 249 | auto ImpDef = B.buildUndef(BigTy); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 250 | |
| 251 | Register BigReg = MRI.createGenericVirtualRegister(BigTy); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 252 | B.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 253 | |
| 254 | int64_t Offset = 0; |
| 255 | for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 256 | B.buildExtract(DstRegs[i], BigReg, Offset); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 257 | } |
| 258 | |
| 259 | /// Lower the return value for the already existing \p Ret. This assumes that |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 260 | /// \p B's insertion point is correct. |
| 261 | bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 262 | const Value *Val, ArrayRef<Register> VRegs, |
| 263 | MachineInstrBuilder &Ret) const { |
| 264 | if (!Val) |
| 265 | return true; |
| 266 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 267 | auto &MF = B.getMF(); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 268 | const auto &F = MF.getFunction(); |
| 269 | const DataLayout &DL = MF.getDataLayout(); |
| 270 | |
| 271 | CallingConv::ID CC = F.getCallingConv(); |
| 272 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 273 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 274 | |
| 275 | ArgInfo OrigRetInfo(VRegs, Val->getType()); |
| 276 | setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); |
| 277 | SmallVector<ArgInfo, 4> SplitRetInfos; |
| 278 | |
| 279 | splitToValueTypes( |
| 280 | OrigRetInfo, SplitRetInfos, DL, MRI, CC, |
| 281 | [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 282 | unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 283 | }); |
| 284 | |
| 285 | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); |
| 286 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 287 | OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn); |
| 288 | return handleAssignments(B, SplitRetInfos, RetHandler); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 289 | } |
| 290 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 291 | bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, |
Alexander Ivchenko | 49168f6 | 2018-08-02 08:33:31 +0000 | [diff] [blame] | 292 | const Value *Val, |
Matt Arsenault | e3a676e | 2019-06-24 15:50:29 +0000 | [diff] [blame] | 293 | ArrayRef<Register> VRegs) const { |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 294 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 295 | MachineFunction &MF = B.getMF(); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 296 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 297 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 298 | MFI->setIfReturnsVoid(!Val); |
| 299 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 300 | assert(!Val == VRegs.empty() && "Return value without a vreg"); |
| 301 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 302 | CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 303 | const bool IsShader = AMDGPU::isShader(CC); |
| 304 | const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || |
| 305 | AMDGPU::isKernel(CC); |
| 306 | if (IsWaveEnd) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 307 | B.buildInstr(AMDGPU::S_ENDPGM) |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 308 | .addImm(0); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 309 | return true; |
| 310 | } |
| 311 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 312 | auto const &ST = B.getMF().getSubtarget<GCNSubtarget>(); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 313 | |
Michael Liao | 711556e | 2019-07-26 17:13:59 +0000 | [diff] [blame] | 314 | unsigned ReturnOpc = |
| 315 | IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; |
Tom Stellard | 257882f | 2018-04-24 21:29:36 +0000 | [diff] [blame] | 316 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 317 | auto Ret = B.buildInstrNoInsert(ReturnOpc); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 318 | Register ReturnAddrVReg; |
| 319 | if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { |
| 320 | ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); |
| 321 | Ret.addUse(ReturnAddrVReg); |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 322 | } |
Tom Stellard | 206b992 | 2019-04-09 02:26:03 +0000 | [diff] [blame] | 323 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 324 | if (!lowerReturnVal(B, Val, VRegs, Ret)) |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 325 | return false; |
| 326 | |
| 327 | if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { |
| 328 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 329 | Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), |
| 330 | &AMDGPU::SGPR_64RegClass); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 331 | B.buildCopy(ReturnAddrVReg, LiveInReturn); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 332 | } |
| 333 | |
| 334 | // TODO: Handle CalleeSavedRegsViaCopy. |
| 335 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 336 | B.insertInstr(Ret); |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 337 | return true; |
| 338 | } |
| 339 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 340 | Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B, |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 341 | Type *ParamTy, |
Matt Arsenault | 29f3037 | 2018-07-05 17:01:20 +0000 | [diff] [blame] | 342 | uint64_t Offset) const { |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 343 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 344 | MachineFunction &MF = B.getMF(); |
Matt Arsenault | 8623e8d | 2017-08-03 23:00:29 +0000 | [diff] [blame] | 345 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 346 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 347 | const Function &F = MF.getFunction(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 348 | const DataLayout &DL = F.getParent()->getDataLayout(); |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 349 | PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); |
Daniel Sanders | 52b4ce7 | 2017-03-07 23:20:35 +0000 | [diff] [blame] | 350 | LLT PtrType = getLLTForType(*PtrTy, DL); |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 351 | Register DstReg = MRI.createGenericVirtualRegister(PtrType); |
| 352 | Register KernArgSegmentPtr = |
Matt Arsenault | 8623e8d | 2017-08-03 23:00:29 +0000 | [diff] [blame] | 353 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 354 | Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 355 | |
Matt Arsenault | faeaedf | 2019-06-24 16:16:12 +0000 | [diff] [blame] | 356 | Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 357 | B.buildConstant(OffsetReg, Offset); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 358 | |
Daniel Sanders | e74c5b9 | 2019-11-01 13:18:00 -0700 | [diff] [blame^] | 359 | B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 360 | |
| 361 | return DstReg; |
| 362 | } |
| 363 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 364 | void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, |
Matt Arsenault | 29f3037 | 2018-07-05 17:01:20 +0000 | [diff] [blame] | 365 | Type *ParamTy, uint64_t Offset, |
| 366 | unsigned Align, |
Matt Arsenault | e3a676e | 2019-06-24 15:50:29 +0000 | [diff] [blame] | 367 | Register DstReg) const { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 368 | MachineFunction &MF = B.getMF(); |
Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 369 | const Function &F = MF.getFunction(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 370 | const DataLayout &DL = F.getParent()->getDataLayout(); |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 371 | PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 372 | MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); |
| 373 | unsigned TypeSize = DL.getTypeStoreSize(ParamTy); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 374 | Register PtrReg = lowerParameterPtr(B, ParamTy, Offset); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 375 | |
| 376 | MachineMemOperand *MMO = |
| 377 | MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | |
Matt Arsenault | 7df225d | 2019-07-19 17:52:56 +0000 | [diff] [blame] | 378 | MachineMemOperand::MODereferenceable | |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 379 | MachineMemOperand::MOInvariant, |
| 380 | TypeSize, Align); |
| 381 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 382 | B.buildLoad(DstReg, PtrReg, *MMO); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 383 | } |
| 384 | |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 385 | // Allocate special inputs passed in user SGPRs. |
| 386 | static void allocateHSAUserSGPRs(CCState &CCInfo, |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 387 | MachineIRBuilder &B, |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 388 | MachineFunction &MF, |
| 389 | const SIRegisterInfo &TRI, |
| 390 | SIMachineFunctionInfo &Info) { |
| 391 | // FIXME: How should these inputs interact with inreg / custom SGPR inputs? |
| 392 | if (Info.hasPrivateSegmentBuffer()) { |
| 393 | unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); |
| 394 | MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); |
| 395 | CCInfo.AllocateReg(PrivateSegmentBufferReg); |
| 396 | } |
| 397 | |
| 398 | if (Info.hasDispatchPtr()) { |
| 399 | unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); |
| 400 | MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); |
| 401 | CCInfo.AllocateReg(DispatchPtrReg); |
| 402 | } |
| 403 | |
| 404 | if (Info.hasQueuePtr()) { |
| 405 | unsigned QueuePtrReg = Info.addQueuePtr(TRI); |
| 406 | MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); |
| 407 | CCInfo.AllocateReg(QueuePtrReg); |
| 408 | } |
| 409 | |
| 410 | if (Info.hasKernargSegmentPtr()) { |
| 411 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 412 | Register InputPtrReg = Info.addKernargSegmentPtr(TRI); |
| 413 | const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); |
| 414 | Register VReg = MRI.createGenericVirtualRegister(P4); |
| 415 | MRI.addLiveIn(InputPtrReg, VReg); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 416 | B.getMBB().addLiveIn(InputPtrReg); |
| 417 | B.buildCopy(VReg, InputPtrReg); |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 418 | CCInfo.AllocateReg(InputPtrReg); |
| 419 | } |
| 420 | |
| 421 | if (Info.hasDispatchID()) { |
| 422 | unsigned DispatchIDReg = Info.addDispatchID(TRI); |
| 423 | MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); |
| 424 | CCInfo.AllocateReg(DispatchIDReg); |
| 425 | } |
| 426 | |
| 427 | if (Info.hasFlatScratchInit()) { |
| 428 | unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); |
| 429 | MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); |
| 430 | CCInfo.AllocateReg(FlatScratchInitReg); |
| 431 | } |
| 432 | |
| 433 | // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read |
| 434 | // these from the dispatch pointer. |
| 435 | } |
| 436 | |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 437 | bool AMDGPUCallLowering::lowerFormalArgumentsKernel( |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 438 | MachineIRBuilder &B, const Function &F, |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 439 | ArrayRef<ArrayRef<Register>> VRegs) const { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 440 | MachineFunction &MF = B.getMF(); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 441 | const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); |
| 442 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 443 | SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 444 | const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| 445 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 446 | |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 447 | const DataLayout &DL = F.getParent()->getDataLayout(); |
| 448 | |
| 449 | SmallVector<CCValAssign, 16> ArgLocs; |
| 450 | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
| 451 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 452 | allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 453 | |
| 454 | unsigned i = 0; |
| 455 | const unsigned KernArgBaseAlign = 16; |
| 456 | const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); |
| 457 | uint64_t ExplicitArgOffset = 0; |
| 458 | |
| 459 | // TODO: Align down to dword alignment and extract bits for extending loads. |
| 460 | for (auto &Arg : F.args()) { |
| 461 | Type *ArgTy = Arg.getType(); |
| 462 | unsigned AllocSize = DL.getTypeAllocSize(ArgTy); |
| 463 | if (AllocSize == 0) |
| 464 | continue; |
| 465 | |
| 466 | unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); |
| 467 | |
| 468 | uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; |
| 469 | ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; |
| 470 | |
| 471 | ArrayRef<Register> OrigArgRegs = VRegs[i]; |
| 472 | Register ArgReg = |
| 473 | OrigArgRegs.size() == 1 |
| 474 | ? OrigArgRegs[0] |
| 475 | : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); |
| 476 | unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); |
| 477 | ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 478 | lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 479 | if (OrigArgRegs.size() > 1) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 480 | unpackRegs(OrigArgRegs, ArgReg, ArgTy, B); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 481 | ++i; |
| 482 | } |
| 483 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 484 | TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); |
| 485 | TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 486 | return true; |
| 487 | } |
| 488 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 489 | // TODO: Move this to generic code |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 490 | static void packSplitRegsToOrigType(MachineIRBuilder &B, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 491 | ArrayRef<Register> OrigRegs, |
| 492 | ArrayRef<Register> Regs, |
| 493 | LLT LLTy, |
| 494 | LLT PartLLT) { |
| 495 | if (!LLTy.isVector() && !PartLLT.isVector()) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 496 | B.buildMerge(OrigRegs[0], Regs); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 497 | return; |
| 498 | } |
| 499 | |
| 500 | if (LLTy.isVector() && PartLLT.isVector()) { |
| 501 | assert(LLTy.getElementType() == PartLLT.getElementType()); |
| 502 | |
| 503 | int DstElts = LLTy.getNumElements(); |
| 504 | int PartElts = PartLLT.getNumElements(); |
| 505 | if (DstElts % PartElts == 0) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 506 | B.buildConcatVectors(OrigRegs[0], Regs); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 507 | else { |
| 508 | // Deal with v3s16 split into v2s16 |
| 509 | assert(PartElts == 2 && DstElts % 2 != 0); |
| 510 | int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); |
| 511 | |
| 512 | LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 513 | auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs); |
| 514 | B.buildExtract(OrigRegs[0], RoundedConcat, 0); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 515 | } |
| 516 | |
| 517 | return; |
| 518 | } |
| 519 | |
| 520 | assert(LLTy.isVector() && !PartLLT.isVector()); |
| 521 | |
| 522 | LLT DstEltTy = LLTy.getElementType(); |
| 523 | if (DstEltTy == PartLLT) { |
| 524 | // Vector was trivially scalarized. |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 525 | B.buildBuildVector(OrigRegs[0], Regs); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 526 | } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { |
| 527 | // Deal with vector with 64-bit elements decomposed to 32-bit |
| 528 | // registers. Need to create intermediate 64-bit elements. |
| 529 | SmallVector<Register, 8> EltMerges; |
| 530 | int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); |
| 531 | |
| 532 | assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); |
| 533 | |
| 534 | for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 535 | auto Merge = B.buildMerge(DstEltTy, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 536 | Regs.take_front(PartsPerElt)); |
| 537 | EltMerges.push_back(Merge.getReg(0)); |
| 538 | Regs = Regs.drop_front(PartsPerElt); |
| 539 | } |
| 540 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 541 | B.buildBuildVector(OrigRegs[0], EltMerges); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 542 | } else { |
| 543 | // Vector was split, and elements promoted to a wider type. |
| 544 | LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 545 | auto BV = B.buildBuildVector(BVType, Regs); |
| 546 | B.buildTrunc(OrigRegs[0], BV); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 547 | } |
| 548 | } |
| 549 | |
Diana Picus | c3dbe23 | 2019-06-27 08:54:17 +0000 | [diff] [blame] | 550 | bool AMDGPUCallLowering::lowerFormalArguments( |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 551 | MachineIRBuilder &B, const Function &F, |
Diana Picus | c3dbe23 | 2019-06-27 08:54:17 +0000 | [diff] [blame] | 552 | ArrayRef<ArrayRef<Register>> VRegs) const { |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 553 | CallingConv::ID CC = F.getCallingConv(); |
| 554 | |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 555 | // The infrastructure for normal calling convention lowering is essentially |
| 556 | // useless for kernels. We want to avoid any kind of legalization or argument |
| 557 | // splitting. |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 558 | if (CC == CallingConv::AMDGPU_KERNEL) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 559 | return lowerFormalArgumentsKernel(B, F, VRegs); |
Matt Arsenault | b725d27 | 2019-07-11 14:18:25 +0000 | [diff] [blame] | 560 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 561 | const bool IsShader = AMDGPU::isShader(CC); |
| 562 | const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); |
| 563 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 564 | MachineFunction &MF = B.getMF(); |
| 565 | MachineBasicBlock &MBB = B.getMBB(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 566 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 567 | SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 568 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
| 569 | const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 570 | const DataLayout &DL = F.getParent()->getDataLayout(); |
| 571 | |
Matt Arsenault | e0a4da8 | 2019-05-30 19:33:18 +0000 | [diff] [blame] | 572 | |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 573 | SmallVector<CCValAssign, 16> ArgLocs; |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 574 | CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 575 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 576 | if (!IsEntryFunc) { |
| 577 | Register ReturnAddrReg = TRI->getReturnAddressReg(MF); |
| 578 | Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, |
| 579 | &AMDGPU::SGPR_64RegClass); |
| 580 | MBB.addLiveIn(ReturnAddrReg); |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 581 | B.buildCopy(LiveInReturn, ReturnAddrReg); |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 582 | } |
| 583 | |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 584 | if (Info->hasImplicitBufferPtr()) { |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 585 | Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); |
Matt Arsenault | bae3636 | 2019-07-01 18:50:50 +0000 | [diff] [blame] | 586 | MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); |
| 587 | CCInfo.AllocateReg(ImplicitBufferPtrReg); |
| 588 | } |
| 589 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 590 | |
| 591 | SmallVector<ArgInfo, 32> SplitArgs; |
| 592 | unsigned Idx = 0; |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 593 | unsigned PSInputNum = 0; |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 594 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 595 | for (auto &Arg : F.args()) { |
| 596 | if (DL.getTypeStoreSize(Arg.getType()) == 0) |
| 597 | continue; |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 598 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 599 | const bool InReg = Arg.hasAttribute(Attribute::InReg); |
| 600 | |
| 601 | // SGPR arguments to functions not implemented. |
| 602 | if (!IsShader && InReg) |
| 603 | return false; |
| 604 | |
Matt Arsenault | a9ea8a9 | 2019-07-26 02:36:05 +0000 | [diff] [blame] | 605 | if (Arg.hasAttribute(Attribute::SwiftSelf) || |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 606 | Arg.hasAttribute(Attribute::SwiftError) || |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 607 | Arg.hasAttribute(Attribute::Nest)) |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 608 | return false; |
| 609 | |
| 610 | if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { |
| 611 | const bool ArgUsed = !Arg.use_empty(); |
| 612 | bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); |
| 613 | |
| 614 | if (!SkipArg) { |
| 615 | Info->markPSInputAllocated(PSInputNum); |
| 616 | if (ArgUsed) |
| 617 | Info->markPSInputEnabled(PSInputNum); |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 618 | } |
| 619 | |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 620 | ++PSInputNum; |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 621 | |
| 622 | if (SkipArg) { |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 623 | for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 624 | B.buildUndef(VRegs[Idx][I]); |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 625 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 626 | ++Idx; |
| 627 | continue; |
| 628 | } |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 629 | } |
| 630 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 631 | ArgInfo OrigArg(VRegs[Idx], Arg.getType()); |
| 632 | setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 633 | |
| 634 | splitToValueTypes( |
| 635 | OrigArg, SplitArgs, DL, MRI, CC, |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 636 | // FIXME: We should probably be passing multiple registers to |
| 637 | // handleAssignments to do this |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 638 | [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 639 | packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, |
Matt Arsenault | b60a2ae | 2019-07-19 14:29:30 +0000 | [diff] [blame] | 640 | LLTy, PartLLT); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 641 | }); |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 642 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 643 | ++Idx; |
| 644 | } |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 645 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 646 | // At least one interpolation mode must be enabled or else the GPU will |
| 647 | // hang. |
| 648 | // |
| 649 | // Check PSInputAddr instead of PSInputEnable. The idea is that if the user |
| 650 | // set PSInputAddr, the user wants to enable some bits after the compilation |
| 651 | // based on run-time states. Since we can't know what the final PSInputEna |
| 652 | // will look like, so we shouldn't do anything here and the user should take |
| 653 | // responsibility for the correct programming. |
| 654 | // |
| 655 | // Otherwise, the following restrictions apply: |
| 656 | // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. |
| 657 | // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be |
| 658 | // enabled too. |
| 659 | if (CC == CallingConv::AMDGPU_PS) { |
| 660 | if ((Info->getPSInputAddr() & 0x7F) == 0 || |
| 661 | ((Info->getPSInputAddr() & 0xF) == 0 && |
| 662 | Info->isPSInputAllocated(11))) { |
| 663 | CCInfo.AllocateReg(AMDGPU::VGPR0); |
| 664 | CCInfo.AllocateReg(AMDGPU::VGPR1); |
| 665 | Info->markPSInputAllocated(0); |
| 666 | Info->markPSInputEnabled(0); |
| 667 | } |
| 668 | |
| 669 | if (Subtarget.isAmdPalOS()) { |
| 670 | // For isAmdPalOS, the user does not enable some bits after compilation |
| 671 | // based on run-time states; the register values being generated here are |
| 672 | // the final ones set in hardware. Therefore we need to apply the |
| 673 | // workaround to PSInputAddr and PSInputEnable together. (The case where |
| 674 | // a bit is set in PSInputAddr but not PSInputEnable is where the frontend |
| 675 | // set up an input arg for a particular interpolation mode, but nothing |
| 676 | // uses that input arg. Really we should have an earlier pass that removes |
| 677 | // such an arg.) |
| 678 | unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); |
| 679 | if ((PsInputBits & 0x7F) == 0 || |
| 680 | ((PsInputBits & 0xF) == 0 && |
| 681 | (PsInputBits >> 11 & 1))) |
| 682 | Info->markPSInputEnabled( |
| 683 | countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); |
Tom Stellard | c7709e1 | 2018-04-24 20:51:28 +0000 | [diff] [blame] | 684 | } |
Tom Stellard | ca16621 | 2017-01-30 21:56:46 +0000 | [diff] [blame] | 685 | } |
| 686 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 687 | const SITargetLowering &TLI = *getTLI<SITargetLowering>(); |
| 688 | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 689 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 690 | if (!MBB.empty()) |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 691 | B.setInstr(*MBB.begin()); |
Matt Arsenault | e0a4da8 | 2019-05-30 19:33:18 +0000 | [diff] [blame] | 692 | |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 693 | FormalArgHandler Handler(B, MRI, AssignFn); |
| 694 | if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 695 | return false; |
| 696 | |
| 697 | if (!IsEntryFunc) { |
| 698 | // Special inputs come after user arguments. |
| 699 | TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); |
Tom Stellard | 9d8337d | 2017-08-01 12:38:33 +0000 | [diff] [blame] | 700 | } |
| 701 | |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 702 | // Start adding system SGPRs. |
| 703 | if (IsEntryFunc) { |
| 704 | TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); |
| 705 | } else { |
| 706 | CCInfo.AllocateReg(Info->getScratchRSrcReg()); |
| 707 | CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); |
| 708 | CCInfo.AllocateReg(Info->getFrameOffsetReg()); |
| 709 | TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); |
| 710 | } |
| 711 | |
| 712 | // Move back to the end of the basic block. |
Austin Kerbow | 06c8cb0 | 2019-09-09 23:06:13 +0000 | [diff] [blame] | 713 | B.setMBB(MBB); |
Matt Arsenault | fecf43e | 2019-07-19 14:15:18 +0000 | [diff] [blame] | 714 | |
| 715 | return true; |
Tom Stellard | 000c5af | 2016-04-14 19:09:28 +0000 | [diff] [blame] | 716 | } |