Correct VectorCall x86 (32 bit) behavior for SSE Register Assignment
In running some internal vectorcall tests in 32 bit mode, we discovered that the
behavior I'd previously implemented for x64 (and applied to x32) regarding the
assignment of SSE registers was incorrect. See spec here:
https://msdn.microsoft.com/en-us/library/dn375768.aspx
My previous implementation applied register argument position from the x64
version to both. This isn't correct for x86, so this removes and refactors that
section. Additionally, it corrects the integer/int-pointer assignments. Unlike
x64, x86 permits integers to be assigned independent of position.
Finally, the code for 32 bit was cleaned up a little to clarify the intent,
as well as given a descriptive comment.
Differential Revision: https://reviews.llvm.org/D34455
llvm-svn: 305928
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d6a5e75..8d00e05 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -951,8 +951,7 @@
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
- ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
- const ABIArgInfo& current) const;
+
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -1536,27 +1535,6 @@
return true;
}
-ABIArgInfo
-X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
- const ABIArgInfo ¤t) const {
- // Assumes vectorCall calling convention.
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
-
- if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
- isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- // HVA types get passed directly in registers if there is room.
- State.FreeSSERegs -= NumElts;
- return getDirectX86Hva();
- }
- // If there's no room, the HVA gets passed as normal indirect
- // structure.
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- }
- return current;
-}
-
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
@@ -1575,35 +1553,20 @@
}
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets, regcall uses some of the HVA rules.
+ // Regcall uses the concept of a homogenous vector aggregate, similar
+ // to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if ((State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall) &&
+ if (State.CC == llvm::CallingConv::X86_RegCall &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.CC == llvm::CallingConv::X86_RegCall) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
- return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
-
- }
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
- if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
- // Actual floating-point types get registers first time through if
- // there is registers available
- State.FreeSSERegs -= NumElts;
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
- } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
- // HVA Types only get registers after everything else has been
- // set, so it gets set as indirect for now.
- return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
- }
+ return ABIArgInfo::getExpand();
}
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
@@ -1684,31 +1647,53 @@
void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
bool &UsedInAlloca) const {
- // Vectorcall only allows the first 6 parameters to be passed in registers,
- // and homogeneous vector aggregates are only put into registers as a second
- // priority.
- unsigned Count = 0;
- CCState ZeroState = State;
- ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
- // HVAs must be done as a second priority for registers, so the deferred
- // items are dealt with by going through the pattern a second time.
+ // Vectorcall x86 works subtly different than in x64, so the format is
+ // a bit different than the x64 version. First, all vector types (not HVAs)
+ // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+ // This differs from the x64 implementation, where the first 6 by INDEX get
+ // registers.
+ // After that, integers AND HVAs are assigned Left to Right in the same pass.
+ // Integers are passed as ECX/EDX if one is available (in order). HVAs will
+ // first take up the remaining YMM/XMM registers. If insufficient registers
+ // remain but an integer register (ECX/EDX) is available, it will be passed
+ // in that, else, on the stack.
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = classifyArgumentType(I.type, State);
- else
- // Parameters after the 6th cannot be passed in registers,
- // so pretend there are no registers left for them.
- I.info = classifyArgumentType(I.type, ZeroState);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- ++Count;
+ // First pass do all the vector types.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = ABIArgInfo::getDirect();
+ } else {
+ I.info = classifyArgumentType(Ty, State);
+ }
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
- Count = 0;
- // Go through the arguments a second time to get HVAs registers if there
- // are still some available.
+
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, State, I.info);
- ++Count;
+ // Second pass, do the rest!
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
+
+ if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
+ // Assign true HVAs (non vector/native FP types).
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = getDirectX86Hva();
+ } else {
+ I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ } else if (!IsHva) {
+ // Assign all Non-HVAs, so this will exclude Vector/FP args.
+ I.info = classifyArgumentType(Ty, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
}
@@ -3901,6 +3886,8 @@
bool IsRegCall) const {
unsigned Count = 0;
for (auto &I : FI.arguments()) {
+ // Vectorcall in x64 only permits the first 6 arguments to be passed
+ // as XMM/YMM registers.
if (Count < VectorcallMaxParamNumAsReg)
I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
else {
@@ -3913,11 +3900,8 @@
++Count;
}
- Count = 0;
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
- ++Count;
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
}
}