AMDGPU: Partially fix handling of packed amdgpu_ps arguments
Fixes annoying limitations when writing tests.
Also remove more leftover code for manually scalarizing arguments
and return values.
llvm-svn: 338618
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2500786..afc8056 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1349,7 +1349,8 @@
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
- assert(!Arg->VT.isVector() && "vector type argument should have been split");
+ assert((!Arg->VT.isVector() || Arg->VT.getScalarSizeInBits() == 16) &&
+ "vector type argument should have been split");
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
@@ -1951,29 +1952,6 @@
llvm_unreachable("Unknown loc info!");
}
- if (IsShader && Arg.VT.isVector()) {
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
-
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- Regs.push_back(Copy);
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
- continue;
- }
-
InVals.push_back(Val);
}
@@ -2037,48 +2015,19 @@
bool IsShader = AMDGPU::isShader(CallConv);
- Info->setIfReturnsVoid(Outs.size() == 0);
+ Info->setIfReturnsVoid(Outs.empty());
bool IsWaveEnd = Info->returnsVoid() && IsShader;
- SmallVector<ISD::OutputArg, 48> Splits;
- SmallVector<SDValue, 48> SplitVals;
-
- // Split vectors into their elements.
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- const ISD::OutputArg &Out = Outs[i];
-
- if (IsShader && Out.VT.isVector()) {
- MVT VT = Out.VT.getVectorElementType();
- ISD::OutputArg NewOut = Out;
- NewOut.Flags.setSplit();
- NewOut.VT = VT;
-
- // We want the original number of vector elements here, e.g.
- // three or five, not four or eight.
- unsigned NumElements = Out.ArgVT.getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
- DAG.getConstant(j, DL, MVT::i32));
- SplitVals.push_back(Elem);
- Splits.push_back(NewOut);
- NewOut.PartOffset += NewOut.VT.getStoreSize();
- }
- } else {
- SplitVals.push_back(OutVals[i]);
- Splits.push_back(Out);
- }
- }
-
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 48> RVLocs;
+ SmallVector<ISD::OutputArg, 48> Splits;
// CCState - Info about the registers and stack slots.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze outgoing return values.
- CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg));
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 48> RetOps;
@@ -2103,14 +2052,12 @@
}
// Copy the result values into the output registers.
- for (unsigned i = 0, realRVLocIdx = 0;
- i != RVLocs.size();
- ++i, ++realRVLocIdx) {
- CCValAssign &VA = RVLocs[i];
+ for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
+ ++I, ++RealRVLocIdx) {
+ CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
// TODO: Partially return in registers if return values don't fit.
-
- SDValue Arg = SplitVals[realRVLocIdx];
+ SDValue Arg = OutVals[RealRVLocIdx];
// Copied from other backends.
switch (VA.getLocInfo()) {