Revert "[MS] Overhaul how clang passes overaligned args on x86_32"

It broke some Chromium tests, so let's revert until it can be fixed; see
https://crbug.com/1046362

This reverts commit 2af74e27ed7d0832cbdde9cb969aaca7a42e99f9.
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 5b03f37..3a50e2b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2339,9 +2339,6 @@
       auto FieldIndex = ArgI.getInAllocaFieldIndex();
       Address V =
           Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
-      if (ArgI.getInAllocaIndirect())
-        V = Address(Builder.CreateLoad(V),
-                    getContext().getTypeAlignInChars(Ty));
       ArgVals.push_back(ParamValue::forIndirect(V));
       break;
     }
@@ -4041,39 +4038,18 @@
       assert(NumIRArgs == 0);
       assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
       if (I->isAggregate()) {
+        // Replace the placeholder with the appropriate argument slot GEP.
         Address Addr = I->hasLValue()
                            ? I->getKnownLValue().getAddress(*this)
                            : I->getKnownRValue().getAggregateAddress();
         llvm::Instruction *Placeholder =
             cast<llvm::Instruction>(Addr.getPointer());
-
-        if (!ArgInfo.getInAllocaIndirect()) {
-          // Replace the placeholder with the appropriate argument slot GEP.
-          CGBuilderTy::InsertPoint IP = Builder.saveIP();
-          Builder.SetInsertPoint(Placeholder);
-          Addr = Builder.CreateStructGEP(ArgMemory,
-                                         ArgInfo.getInAllocaFieldIndex());
-          Builder.restoreIP(IP);
-        } else {
-          // For indirect things such as overaligned structs, replace the
-          // placeholder with a regular aggregate temporary alloca. Store the
-          // address of this alloca into the struct.
-          Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
-          Address ArgSlot = Builder.CreateStructGEP(
-              ArgMemory, ArgInfo.getInAllocaFieldIndex());
-          Builder.CreateStore(Addr.getPointer(), ArgSlot);
-        }
-        deferPlaceholderReplacement(Placeholder, Addr.getPointer());
-      } else if (ArgInfo.getInAllocaIndirect()) {
-        // Make a temporary alloca and store the address of it into the argument
-        // struct.
-        Address Addr = CreateMemTempWithoutCast(
-            I->Ty, getContext().getTypeAlignInChars(I->Ty),
-            "indirect-arg-temp");
-        I->copyInto(*this, Addr);
-        Address ArgSlot =
+        CGBuilderTy::InsertPoint IP = Builder.saveIP();
+        Builder.SetInsertPoint(Placeholder);
+        Addr =
             Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
-        Builder.CreateStore(Addr.getPointer(), ArgSlot);
+        Builder.restoreIP(IP);
+        deferPlaceholderReplacement(Placeholder, Addr.getPointer());
       } else {
         // Store the RValue into the argument struct.
         Address Addr =
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 08ef8ff..c803785 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1676,7 +1676,6 @@
   bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
-  TypeInfo TI = getContext().getTypeInfo(Ty);
 
   // Check with the C++ ABI first.
   const RecordType *RT = Ty->getAs<RecordType>();
@@ -1726,7 +1725,7 @@
     bool NeedsPadding = false;
     bool InReg;
     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
-      unsigned SizeInRegs = (TI.Width + 31) / 32;
+      unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
       if (InReg)
@@ -1736,19 +1735,14 @@
     }
     llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
 
-    // Pass over-aligned aggregates on Windows indirectly. This behavior was
-    // added in MSVC 2015.
-    if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
-      return getIndirectResult(Ty, /*ByVal=*/false, State);
-
     // Expand small (<= 128-bit) record types when we know that the stack layout
     // of those arguments will match the struct. This is important because the
     // LLVM backend isn't smart enough to remove byval, which inhibits many
     // optimizations.
     // Don't do this for the MCU if there are still free integer registers
     // (see X86_64 ABI for full explanation).
-    if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
-        canExpandIndirectArgument(Ty))
+    if (getContext().getTypeSize(Ty) <= 4 * 32 &&
+        (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
       return ABIArgInfo::getExpandWithPadding(
           IsFastCall || IsVectorCall || IsRegCall, PaddingType);
 
@@ -1756,24 +1750,14 @@
   }
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
-    // On Windows, vectors are passed directly if registers are available, or
-    // indirectly if not. This avoids the need to align argument memory. Pass
-    // user-defined vector types larger than 512 bits indirectly for simplicity.
-    if (IsWin32StructABI) {
-      if (TI.Width <= 512 && State.FreeSSERegs > 0) {
-        --State.FreeSSERegs;
-        return ABIArgInfo::getDirectInReg();
-      }
-      return getIndirectResult(Ty, /*ByVal=*/false, State);
-    }
-
     // On Darwin, some vectors are passed in memory, we handle this by passing
     // it as an i8/i16/i32/i64.
     if (IsDarwinVectorABI) {
-      if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
-          (TI.Width == 64 && VT->getNumElements() == 1))
-        return ABIArgInfo::getDirect(
-            llvm::IntegerType::get(getVMContext(), TI.Width));
+      uint64_t Size = getContext().getTypeSize(Ty);
+      if ((Size == 8 || Size == 16 || Size == 32) ||
+          (Size == 64 && VT->getNumElements() == 1))
+        return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
+                                                            Size));
     }
 
     if (IsX86_MMXType(CGT.ConvertType(Ty)))
@@ -1803,10 +1787,9 @@
   CCState State(FI);
   if (IsMCUABI)
     State.FreeRegs = 3;
-  else if (State.CC == llvm::CallingConv::X86_FastCall) {
+  else if (State.CC == llvm::CallingConv::X86_FastCall)
     State.FreeRegs = 2;
-    State.FreeSSERegs = 3;
-  } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+  else if (State.CC == llvm::CallingConv::X86_VectorCall) {
     State.FreeRegs = 2;
     State.FreeSSERegs = 6;
   } else if (FI.getHasRegParm())
@@ -1814,11 +1797,6 @@
   else if (State.CC == llvm::CallingConv::X86_RegCall) {
     State.FreeRegs = 5;
     State.FreeSSERegs = 8;
-  } else if (IsWin32StructABI) {
-    // Since MSVC 2015, the first three SSE vectors have been passed in
-    // registers. The rest are passed indirectly.
-    State.FreeRegs = DefaultNumRegisterParameters;
-    State.FreeSSERegs = 3;
   } else
     State.FreeRegs = DefaultNumRegisterParameters;
 
@@ -1865,25 +1843,16 @@
                                    CharUnits &StackOffset, ABIArgInfo &Info,
                                    QualType Type) const {
   // Arguments are always 4-byte-aligned.
-  CharUnits WordSize = CharUnits::fromQuantity(4);
-  assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
+  CharUnits FieldAlign = CharUnits::fromQuantity(4);
 
-  // sret pointers and indirect things will require an extra pointer
-  // indirection, unless they are byval. Most things are byval, and will not
-  // require this indirection.
-  bool IsIndirect = false;
-  if (Info.isIndirect() && !Info.getIndirectByVal())
-    IsIndirect = true;
-  Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
-  llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
-  if (IsIndirect)
-    LLTy = LLTy->getPointerTo(0);
-  FrameFields.push_back(LLTy);
-  StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
+  assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
+  Info = ABIArgInfo::getInAlloca(FrameFields.size());
+  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
+  StackOffset += getContext().getTypeSizeInChars(Type);
 
   // Insert padding bytes to respect alignment.
   CharUnits FieldEnd = StackOffset;
-  StackOffset = FieldEnd.alignTo(WordSize);
+  StackOffset = FieldEnd.alignTo(FieldAlign);
   if (StackOffset != FieldEnd) {
     CharUnits NumBytes = StackOffset - FieldEnd;
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1897,12 +1866,16 @@
   switch (Info.getKind()) {
   case ABIArgInfo::InAlloca:
     return true;
+  case ABIArgInfo::Indirect:
+    assert(Info.getIndirectByVal());
+    return true;
   case ABIArgInfo::Ignore:
     return false;
-  case ABIArgInfo::Indirect:
   case ABIArgInfo::Direct:
   case ABIArgInfo::Extend:
-    return !Info.getInReg();
+    if (Info.getInReg())
+      return false;
+    return true;
   case ABIArgInfo::Expand:
   case ABIArgInfo::CoerceAndExpand:
     // These are aggregate types which are never passed in registers when
@@ -1936,7 +1909,8 @@
 
   // Put the sret parameter into the inalloca struct if it's in memory.
   if (Ret.isIndirect() && !Ret.getInReg()) {
-    addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
+    CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
+    addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
     // On Windows, the hidden sret parameter is always returned in eax.
     Ret.setInAllocaSRet(IsWin32StructABI);
   }