Correct dwarf unwind information in function epilogue
This patch aims to provide correct dwarf unwind information in function
epilogue for X86.
It consists of two parts. The first part inserts CFI instructions that set
appropriate cfa offset and cfa register in emitEpilogue() in
X86FrameLowering. This part is X86 specific.
The second part is platform independent and ensures that:
* CFI instructions do not affect code generation (they are not counted as
  instructions when tail duplicating or tail merging)
* Unwind information remains correct when a function is modified by
  different passes. This is done in a late pass by analyzing information
  about cfa offset and cfa register in BBs and inserting additional CFI
  directives where necessary.
Added CFIInstrInserter pass:
* analyzes each basic block to determine cfa offset and register are valid
  at its entry and exit
* verifies that outgoing cfa offset and register of predecessor blocks match
  incoming values of their successors
* inserts additional CFI directives at basic block beginning to correct the
  rule for calculating CFA
Having CFI instructions in function epilogue can cause incorrect CFA
calculation rule for some basic blocks. This can happen if, due to basic
block reordering, or the existence of multiple epilogue blocks, some of the
blocks have wrong cfa offset and register values set by the epilogue block
above them.
CFIInstrInserter is currently run only on X86, but can be used by any target
that implements support for adding CFI instructions in epilogue.
Patch by Violeta Vukobrat.
Differential Revision: https://reviews.llvm.org/D42848
llvm-svn: 330706
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index fac7b8c..0edebac 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -434,6 +434,9 @@
   // This pass expands indirectbr instructions.
   FunctionPass *createIndirectBrExpandPass();
 
+  /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
+  FunctionPass *createCFIInstrInserter();
+
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 637b73c..f8effee 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -345,6 +345,14 @@
           return false;
     return true;
   }
+
+  /// Return initial CFA offset value i.e. the one valid at the beginning of the
+  /// function (before any stack operations).
+  virtual int getInitialCFAOffset(const MachineFunction &MF) const;
+
+  /// Return initial CFA register value i.e. the one valid at the beginning of
+  /// the function (before any stack operations).
+  virtual unsigned getInitialCFARegister(const MachineFunction &MF) const;
 };
 
 } // End llvm namespace
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 189c947..e6ceeff 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -91,6 +91,7 @@
 void initializeCFGPrinterLegacyPassPass(PassRegistry&);
 void initializeCFGSimplifyPassPass(PassRegistry&);
 void initializeCFGViewerLegacyPassPass(PassRegistry&);
+void initializeCFIInstrInserterPass(PassRegistry&);
 void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
 void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
 void initializeCallGraphDOTPrinterPass(PassRegistry&);
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 7f358a6..f160efd 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -296,6 +296,11 @@
   return HashMachineInstr(*I);
 }
 
+///  Whether MI should be counted as an instruction when calculating common tail.
+static bool countsAsInstruction(const MachineInstr &MI) {
+  return !(MI.isDebugValue() || MI.isCFIInstruction());
+}
+
 /// ComputeCommonTailLength - Given two machine basic blocks, compute the number
 /// of instructions they actually have in common together at their end.  Return
 /// iterators for the first shared instruction in each block.
@@ -310,26 +315,27 @@
   while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
     --I1; --I2;
     // Skip debugging pseudos; necessary to avoid changing the code.
-    while (I1->isDebugValue()) {
+    while (!countsAsInstruction(*I1)) {
       if (I1==MBB1->begin()) {
-        while (I2->isDebugValue()) {
-          if (I2==MBB2->begin())
+        while (!countsAsInstruction(*I2)) {
+          if (I2==MBB2->begin()) {
             // I1==DBG at begin; I2==DBG at begin
-            return TailLen;
+            goto SkipTopCFIAndReturn;
+          }
           --I2;
         }
         ++I2;
         // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
-        return TailLen;
+        goto SkipTopCFIAndReturn;
       }
       --I1;
     }
     // I1==first (untested) non-DBG preceding known match
-    while (I2->isDebugValue()) {
+    while (!countsAsInstruction(*I2)) {
       if (I2==MBB2->begin()) {
         ++I1;
         // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
-        return TailLen;
+        goto SkipTopCFIAndReturn;
       }
       --I2;
     }
@@ -368,6 +374,37 @@
     }
     ++I1;
   }
+
+SkipTopCFIAndReturn:
+  // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
+  // I1 and I2 are non-identical when compared and then one or both of them ends
+  // up pointing to a CFI instruction after being incremented. For example:
+  /*
+    BB1:
+    ...
+    INSTRUCTION_A
+    ADD32ri8  <- last common instruction
+    ...
+    BB2:
+    ...
+    INSTRUCTION_B
+    CFI_INSTRUCTION
+    ADD32ri8  <- last common instruction
+    ...
+  */
+  // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
+  // incrementing the iterators, I1 will point to ADD, however I2 will point to
+  // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
+  // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
+  // instruction.
+  while (I1 != MBB1->end() && I1->isCFIInstruction()) {
+    ++I1;
+  }
+
+  while (I2 != MBB2->end() && I2->isCFIInstruction()) {
+    ++I2;
+  }
+
   return TailLen;
 }
 
@@ -454,7 +491,7 @@
                                 MachineBasicBlock::iterator E) {
   unsigned Time = 0;
   for (; I != E; ++I) {
-    if (I->isDebugValue())
+    if (!countsAsInstruction(*I))
       continue;
     if (I->isCall())
       Time += 10;
@@ -814,12 +851,12 @@
     assert(MBBI != MBBIE && "Reached BB end within common tail length!");
     (void)MBBIE;
 
-    if (MBBI->isDebugValue()) {
+    if (!countsAsInstruction(*MBBI)) {
       ++MBBI;
       continue;
     }
 
-    while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+    while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon))
       ++MBBICommon;
 
     assert(MBBICommon != MBBIECommon &&
@@ -859,7 +896,7 @@
   }
 
   for (auto &MI : *MBB) {
-    if (MI.isDebugValue())
+    if (!countsAsInstruction(MI))
       continue;
     DebugLoc DL = MI.getDebugLoc();
     for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
@@ -869,7 +906,7 @@
       auto &Pos = NextCommonInsts[i];
       assert(Pos != SameTails[i].getBlock()->end() &&
           "Reached BB end within common tail");
-      while (Pos->isDebugValue()) {
+      while (!countsAsInstruction(*Pos)) {
         ++Pos;
         assert(Pos != SameTails[i].getBlock()->end() &&
             "Reached BB end within common tail");
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
new file mode 100644
index 0000000..f3bc859
--- /dev/null
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -0,0 +1,308 @@
+//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass verifies incoming and outgoing CFA information of basic
+/// blocks. CFA information is information about offset and register set by CFI
+/// directives, valid at the start and end of a basic block. This pass checks
+/// that outgoing information of predecessors matches incoming information of
+/// their successors. Then it checks if blocks have correct CFA calculation rule
+/// set and inserts additional CFI instruction at their beginnings if they
+/// don't. CFI instructions are inserted if basic blocks have incorrect offset
+/// or register set by previous blocks, as a result of a non-linear layout of
+/// blocks in a function.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+class CFIInstrInserter : public MachineFunctionPass {
+ public:
+  static char ID;
+
+  CFIInstrInserter() : MachineFunctionPass(ID) {
+    initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (!MF.getMMI().hasDebugInfo() &&
+        !MF.getFunction().needsUnwindTableEntry())
+      return false;
+
+    MBBVector.resize(MF.getNumBlockIDs());
+    calculateCFAInfo(MF);
+#ifndef NDEBUG
+    if (unsigned ErrorNum = verify(MF))
+      report_fatal_error("Found " + Twine(ErrorNum) +
+                         " in/out CFI information errors.");
+#endif
+    bool insertedCFI = insertCFIInstrs(MF);
+    MBBVector.clear();
+    return insertedCFI;
+  }
+
+ private:
+  struct MBBCFAInfo {
+    MachineBasicBlock *MBB;
+    /// Value of cfa offset valid at basic block entry.
+    int IncomingCFAOffset = -1;
+    /// Value of cfa offset valid at basic block exit.
+    int OutgoingCFAOffset = -1;
+    /// Value of cfa register valid at basic block entry.
+    unsigned IncomingCFARegister = 0;
+    /// Value of cfa register valid at basic block exit.
+    unsigned OutgoingCFARegister = 0;
+    /// If in/out cfa offset and register values for this block have already
+    /// been set or not.
+    bool Processed = false;
+  };
+
+  /// Contains cfa offset and register values valid at entry and exit of basic
+  /// blocks.
+  std::vector<MBBCFAInfo> MBBVector;
+
+  /// Calculate cfa offset and register values valid at entry and exit for all
+  /// basic blocks in a function.
+  void calculateCFAInfo(MachineFunction &MF);
+  /// Calculate cfa offset and register values valid at basic block exit by
+  /// checking the block for CFI instructions. Block's incoming CFA info remains
+  /// the same.
+  void calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo);
+  /// Update in/out cfa offset and register values for successors of the basic
+  /// block.
+  void updateSuccCFAInfo(MBBCFAInfo &MBBInfo);
+
+  /// Check if incoming CFA information of a basic block matches outgoing CFA
+  /// information of the previous block. If it doesn't, insert CFI instruction
+  /// at the beginning of the block that corrects the CFA calculation rule for
+  /// that block.
+  bool insertCFIInstrs(MachineFunction &MF);
+  /// Return the cfa offset value that should be set at the beginning of a MBB
+  /// if needed. The negated value is needed when creating CFI instructions that
+  /// set absolute offset.
+  int getCorrectCFAOffset(MachineBasicBlock *MBB) {
+    return -MBBVector[MBB->getNumber()].IncomingCFAOffset;
+  }
+
+  void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
+  /// Go through each MBB in a function and check that outgoing offset and
+  /// register of its predecessors match incoming offset and register of that
+  /// MBB, as well as that incoming offset and register of its successors match
+  /// outgoing offset and register of the MBB.
+  unsigned verify(MachineFunction &MF);
+};
+}  // namespace
+
+char CFIInstrInserter::ID = 0;
+INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter",
+                "Check CFA info and insert CFI instructions if needed", false,
+                false)
+FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); }
+
+void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
+  // Initial CFA offset value i.e. the one valid at the beginning of the
+  // function.
+  int InitialOffset =
+      MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF);
+  // Initial CFA register value i.e. the one valid at the beginning of the
+  // function.
+  unsigned InitialRegister =
+      MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
+
+  // Initialize MBBMap.
+  for (MachineBasicBlock &MBB : MF) {
+    MBBCFAInfo MBBInfo;
+    MBBInfo.MBB = &MBB;
+    MBBInfo.IncomingCFAOffset = InitialOffset;
+    MBBInfo.OutgoingCFAOffset = InitialOffset;
+    MBBInfo.IncomingCFARegister = InitialRegister;
+    MBBInfo.OutgoingCFARegister = InitialRegister;
+    MBBVector[MBB.getNumber()] = MBBInfo;
+  }
+
+  // Set in/out cfa info for all blocks in the function. This traversal is based
+  // on the assumption that the first block in the function is the entry block
+  // i.e. that it has initial cfa offset and register values as incoming CFA
+  // information.
+  for (MachineBasicBlock &MBB : MF) {
+    if (MBBVector[MBB.getNumber()].Processed) continue;
+    calculateOutgoingCFAInfo(MBBVector[MBB.getNumber()]);
+    updateSuccCFAInfo(MBBVector[MBB.getNumber()]);
+  }
+}
+
+void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
+  // Outgoing cfa offset set by the block.
+  int SetOffset = MBBInfo.IncomingCFAOffset;
+  // Outgoing cfa register set by the block.
+  unsigned SetRegister = MBBInfo.IncomingCFARegister;
+  const std::vector<MCCFIInstruction> &Instrs =
+      MBBInfo.MBB->getParent()->getFrameInstructions();
+
+  // Determine cfa offset and register set by the block.
+  for (MachineInstr &MI : *MBBInfo.MBB) {
+    if (MI.isCFIInstruction()) {
+      unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+      const MCCFIInstruction &CFI = Instrs[CFIIndex];
+      switch (CFI.getOperation()) {
+      case MCCFIInstruction::OpDefCfaRegister:
+        SetRegister = CFI.getRegister();
+        break;
+      case MCCFIInstruction::OpDefCfaOffset:
+        SetOffset = CFI.getOffset();
+        break;
+      case MCCFIInstruction::OpAdjustCfaOffset:
+        SetOffset += CFI.getOffset();
+        break;
+      case MCCFIInstruction::OpDefCfa:
+        SetRegister = CFI.getRegister();
+        SetOffset = CFI.getOffset();
+        break;
+      case MCCFIInstruction::OpRememberState:
+        // TODO: Add support for handling cfi_remember_state.
+#ifndef NDEBUG
+        report_fatal_error(
+            "Support for cfi_remember_state not implemented! Value of CFA "
+            "may be incorrect!\n");
+#endif
+        break;
+      case MCCFIInstruction::OpRestoreState:
+        // TODO: Add support for handling cfi_restore_state.
+#ifndef NDEBUG
+        report_fatal_error(
+            "Support for cfi_restore_state not implemented! Value of CFA may "
+            "be incorrect!\n");
+#endif
+        break;
+      // Other CFI directives do not affect CFA value.
+      case MCCFIInstruction::OpSameValue:
+      case MCCFIInstruction::OpOffset:
+      case MCCFIInstruction::OpRelOffset:
+      case MCCFIInstruction::OpEscape:
+      case MCCFIInstruction::OpRestore:
+      case MCCFIInstruction::OpUndefined:
+      case MCCFIInstruction::OpRegister:
+      case MCCFIInstruction::OpWindowSave:
+      case MCCFIInstruction::OpGnuArgsSize:
+        break;
+      }
+    }
+  }
+
+  MBBInfo.Processed = true;
+
+  // Update outgoing CFA info.
+  MBBInfo.OutgoingCFAOffset = SetOffset;
+  MBBInfo.OutgoingCFARegister = SetRegister;
+}
+
+void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) {
+  for (MachineBasicBlock *Succ : MBBInfo.MBB->successors()) {
+    MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()];
+    if (SuccInfo.Processed) continue;
+    SuccInfo.IncomingCFAOffset = MBBInfo.OutgoingCFAOffset;
+    SuccInfo.IncomingCFARegister = MBBInfo.OutgoingCFARegister;
+    calculateOutgoingCFAInfo(SuccInfo);
+    updateSuccCFAInfo(SuccInfo);
+  }
+}
+
+bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
+  const MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()];
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  bool InsertedCFIInstr = false;
+
+  for (MachineBasicBlock &MBB : MF) {
+    // Skip the first MBB in a function
+    if (MBB.getNumber() == MF.front().getNumber()) continue;
+
+    const MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()];
+    auto MBBI = MBBInfo.MBB->begin();
+    DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI);
+
+    if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+      // If both outgoing offset and register of a previous block don't match
+      // incoming offset and register of this block, add a def_cfa instruction
+      // with the correct offset and register for this block.
+      if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) {
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+            nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
+        BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex);
+        // If outgoing offset of a previous block doesn't match incoming offset
+        // of this block, add a def_cfa_offset instruction with the correct
+        // offset for this block.
+      } else {
+        unsigned CFIIndex =
+            MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(
+                nullptr, getCorrectCFAOffset(&MBB)));
+        BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex);
+      }
+      InsertedCFIInstr = true;
+      // If outgoing register of a previous block doesn't match incoming
+      // register of this block, add a def_cfa_register instruction with the
+      // correct register for this block.
+    } else if (PrevMBBInfo->OutgoingCFARegister !=
+               MBBInfo.IncomingCFARegister) {
+      unsigned CFIIndex =
+          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+              nullptr, MBBInfo.IncomingCFARegister));
+      BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+      InsertedCFIInstr = true;
+    }
+    PrevMBBInfo = &MBBInfo;
+  }
+  return InsertedCFIInstr;
+}
+
+void CFIInstrInserter::report(const MBBCFAInfo &Pred,
+                              const MBBCFAInfo &Succ) {
+  errs() << "*** Inconsistent CFA register and/or offset between pred and succ "
+            "***\n";
+  errs() << "Pred: " << Pred.MBB->getName()
+         << " outgoing CFA Reg:" << Pred.OutgoingCFARegister << "\n";
+  errs() << "Pred: " << Pred.MBB->getName()
+         << " outgoing CFA Offset:" << Pred.OutgoingCFAOffset << "\n";
+  errs() << "Succ: " << Succ.MBB->getName()
+         << " incoming CFA Reg:" << Succ.IncomingCFARegister << "\n";
+  errs() << "Succ: " << Succ.MBB->getName()
+         << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n";
+}
+
+unsigned CFIInstrInserter::verify(MachineFunction &MF) {
+  unsigned ErrorNum = 0;
+  for (MachineBasicBlock &CurrMBB : MF) {
+    const MBBCFAInfo &CurrMBBInfo = MBBVector[CurrMBB.getNumber()];
+    for (MachineBasicBlock *Succ : CurrMBB.successors()) {
+      const MBBCFAInfo &SuccMBBInfo = MBBVector[Succ->getNumber()];
+      // Check that incoming offset and register values of successors match the
+      // outgoing offset and register values of CurrMBB
+      if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset ||
+          SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) {
+        report(CurrMBBInfo, SuccMBBInfo);
+        ErrorNum++;
+      }
+    }
+  }
+  return ErrorNum;
+}
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index b79b407..3df1a38 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -10,6 +10,7 @@
   BuiltinGCs.cpp
   CalcSpillWeights.cpp
   CallingConvLower.cpp
+  CFIInstrInserter.cpp
   CodeGen.cpp
   CodeGenPrepare.cpp
   CriticalAntiDepBreaker.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 51f7e0c..54f1db8 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@
   initializeAtomicExpandPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeBranchRelaxationPass(Registry);
+  initializeCFIInstrInserterPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeDetectDeadLanesPass(Registry);
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 4d6885c..f0cfa2f 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -124,3 +124,12 @@
 
   return 0;
 }
+
+int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
+  llvm_unreachable("getInitialCFAOffset() not implemented!");
+}
+
+unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF)
+    const {
+  llvm_unreachable("getInitialCFARegister() not implemented!");
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index d052998..42f9676 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -399,28 +399,30 @@
     return 0;
 
   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
-  MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
-                                                       : std::next(MBBI);
+
   PI = skipDebugInstructionsBackward(PI, MBB.begin());
-  if (NI != nullptr)
-    NI = skipDebugInstructionsForward(NI, MBB.end());
+  // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
+  // instruction, and that there are no DBG_VALUE or other instructions between
+  // ADD/SUB/LEA and its corresponding CFI instruction.
+  /* TODO: Add support for the case where there are multiple CFI instructions
+    below the ADD/SUB/LEA, e.g.:
+    ...
+    add
+    cfi_def_cfa_offset
+    cfi_offset
+    ...
+  */
+  if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
+    PI = std::prev(PI);
 
   unsigned Opc = PI->getOpcode();
   int Offset = 0;
 
-  if (!doMergeWithPrevious && NI != MBB.end() &&
-      NI->getOpcode() == TargetOpcode::CFI_INSTRUCTION) {
-    // Don't merge with the next instruction if it has CFI.
-    return Offset;
-  }
-
   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
        Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
       PI->getOperand(0).getReg() == StackPtr){
     assert(PI->getOperand(1).getReg() == StackPtr);
-    Offset += PI->getOperand(2).getImm();
-    MBB.erase(PI);
-    if (!doMergeWithPrevious) MBBI = NI;
+    Offset = PI->getOperand(2).getImm();
   } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
              PI->getOperand(0).getReg() == StackPtr &&
              PI->getOperand(1).getReg() == StackPtr &&
@@ -428,17 +430,19 @@
              PI->getOperand(3).getReg() == X86::NoRegister &&
              PI->getOperand(5).getReg() == X86::NoRegister) {
     // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
-    Offset += PI->getOperand(4).getImm();
-    MBB.erase(PI);
-    if (!doMergeWithPrevious) MBBI = NI;
+    Offset = PI->getOperand(4).getImm();
   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
              PI->getOperand(0).getReg() == StackPtr) {
     assert(PI->getOperand(1).getReg() == StackPtr);
-    Offset -= PI->getOperand(2).getImm();
-    MBB.erase(PI);
-    if (!doMergeWithPrevious) MBBI = NI;
-  }
+    Offset = -PI->getOperand(2).getImm();
+  } else
+    return 0;
+
+  PI = MBB.erase(PI);
+  if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI);
+  if (!doMergeWithPrevious)
+    MBBI = skipDebugInstructionsForward(PI, MBB.end());
 
   return Offset;
 }
@@ -1573,6 +1577,11 @@
   bool HasFP = hasFP(MF);
   uint64_t NumBytes = 0;
 
+  bool NeedsDwarfCFI =
+      (!MF.getTarget().getTargetTriple().isOSDarwin() &&
+       !MF.getTarget().getTargetTriple().isOSWindows()) &&
+      (MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry());
+
   if (IsFunclet) {
     assert(HasFP && "EH funclets without FP not yet implemented");
     NumBytes = getWinEHFuncletFrameSize(MF);
@@ -1595,6 +1604,13 @@
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
             MachineFramePtr)
         .setMIFlag(MachineInstr::FrameDestroy);
+    if (NeedsDwarfCFI) {
+      unsigned DwarfStackPtr =
+          TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
+      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
+                                  nullptr, DwarfStackPtr, -SlotSize));
+      --MBBI;
+    }
   }
 
   MachineBasicBlock::iterator FirstCSPop = MBBI;
@@ -1658,6 +1674,11 @@
   } else if (NumBytes) {
     // Adjust stack pointer back: ESP += numbytes.
     emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
+    if (!hasFP(MF) && NeedsDwarfCFI) {
+      // Define the current CFA rule to use the provided offset.
+      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
+                                  nullptr, -CSSize - SlotSize));
+    }
     --MBBI;
   }
 
@@ -1670,6 +1691,23 @@
   if (NeedsWin64CFI && MF.hasWinCFI())
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
 
+  if (!hasFP(MF) && NeedsDwarfCFI) {
+    MBBI = FirstCSPop;
+    int64_t Offset = -CSSize - SlotSize;
+    // Mark callee-saved pop instruction.
+    // Define the current CFA rule to use the provided offset.
+    while (MBBI != MBB.end()) {
+      MachineBasicBlock::iterator PI = MBBI;
+      unsigned Opc = PI->getOpcode();
+      ++MBBI;
+      if (Opc == X86::POP32r || Opc == X86::POP64r) {
+        Offset += SlotSize;
+        BuildCFI(MBB, MBBI, DL,
+                 MCCFIInstruction::createDefCfaOffset(nullptr, Offset));
+      }
+    }
+  }
+
   if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
     // Add the return addr area delta back since we are not tail calling.
     int Offset = -1 * X86FI->getTCReturnAddrDelta();
@@ -2719,7 +2757,6 @@
 
     // Add Amount to SP to destroy a frame, or subtract to setup.
     int64_t StackAdjustment = isDestroy ? Amount : -Amount;
-    int64_t CfaAdjustment = -StackAdjustment;
 
     if (StackAdjustment) {
       // Merge with any previous or following adjustment instruction. Note: the
@@ -2744,6 +2781,7 @@
       // offset to be correct at each call site, while for debugging we want
       // it to be more precise.
 
+      int64_t CfaAdjustment = -StackAdjustment;
       // TODO: When not using precise CFA, we also need to adjust for the
       // InternalAmt here.
       if (CfaAdjustment) {
@@ -2874,6 +2912,15 @@
   return MBBI;
 }
 
+int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
+  return TRI->getSlotSize();
+}
+
+unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF)
+    const {
+  return TRI->getDwarfRegNum(StackPtr, true);
+}
+
 namespace {
 // Struct used by orderFrameObjects to help sort the stack objects.
 struct X86FrameSortingObject {
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index ec98e3f..3bd805a 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -168,6 +168,10 @@
                               MachineBasicBlock::iterator MBBI,
                               const DebugLoc &DL, bool RestoreSP = false) const;
 
+  int getInitialCFAOffset(const MachineFunction &MF) const override;
+
+  unsigned getInitialCFARegister(const MachineFunction &MF) const override;
+
 private:
   uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
 
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 2fbf6c3..21fd071 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -495,4 +495,10 @@
 
 void X86PassConfig::addPreEmitPass2() {
   addPass(createX86RetpolineThunksPass());
+  // Verify basic block incoming and outgoing cfa offset and register values and
+  // correct CFA calculation rule where needed by inserting appropriate CFI
+  // instructions.
+  const Triple &TT = TM->getTargetTriple();
+  if (!TT.isOSDarwin() && !TT.isOSWindows())
+    addPass(createCFIInstrInserter());
 }
diff --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
index 46fe0ec..5c7cbaa 100644
--- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX
 ; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN
 
-; ModuleID = 'taildup-cfi.c'
-source_filename = "taildup-cfi.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @g = common local_unnamed_addr global i32 0, align 4
diff --git a/llvm/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/llvm/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index 6814ed1..109962c 100644
--- a/llvm/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/llvm/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -23,6 +23,7 @@
 }
 
 ; CHECK: lpad
+; CHECK-NEXT: .cfi_def_cfa_offset 16
 ; CHECK-NEXT: Ltmp
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index d11f4ef..2d4d4e9 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -87,6 +87,7 @@
 ; X32-NEXT:    movss %xmm4, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    addl $60, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: full_test:
diff --git a/llvm/test/CodeGen/X86/GlobalISel/brcond.ll b/llvm/test/CodeGen/X86/GlobalISel/brcond.ll
index e925731..f5ff048 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/brcond.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/brcond.ll
@@ -36,6 +36,7 @@
 ; X32-NEXT:    movl %eax, (%esp)
 ; X32-NEXT:    movl (%esp), %eax
 ; X32-NEXT:    popl %ecx
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 entry:
   %retval = alloca i32, align 4
diff --git a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
index 51421c0..ede0df3 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
@@ -117,6 +117,7 @@
 ; X32-NEXT:    movups {{[0-9]+}}(%esp), %xmm1
 ; X32-NEXT:    movaps %xmm2, %xmm0
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_v8i32_args:
@@ -135,6 +136,7 @@
 ; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    calll trivial_callee
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_trivial_call:
@@ -143,6 +145,7 @@
 ; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    callq trivial_callee
 ; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   call void @trivial_callee()
   ret void
@@ -160,6 +163,7 @@
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll simple_arg_callee
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_simple_arg_call:
@@ -171,6 +175,7 @@
 ; X64-NEXT:    movl %eax, %esi
 ; X64-NEXT:    callq simple_arg_callee
 ; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   call void @simple_arg_callee(i32 %in1, i32 %in0)
   ret void
@@ -193,6 +198,7 @@
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll simple_arg8_callee
 ; X32-NEXT:    addl $44, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_simple_arg8_call:
@@ -208,6 +214,7 @@
 ; X64-NEXT:    movl %edi, %r9d
 ; X64-NEXT:    callq simple_arg8_callee
 ; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   call void @simple_arg8_callee(i32 %in0, i32 %in0, i32 %in0, i32 %in0,i32 %in0, i32 %in0, i32 %in0, i32 %in0)
   ret void
@@ -224,6 +231,7 @@
 ; X32-NEXT:    calll simple_return_callee
 ; X32-NEXT:    addl %eax, %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_simple_return_callee:
@@ -234,6 +242,7 @@
 ; X64-NEXT:    callq simple_return_callee
 ; X64-NEXT:    addl %eax, %eax
 ; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %call = call i32 @simple_return_callee(i32 5)
   %r = add i32 %call, %call
@@ -254,6 +263,7 @@
 ; X32-NEXT:    paddd (%esp), %xmm0 # 16-byte Folded Reload
 ; X32-NEXT:    paddd {{[0-9]+}}(%esp), %xmm1 # 16-byte Folded Reload
 ; X32-NEXT:    addl $44, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_split_return_callee:
@@ -268,6 +278,7 @@
 ; X64-NEXT:    paddd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    paddd {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
 ; X64-NEXT:    addq $40, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %call = call <8 x i32> @split_return_callee(<8 x i32> %arg2)
   %r = add <8 x i32> %arg1, %call
@@ -281,6 +292,7 @@
 ; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    calll *{{[0-9]+}}(%esp)
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_indirect_call:
@@ -289,6 +301,7 @@
 ; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    callq *%rdi
 ; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   call void %func()
   ret void
@@ -317,8 +330,11 @@
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    calll take_char
 ; X32-NEXT:    addl $4, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_abi_exts_call:
@@ -335,6 +351,7 @@
 ; X64-NEXT:    movl %ebx, %edi
 ; X64-NEXT:    callq take_char
 ; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %val = load i8, i8* %addr
   call void @take_char(i8 %val)
@@ -357,6 +374,7 @@
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll variadic_callee
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_variadic_call_1:
@@ -368,6 +386,7 @@
 ; X64-NEXT:    movb $0, %al
 ; X64-NEXT:    callq variadic_callee
 ; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 
   %addr = load i8*, i8** %addr_ptr
@@ -393,6 +412,7 @@
 ; X32-NEXT:    movl %ecx, 4(%eax)
 ; X32-NEXT:    calll variadic_callee
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_variadic_call_2:
@@ -405,6 +425,7 @@
 ; X64-NEXT:    movb $1, %al
 ; X64-NEXT:    callq variadic_callee
 ; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 
   %addr = load i8*, i8** %addr_ptr
diff --git a/llvm/test/CodeGen/X86/GlobalISel/frameIndex.ll b/llvm/test/CodeGen/X86/GlobalISel/frameIndex.ll
index 1faa82b..96bf593 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/frameIndex.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/frameIndex.ll
@@ -18,6 +18,7 @@
 ; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movl %esp, %eax
 ; X32-NEXT:    popl %ecx
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X32ABI-LABEL: allocai32:
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 1106211..569f6a7 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -61,6 +61,7 @@
 ; CHECK-NEXT:       Insert XRay ops
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       X86 Retpoline Thunks
+; CHECK-NEXT:       Check CFA info and insert CFI instructions if needed
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       X86 Assembly Printer
diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll
index 8511474..080dce8 100644
--- a/llvm/test/CodeGen/X86/O3-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O3-pipeline.ll
@@ -160,6 +160,7 @@
 ; CHECK-NEXT:       Insert XRay ops
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       X86 Retpoline Thunks
+; CHECK-NEXT:       Check CFA info and insert CFI instructions if needed
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       X86 Assembly Printer
diff --git a/llvm/test/CodeGen/X86/TruncAssertZext.ll b/llvm/test/CodeGen/X86/TruncAssertZext.ll
index 80f8e0f..5a70814 100644
--- a/llvm/test/CodeGen/X86/TruncAssertZext.ll
+++ b/llvm/test/CodeGen/X86/TruncAssertZext.ll
@@ -25,6 +25,7 @@
 ; CHECK-NEXT:    subq %rcx, %rax
 ; CHECK-NEXT:    shrq $32, %rax
 ; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %b = call i64 @foo()
   %or = and i64 %b, 18446744069414584575 ; this is 0xffffffff000000ff
diff --git a/llvm/test/CodeGen/X86/avoid-sfb.ll b/llvm/test/CodeGen/X86/avoid-sfb.ll
index 0e2d292..282a8ac 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb.ll
+++ b/llvm/test/CodeGen/X86/avoid-sfb.ll
@@ -854,10 +854,15 @@
 ; CHECK-NEXT:    movups (%rbx), %xmm0
 ; CHECK-NEXT:    movups %xmm0, (%r12)
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
 ; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; DISABLED-LABEL: test_limit_all:
@@ -896,10 +901,15 @@
 ; DISABLED-NEXT:    movups (%rbx), %xmm0
 ; DISABLED-NEXT:    movups %xmm0, (%r12)
 ; DISABLED-NEXT:    popq %rbx
+; DISABLED-NEXT:    .cfi_def_cfa_offset 40
 ; DISABLED-NEXT:    popq %r12
+; DISABLED-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLED-NEXT:    popq %r14
+; DISABLED-NEXT:    .cfi_def_cfa_offset 24
 ; DISABLED-NEXT:    popq %r15
+; DISABLED-NEXT:    .cfi_def_cfa_offset 16
 ; DISABLED-NEXT:    popq %rbp
+; DISABLED-NEXT:    .cfi_def_cfa_offset 8
 ; DISABLED-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: test_limit_all:
@@ -938,10 +948,15 @@
 ; CHECK-AVX2-NEXT:    vmovups (%rbx), %xmm0
 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r12)
 ; CHECK-AVX2-NEXT:    popq %rbx
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
 ; CHECK-AVX2-NEXT:    popq %r12
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-AVX2-NEXT:    popq %r14
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-AVX2-NEXT:    popq %r15
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-AVX2-NEXT:    popq %rbp
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-AVX2-NEXT:    retq
 ;
 ; CHECK-AVX512-LABEL: test_limit_all:
@@ -980,10 +995,15 @@
 ; CHECK-AVX512-NEXT:    vmovups (%rbx), %xmm0
 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r12)
 ; CHECK-AVX512-NEXT:    popq %rbx
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
 ; CHECK-AVX512-NEXT:    popq %r12
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-AVX512-NEXT:    popq %r14
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-AVX512-NEXT:    popq %r15
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-AVX512-NEXT:    popq %rbp
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-AVX512-NEXT:    retq
 entry:
   %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
@@ -1047,10 +1067,15 @@
 ; CHECK-NEXT:    movl 12(%rbx), %eax
 ; CHECK-NEXT:    movl %eax, 12(%r14)
 ; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; DISABLED-LABEL: test_limit_one_pred:
@@ -1086,10 +1111,15 @@
 ; DISABLED-NEXT:    movups (%rbx), %xmm0
 ; DISABLED-NEXT:    movups %xmm0, (%r12)
 ; DISABLED-NEXT:    addq $8, %rsp
+; DISABLED-NEXT:    .cfi_def_cfa_offset 40
 ; DISABLED-NEXT:    popq %rbx
+; DISABLED-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLED-NEXT:    popq %r12
+; DISABLED-NEXT:    .cfi_def_cfa_offset 24
 ; DISABLED-NEXT:    popq %r14
+; DISABLED-NEXT:    .cfi_def_cfa_offset 16
 ; DISABLED-NEXT:    popq %r15
+; DISABLED-NEXT:    .cfi_def_cfa_offset 8
 ; DISABLED-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: test_limit_one_pred:
@@ -1129,10 +1159,15 @@
 ; CHECK-AVX2-NEXT:    movl 12(%rbx), %eax
 ; CHECK-AVX2-NEXT:    movl %eax, 12(%r14)
 ; CHECK-AVX2-NEXT:    addq $8, %rsp
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
 ; CHECK-AVX2-NEXT:    popq %rbx
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-AVX2-NEXT:    popq %r12
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-AVX2-NEXT:    popq %r14
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-AVX2-NEXT:    popq %r15
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-AVX2-NEXT:    retq
 ;
 ; CHECK-AVX512-LABEL: test_limit_one_pred:
@@ -1172,10 +1207,15 @@
 ; CHECK-AVX512-NEXT:    movl 12(%rbx), %eax
 ; CHECK-AVX512-NEXT:    movl %eax, 12(%r14)
 ; CHECK-AVX512-NEXT:    addq $8, %rsp
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
 ; CHECK-AVX512-NEXT:    popq %rbx
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-AVX512-NEXT:    popq %r12
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-AVX512-NEXT:    popq %r14
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-AVX512-NEXT:    popq %r15
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-AVX512-NEXT:    retq
 entry:
   %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index e866122..cbeed64 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -24,6 +24,7 @@
 ; X32-NEXT:    movzwl %ax, %eax
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
@@ -75,6 +76,7 @@
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
@@ -123,6 +125,7 @@
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
diff --git a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
index 199f1cb..6800974 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
@@ -194,11 +194,15 @@
 ; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
 ; LINUXOSX64-NEXT:    callq test_argv64i1
 ; LINUXOSX64-NEXT:    addq $24, %rsp
-; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset -16
+; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset -24
 ; LINUXOSX64-NEXT:    popq %r12
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
 ; LINUXOSX64-NEXT:    popq %r13
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
 ; LINUXOSX64-NEXT:    popq %r14
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %r15
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %v0 = bitcast i64 4294967298 to <64 x i1>
@@ -271,6 +275,7 @@
 ; LINUXOSX64-NEXT:    kmovq %rax, %k0
 ; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm0
 ; LINUXOSX64-NEXT:    popq %rax
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %call = call x86_regcallcc <64 x i1> @test_retv64i1()
@@ -381,7 +386,9 @@
 ; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
 ; LINUXOSX64-NEXT:    addq $128, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    vzeroupper
 ; LINUXOSX64-NEXT:    retq
 entry:
@@ -435,6 +442,7 @@
 ; LINUXOSX64-NEXT:    movl $1, %edx
 ; LINUXOSX64-NEXT:    callq test_argv32i1
 ; LINUXOSX64-NEXT:    popq %rcx
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %v0 = bitcast i32 1 to <32 x i1>
@@ -497,6 +505,7 @@
 ; LINUXOSX64-NEXT:    callq test_retv32i1
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    popq %rcx
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %call = call x86_regcallcc <32 x i1> @test_retv32i1()
@@ -610,7 +619,9 @@
 ; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
 ; LINUXOSX64-NEXT:    addq $128, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
   ret i16 %res
@@ -662,6 +673,7 @@
 ; LINUXOSX64-NEXT:    movl $1, %edx
 ; LINUXOSX64-NEXT:    callq test_argv16i1
 ; LINUXOSX64-NEXT:    popq %rcx
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %v0 = bitcast i16 1 to <16 x i1>
@@ -730,6 +742,7 @@
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; LINUXOSX64-NEXT:    popq %rcx
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %call = call x86_regcallcc <16 x i1> @test_retv16i1()
@@ -843,7 +856,9 @@
 ; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
 ; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
 ; LINUXOSX64-NEXT:    addq $128, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
   ret i8 %res
@@ -895,6 +910,7 @@
 ; LINUXOSX64-NEXT:    movl $1, %edx
 ; LINUXOSX64-NEXT:    callq test_argv8i1
 ; LINUXOSX64-NEXT:    popq %rcx
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
 entry:
   %v0 = bitcast i8 1 to <8 x i1>
@@ -968,9 +984,11 @@
 ; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm0
 ; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; LINUXOSX64-NEXT:    popq %rax
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    vzeroupper
 ; LINUXOSX64-NEXT:    retq
 entry:
   %call = call x86_regcallcc <8 x i1> @test_retv8i1()
   ret <8 x i1> %call
 }
+
diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
index 5863692..6160eed 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
@@ -63,6 +63,7 @@
 ; LINUXOSX64-NEXT:    callq test_argReti1
 ; LINUXOSX64-NEXT:    incb %al
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i1 %a, 1
   %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
@@ -130,6 +131,7 @@
 ; LINUXOSX64-NEXT:    callq test_argReti8
 ; LINUXOSX64-NEXT:    incb %al
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i8 %a, 1
   %c = call x86_regcallcc i8 @test_argReti8(i8 %b)
@@ -200,6 +202,7 @@
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i16 %a, 1
   %c = call x86_regcallcc i16 @test_argReti16(i16 %b)
@@ -261,6 +264,7 @@
 ; LINUXOSX64-NEXT:    callq test_argReti32
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i32 %a, 1
   %c = call x86_regcallcc i32 @test_argReti32(i32 %b)
@@ -327,6 +331,7 @@
 ; LINUXOSX64-NEXT:    callq test_argReti64
 ; LINUXOSX64-NEXT:    incq %rax
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = add i64 %a, 1
   %c = call x86_regcallcc i64 @test_argReti64(i64 %b)
@@ -406,7 +411,9 @@
 ; LINUXOSX64-NEXT:    vaddss %xmm8, %xmm0, %xmm0
 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
 ; LINUXOSX64-NEXT:    addq $16, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd float 1.0, %a
   %c = call x86_regcallcc float @test_argRetFloat(float %b)
@@ -486,7 +493,9 @@
 ; LINUXOSX64-NEXT:    vaddsd %xmm8, %xmm0, %xmm0
 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
 ; LINUXOSX64-NEXT:    addq $16, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd double 1.0, %a
   %c = call x86_regcallcc double @test_argRetDouble(double %b)
@@ -548,6 +557,7 @@
 ; LINUXOSX64-NEXT:    callq test_argRetf80
 ; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = fadd x86_fp80 %a, %a
   %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
@@ -611,6 +621,7 @@
 ; LINUXOSX64-NEXT:    callq test_argRetPointer
 ; LINUXOSX64-NEXT:    incl %eax
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = ptrtoint [4 x i32]* %a to i32
   %c = add i32 %b, 1
@@ -694,7 +705,9 @@
 ; LINUXOSX64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
 ; LINUXOSX64-NEXT:    addq $16, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %a)
   %c = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b
@@ -768,7 +781,9 @@
 ; LINUXOSX64-NEXT:    vmovdqu (%rsp), %ymm1 # 32-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
 ; LINUXOSX64-NEXT:    addq $48, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %a)
   %c = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b
@@ -842,7 +857,9 @@
 ; LINUXOSX64-NEXT:    vmovdqu64 (%rsp), %zmm1 # 64-byte Reload
 ; LINUXOSX64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 ; LINUXOSX64-NEXT:    addq $112, %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
 ; LINUXOSX64-NEXT:    popq %rsp
+; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
 ; LINUXOSX64-NEXT:    retq
   %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %a)
   %c = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 85028df..c39827c 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -8702,6 +8702,7 @@
 ; GENERIC-NEXT:    callq func_f32
 ; GENERIC-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
 ; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
+; GENERIC-NEXT:    .cfi_def_cfa_offset 8
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: broadcast_ss_spill:
@@ -8713,6 +8714,7 @@
 ; SKX-NEXT:    callq func_f32
 ; SKX-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
 ; SKX-NEXT:    addq $24, %rsp # sched: [1:0.25]
+; SKX-NEXT:    .cfi_def_cfa_offset 8
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a  = fadd float %x, %x
   call void @func_f32(float %a)
@@ -8732,6 +8734,7 @@
 ; GENERIC-NEXT:    callq func_f64
 ; GENERIC-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
 ; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
+; GENERIC-NEXT:    .cfi_def_cfa_offset 8
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: broadcast_sd_spill:
@@ -8743,6 +8746,7 @@
 ; SKX-NEXT:    callq func_f64
 ; SKX-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
 ; SKX-NEXT:    addq $24, %rsp # sched: [1:0.25]
+; SKX-NEXT:    .cfi_def_cfa_offset 8
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a  = fadd double %x, %x
   call void @func_f64(double %a)
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index 42f2a8f..aadec5a 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -115,6 +115,7 @@
 ; X86-NEXT:    vmovaps 8(%ebp), %zmm1
 ; X86-NEXT:    movl %ebp, %esp
 ; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: select04:
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
index 00e654d..ddcd0cb 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -405,6 +405,7 @@
 ; ALL-NEXT:    callq func_f32
 ; ALL-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload
 ; ALL-NEXT:    addq $24, %rsp
+; ALL-NEXT:    .cfi_def_cfa_offset 8
 ; ALL-NEXT:    retq
   %a  = fadd float %x, %x
   call void @func_f32(float %a)
@@ -424,6 +425,7 @@
 ; ALL-NEXT:    callq func_f64
 ; ALL-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload
 ; ALL-NEXT:    addq $24, %rsp
+; ALL-NEXT:    .cfi_def_cfa_offset 8
 ; ALL-NEXT:    retq
   %a  = fadd double %x, %x
   call void @func_f64(double %a)
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
index 6844aab..a0af130 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
@@ -25,6 +25,7 @@
 ; X32-NEXT:    kmovd %k0, %edx
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
@@ -73,6 +74,7 @@
 ; X32-NEXT:    kmovd %k0, %eax
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index 01d66a1..2b0f67c 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -1742,7 +1742,9 @@
 ; AVX512F-32-NEXT:    addl $-1, %eax
 ; AVX512F-32-NEXT:    adcl $-1, %edx
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512F-32-NEXT:    popl %edi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@@ -1855,9 +1857,13 @@
 ; AVX512F-32-NEXT:    addl %esi, %eax
 ; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512F-32-NEXT:    popl %edi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 12
 ; AVX512F-32-NEXT:    popl %ebx
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512F-32-NEXT:    popl %ebp
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -1948,7 +1954,9 @@
 ; AVX512F-32-NEXT:    addl $-1, %eax
 ; AVX512F-32-NEXT:    adcl $-1, %edx
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512F-32-NEXT:    popl %edi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@@ -2061,9 +2069,13 @@
 ; AVX512F-32-NEXT:    addl %esi, %eax
 ; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512F-32-NEXT:    popl %edi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 12
 ; AVX512F-32-NEXT:    popl %ebx
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512F-32-NEXT:    popl %ebp
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -2199,6 +2211,7 @@
 ; AVX512F-32-NEXT:    addl %esi, %eax
 ; AVX512F-32-NEXT:    addl %ecx, %eax
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
@@ -2334,6 +2347,7 @@
 ; AVX512F-32-NEXT:    addl %esi, %eax
 ; AVX512F-32-NEXT:    addl %ecx, %eax
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
@@ -2482,6 +2496,7 @@
 ; AVX512F-32-NEXT:    addl %esi, %eax
 ; AVX512F-32-NEXT:    adcl %ecx, %edx
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
@@ -2545,6 +2560,7 @@
 ; AVX512F-32-NEXT:    addl %esi, %eax
 ; AVX512F-32-NEXT:    adcl %ecx, %edx
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 4
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
diff --git a/llvm/test/CodeGen/X86/avx512vl-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512vl-vbroadcast.ll
index 97fa973..13d6f29 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vbroadcast.ll
@@ -12,6 +12,7 @@
 ; CHECK-NEXT:    callq func_f32
 ; CHECK-NEXT:    vbroadcastss (%rsp), %ymm0 # 16-byte Folded Reload
 ; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %a  = fadd float %x, %x
   call void @func_f32(float %a)
@@ -30,6 +31,7 @@
 ; CHECK-NEXT:    callq func_f32
 ; CHECK-NEXT:    vbroadcastss (%rsp), %xmm0 # 16-byte Folded Reload
 ; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %a  = fadd float %x, %x
   call void @func_f32(float %a)
@@ -49,6 +51,7 @@
 ; CHECK-NEXT:    callq func_f64
 ; CHECK-NEXT:    vbroadcastsd (%rsp), %ymm0 # 16-byte Folded Reload
 ; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %a  = fadd double %x, %x
   call void @func_f64(double %a)
diff --git a/llvm/test/CodeGen/X86/bool-vector.ll b/llvm/test/CodeGen/X86/bool-vector.ll
index 88fe0a7..d7c57c3 100644
--- a/llvm/test/CodeGen/X86/bool-vector.ll
+++ b/llvm/test/CodeGen/X86/bool-vector.ll
@@ -93,6 +93,7 @@
 ; X32-NEXT:    leal (%eax,%edx,4), %eax
 ; X32-NEXT:    leal (%eax,%esi,8), %eax
 ; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X32-SSE2-LABEL: PR15215_good:
@@ -115,6 +116,7 @@
 ; X32-SSE2-NEXT:    leal (%eax,%edx,4), %eax
 ; X32-SSE2-NEXT:    leal (%eax,%esi,8), %eax
 ; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-AVX2-LABEL: PR15215_good:
@@ -134,6 +136,7 @@
 ; X32-AVX2-NEXT:    leal (%eax,%edx,4), %eax
 ; X32-AVX2-NEXT:    leal (%eax,%esi,8), %eax
 ; X32-AVX2-NEXT:    popl %esi
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X32-AVX2-NEXT:    retl
 ;
 ; X64-LABEL: PR15215_good:
diff --git a/llvm/test/CodeGen/X86/cfi-inserter-check-order.ll b/llvm/test/CodeGen/X86/cfi-inserter-check-order.ll
new file mode 100644
index 0000000..d2f47c2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cfi-inserter-check-order.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-- -O2 -enable-machine-outliner -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+; Confirm that passes that can add CFI instructions run before CFI instruction inserter.
+
+; CHECK-LABEL: Pass Arguments:
+; CHECK:       Check CFA info and insert CFI instructions if needed
+; CHECK-NOT:   X86 Optimize Call Frame
+; CHECK-NOT:   Prologue/Epilogue Insertion & Frame Finalization
+; CHECK-NOT:   Machine Outliner
+
+define void @f() {
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index 244e651..71cb960 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -247,10 +247,13 @@
 ; CHECK-NEXT:  # %bb.1: # %T
 ; CHECK-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
 ; CHECK-NEXT:    popq %rcx # encoding: [0x59]
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ; CHECK-NEXT:  .LBB12_2: # %F
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
 ; CHECK-NEXT:    popq %rcx # encoding: [0x59]
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 entry:
   %tmp1 = call zeroext i1 @test12b()
diff --git a/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll b/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll
index 54076ab..d2dbb30 100644
--- a/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll
@@ -14,6 +14,7 @@
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
   %success = extractvalue { i128, i1 } %pair, 1
@@ -35,10 +36,13 @@
 ; CHECK-NEXT:  # %bb.1: # %true
 ; CHECK-NEXT:    callq foo
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB1_2: # %false
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
   %success = extractvalue { i128, i1 } %pair, 1
@@ -70,6 +74,7 @@
 ; CHECK-NEXT:    sbbq %r10, %rdx
 ; CHECK-NEXT:    setge %al
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
   %oldval = extractvalue { i128, i1 } %pair, 0
@@ -93,6 +98,7 @@
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    movq %r10, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
   %success = extractvalue { i128, i1 } %pair, 1
@@ -128,6 +134,7 @@
 ; CHECK-NEXT:    movq %r10, %rax
 ; CHECK-NEXT:    movq %r9, %rdx
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %init = load atomic i128, i128* %addr seq_cst, align 16
diff --git a/llvm/test/CodeGen/X86/emutls-pie.ll b/llvm/test/CodeGen/X86/emutls-pie.ll
index 4fa191c..904196e 100644
--- a/llvm/test/CodeGen/X86/emutls-pie.ll
+++ b/llvm/test/CodeGen/X86/emutls-pie.ll
@@ -29,13 +29,16 @@
 ; X32-NEXT: calll my_emutls_get_address@PLT
 ; X32-NEXT: movl (%eax), %eax
 ; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
 ; X32-NEXT: popl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 4
 ; X32-NEXT: retl
 ; X64-LABEL: my_get_xyz:
 ; X64:      movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
 ; X64-NEXT: callq my_emutls_get_address@PLT
 ; X64-NEXT: movl (%rax), %eax
 ; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
 ; X64-NEXT: retq
 
 entry:
@@ -55,13 +58,16 @@
 ; X32-NEXT: calll __emutls_get_address@PLT
 ; X32-NEXT: movl (%eax), %eax
 ; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
 ; X32-NEXT: popl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 4
 ; X32-NEXT: retl
 ; X64-LABEL: f1:
 ; X64:      leaq __emutls_v.i(%rip), %rdi
 ; X64-NEXT: callq __emutls_get_address@PLT
 ; X64-NEXT: movl (%rax), %eax
 ; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
 ; X64-NEXT: retq
 
 entry:
diff --git a/llvm/test/CodeGen/X86/emutls.ll b/llvm/test/CodeGen/X86/emutls.ll
index 5b1c039..75d45c0 100644
--- a/llvm/test/CodeGen/X86/emutls.ll
+++ b/llvm/test/CodeGen/X86/emutls.ll
@@ -23,12 +23,14 @@
 ; X32-NEXT:    calll my_emutls_get_address
 ; X32-NEXT:    movl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ; X64-LABEL: my_get_xyz:
 ; X64:         movl $my_emutls_v_xyz, %edi
 ; X64-NEXT:    callq my_emutls_get_address
 ; X64-NEXT:    movl (%rax), %eax
 ; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 
 entry:
@@ -52,12 +54,14 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ; X64-LABEL: f1:
 ; X64:         movl $__emutls_v.i1, %edi
 ; X64-NEXT:    callq __emutls_get_address
 ; X64-NEXT:    movl (%rax), %eax
 ; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 
 entry:
@@ -70,11 +74,13 @@
 ; X32:         movl $__emutls_v.i1, (%esp)
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ; X64-LABEL: f2:
 ; X64:         movl $__emutls_v.i1, %edi
 ; X64-NEXT:    callq __emutls_get_address
 ; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 
 entry:
@@ -99,6 +105,7 @@
 ; X32:         movl $__emutls_v.i2, (%esp)
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -123,6 +130,7 @@
 ; X32:         movl $__emutls_v.i3, (%esp)
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -135,6 +143,7 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -147,6 +156,7 @@
 ; X32:         movl $__emutls_v.i4, (%esp)
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -159,6 +169,7 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -171,6 +182,7 @@
 ; X32:         movl $__emutls_v.i5, (%esp)
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -183,6 +195,7 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movzwl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -196,6 +209,7 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movswl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -210,6 +224,7 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movb (%eax), %al
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
@@ -223,6 +238,7 @@
 ; X32-NEXT:    calll __emutls_get_address
 ; X32-NEXT:    movsbl (%eax), %eax
 ; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 
 entry:
diff --git a/llvm/test/CodeGen/X86/epilogue-cfi-fp.ll b/llvm/test/CodeGen/X86/epilogue-cfi-fp.ll
new file mode 100644
index 0000000..9ccedb9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/epilogue-cfi-fp.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-pc-linux"
+
+; Function Attrs: noinline nounwind
+define i32 @foo(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m) #0 {
+
+; CHECK-LABEL:   foo:
+; CHECK:         popl %ebp
+; CHECK-NEXT:    .cfi_def_cfa %esp, 4
+; CHECK-NEXT:    retl
+
+entry:
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store i32 %l, i32* %l.addr, align 4
+  store i32 %m, i32* %m.addr, align 4
+  ret i32 0
+}
+
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
diff --git a/llvm/test/CodeGen/X86/epilogue-cfi-no-fp.ll b/llvm/test/CodeGen/X86/epilogue-cfi-no-fp.ll
new file mode 100644
index 0000000..6b0e79f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/epilogue-cfi-no-fp.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-pc-linux"
+
+; Function Attrs: noinline nounwind
+define i32 @foo(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m) {
+; CHECK-LABEL:   foo:
+; CHECK:         addl	$20, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popl	%esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    popl	%edi
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    popl	%ebx
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+entry:
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store i32 %l, i32* %l.addr, align 4
+  store i32 %m, i32* %m.addr, align 4
+  ret i32 0
+}
+
+
+
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
index 4465d34..d37a858 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -31,6 +31,7 @@
 ; SSE2_X86-NEXT:    fldl (%esp)
 ; SSE2_X86-NEXT:    movl %ebp, %esp
 ; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa %esp, 4
 ; SSE2_X86-NEXT:    retl
 ;
 ; AVX_X86-LABEL: int_to_double_rr:
@@ -47,6 +48,7 @@
 ; AVX_X86-NEXT:    fldl (%esp)
 ; AVX_X86-NEXT:    movl %ebp, %esp
 ; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX_X86-NEXT:    retl
 entry:
   %0 = sitofp i32 %a to double
@@ -80,6 +82,7 @@
 ; SSE2_X86-NEXT:    fldl (%esp)
 ; SSE2_X86-NEXT:    movl %ebp, %esp
 ; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa %esp, 4
 ; SSE2_X86-NEXT:    retl
 ;
 ; AVX_X86-LABEL: int_to_double_rm:
@@ -97,6 +100,7 @@
 ; AVX_X86-NEXT:    fldl (%esp)
 ; AVX_X86-NEXT:    movl %ebp, %esp
 ; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX_X86-NEXT:    retl
 entry:
   %0 = load i32, i32* %a
@@ -130,6 +134,7 @@
 ; SSE2_X86-NEXT:    fldl (%esp)
 ; SSE2_X86-NEXT:    movl %ebp, %esp
 ; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa %esp, 4
 ; SSE2_X86-NEXT:    retl
 ;
 ; AVX_X86-LABEL: int_to_double_rm_optsize:
@@ -147,6 +152,7 @@
 ; AVX_X86-NEXT:    fldl (%esp)
 ; AVX_X86-NEXT:    movl %ebp, %esp
 ; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX_X86-NEXT:    retl
 entry:
   %0 = load i32, i32* %a
@@ -174,6 +180,7 @@
 ; SSE2_X86-NEXT:    movss %xmm0, (%esp)
 ; SSE2_X86-NEXT:    flds (%esp)
 ; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 4
 ; SSE2_X86-NEXT:    retl
 ;
 ; AVX_X86-LABEL: int_to_float_rr:
@@ -184,6 +191,7 @@
 ; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
 ; AVX_X86-NEXT:    flds (%esp)
 ; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
 ; AVX_X86-NEXT:    retl
 entry:
   %0 = sitofp i32 %a to float
@@ -211,6 +219,7 @@
 ; SSE2_X86-NEXT:    movss %xmm0, (%esp)
 ; SSE2_X86-NEXT:    flds (%esp)
 ; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 4
 ; SSE2_X86-NEXT:    retl
 ;
 ; AVX_X86-LABEL: int_to_float_rm:
@@ -222,6 +231,7 @@
 ; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
 ; AVX_X86-NEXT:    flds (%esp)
 ; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
 ; AVX_X86-NEXT:    retl
 entry:
   %0 = load i32, i32* %a
@@ -249,6 +259,7 @@
 ; SSE2_X86-NEXT:    movss %xmm0, (%esp)
 ; SSE2_X86-NEXT:    flds (%esp)
 ; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 4
 ; SSE2_X86-NEXT:    retl
 ;
 ; AVX_X86-LABEL: int_to_float_rm_optsize:
@@ -260,6 +271,7 @@
 ; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
 ; AVX_X86-NEXT:    flds (%esp)
 ; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
 ; AVX_X86-NEXT:    retl
 entry:
   %0 = load i32, i32* %a
diff --git a/llvm/test/CodeGen/X86/fast-isel-store.ll b/llvm/test/CodeGen/X86/fast-isel-store.ll
index 49f22ec..bf52b6c 100644
--- a/llvm/test/CodeGen/X86/fast-isel-store.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-store.ll
@@ -351,6 +351,7 @@
 ; SSE64-NEXT:    movupd %xmm0, (%eax)
 ; SSE64-NEXT:    movupd %xmm1, 16(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVX32-LABEL: test_store_4xf64:
@@ -389,6 +390,7 @@
 ; SSE64-NEXT:    movapd %xmm0, (%eax)
 ; SSE64-NEXT:    movapd %xmm1, 16(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVX32-LABEL: test_store_4xf64_aligned:
@@ -428,6 +430,7 @@
 ; SSE64-NEXT:    movups %xmm2, 32(%eax)
 ; SSE64-NEXT:    movups %xmm3, 48(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVXONLY32-LABEL: test_store_16xi32:
@@ -477,6 +480,7 @@
 ; SSE64-NEXT:    movaps %xmm2, 32(%eax)
 ; SSE64-NEXT:    movaps %xmm3, 48(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVXONLY32-LABEL: test_store_16xi32_aligned:
@@ -526,6 +530,7 @@
 ; SSE64-NEXT:    movups %xmm2, 32(%eax)
 ; SSE64-NEXT:    movups %xmm3, 48(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVXONLY32-LABEL: test_store_16xf32:
@@ -575,6 +580,7 @@
 ; SSE64-NEXT:    movaps %xmm2, 32(%eax)
 ; SSE64-NEXT:    movaps %xmm3, 48(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVXONLY32-LABEL: test_store_16xf32_aligned:
@@ -632,6 +638,7 @@
 ; SSE64-NEXT:    movupd %xmm2, 32(%eax)
 ; SSE64-NEXT:    movupd %xmm3, 48(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVXONLY32-LABEL: test_store_8xf64:
@@ -658,6 +665,7 @@
 ; AVXONLY64-NEXT:    vmovupd %ymm1, 32(%eax)
 ; AVXONLY64-NEXT:    movl %ebp, %esp
 ; AVXONLY64-NEXT:    popl %ebp
+; AVXONLY64-NEXT:    .cfi_def_cfa %esp, 4
 ; AVXONLY64-NEXT:    retl
 ;
 ; AVX51232-LABEL: test_store_8xf64:
@@ -705,6 +713,7 @@
 ; SSE64-NEXT:    movapd %xmm2, 32(%eax)
 ; SSE64-NEXT:    movapd %xmm3, 48(%eax)
 ; SSE64-NEXT:    addl $12, %esp
+; SSE64-NEXT:    .cfi_def_cfa_offset 4
 ; SSE64-NEXT:    retl
 ;
 ; AVXONLY32-LABEL: test_store_8xf64_aligned:
@@ -731,6 +740,7 @@
 ; AVXONLY64-NEXT:    vmovapd %ymm1, 32(%eax)
 ; AVXONLY64-NEXT:    movl %ebp, %esp
 ; AVXONLY64-NEXT:    popl %ebp
+; AVXONLY64-NEXT:    .cfi_def_cfa %esp, 4
 ; AVXONLY64-NEXT:    retl
 ;
 ; AVX51232-LABEL: test_store_8xf64_aligned:
diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll
index 9924af1..c6d432a 100644
--- a/llvm/test/CodeGen/X86/fmaxnum.ll
+++ b/llvm/test/CodeGen/X86/fmaxnum.ll
@@ -84,6 +84,7 @@
 ; CHECK-NEXT:    fstpt (%rsp)
 ; CHECK-NEXT:    callq fmaxl
 ; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
@@ -146,6 +147,7 @@
 ; CHECK-NEXT:    fstpt (%rsp)
 ; CHECK-NEXT:    callq fmaxl
 ; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll
index 46ea907..0874fb2 100644
--- a/llvm/test/CodeGen/X86/fminnum.ll
+++ b/llvm/test/CodeGen/X86/fminnum.ll
@@ -76,6 +76,7 @@
 ; CHECK-NEXT:    fstpt (%rsp)
 ; CHECK-NEXT:    callq fminl
 ; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
@@ -138,6 +139,7 @@
 ; CHECK-NEXT:    fstpt (%rsp)
 ; CHECK-NEXT:    callq fminl
 ; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
diff --git a/llvm/test/CodeGen/X86/fp-arith.ll b/llvm/test/CodeGen/X86/fp-arith.ll
index e3711e0..73a132d 100644
--- a/llvm/test/CodeGen/X86/fp-arith.ll
+++ b/llvm/test/CodeGen/X86/fp-arith.ll
@@ -16,6 +16,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fiadds {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fiadd_fp80_i16:
@@ -41,6 +42,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fiadds {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fiadd_fp80_i16_ld:
@@ -66,6 +68,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fiaddl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fiadd_fp80_i32:
@@ -90,6 +93,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fiaddl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fiadd_fp80_i32_ld:
@@ -119,6 +123,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fisubs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisub_fp80_i16:
@@ -144,6 +149,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fisubs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisub_fp80_i16_ld:
@@ -169,6 +175,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fisubl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisub_fp80_i32:
@@ -193,6 +200,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fisubl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisub_fp80_i32_ld:
@@ -222,6 +230,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fisubrs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisubr_fp80_i16:
@@ -247,6 +256,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fisubrs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisubr_fp80_i16_ld:
@@ -272,6 +282,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fisubrl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisubr_fp80_i32:
@@ -296,6 +307,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fisubrl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fisubr_fp80_i32_ld:
@@ -325,6 +337,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fimuls {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fimul_fp80_i16:
@@ -350,6 +363,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fimuls {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fimul_fp80_i16_ld:
@@ -375,6 +389,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fimull (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fimul_fp80_i32:
@@ -399,6 +414,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fimull (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fimul_fp80_i32_ld:
@@ -428,6 +444,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fidivs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidiv_fp80_i16:
@@ -453,6 +470,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fidivs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidiv_fp80_i16_ld:
@@ -478,6 +496,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fidivl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidiv_fp80_i32:
@@ -502,6 +521,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fidivl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidiv_fp80_i32_ld:
@@ -531,6 +551,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fidivrs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidivr_fp80_i16:
@@ -556,6 +577,7 @@
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fidivrs {{[0-9]+}}(%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidivr_fp80_i16_ld:
@@ -581,6 +603,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fidivrl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidivr_fp80_i32:
@@ -605,6 +628,7 @@
 ; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    fidivrl (%esp)
 ; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fidivr_fp80_i32_ld:
diff --git a/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic-2.ll b/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic-2.ll
index ba80c83..749686d 100644
--- a/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic-2.ll
+++ b/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic-2.ll
@@ -18,11 +18,13 @@
 }
 
 ; CHECK-LABEL: noDebug
-; CHECK:       addq  $24, %rsp
-; CHECK:       popq  %rbx
-; CHECK-NEXT:  popq  %r14
-; CHECK-NEXT:  retq
-
+; CHECK:        addq	$24, %rsp
+; CHECK-NEXT: 	 .cfi_adjust_cfa_offset -24
+; CHECK-NEXT: 	 popq	%rbx
+; CHECK-NEXT: 	 .cfi_def_cfa_offset 16
+; CHECK-NEXT: 	 popq	%r14
+; CHECK-NEXT: 	 .cfi_def_cfa_offset 8
+; CHECK-NEXT: 	 retq
 
 define void @withDebug() !dbg !18 {
 entry:
@@ -43,8 +45,11 @@
 ; CHECK:       callq printf
 ; CHECK:       callq printf
 ; CHECK-NEXT:  addq  $24, %rsp
-; CHECK:       popq  %rbx
+; CHECK-NEXT:  .cfi_adjust_cfa_offset -24
+; CHECK-NEXT:  popq  %rbx
+; CHECK-NEXT:  .cfi_def_cfa_offset 16
 ; CHECK-NEXT:  popq  %r14
+; CHECK-NEXT:  .cfi_def_cfa_offset 8
 ; CHECK-NEXT:  retq
 
 declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
diff --git a/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic.ll b/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic.ll
index f9ecf70..de9d6bf 100644
--- a/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic.ll
+++ b/llvm/test/CodeGen/X86/frame-lowering-debug-intrinsic.ll
@@ -9,6 +9,7 @@
 
 ; CHECK-LABEL: fn1NoDebug
 ; CHECK: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
 ; CHECK-NEXT: ret
 
 define i64 @fn1WithDebug(i64 %a) !dbg !4 {
@@ -19,6 +20,7 @@
 
 ; CHECK-LABEL: fn1WithDebug
 ; CHECK: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
 ; CHECK-NEXT: ret
 
 %struct.Buffer = type { i8, [63 x i8] }
@@ -33,6 +35,7 @@
 ; CHECK-NOT: sub
 ; CHECK: mov
 ; CHECK-NEXT: pop
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
 ; CHECK-NEXT: ret
 
 define void @fn2WithDebug(%struct.Buffer* byval align 64 %p1) !dbg !8 {
@@ -46,6 +49,7 @@
 ; CHECK-NOT: sub
 ; CHECK: mov
 ; CHECK-NEXT: pop
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
 ; CHECK-NEXT: ret
 
 declare i64 @fn(i64, i64)
diff --git a/llvm/test/CodeGen/X86/h-registers-1.ll b/llvm/test/CodeGen/X86/h-registers-1.ll
index fe00672..6f5105b 100644
--- a/llvm/test/CodeGen/X86/h-registers-1.ll
+++ b/llvm/test/CodeGen/X86/h-registers-1.ll
@@ -37,7 +37,9 @@
 ; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    addq %rdx, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; GNUX32-LABEL: foo:
@@ -69,7 +71,9 @@
 ; GNUX32-NEXT:    addq %rcx, %rax
 ; GNUX32-NEXT:    addq %rdx, %rax
 ; GNUX32-NEXT:    popq %rbx
+; GNUX32-NEXT:    .cfi_def_cfa_offset 16
 ; GNUX32-NEXT:    popq %rbp
+; GNUX32-NEXT:    .cfi_def_cfa_offset 8
 ; GNUX32-NEXT:    retq
   %sa = lshr i64 %a, 8
   %A = and i64 %sa, 255
diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/CodeGen/X86/haddsub-2.ll
index a83a045..42c6a2d 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/CodeGen/X86/haddsub-2.ll
@@ -724,11 +724,17 @@
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE3-NEXT:    popq %rbx
+; SSE3-NEXT:    .cfi_def_cfa_offset 48
 ; SSE3-NEXT:    popq %r12
+; SSE3-NEXT:    .cfi_def_cfa_offset 40
 ; SSE3-NEXT:    popq %r13
+; SSE3-NEXT:    .cfi_def_cfa_offset 32
 ; SSE3-NEXT:    popq %r14
+; SSE3-NEXT:    .cfi_def_cfa_offset 24
 ; SSE3-NEXT:    popq %r15
+; SSE3-NEXT:    .cfi_def_cfa_offset 16
 ; SSE3-NEXT:    popq %rbp
+; SSE3-NEXT:    .cfi_def_cfa_offset 8
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: avx2_vphadd_w_test:
@@ -1351,11 +1357,17 @@
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE3-NEXT:    popq %rbx
+; SSE3-NEXT:    .cfi_def_cfa_offset 48
 ; SSE3-NEXT:    popq %r12
+; SSE3-NEXT:    .cfi_def_cfa_offset 40
 ; SSE3-NEXT:    popq %r13
+; SSE3-NEXT:    .cfi_def_cfa_offset 32
 ; SSE3-NEXT:    popq %r14
+; SSE3-NEXT:    .cfi_def_cfa_offset 24
 ; SSE3-NEXT:    popq %r15
+; SSE3-NEXT:    .cfi_def_cfa_offset 16
 ; SSE3-NEXT:    popq %rbp
+; SSE3-NEXT:    .cfi_def_cfa_offset 8
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: avx2_hadd_w:
diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll
index efe07cf..ce2d0e9 100644
--- a/llvm/test/CodeGen/X86/hipe-cc64.ll
+++ b/llvm/test/CodeGen/X86/hipe-cc64.ll
@@ -87,6 +87,7 @@
   ; CHECK-NEXT: movl	$47, %ecx
   ; CHECK-NEXT: movl	$63, %r8d
   ; CHECK-NEXT: popq	%rax
+  ; CHECK-NEXT: .cfi_def_cfa_offset 16
   ; CHECK-NEXT: jmp	tailcallee
   %ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15,
      i64 31, i64 47, i64 63, i64 79) #1
diff --git a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
index 10986c0..24462d4 100644
--- a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
+++ b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
@@ -81,6 +81,7 @@
 ; X86-NEXT:    orl %edx, %eax
 ; X86-NEXT:    movw %ax, (%ecx)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: i24_insert_bit:
diff --git a/llvm/test/CodeGen/X86/imul.ll b/llvm/test/CodeGen/X86/imul.ll
index 7bb10aa..958c611 100644
--- a/llvm/test/CodeGen/X86/imul.ll
+++ b/llvm/test/CodeGen/X86/imul.ll
@@ -307,6 +307,7 @@
 ; X86-NEXT:    subl %ecx, %edx
 ; X86-NEXT:    subl %esi, %edx
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 entry:
 	%tmp3 = mul i64 %a, -31
@@ -362,6 +363,7 @@
 ; X86-NEXT:    subl %ecx, %edx
 ; X86-NEXT:    subl %esi, %edx
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 entry:
 	%tmp3 = mul i64 %a, -33
@@ -390,6 +392,7 @@
 ; X86-NEXT:    addl %esi, %edx
 ; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 entry:
 	%tmp3 = mul i64 %a, 9223372036854775807
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse1.ll b/llvm/test/CodeGen/X86/lea-opt-cse1.ll
index 08241f6..539211c 100644
--- a/llvm/test/CodeGen/X86/lea-opt-cse1.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-cse1.ll
@@ -30,6 +30,7 @@
 ; X86-NEXT:    leal 1(%edx,%ecx), %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
  entry:
    %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse2.ll b/llvm/test/CodeGen/X86/lea-opt-cse2.ll
index 429a7a5..40e35b4 100644
--- a/llvm/test/CodeGen/X86/lea-opt-cse2.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-cse2.ll
@@ -46,7 +46,9 @@
 ; X86-NEXT:    leal 1(%esi,%edx), %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
  entry:
    br label %loop
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse3.ll b/llvm/test/CodeGen/X86/lea-opt-cse3.ll
index fb7fe12..93e4fa7 100644
--- a/llvm/test/CodeGen/X86/lea-opt-cse3.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-cse3.ll
@@ -91,6 +91,7 @@
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:  .LBB2_2: # %exit
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 entry:
   %mul = shl i32 %b, 2
@@ -143,6 +144,7 @@
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:  .LBB3_2: # %exit
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 entry:
   %mul = shl i32 %b, 1
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse4.ll b/llvm/test/CodeGen/X86/lea-opt-cse4.ll
index a295ac7..57f0dbd 100644
--- a/llvm/test/CodeGen/X86/lea-opt-cse4.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-cse4.ll
@@ -36,6 +36,7 @@
 ; X86-NEXT:    leal 1(%ecx,%edx), %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
  entry:
    %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
@@ -110,7 +111,9 @@
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    movl %edx, 16(%eax)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
  entry:
    br label %loop
diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll
index 05fad9c..5856de3 100644
--- a/llvm/test/CodeGen/X86/legalize-shift-64.ll
+++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll
@@ -117,9 +117,13 @@
 ; CHECK-NEXT:    movl %esi, 4(%eax)
 ; CHECK-NEXT:    movl %edi, (%eax)
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl $4
   %shl = shl <2 x i64> %A, %B
   ret <2 x i64> %shl
@@ -160,6 +164,7 @@
 ; CHECK-NEXT:  .LBB5_4: # %if.then
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    .cfi_def_cfa %esp, 4
 ; CHECK-NEXT:    retl
   %x = alloca i32, align 4
   %t = alloca i64, align 8
diff --git a/llvm/test/CodeGen/X86/legalize-shl-vec.ll b/llvm/test/CodeGen/X86/legalize-shl-vec.ll
index e6cfd9c..fdd73b8 100644
--- a/llvm/test/CodeGen/X86/legalize-shl-vec.ll
+++ b/llvm/test/CodeGen/X86/legalize-shl-vec.ll
@@ -119,10 +119,15 @@
 ; X32-NEXT:    movl $0, 4(%eax)
 ; X32-NEXT:    movl $0, (%eax)
 ; X32-NEXT:    addl $8, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 20
 ; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    popl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    popl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl $4
 ;
 ; X64-LABEL: test_srl:
@@ -206,10 +211,15 @@
 ; X32-NEXT:    movl %ecx, 4(%eax)
 ; X32-NEXT:    movl %ecx, (%eax)
 ; X32-NEXT:    addl $8, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 20
 ; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    popl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    popl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl $4
 ;
 ; X64-LABEL: test_sra:
diff --git a/llvm/test/CodeGen/X86/live-out-reg-info.ll b/llvm/test/CodeGen/X86/live-out-reg-info.ll
index 882e17e..9b6b35d 100644
--- a/llvm/test/CodeGen/X86/live-out-reg-info.ll
+++ b/llvm/test/CodeGen/X86/live-out-reg-info.ll
@@ -18,6 +18,7 @@
 ; CHECK-NEXT:    callq qux
 ; CHECK-NEXT:  .LBB0_2: # %false
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t0 = lshr i32 %a, 23
   br label %next
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index c943b6d..8c69dba 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -376,6 +376,7 @@
 ; CHECK-NEXT:    orl %ecx, %eax
 ; CHECK-NEXT:    orl %edx, %eax
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
@@ -496,6 +497,7 @@
 ; CHECK-NEXT:    movzbl 3(%ecx), %eax
 ; CHECK-NEXT:    orl %edx, %eax
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index c6fefa6..6bd8f92 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1733,6 +1733,7 @@
 ; KNL_32-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 ; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; KNL_32-NEXT:    addl $12, %esp
+; KNL_32-NEXT:    .cfi_def_cfa_offset 4
 ; KNL_32-NEXT:    vzeroupper
 ; KNL_32-NEXT:    retl
 ;
@@ -1808,6 +1809,7 @@
 ; SKX_32-NEXT:  .LBB31_6: # %else5
 ; SKX_32-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1}
 ; SKX_32-NEXT:    addl $12, %esp
+; SKX_32-NEXT:    .cfi_def_cfa_offset 4
 ; SKX_32-NEXT:    retl
 
   %sext_ind = sext <3 x i32> %ind to <3 x i64>
@@ -1934,6 +1936,7 @@
 ; KNL_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    retl
 ;
 ; SKX-LABEL: test_gather_16i64:
@@ -1968,6 +1971,7 @@
 ; SKX_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    retl
   %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
   ret <16 x i64> %res
@@ -2051,6 +2055,7 @@
 ; KNL_32-NEXT:    vmovapd %zmm2, %zmm0
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    retl
 ;
 ; SKX-LABEL: test_gather_16f64:
@@ -2085,6 +2090,7 @@
 ; SKX_32-NEXT:    vmovapd %zmm2, %zmm0
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    retl
   %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
   ret <16 x double> %res
@@ -2166,6 +2172,7 @@
 ; KNL_32-NEXT:    vpscatterdq %zmm1, (,%ymm0) {%k2}
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    vzeroupper
 ; KNL_32-NEXT:    retl
 ;
@@ -2199,6 +2206,7 @@
 ; SKX_32-NEXT:    vpscatterdq %zmm1, (,%ymm0) {%k2}
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
   call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
@@ -2282,6 +2290,7 @@
 ; KNL_32-NEXT:    vscatterdpd %zmm1, (,%ymm0) {%k2}
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    vzeroupper
 ; KNL_32-NEXT:    retl
 ;
@@ -2315,6 +2324,7 @@
 ; SKX_32-NEXT:    vscatterdpd %zmm1, (,%ymm0) {%k2}
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
   call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
@@ -2354,6 +2364,7 @@
 ; KNL_32-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    retl
 ;
 ; SKX-LABEL: test_pr28312:
@@ -2381,6 +2392,7 @@
 ; SKX_32-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    retl
   %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
   %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
@@ -2795,6 +2807,7 @@
 ; KNL_32-NEXT:    vmovapd %zmm3, %zmm1
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    retl
 ;
 ; SKX-LABEL: test_gather_setcc_split:
@@ -2830,6 +2843,7 @@
 ; SKX_32-NEXT:    vmovapd %zmm3, %zmm1
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr double, double *%base, <16 x i64> %sext_ind
@@ -2870,6 +2884,7 @@
 ; KNL_32-NEXT:    vscatterdpd %zmm2, (%eax,%ymm0,8) {%k1}
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL_32-NEXT:    vzeroupper
 ; KNL_32-NEXT:    retl
 ;
@@ -2903,6 +2918,7 @@
 ; SKX_32-NEXT:    vscatterdpd %zmm2, (%eax,%ymm0,8) {%k1}
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
diff --git a/llvm/test/CodeGen/X86/memset-nonzero.ll b/llvm/test/CodeGen/X86/memset-nonzero.ll
index cc8f2b5..199bf75 100644
--- a/llvm/test/CodeGen/X86/memset-nonzero.ll
+++ b/llvm/test/CodeGen/X86/memset-nonzero.ll
@@ -148,6 +148,7 @@
 ; SSE-NEXT:    movl $256, %edx # imm = 0x100
 ; SSE-NEXT:    callq memset
 ; SSE-NEXT:    popq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 8
 ; SSE-NEXT:    retq
 ;
 ; SSE2FAST-LABEL: memset_256_nonzero_bytes:
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 8c96b2b..37841b8 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -72,7 +72,9 @@
 ; X32-SSE1-NEXT:    movl %esi, 4(%eax)
 ; X32-SSE1-NEXT:    movl %edx, (%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_2i64_i64_12:
@@ -384,6 +386,7 @@
 ; X32-SSE1-NEXT:    movl %edx, (%eax)
 ; X32-SSE1-NEXT:    movl %ecx, 12(%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_4i32_i32_23u5:
@@ -435,7 +438,9 @@
 ; X32-SSE1-NEXT:    movl %edx, (%eax)
 ; X32-SSE1-NEXT:    movl %ecx, 12(%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_4i32_i32_23u5_inc2:
@@ -490,7 +495,9 @@
 ; X32-SSE1-NEXT:    movl %edx, (%eax)
 ; X32-SSE1-NEXT:    movl %ecx, 12(%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_4i32_i32_23u5_inc3:
@@ -649,7 +656,9 @@
 ; X32-SSE1-NEXT:    movl $0, 12(%eax)
 ; X32-SSE1-NEXT:    movl $0, 8(%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_4i32_i32_45zz_inc4:
@@ -701,7 +710,9 @@
 ; X32-SSE1-NEXT:    movl $0, 12(%eax)
 ; X32-SSE1-NEXT:    movl $0, 8(%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_4i32_i32_45zz_inc5:
@@ -751,7 +762,9 @@
 ; X32-SSE1-NEXT:    movl %esi, 6(%eax)
 ; X32-SSE1-NEXT:    movl %edx, (%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_8i16_i16_23u567u9:
@@ -897,9 +910,13 @@
 ; X32-SSE1-NEXT:    movl %esi, 3(%eax)
 ; X32-SSE1-NEXT:    movw %bp, (%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 16
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 12
 ; X32-SSE1-NEXT:    popl %ebx
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %ebp
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_16i8_i8_01u3456789ABCDuF:
@@ -1128,7 +1145,9 @@
 ; X32-SSE1-NEXT:    movl %esi, 4(%eax)
 ; X32-SSE1-NEXT:    movl %edx, (%eax)
 ; X32-SSE1-NEXT:    popl %esi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 8
 ; X32-SSE1-NEXT:    popl %edi
+; X32-SSE1-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE1-NEXT:    retl $4
 ;
 ; X32-SSE41-LABEL: merge_2i64_i64_12_volatile:
diff --git a/llvm/test/CodeGen/X86/merge-sp-updates-cfi.ll b/llvm/test/CodeGen/X86/merge-sp-updates-cfi.ll
new file mode 100644
index 0000000..dc758cf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/merge-sp-updates-cfi.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+
+; Function Attrs: optsize
+declare void @foo(i32, i32)
+declare x86_stdcallcc void @stdfoo(i32, i32)
+
+; CHECK-LABEL: testNoUnwind:
+; CHECK:       subl $20, %esp
+; CHECK-NOT:   subl $12, %esp
+; CHECK-NOT:   subl $8, %esp
+; CHECK:       calll foo
+; CHECK:       addl $8, %esp
+; CHECK-NOT:   addl $16, %esp
+; CHECK-NOT:   subl $8, %esp
+; CHECK:       calll stdfoo
+; CHECK:       addl $20, %esp
+; CHECK-NOT:   addl $8, %esp
+; CHECK-NOT:   addl $12, %esp
+define void @testNoUnwind() nounwind {
+entry:
+  tail call void @foo(i32 1, i32 2)
+  tail call x86_stdcallcc void @stdfoo(i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: testWithUnwind:
+; CHECK:       subl $20, %esp
+; CHECK-NEXT: .cfi_adjust_cfa_offset 20
+; CHECK-NOT:   subl $12, %esp
+; CHECK-NOT:   subl $8, %esp
+; CHECK:       calll foo
+; CHECK:       addl $8, %esp
+; CHECK-NEXT: .cfi_adjust_cfa_offset -8
+; CHECK-NOT:   addl $16, %esp
+; CHECK-NOT:   subl $8, %esp
+; CHECK:       calll stdfoo
+; CHECK:       addl $20, %esp
+; CHECK-NEXT: .cfi_adjust_cfa_offset -20
+; CHECK-NOT:   addl $8, %esp
+; CHECK-NOT:   addl $12, %esp
+define void @testWithUnwind() {
+entry:
+  tail call void @foo(i32 1, i32 2)
+  tail call x86_stdcallcc void @stdfoo(i32 3, i32 4)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll
index 9da14ef..bfcddf4 100644
--- a/llvm/test/CodeGen/X86/mmx-arith.ll
+++ b/llvm/test/CodeGen/X86/mmx-arith.ll
@@ -80,6 +80,7 @@
 ; X32-NEXT:    emms
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test0:
@@ -415,6 +416,7 @@
 ; X32-NEXT:    emms
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test2:
diff --git a/llvm/test/CodeGen/X86/movtopush.ll b/llvm/test/CodeGen/X86/movtopush.ll
index 051c8a7..7c067be 100644
--- a/llvm/test/CodeGen/X86/movtopush.ll
+++ b/llvm/test/CodeGen/X86/movtopush.ll
@@ -383,7 +383,7 @@
 ; LINUX: .cfi_adjust_cfa_offset 4
 ; LINUX: calll   good
 ; LINUX: addl    $28, %esp
-; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX: .cfi_adjust_cfa_offset -28
 ; LINUX-NOT: add
 ; LINUX: retl
 define void @pr27140() optsize {
diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll
index 4d1b23a..70acb9e 100644
--- a/llvm/test/CodeGen/X86/mul-constant-result.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-result.ll
@@ -34,84 +34,116 @@
 ; X86-NEXT:  .LBB0_6:
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_39:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:  .LBB0_40:
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_7:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_8:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $2, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_9:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_10:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_11:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (,%eax,8), %ecx
 ; X86-NEXT:    jmp .LBB0_12
 ; X86-NEXT:  .LBB0_13:
 ; X86-NEXT:    shll $3, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_14:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_15:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_16:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:    leal (%eax,%ecx,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_17:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $2, %eax
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_18:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,2), %ecx
 ; X86-NEXT:    leal (%eax,%ecx,4), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_19:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,2), %ecx
 ; X86-NEXT:    jmp .LBB0_20
 ; X86-NEXT:  .LBB0_21:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_22:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_23:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll $4, %ecx
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_24:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_25:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:    shll $2, %ecx
 ; X86-NEXT:    jmp .LBB0_12
@@ -119,20 +151,26 @@
 ; X86-NEXT:    shll $2, %eax
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_27:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:    leal (%eax,%ecx,4), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_28:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:  .LBB0_20:
 ; X86-NEXT:    leal (%eax,%ecx,4), %ecx
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_29:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,2), %ecx
 ; X86-NEXT:    shll $3, %ecx
 ; X86-NEXT:    jmp .LBB0_12
@@ -140,13 +178,17 @@
 ; X86-NEXT:    shll $3, %eax
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_31:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_32:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
 ; X86-NEXT:    jmp .LBB0_12
@@ -154,21 +196,27 @@
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_34:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_35:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
 ; X86-NEXT:    addl %eax, %ecx
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_36:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll $5, %ecx
 ; X86-NEXT:    subl %eax, %ecx
@@ -180,10 +228,13 @@
 ; X86-NEXT:    subl %eax, %ecx
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_38:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $5, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-HSW-LABEL: mult:
@@ -857,8 +908,11 @@
 ; X86-NEXT:    negl %ecx
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 12
 ; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    popl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-HSW-LABEL: foo:
@@ -1072,10 +1126,15 @@
 ; X64-HSW-NEXT:    negl %ecx
 ; X64-HSW-NEXT:    movl %ecx, %eax
 ; X64-HSW-NEXT:    addq $8, %rsp
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 40
 ; X64-HSW-NEXT:    popq %rbx
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 32
 ; X64-HSW-NEXT:    popq %r14
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 24
 ; X64-HSW-NEXT:    popq %r15
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 16
 ; X64-HSW-NEXT:    popq %rbp
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 8
 ; X64-HSW-NEXT:    retq
   %1 = tail call i32 @mult(i32 1, i32 0)
   %2 = icmp ne i32 %1, 1
diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll
index 105af64..7be8a8e 100644
--- a/llvm/test/CodeGen/X86/mul-i256.ll
+++ b/llvm/test/CodeGen/X86/mul-i256.ll
@@ -349,10 +349,15 @@
 ; X32-NEXT:    movl %eax, 24(%ecx)
 ; X32-NEXT:    movl %edx, 28(%ecx)
 ; X32-NEXT:    addl $88, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 20
 ; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    popl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    popl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test:
@@ -421,8 +426,11 @@
 ; X64-NEXT:    movq %rax, 16(%r9)
 ; X64-NEXT:    movq %rdx, 24(%r9)
 ; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 24
 ; X64-NEXT:    popq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    popq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 entry:
   %av = load i256, i256* %a
diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll
index 8ef1ba2..2f0e6b2 100644
--- a/llvm/test/CodeGen/X86/mul128.ll
+++ b/llvm/test/CodeGen/X86/mul128.ll
@@ -86,10 +86,15 @@
 ; X86-NEXT:    movl %edx, 12(%ecx)
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 20
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 12
 ; X86-NEXT:    popl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl $4
   %k = mul i128 %t, %u
   ret i128 %k
diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
index 94a27c1..0bbc546 100644
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -94,12 +94,19 @@
 ; LINUX-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
 ; LINUX-NEXT:    movb {{[0-9]+}}(%rsp), %al # 1-byte Reload
 ; LINUX-NEXT:    addq $360, %rsp # imm = 0x168
+; LINUX-NEXT:    .cfi_def_cfa_offset 56
 ; LINUX-NEXT:    popq %rbx
+; LINUX-NEXT:    .cfi_def_cfa_offset 48
 ; LINUX-NEXT:    popq %r12
+; LINUX-NEXT:    .cfi_def_cfa_offset 40
 ; LINUX-NEXT:    popq %r13
+; LINUX-NEXT:    .cfi_def_cfa_offset 32
 ; LINUX-NEXT:    popq %r14
+; LINUX-NEXT:    .cfi_def_cfa_offset 24
 ; LINUX-NEXT:    popq %r15
+; LINUX-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-NEXT:    popq %rbp
+; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; LINUX-X32-LABEL: f_thunk:
@@ -181,12 +188,19 @@
 ; LINUX-X32-NEXT:    movaps {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
 ; LINUX-X32-NEXT:    movb {{[0-9]+}}(%esp), %al # 1-byte Reload
 ; LINUX-X32-NEXT:    addl $344, %esp # imm = 0x158
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 56
 ; LINUX-X32-NEXT:    popq %rbx
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 48
 ; LINUX-X32-NEXT:    popq %r12
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 40
 ; LINUX-X32-NEXT:    popq %r13
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 32
 ; LINUX-X32-NEXT:    popq %r14
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 24
 ; LINUX-X32-NEXT:    popq %r15
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-X32-NEXT:    popq %rbp
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: f_thunk:
@@ -295,6 +309,7 @@
 ; LINUX-NEXT:    pushq %rax
 ; LINUX-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-NEXT:    popq %r11
+; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%rdi # TAILCALL
 ;
 ; LINUX-X32-LABEL: g_thunk:
@@ -303,6 +318,7 @@
 ; LINUX-X32-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-X32-NEXT:    movl %edi, %r11d
 ; LINUX-X32-NEXT:    addl $8, %esp
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: g_thunk:
@@ -344,11 +360,14 @@
 ; LINUX-NEXT:  # %bb.1: # %then
 ; LINUX-NEXT:    movq 8(%rdi), %r11
 ; LINUX-NEXT:    addq $8, %rsp
+; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%r11 # TAILCALL
 ; LINUX-NEXT:  .LBB2_2: # %else
+; LINUX-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-NEXT:    movq 16(%rdi), %r11
 ; LINUX-NEXT:    movl $42, {{.*}}(%rip)
 ; LINUX-NEXT:    addq $8, %rsp
+; LINUX-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; LINUX-X32-LABEL: h_thunk:
@@ -360,11 +379,14 @@
 ; LINUX-X32-NEXT:  # %bb.1: # %then
 ; LINUX-X32-NEXT:    movl 4(%edi), %r11d
 ; LINUX-X32-NEXT:    addl $8, %esp
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ; LINUX-X32-NEXT:  .LBB2_2: # %else
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 16
 ; LINUX-X32-NEXT:    movl 8(%edi), %r11d
 ; LINUX-X32-NEXT:    movl $42, {{.*}}(%rip)
 ; LINUX-X32-NEXT:    addl $8, %esp
+; LINUX-X32-NEXT:    .cfi_def_cfa_offset 8
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: h_thunk:
diff --git a/llvm/test/CodeGen/X86/pr21792.ll b/llvm/test/CodeGen/X86/pr21792.ll
index df35269..721aae9 100644
--- a/llvm/test/CodeGen/X86/pr21792.ll
+++ b/llvm/test/CodeGen/X86/pr21792.ll
@@ -24,6 +24,7 @@
 ; CHECK-NEXT:    leaq stuff+8(%rax), %r9
 ; CHECK-NEXT:    callq toto
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %tmp2 = bitcast <4 x float> %vx to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/pr29061.ll b/llvm/test/CodeGen/X86/pr29061.ll
index d16e453..3b5b4e6 100644
--- a/llvm/test/CodeGen/X86/pr29061.ll
+++ b/llvm/test/CodeGen/X86/pr29061.ll
@@ -15,6 +15,7 @@
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 entry:
   tail call void asm sideeffect "", "{di},~{dirflag},~{fpsr},~{flags}"(i8 %c)
@@ -32,6 +33,7 @@
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 entry:
   tail call void asm sideeffect "", "{si},~{dirflag},~{fpsr},~{flags}"(i8 %c)
diff --git a/llvm/test/CodeGen/X86/pr29112.ll b/llvm/test/CodeGen/X86/pr29112.ll
index 195ff52..832ebd1 100644
--- a/llvm/test/CodeGen/X86/pr29112.ll
+++ b/llvm/test/CodeGen/X86/pr29112.ll
@@ -65,6 +65,7 @@
 ; CHECK-NEXT:    vaddps {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    addq $88, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %a1 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17>
 
diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll
index 30c2e59..4a75d3e 100644
--- a/llvm/test/CodeGen/X86/pr30430.ll
+++ b/llvm/test/CodeGen/X86/pr30430.ll
@@ -108,6 +108,7 @@
 ; CHECK-NEXT:    vmovss %xmm14, (%rsp) # 4-byte Spill
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
 ; CHECK-NEXT:    retq
 entry:
   %__A.addr.i = alloca float, align 4
diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll
index ec56dde..22048a9 100644
--- a/llvm/test/CodeGen/X86/pr32241.ll
+++ b/llvm/test/CodeGen/X86/pr32241.ll
@@ -47,6 +47,7 @@
 ; CHECK-NEXT:    movw %dx, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    addl $16, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 entry:
   %aa = alloca i16, align 2
diff --git a/llvm/test/CodeGen/X86/pr32256.ll b/llvm/test/CodeGen/X86/pr32256.ll
index ab6af88..eae357c 100644
--- a/llvm/test/CodeGen/X86/pr32256.ll
+++ b/llvm/test/CodeGen/X86/pr32256.ll
@@ -27,6 +27,7 @@
 ; CHECK-NEXT:    andb $1, %al
 ; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    addl $2, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 entry:
   %b = alloca i8, align 1
diff --git a/llvm/test/CodeGen/X86/pr32282.ll b/llvm/test/CodeGen/X86/pr32282.ll
index c1f442d..c2d695c 100644
--- a/llvm/test/CodeGen/X86/pr32282.ll
+++ b/llvm/test/CodeGen/X86/pr32282.ll
@@ -47,7 +47,9 @@
 ; X86-NEXT:    orl %eax, %edx
 ; X86-NEXT:    setne {{[0-9]+}}(%esp)
 ; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll
index 46e37ab..0f9d936 100644
--- a/llvm/test/CodeGen/X86/pr32284.ll
+++ b/llvm/test/CodeGen/X86/pr32284.ll
@@ -71,6 +71,7 @@
 ; 686-O0-NEXT:    movzbl %al, %ecx
 ; 686-O0-NEXT:    movl %ecx, (%esp)
 ; 686-O0-NEXT:    addl $8, %esp
+; 686-O0-NEXT:    .cfi_def_cfa_offset 4
 ; 686-O0-NEXT:    retl
 ;
 ; 686-LABEL: foo:
@@ -88,6 +89,7 @@
 ; 686-NEXT:    setle %dl
 ; 686-NEXT:    movl %edx, {{[0-9]+}}(%esp)
 ; 686-NEXT:    addl $8, %esp
+; 686-NEXT:    .cfi_def_cfa_offset 4
 ; 686-NEXT:    retl
 entry:
   %a = alloca i8, align 1
@@ -226,10 +228,15 @@
 ; 686-O0-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; 686-O0-NEXT:    movl %esi, (%esp) # 4-byte Spill
 ; 686-O0-NEXT:    addl $24, %esp
+; 686-O0-NEXT:    .cfi_def_cfa_offset 20
 ; 686-O0-NEXT:    popl %esi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 16
 ; 686-O0-NEXT:    popl %edi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 12
 ; 686-O0-NEXT:    popl %ebx
+; 686-O0-NEXT:    .cfi_def_cfa_offset 8
 ; 686-O0-NEXT:    popl %ebp
+; 686-O0-NEXT:    .cfi_def_cfa_offset 4
 ; 686-O0-NEXT:    retl
 ;
 ; 686-LABEL: f1:
@@ -271,8 +278,11 @@
 ; 686-NEXT:    movl %eax, _ZN8struct_210member_2_0E
 ; 686-NEXT:    movl $0, _ZN8struct_210member_2_0E+4
 ; 686-NEXT:    addl $1, %esp
+; 686-NEXT:    .cfi_def_cfa_offset 12
 ; 686-NEXT:    popl %esi
+; 686-NEXT:    .cfi_def_cfa_offset 8
 ; 686-NEXT:    popl %edi
+; 686-NEXT:    .cfi_def_cfa_offset 4
 ; 686-NEXT:    retl
 entry:
   %a = alloca i8, align 1
@@ -383,7 +393,9 @@
 ; 686-O0-NEXT:    # implicit-def: $eax
 ; 686-O0-NEXT:    movw %si, (%eax)
 ; 686-O0-NEXT:    addl $2, %esp
+; 686-O0-NEXT:    .cfi_def_cfa_offset 8
 ; 686-O0-NEXT:    popl %esi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 4
 ; 686-O0-NEXT:    retl
 ;
 ; 686-LABEL: f2:
@@ -404,6 +416,7 @@
 ; 686-NEXT:    sete %dl
 ; 686-NEXT:    movw %dx, (%eax)
 ; 686-NEXT:    addl $2, %esp
+; 686-NEXT:    .cfi_def_cfa_offset 4
 ; 686-NEXT:    retl
 entry:
   %a = alloca i16, align 2
@@ -520,6 +533,7 @@
 ; 686-O0-NEXT:    popl %esi
 ; 686-O0-NEXT:    popl %edi
 ; 686-O0-NEXT:    popl %ebp
+; 686-O0-NEXT:    .cfi_def_cfa %esp, 4
 ; 686-O0-NEXT:    retl
 ;
 ; 686-LABEL: f3:
@@ -545,6 +559,7 @@
 ; 686-NEXT:    movl %ecx, var_46
 ; 686-NEXT:    movl %ebp, %esp
 ; 686-NEXT:    popl %ebp
+; 686-NEXT:    .cfi_def_cfa %esp, 4
 ; 686-NEXT:    retl
 entry:
   %a = alloca i64, align 8
diff --git a/llvm/test/CodeGen/X86/pr32329.ll b/llvm/test/CodeGen/X86/pr32329.ll
index 38d3206..7ccd559 100644
--- a/llvm/test/CodeGen/X86/pr32329.ll
+++ b/llvm/test/CodeGen/X86/pr32329.ll
@@ -57,9 +57,13 @@
 ; X86-NEXT:    imull %eax, %ebx
 ; X86-NEXT:    movb %bl, var_218
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    popl %edi
+; X86-NEXT:    .cfi_def_cfa_offset 12
 ; X86-NEXT:    popl %ebx
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll
index acb7ce3..441af22 100644
--- a/llvm/test/CodeGen/X86/pr32345.ll
+++ b/llvm/test/CodeGen/X86/pr32345.ll
@@ -93,6 +93,7 @@
 ; 6860-NEXT:    popl %edi
 ; 6860-NEXT:    popl %ebx
 ; 6860-NEXT:    popl %ebp
+; 6860-NEXT:    .cfi_def_cfa %esp, 4
 ; 6860-NEXT:    retl
 ;
 ; X64-LABEL: foo:
@@ -136,6 +137,7 @@
 ; 686-NEXT:    movb %dl, (%eax)
 ; 686-NEXT:    movl %ebp, %esp
 ; 686-NEXT:    popl %ebp
+; 686-NEXT:    .cfi_def_cfa %esp, 4
 ; 686-NEXT:    retl
 bb:
   %tmp = alloca i64, align 8
diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll
index 86a46fa..4754d8e 100644
--- a/llvm/test/CodeGen/X86/pr32451.ll
+++ b/llvm/test/CodeGen/X86/pr32451.ll
@@ -30,7 +30,9 @@
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
 ; CHECK-NEXT:    movl %eax, (%ecx)
 ; CHECK-NEXT:    addl $16, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 top:
   %3 = alloca i8***
diff --git a/llvm/test/CodeGen/X86/pr34088.ll b/llvm/test/CodeGen/X86/pr34088.ll
index 2fb000f..6950e50 100644
--- a/llvm/test/CodeGen/X86/pr34088.ll
+++ b/llvm/test/CodeGen/X86/pr34088.ll
@@ -27,6 +27,7 @@
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    .cfi_def_cfa %esp, 4
 ; CHECK-NEXT:    retl
 entry:
   %foo = alloca %struct.Foo, align 4
diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll
index 464db01..ddc6f23 100644
--- a/llvm/test/CodeGen/X86/pr34592.ll
+++ b/llvm/test/CodeGen/X86/pr34592.ll
@@ -61,6 +61,7 @@
 ; CHECK-NEXT:    vmovaps %ymm14, (%rsp) # 32-byte Spill
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
 ; CHECK-NEXT:    retq
 bb:
   %tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll
index 2edad66..db3c95d 100644
--- a/llvm/test/CodeGen/X86/pr34653.ll
+++ b/llvm/test/CodeGen/X86/pr34653.ll
@@ -199,6 +199,7 @@
 ; CHECK-NEXT:    vmovsd %xmm7, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
 entry:
diff --git a/llvm/test/CodeGen/X86/pr9743.ll b/llvm/test/CodeGen/X86/pr9743.ll
index 73b3c7f..ac3d457 100644
--- a/llvm/test/CodeGen/X86/pr9743.ll
+++ b/llvm/test/CodeGen/X86/pr9743.ll
@@ -11,4 +11,5 @@
 ; CHECK-NEXT:  movq    %rsp, %rbp
 ; CHECK-NEXT:  .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:  popq    %rbp
+; CHECK-NEXT:  .cfi_def_cfa %rsp, 8
 ; CHECK-NEXT:  ret
diff --git a/llvm/test/CodeGen/X86/push-cfi-debug.ll b/llvm/test/CodeGen/X86/push-cfi-debug.ll
index 7f438e3..06ae2de 100644
--- a/llvm/test/CodeGen/X86/push-cfi-debug.ll
+++ b/llvm/test/CodeGen/X86/push-cfi-debug.ll
@@ -6,17 +6,15 @@
 declare x86_stdcallcc void @stdfoo(i32, i32) #0
 
 ; CHECK-LABEL: test1:
-; CHECK: subl $8, %esp
-; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: subl $20, %esp
+; CHECK: .cfi_adjust_cfa_offset 20
 ; CHECK: pushl $2
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: pushl $1
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: calll foo
-; CHECK: addl $16, %esp
-; CHECK: .cfi_adjust_cfa_offset -16
-; CHECK: subl $8, %esp
-; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: addl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset -8
 ; CHECK: pushl $4
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: pushl $3
@@ -24,7 +22,7 @@
 ; CHECK: calll stdfoo
 ; CHECK: .cfi_adjust_cfa_offset -8
 ; CHECK: addl $20, %esp
-; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: .cfi_adjust_cfa_offset -20
 define void @test1() #0 !dbg !4 {
 entry:
   tail call void @foo(i32 1, i32 2) #1, !dbg !10
diff --git a/llvm/test/CodeGen/X86/push-cfi-obj.ll b/llvm/test/CodeGen/X86/push-cfi-obj.ll
index 8b3c3ad..1371acb 100644
--- a/llvm/test/CodeGen/X86/push-cfi-obj.ll
+++ b/llvm/test/CodeGen/X86/push-cfi-obj.ll
@@ -12,7 +12,7 @@
 ; LINUX-NEXT:    ]
 ; LINUX-NEXT:    Address: 0x0
 ; LINUX-NEXT:    Offset: 0x5C
-; LINUX-NEXT:    Size: 64
+; LINUX-NEXT:    Size: 72
 ; LINUX-NEXT:    Link: 0
 ; LINUX-NEXT:    Info: 0
 ; LINUX-NEXT:    AddressAlignment: 4
@@ -22,8 +22,9 @@
 ; LINUX-NEXT:    SectionData (
 ; LINUX-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
 ; LINUX-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
-; LINUX-NEXT:      0020: 1C000000 24000000 00000000 1D000000  |....$...........|
+; LINUX-NEXT:      0020: 24000000 24000000 00000000 1D000000  |$...$...........|
 ; LINUX-NEXT:      0030: 04000000 00410E08 8502420D 05432E10  |.....A....B..C..|
+; LINUX-NEXT:      0040: 540C0404 410C0508                    |T...A...|
 ; LINUX-NEXT:    )
 
 declare i32 @__gxx_personality_v0(...)
@@ -35,7 +36,7 @@
           to label %continue unwind label %cleanup
 continue:
   ret void
-cleanup:  
+cleanup:
   landingpad { i8*, i32 }
      cleanup
   ret void
diff --git a/llvm/test/CodeGen/X86/push-cfi.ll b/llvm/test/CodeGen/X86/push-cfi.ll
index 91e579a..97f5630 100644
--- a/llvm/test/CodeGen/X86/push-cfi.ll
+++ b/llvm/test/CodeGen/X86/push-cfi.ll
@@ -75,7 +75,7 @@
 ; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $28, %esp
-; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX: .cfi_adjust_cfa_offset -28
 ; DARWIN-NOT: .cfi_escape
 ; DARWIN-NOT: pushl
 define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
diff --git a/llvm/test/CodeGen/X86/rdtsc.ll b/llvm/test/CodeGen/X86/rdtsc.ll
index 96ad1ab..6370c05 100644
--- a/llvm/test/CodeGen/X86/rdtsc.ll
+++ b/llvm/test/CodeGen/X86/rdtsc.ll
@@ -50,6 +50,7 @@
 ; X86-NEXT:    rdtscp
 ; X86-NEXT:    movl %ecx, (%esi)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_builtin_rdtscp:
diff --git a/llvm/test/CodeGen/X86/return-ext.ll b/llvm/test/CodeGen/X86/return-ext.ll
index ef160f4..c66e518 100644
--- a/llvm/test/CodeGen/X86/return-ext.ll
+++ b/llvm/test/CodeGen/X86/return-ext.ll
@@ -106,6 +106,7 @@
 ; CHECK: call
 ; CHECK-NEXT: movzbl
 ; CHECK-NEXT: {{pop|add}}
+; CHECK-NEXT: .cfi_def_cfa_offset {{4|8}}
 ; CHECK-NEXT: ret
 }
 
@@ -120,6 +121,7 @@
 ; CHECK: call
 ; CHECK-NEXT: movzbl
 ; CHECK-NEXT: {{pop|add}}
+; CHECK-NEXT: .cfi_def_cfa_offset {{4|8}}
 ; CHECK-NEXT: ret
 }
 
@@ -134,5 +136,6 @@
 ; CHECK: call
 ; CHECK-NEXT: movzwl
 ; CHECK-NEXT: {{pop|add}}
+; CHECK-NEXT: .cfi_def_cfa_offset {{4|8}}
 ; CHECK-NEXT: ret
 }
diff --git a/llvm/test/CodeGen/X86/rtm.ll b/llvm/test/CodeGen/X86/rtm.ll
index 771e234..ed96440 100644
--- a/llvm/test/CodeGen/X86/rtm.ll
+++ b/llvm/test/CodeGen/X86/rtm.ll
@@ -75,6 +75,7 @@
 ; X64-NEXT:    xabort $1
 ; X64-NEXT:    callq f1
 ; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
 entry:
   %x.addr = alloca i32, align 4
diff --git a/llvm/test/CodeGen/X86/schedule-x86_32.ll b/llvm/test/CodeGen/X86/schedule-x86_32.ll
index 5cf8a64..cbaf36d 100644
--- a/llvm/test/CodeGen/X86/schedule-x86_32.ll
+++ b/llvm/test/CodeGen/X86/schedule-x86_32.ll
@@ -476,6 +476,7 @@
 ; GENERIC-NEXT:    bound %ecx, (%edx)
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    popl %esi
+; GENERIC-NEXT:    .cfi_def_cfa_offset 4
 ; GENERIC-NEXT:    retl
 ;
 ; ATOM-LABEL: test_bound:
@@ -492,6 +493,7 @@
 ; ATOM-NEXT:    bound %ecx, (%edx) # sched: [11:5.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    popl %esi # sched: [1:1.00]
+; ATOM-NEXT:    .cfi_def_cfa_offset 4
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_bound:
@@ -508,6 +510,7 @@
 ; SLM-NEXT:    bound %ecx, (%edx) # sched: [100:1.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    popl %esi # sched: [3:1.00]
+; SLM-NEXT:    .cfi_def_cfa_offset 4
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_bound:
@@ -524,6 +527,7 @@
 ; SANDY-NEXT:    bound %ecx, (%edx) # sched: [100:0.33]
 ; SANDY-NEXT:    #NO_APP
 ; SANDY-NEXT:    popl %esi # sched: [6:0.50]
+; SANDY-NEXT:    .cfi_def_cfa_offset 4
 ; SANDY-NEXT:    retl # sched: [6:1.00]
 ;
 ; HASWELL-LABEL: test_bound:
@@ -540,6 +544,7 @@
 ; HASWELL-NEXT:    bound %ecx, (%edx) # sched: [1:?]
 ; HASWELL-NEXT:    #NO_APP
 ; HASWELL-NEXT:    popl %esi # sched: [6:0.50]
+; HASWELL-NEXT:    .cfi_def_cfa_offset 4
 ; HASWELL-NEXT:    retl # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_bound:
@@ -556,6 +561,7 @@
 ; BROADWELL-NEXT:    bound %ecx, (%edx) # sched: [100:0.25]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    popl %esi # sched: [6:0.50]
+; BROADWELL-NEXT:    .cfi_def_cfa_offset 4
 ; BROADWELL-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKYLAKE-LABEL: test_bound:
@@ -572,6 +578,7 @@
 ; SKYLAKE-NEXT:    bound %ecx, (%edx) # sched: [100:0.25]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    popl %esi # sched: [6:0.50]
+; SKYLAKE-NEXT:    .cfi_def_cfa_offset 4
 ; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKX-LABEL: test_bound:
@@ -588,6 +595,7 @@
 ; SKX-NEXT:    bound %ecx, (%edx) # sched: [100:0.25]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    popl %esi # sched: [6:0.50]
+; SKX-NEXT:    .cfi_def_cfa_offset 4
 ; SKX-NEXT:    retl # sched: [6:0.50]
 ;
 ; BTVER2-LABEL: test_bound:
@@ -604,6 +612,7 @@
 ; BTVER2-NEXT:    bound %ecx, (%edx) # sched: [100:0.50]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    popl %esi # sched: [5:1.00]
+; BTVER2-NEXT:    .cfi_def_cfa_offset 4
 ; BTVER2-NEXT:    retl # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_bound:
@@ -620,6 +629,7 @@
 ; ZNVER1-NEXT:    bound %ecx, (%edx) # sched: [100:?]
 ; ZNVER1-NEXT:    #NO_APP
 ; ZNVER1-NEXT:    popl %esi # sched: [8:0.50]
+; ZNVER1-NEXT:    .cfi_def_cfa_offset 4
 ; ZNVER1-NEXT:    retl # sched: [1:0.50]
   call void asm sideeffect "bound $0, $1 \0A\09 bound $2, $3", "r,*m,r,*m"(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3)
   ret void
diff --git a/llvm/test/CodeGen/X86/select-mmx.ll b/llvm/test/CodeGen/X86/select-mmx.ll
index e907a3d..f24111f 100644
--- a/llvm/test/CodeGen/X86/select-mmx.ll
+++ b/llvm/test/CodeGen/X86/select-mmx.ll
@@ -48,6 +48,7 @@
 ; I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; I32-NEXT:    movl %ebp, %esp
 ; I32-NEXT:    popl %ebp
+; I32-NEXT:    .cfi_def_cfa %esp, 4
 ; I32-NEXT:    retl
   %cond = icmp eq i64 %arg, 0
   %slct = select i1 %cond, x86_mmx bitcast (i64 7 to x86_mmx), x86_mmx bitcast (i64 0 to x86_mmx)
@@ -100,6 +101,7 @@
 ; I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; I32-NEXT:    movl %ebp, %esp
 ; I32-NEXT:    popl %ebp
+; I32-NEXT:    .cfi_def_cfa %esp, 4
 ; I32-NEXT:    retl
   %cond = icmp eq i64 %arg, 0
   %xmmx = bitcast i64 %x to x86_mmx
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index 00fa427..11e453e 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -88,6 +88,7 @@
 ; KNL-32-NEXT:    jne .LBB1_1
 ; KNL-32-NEXT:  # %bb.2: # %for_exit600
 ; KNL-32-NEXT:    popl %esi
+; KNL-32-NEXT:    .cfi_def_cfa_offset 4
 ; KNL-32-NEXT:    retl
 allocas:
   br label %for_test11.preheader
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index ed7d05f..925e63d 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -35,6 +35,7 @@
 ; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; X86-SSE-NEXT:    movq %xmm1, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_2xi8:
@@ -52,6 +53,7 @@
 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8:
@@ -121,6 +123,7 @@
 ; X86-SSE-NEXT:    pmaddwd %xmm0, %xmm2
 ; X86-SSE-NEXT:    movdqu %xmm2, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_4xi8:
@@ -137,6 +140,7 @@
 ; X86-AVX-NEXT:    vpmaddwd %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovdqu %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_4xi8:
@@ -206,6 +210,7 @@
 ; X86-SSE-NEXT:    movdqu %xmm1, 16(%esi,%ecx,4)
 ; X86-SSE-NEXT:    movdqu %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: mul_8xi8:
@@ -226,6 +231,7 @@
 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -243,6 +249,7 @@
 ; X86-AVX2-NEXT:    vpmaddwd %ymm0, %ymm1, %ymm0
 ; X86-AVX2-NEXT:    vmovdqu %ymm0, (%esi,%ecx,4)
 ; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
@@ -340,6 +347,7 @@
 ; X86-SSE-NEXT:    movdqu %xmm4, 16(%esi,%ecx,4)
 ; X86-SSE-NEXT:    movdqu %xmm3, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: mul_16xi8:
@@ -368,6 +376,7 @@
 ; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
 ; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -389,6 +398,7 @@
 ; X86-AVX2-NEXT:    vmovdqu %ymm0, 32(%esi,%ecx,4)
 ; X86-AVX2-NEXT:    vmovdqu %ymm1, (%esi,%ecx,4)
 ; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
@@ -494,6 +504,7 @@
 ; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; X86-SSE-NEXT:    movq %xmm1, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_2xi16:
@@ -512,6 +523,7 @@
 ; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16:
@@ -577,6 +589,7 @@
 ; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; X86-SSE-NEXT:    movdqu %xmm1, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_4xi16:
@@ -593,6 +606,7 @@
 ; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovdqu %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_4xi16:
@@ -659,6 +673,7 @@
 ; X86-SSE-NEXT:    movdqu %xmm1, 16(%esi,%ecx,4)
 ; X86-SSE-NEXT:    movdqu %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: mul_8xi16:
@@ -679,6 +694,7 @@
 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -696,6 +712,7 @@
 ; X86-AVX2-NEXT:    vpmulld %ymm0, %ymm1, %ymm0
 ; X86-AVX2-NEXT:    vmovdqu %ymm0, (%esi,%ecx,4)
 ; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
@@ -791,6 +808,7 @@
 ; X86-SSE-NEXT:    movdqu %xmm2, 16(%esi,%ecx,4)
 ; X86-SSE-NEXT:    movdqu %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: mul_16xi16:
@@ -819,6 +837,7 @@
 ; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
 ; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -840,6 +859,7 @@
 ; X86-AVX2-NEXT:    vmovdqu %ymm0, 32(%esi,%ecx,4)
 ; X86-AVX2-NEXT:    vmovdqu %ymm1, (%esi,%ecx,4)
 ; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
@@ -949,6 +969,7 @@
 ; X86-SSE-NEXT:    psrad $16, %xmm0
 ; X86-SSE-NEXT:    movq %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_2xi8_sext:
@@ -966,6 +987,7 @@
 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_sext:
@@ -1041,6 +1063,7 @@
 ; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; X86-SSE-NEXT:    movq %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_2xi8_sext_zext:
@@ -1058,6 +1081,7 @@
 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_sext_zext:
@@ -1128,6 +1152,7 @@
 ; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; X86-SSE-NEXT:    movq %xmm1, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_2xi16_sext:
@@ -1145,6 +1170,7 @@
 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16_sext:
@@ -1222,6 +1248,7 @@
 ; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
 ; X86-SSE-NEXT:    movq %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX-LABEL: mul_2xi16_sext_zext:
@@ -1241,6 +1268,7 @@
 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16_sext_zext:
@@ -1333,6 +1361,7 @@
 ; X86-SSE-NEXT:    movdqu %xmm2, 16(%esi,%ecx,4)
 ; X86-SSE-NEXT:    movdqu %xmm0, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-SSE-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: mul_16xi16_sext:
@@ -1361,6 +1390,7 @@
 ; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
 ; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -1382,6 +1412,7 @@
 ; X86-AVX2-NEXT:    vmovdqu %ymm0, 32(%esi,%ecx,4)
 ; X86-AVX2-NEXT:    vmovdqu %ymm1, (%esi,%ecx,4)
 ; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
@@ -2306,10 +2337,15 @@
 ; X86-AVX1-NEXT:    vmovd %xmm1, (%eax)
 ; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 20
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 16
 ; X86-AVX1-NEXT:    popl %edi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 12
 ; X86-AVX1-NEXT:    popl %ebx
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
 ; X86-AVX1-NEXT:    popl %ebp
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -2375,6 +2411,7 @@
 ; X86-AVX2-NEXT:    vmovd %xmm1, (%eax)
 ; X86-AVX2-NEXT:    vmovdqa %ymm0, (%eax)
 ; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
@@ -2491,7 +2528,9 @@
 ; X64-AVX1-NEXT:    vmovd %xmm1, (%rax)
 ; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX1-NEXT:    popq %rbp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/stack-probe-red-zone.ll b/llvm/test/CodeGen/X86/stack-probe-red-zone.ll
index f069198..fff44a0 100644
--- a/llvm/test/CodeGen/X86/stack-probe-red-zone.ll
+++ b/llvm/test/CodeGen/X86/stack-probe-red-zone.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
 
 ; Ensure that red zone usage occurs.
-define void @testStackProbesOff() {
+define signext i8 @testStackProbesOff() {
   %array = alloca [40096 x i8], align 16
-  ret void
+  ret i8 0
 
 ; CHECK-LABEL:  testStackProbesOff:
 ; CHECK:        subq $39976, %rsp # imm = 0x9C28
diff --git a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
index 1ad35f3..2020b8d 100644
--- a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -83,6 +83,7 @@
 ; CHECK: callq return_i1
 ; CHECK-NEXT: .Ltmp5:
 ; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
 ; CHECK-NEXT: retq
 entry:
   %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
diff --git a/llvm/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
index b88ca03..90f2002 100644
--- a/llvm/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
+++ b/llvm/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
@@ -69,6 +69,7 @@
 ; CHECK: callq return_i1
 ; CHECK-NEXT: .Ltmp4:
 ; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
 ; CHECK-NEXT: retq
 entry:
   %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
diff --git a/llvm/test/CodeGen/X86/statepoint-invoke.ll b/llvm/test/CodeGen/X86/statepoint-invoke.ll
index 12a1226..d4934e3 100644
--- a/llvm/test/CodeGen/X86/statepoint-invoke.ll
+++ b/llvm/test/CodeGen/X86/statepoint-invoke.ll
@@ -142,6 +142,7 @@
   ; CHECK-LABEL: %normal_return
   ; CHECK: xorl %eax, %eax
   ; CHECK-NEXT: popq
+  ; CHECK-NEXT: .cfi_def_cfa_offset 8
   ; CHECK-NEXT: retq
   %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13)
   %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14)
@@ -169,6 +170,7 @@
 normal_return:
   ; CHECK: leaq
   ; CHECK-NEXT: popq
+  ; CHECK-NEXT: .cfi_def_cfa_offset 8
   ; CHECK-NEXT: retq
   %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %sp, i32 13, i32 13)
   %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)*
@@ -177,6 +179,7 @@
 exceptional_return:
   ; CHECK: movl	$15
   ; CHECK-NEXT: popq
+  ; CHECK-NEXT: .cfi_def_cfa_offset 8
   ; CHECK-NEXT: retq
   %landing_pad = landingpad token
           cleanup
diff --git a/llvm/test/CodeGen/X86/statepoint-vector.ll b/llvm/test/CodeGen/X86/statepoint-vector.ll
index fe67b57..30fb802 100644
--- a/llvm/test/CodeGen/X86/statepoint-vector.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vector.ll
@@ -15,6 +15,7 @@
 ; CHECK-NEXT:  .Ltmp0:
 ; CHECK-NEXT:    movaps (%rsp), %xmm0
 ; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
@@ -37,6 +38,7 @@
 ; CHECK-NEXT:  .Ltmp1:
 ; CHECK-NEXT:    movaps (%rsp), %xmm0
 ; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
@@ -65,6 +67,7 @@
 ; CHECK-NEXT:  .Ltmp2:
 ; CHECK-NEXT:    movaps (%rsp), %xmm0
 ; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   br i1 %cnd, label %taken, label %untaken
@@ -101,6 +104,7 @@
 ; CHECK-NEXT:  .Ltmp3:
 ; CHECK-NEXT:    movaps (%rsp), %xmm0
 ; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer)
@@ -119,6 +123,7 @@
 ; CHECK-NEXT:    callq do_safepoint
 ; CHECK-NEXT:  .Ltmp4:
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0)
diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll
index f3563d2..cb85585 100644
--- a/llvm/test/CodeGen/X86/swift-return.ll
+++ b/llvm/test/CodeGen/X86/swift-return.ll
@@ -18,6 +18,7 @@
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test:
@@ -32,6 +33,7 @@
 ; CHECK-O0-NEXT:    addl %ecx, %edi
 ; CHECK-O0-NEXT:    movw %di, %ax
 ; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %key.addr = alloca i32, align 4
@@ -66,6 +68,7 @@
 ; CHECK-NEXT:    addl {{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    addl {{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test2:
@@ -87,6 +90,7 @@
 ; CHECK-O0-NEXT:    addl %edi, %esi
 ; CHECK-O0-NEXT:    movl %esi, %eax
 ; CHECK-O0-NEXT:    addq $24, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %key.addr = alloca i32, align 4
@@ -148,6 +152,7 @@
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    addl %r8d, %eax
 ; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test3:
@@ -161,6 +166,7 @@
 ; CHECK-O0-NEXT:    addl %ecx, %eax
 ; CHECK-O0-NEXT:    addl %r8d, %eax
 ; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %key.addr = alloca i32, align 4
@@ -194,6 +200,7 @@
 ; CHECK-NEXT:    addss %xmm2, %xmm0
 ; CHECK-NEXT:    addss %xmm3, %xmm0
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test4:
@@ -207,6 +214,7 @@
 ; CHECK-O0-NEXT:    addss %xmm2, %xmm0
 ; CHECK-O0-NEXT:    addss %xmm3, %xmm0
 ; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %key.addr = alloca float, align 4
@@ -246,6 +254,7 @@
 ; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    movl %eax, {{.*}}(%rip)
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: consume_i1_ret:
@@ -266,6 +275,7 @@
 ; CHECK-O0-NEXT:    movzbl %r8b, %esi
 ; CHECK-O0-NEXT:    movl %esi, var
 ; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
   %call = call swiftcc { i1, i1, i1, i1 } @produce_i1_ret()
   %v3 = extractvalue { i1, i1, i1, i1 } %call, 0
@@ -309,6 +319,7 @@
 ; CHECK-NEXT:    addsd %xmm2, %xmm0
 ; CHECK-NEXT:    addsd %xmm3, %xmm0
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test5:
@@ -320,6 +331,7 @@
 ; CHECK-O0-NEXT:    addsd %xmm2, %xmm0
 ; CHECK-O0-NEXT:    addsd %xmm3, %xmm0
 ; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %call = call swiftcc { double, double, double, double } @gen5()
@@ -351,6 +363,7 @@
 ; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    addq %r8, %rax
 ; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test6:
@@ -365,6 +378,7 @@
 ; CHECK-O0-NEXT:    addq %rcx, %rax
 ; CHECK-O0-NEXT:    addq %r8, %rax
 ; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %call = call swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen6()
@@ -510,6 +524,7 @@
 ; CHECK-NEXT:    addps %xmm2, %xmm0
 ; CHECK-NEXT:    addps %xmm3, %xmm0
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test11:
@@ -521,6 +536,7 @@
 ; CHECK-O0-NEXT:    addps %xmm2, %xmm0
 ; CHECK-O0-NEXT:    addps %xmm3, %xmm0
 ; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11()
@@ -548,6 +564,7 @@
 ; CHECK-NEXT:    addps %xmm2, %xmm0
 ; CHECK-NEXT:    movaps %xmm3, %xmm1
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: test12:
@@ -559,6 +576,7 @@
 ; CHECK-O0-NEXT:    addps %xmm2, %xmm0
 ; CHECK-O0-NEXT:    movaps %xmm3, %xmm1
 ; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
 entry:
   %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12()
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index e603c61..4fba792 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -81,9 +81,12 @@
 ; CHECK-X64-NEXT:  # %bb.3: # %no
 ; CHECK-X64-NEXT:    callq bar
 ; CHECK-X64-NEXT:    popq %rax
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-X64-NEXT:    retq
 ; CHECK-X64-NEXT:  .LBB1_2: # %yes
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-X64-NEXT:    popq %rax
+; CHECK-X64-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-X64-NEXT:    retq
   %1 = icmp eq <2 x i8> %b, <i8 40, i8 123>
   %2 = extractelement <2 x i1> %1, i32 1
diff --git a/llvm/test/CodeGen/X86/test-vs-bittest.ll b/llvm/test/CodeGen/X86/test-vs-bittest.ll
index d20a757..fa43ecb 100644
--- a/llvm/test/CodeGen/X86/test-vs-bittest.ll
+++ b/llvm/test/CodeGen/X86/test-vs-bittest.ll
@@ -12,6 +12,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB0_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -35,6 +36,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB1_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -63,6 +65,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB2_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -86,6 +89,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB3_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -109,6 +113,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB4_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -132,6 +137,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB5_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -155,6 +161,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB6_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -178,6 +185,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB7_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -201,6 +209,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB8_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -224,6 +233,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB9_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -247,6 +257,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB10_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -270,6 +281,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB11_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -293,6 +305,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB12_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -316,6 +329,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB13_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -339,6 +353,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB14_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -362,6 +377,7 @@
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB15_2: # %no
 ; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
diff --git a/llvm/test/CodeGen/X86/throws-cfi-fp.ll b/llvm/test/CodeGen/X86/throws-cfi-fp.ll
new file mode 100644
index 0000000..3ec97d7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/throws-cfi-fp.ll
@@ -0,0 +1,86 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$__clang_call_terminate = comdat any
+
+@_ZL11ShouldThrow = internal unnamed_addr global i1 false, align 1
+@_ZTIi = external constant i8*
+@str = private unnamed_addr constant [20 x i8] c"Threw an exception!\00"
+
+; Function Attrs: uwtable
+define void @_Z6throwsv() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+
+; CHECK-LABEL:   _Z6throwsv:
+; CHECK:         popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:    .LBB0_1:
+; CHECK-NEXT:    .cfi_def_cfa %rbp, 16
+
+entry:
+  %.b5 = load i1, i1* @_ZL11ShouldThrow, align 1
+  br i1 %.b5, label %if.then, label %try.cont
+
+if.then:                                          ; preds = %entry
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  %0 = bitcast i8* %exception to i32*
+  store i32 1, i32* %0, align 16
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %if.then
+  %1 = landingpad { i8*, i32 }
+          catch i8* null
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = tail call i8* @__cxa_begin_catch(i8* %2)
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @str, i64 0, i64 0))
+  invoke void @__cxa_rethrow()
+          to label %unreachable unwind label %lpad1
+
+lpad1:                                            ; preds = %lpad
+  %4 = landingpad { i8*, i32 }
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+try.cont:                                         ; preds = %entry
+  ret void
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %4
+
+terminate.lpad:                                   ; preds = %lpad1
+  %5 = landingpad { i8*, i32 }
+          catch i8* null
+  %6 = extractvalue { i8*, i32 } %5, 0
+  tail call void @__clang_call_terminate(i8* %6)
+  unreachable
+
+unreachable:                                      ; preds = %lpad, %if.then
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_rethrow()
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: noinline noreturn nounwind
+declare void @__clang_call_terminate(i8*)
+
+declare void @_ZSt9terminatev()
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly)
+
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
diff --git a/llvm/test/CodeGen/X86/throws-cfi-no-fp.ll b/llvm/test/CodeGen/X86/throws-cfi-no-fp.ll
new file mode 100644
index 0000000..a40afbb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/throws-cfi-no-fp.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$__clang_call_terminate = comdat any
+
+@_ZL11ShouldThrow = internal unnamed_addr global i1 false, align 1
+@_ZTIi = external constant i8*
+@str = private unnamed_addr constant [20 x i8] c"Threw an exception!\00"
+
+; Function Attrs: uwtable
+define void @_Z6throwsv() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+
+; CHECK-LABEL:   _Z6throwsv:
+; CHECK:         popq	%rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:    .LBB0_1:
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+
+entry:
+  %.b5 = load i1, i1* @_ZL11ShouldThrow, align 1
+  br i1 %.b5, label %if.then, label %try.cont
+
+if.then:                                          ; preds = %entry
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  %0 = bitcast i8* %exception to i32*
+  store i32 1, i32* %0, align 16
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %if.then
+  %1 = landingpad { i8*, i32 }
+          catch i8* null
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = tail call i8* @__cxa_begin_catch(i8* %2)
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @str, i64 0, i64 0))
+  invoke void @__cxa_rethrow() #4
+          to label %unreachable unwind label %lpad1
+
+lpad1:                                            ; preds = %lpad
+  %4 = landingpad { i8*, i32 }
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+try.cont:                                         ; preds = %entry
+  ret void
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %4
+
+terminate.lpad:                                   ; preds = %lpad1
+  %5 = landingpad { i8*, i32 }
+          catch i8* null
+  %6 = extractvalue { i8*, i32 } %5, 0
+  tail call void @__clang_call_terminate(i8* %6)
+  unreachable
+
+unreachable:                                      ; preds = %lpad, %if.then
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_rethrow()
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: noinline noreturn nounwind
+declare void @__clang_call_terminate(i8*)
+
+declare void @_ZSt9terminatev()
+
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly)
+
diff --git a/llvm/test/CodeGen/X86/vector-arith-sat.ll b/llvm/test/CodeGen/X86/vector-arith-sat.ll
index 59ce37d..ef34843 100644
--- a/llvm/test/CodeGen/X86/vector-arith-sat.ll
+++ b/llvm/test/CodeGen/X86/vector-arith-sat.ll
@@ -469,6 +469,7 @@
 ; AVX512F-32-NEXT:    vpaddsw 8(%ebp), %zmm1, %zmm1
 ; AVX512F-32-NEXT:    movl %ebp, %esp
 ; AVX512F-32-NEXT:    popl %ebp
+; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX512F-32-NEXT:    retl
   %1 = sext <64 x i16> %a to <64 x i32>
   %2 = sext <64 x i16> %b to <64 x i32>
@@ -653,6 +654,7 @@
 ; AVX512F-32-NEXT:    vpsubsw 8(%ebp), %zmm1, %zmm1
 ; AVX512F-32-NEXT:    movl %ebp, %esp
 ; AVX512F-32-NEXT:    popl %ebp
+; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX512F-32-NEXT:    retl
   %1 = sext <64 x i16> %a to <64 x i32>
   %2 = sext <64 x i16> %b to <64 x i32>
@@ -825,6 +827,7 @@
 ; AVX512F-32-NEXT:    vpaddusw 8(%ebp), %zmm1, %zmm1
 ; AVX512F-32-NEXT:    movl %ebp, %esp
 ; AVX512F-32-NEXT:    popl %ebp
+; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX512F-32-NEXT:    retl
   %1 = zext <64 x i16> %a to <64 x i32>
   %2 = zext <64 x i16> %b to <64 x i32>
@@ -977,6 +980,7 @@
 ; AVX512F-32-NEXT:    vpsubusw 8(%ebp), %zmm1, %zmm1
 ; AVX512F-32-NEXT:    movl %ebp, %esp
 ; AVX512F-32-NEXT:    popl %ebp
+; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
 ; AVX512F-32-NEXT:    retl
   %cmp = icmp ugt <64 x i16> %a, %b
   %sel = select <64 x i1> %cmp, <64 x i16> %a, <64 x i16> %b
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 591c521..484eb6c 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -3321,11 +3321,17 @@
 ; AVX1-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    popq %rbx
+; AVX1-NEXT:    .cfi_def_cfa_offset 48
 ; AVX1-NEXT:    popq %r12
+; AVX1-NEXT:    .cfi_def_cfa_offset 40
 ; AVX1-NEXT:    popq %r13
+; AVX1-NEXT:    .cfi_def_cfa_offset 32
 ; AVX1-NEXT:    popq %r14
+; AVX1-NEXT:    .cfi_def_cfa_offset 24
 ; AVX1-NEXT:    popq %r15
+; AVX1-NEXT:    .cfi_def_cfa_offset 16
 ; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    .cfi_def_cfa_offset 8
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_16i1_to_16i16:
@@ -3412,11 +3418,17 @@
 ; AVX2-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    .cfi_def_cfa_offset 48
 ; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    .cfi_def_cfa_offset 40
 ; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    .cfi_def_cfa_offset 32
 ; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    .cfi_def_cfa_offset 24
 ; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    .cfi_def_cfa_offset 16
 ; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    .cfi_def_cfa_offset 8
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_16i1_to_16i16:
@@ -4812,6 +4824,7 @@
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
 ; X32-SSE41-NEXT:    movd %xmm0, %eax
 ; X32-SSE41-NEXT:    popl %ecx
+; X32-SSE41-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE41-NEXT:    retl
 entry:
   %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 73c122a..591e1c5 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -617,6 +617,7 @@
 ; KNL32-NEXT:    vpblendvb %ymm3, 8(%ebp), %ymm1, %ymm1
 ; KNL32-NEXT:    movl %ebp, %esp
 ; KNL32-NEXT:    popl %ebp
+; KNL32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL32-NEXT:    retl
 entry:
   %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32>  <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63>
@@ -657,6 +658,7 @@
 ; KNL32-NEXT:    vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15]
 ; KNL32-NEXT:    movl %ebp, %esp
 ; KNL32-NEXT:    popl %ebp
+; KNL32-NEXT:    .cfi_def_cfa %esp, 4
 ; KNL32-NEXT:    retl
 entry:
   %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32>  <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
index e7956c6..a15d633 100644
--- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll
+++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
@@ -105,10 +105,13 @@
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:  .LBB4_2: # %bb2
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    movl $2, %eax
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 entry:
   %cmp = icmp slt i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/x86-64-psub.ll b/llvm/test/CodeGen/X86/x86-64-psub.ll
index 8cc8050..e1ca868 100644
--- a/llvm/test/CodeGen/X86/x86-64-psub.ll
+++ b/llvm/test/CodeGen/X86/x86-64-psub.ll
@@ -22,6 +22,7 @@
 ; CHECK-NEXT:    psubb %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
@@ -55,6 +56,7 @@
 ; CHECK-NEXT:    psubw %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
@@ -88,6 +90,7 @@
 ; CHECK-NEXT:    psubd %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
@@ -121,6 +124,7 @@
 ; CHECK-NEXT:    psubsb %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
@@ -154,6 +158,7 @@
 ; CHECK-NEXT:    psubsw %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
@@ -187,6 +192,7 @@
 ; CHECK-NEXT:    psubusb %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
@@ -220,6 +226,7 @@
 ; CHECK-NEXT:    psubusw %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
diff --git a/llvm/test/CodeGen/X86/x86-framelowering-trap.ll b/llvm/test/CodeGen/X86/x86-framelowering-trap.ll
index f1590ab..89f4528 100644
--- a/llvm/test/CodeGen/X86/x86-framelowering-trap.ll
+++ b/llvm/test/CodeGen/X86/x86-framelowering-trap.ll
@@ -6,6 +6,7 @@
 ; CHECK: pushq
 ; CHECK: ud2
 ; CHECK-NEXT: popq
+; CHECK-NEXT: .cfi_def_cfa_offset 8
 ; CHECK-NEXT: retq
 define void @bar() {
 entry:
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index bc48ec0..84d0ae4 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -1801,6 +1801,7 @@
 ; AVX1-NEXT:    vmovaps %ymm6, 32(%rdi)
 ; AVX1-NEXT:    vmovaps %ymm8, (%rdi)
 ; AVX1-NEXT:    addq $24, %rsp
+; AVX1-NEXT:    .cfi_def_cfa_offset 8
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll b/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
index 9c4cb67..7a5c7bc 100644
--- a/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
+++ b/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
@@ -20,6 +20,7 @@
 ; CHECK-NEXT:    movl $4, %eax
 ; CHECK-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
 ; CHECK-NEXT:    popq %rdx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   call void asm sideeffect "", "~{rax},~{rdx},~{xmm1},~{rdi},~{rsi},~{xmm0}"()
   ret i32 4