diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index fcec05a..4c1717e 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -9,6 +9,35 @@
 //
 // This is an extremely simple MachineInstr-level copy propagation pass.
 //
+// This pass forwards the source of COPYs to the users of their destinations
+// when doing so is legal.  For example:
+//
+//   %reg1 = COPY %reg0
+//   ...
+//   ... = OP %reg1
+//
+// If
+//   - %reg0 has not been clobbered by the time of the use of %reg1
+//   - the register class constraints are satisfied
+//   - the COPY def is the only value that reaches OP
+// then this pass replaces the above with:
+//
+//   %reg1 = COPY %reg0
+//   ...
+//   ... = OP %reg0
+//
+// This pass also removes some redundant COPYs.  For example:
+//
+//    %R1 = COPY %R0
+//    ... // No clobber of %R1
+//    %R0 = COPY %R1 <<< Removed
+//
+// or
+//
+//    %R1 = COPY %R0
+//    ... // No clobber of %R0
+//    %R1 = COPY %R0 <<< Removed
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
@@ -23,11 +52,13 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <iterator>
@@ -37,6 +68,9 @@
 #define DEBUG_TYPE "machine-cp"
 
 STATISTIC(NumDeletes, "Number of dead copies deleted");
+STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
+DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
+              "Controls which register COPYs are forwarded");
 
 namespace {
 
@@ -73,6 +107,10 @@
     void ReadRegister(unsigned Reg);
     void CopyPropagateBlock(MachineBasicBlock &MBB);
     bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+    void forwardUses(MachineInstr &MI);
+    bool isForwardableRegClassCopy(const MachineInstr &Copy,
+                                   const MachineInstr &UseI, unsigned UseIdx);
+    bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
 
     /// Candidates for deletion.
     SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
@@ -208,6 +246,152 @@
   return true;
 }
 
+/// Decide whether we should forward the source of \param Copy to its use in
+/// \param UseI based on the physical register class constraints of the opcode
+/// and avoiding introducing more cross-class COPYs.
+bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
+                                                       const MachineInstr &UseI,
+                                                       unsigned UseIdx) {
+
+  unsigned CopySrcReg = Copy.getOperand(1).getReg();
+
+  // If the new register meets the opcode register constraints, then allow
+  // forwarding.
+  if (const TargetRegisterClass *URC =
+          UseI.getRegClassConstraint(UseIdx, TII, TRI))
+    return URC->contains(CopySrcReg);
+
+  if (!UseI.isCopy())
+    return false;
+
+  /// COPYs don't have register class constraints, so if the user instruction
+  /// is a COPY, we just try to avoid introducing additional cross-class
+  /// COPYs.  For example:
+  ///
+  ///   RegClassA = COPY RegClassB  // Copy parameter
+  ///   ...
+  ///   RegClassB = COPY RegClassA  // UseI parameter
+  ///
+  /// which after forwarding becomes
+  ///
+  ///   RegClassA = COPY RegClassB
+  ///   ...
+  ///   RegClassB = COPY RegClassB
+  ///
+  /// so we have reduced the number of cross-class COPYs and potentially
+  /// introduced a nop COPY that can be removed.
+  const TargetRegisterClass *UseDstRC =
+      TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
+
+  const TargetRegisterClass *SuperRC = UseDstRC;
+  for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
+       SuperRC; SuperRC = *SuperRCI++)
+    if (SuperRC->contains(CopySrcReg))
+      return true;
+
+  return false;
+}
+
+/// Check that \p MI does not have implicit uses that overlap with it's \p Use
+/// operand (the register being replaced), since these can sometimes be
+/// implicitly tied to other operands.  For example, on AMDGPU:
+///
+/// V_MOVRELS_B32_e32 %VGPR2, %M0<imp-use>, %EXEC<imp-use>, %VGPR2_VGPR3_VGPR4_VGPR5<imp-use>
+///
+/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no
+/// way of knowing we need to update the latter when updating the former.
+bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
+                                                const MachineOperand &Use) {
+  for (const MachineOperand &MIUse : MI.uses())
+    if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() &&
+        MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg()))
+      return true;
+
+  return false;
+}
+
+/// Look for available copies whose destination register is used by \p MI and
+/// replace the use in \p MI with the copy's source register.
+void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
+  if (AvailCopyMap.empty())
+    return;
+
+  // Look for non-tied explicit vreg uses that have an active COPY
+  // instruction that defines the physical register allocated to them.
+  // Replace the vreg with the source of the active COPY.
+  for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd;
+       ++OpIdx) {
+    MachineOperand &MOUse = MI.getOperand(OpIdx);
+    // Don't forward into undef use operands since doing so can cause problems
+    // with the machine verifier, since it doesn't treat undef reads as reads,
+    // so we can end up with a live range that ends on an undef read, leading to
+    // an error that the live range doesn't end on a read of the live range
+    // register.
+    if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() ||
+        MOUse.isImplicit())
+      continue;
+
+    if (!MOUse.getReg())
+      continue;
+
+    // Check that the register is marked 'renamable' so we know it is safe to
+    // rename it without violating any constraints that aren't expressed in the
+    // IR (e.g. ABI or opcode requirements).
+    if (!MOUse.isRenamable())
+      continue;
+
+    auto CI = AvailCopyMap.find(MOUse.getReg());
+    if (CI == AvailCopyMap.end())
+      continue;
+
+    MachineInstr &Copy = *CI->second;
+    unsigned CopyDstReg = Copy.getOperand(0).getReg();
+    const MachineOperand &CopySrc = Copy.getOperand(1);
+    unsigned CopySrcReg = CopySrc.getReg();
+
+    // FIXME: Don't handle partial uses of wider COPYs yet.
+    if (MOUse.getReg() != CopyDstReg) {
+      DEBUG(dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n  "
+                   << MI);
+      continue;
+    }
+
+    // Don't forward COPYs of reserved regs unless they are constant.
+    if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
+      continue;
+
+    if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
+      continue;
+
+    if (hasImplicitOverlap(MI, MOUse))
+      continue;
+
+    if (!DebugCounter::shouldExecute(FwdCounter)) {
+      DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n  "
+                   << MI);
+      continue;
+    }
+
+    DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
+                 << "\n     with " << printReg(CopySrcReg, TRI) << "\n     in "
+                 << MI << "     from " << Copy);
+
+    MOUse.setReg(CopySrcReg);
+    if (!CopySrc.isRenamable())
+      MOUse.setIsRenamable(false);
+
+    DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+
+    // Clear kill markers that may have been invalidated.
+    for (MachineInstr &KMI :
+         make_range(Copy.getIterator(), std::next(MI.getIterator())))
+      KMI.clearRegisterKills(CopySrcReg, TRI);
+
+    ++NumCopyForwards;
+    Changed = true;
+  }
+}
+
 void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
 
@@ -241,6 +425,11 @@
       if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
         continue;
 
+      forwardUses(*MI);
+
+      // Src may have been changed by forwardUses()
+      Src = MI->getOperand(1).getReg();
+
       // If Src is defined by a previous copy, the previous copy cannot be
       // eliminated.
       ReadRegister(Src);
@@ -292,6 +481,20 @@
       continue;
     }
 
+    // Clobber any earlyclobber regs first.
+    for (const MachineOperand &MO : MI->operands())
+      if (MO.isReg() && MO.isEarlyClobber()) {
+        unsigned Reg = MO.getReg();
+        // If we have a tied earlyclobber, that means it is also read by this
+        // instruction, so we need to make sure we don't remove it as dead
+        // later.
+        if (MO.isTied())
+          ReadRegister(Reg);
+        ClobberRegister(Reg);
+      }
+
+    forwardUses(*MI);
+
     // Not a copy.
     SmallVector<unsigned, 2> Defs;
     const MachineOperand *RegMask = nullptr;
@@ -307,7 +510,7 @@
       assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
              "MachineCopyPropagation should be run after register allocation!");
 
-      if (MO.isDef()) {
+      if (MO.isDef() && !MO.isEarlyClobber()) {
         Defs.push_back(Reg);
         continue;
       } else if (MO.readsReg())
@@ -364,6 +567,8 @@
   // since we don't want to trust live-in lists.
   if (MBB.succ_empty()) {
     for (MachineInstr *MaybeDead : MaybeDeadCopies) {
+      DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
+            MaybeDead->dump());
       assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
       MaybeDead->eraseFromParent();
       Changed = true;
