bpf: New post-RA peephole optimization pass to eliminate bad RA codegen

This new pass eliminate identical move:

  MOV rA, rA

This is particularly possible to happen when sub-register support
enabled. The special type cast insn MOV_32_64 involves different
register class on src (i32) and dst (i64), RA could generate useless
instruction due to this.

This pass also could serve as the bast for further post-RA optimization.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
llvm-svn: 327370
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 8b94ba1..76d3e1c 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -18,8 +18,10 @@
 
 FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
 FunctionPass *createBPFMIPeepholePass();
+FunctionPass *createBPFMIPreEmitPeepholePass();
 
 void initializeBPFMIPeepholePass(PassRegistry&);
+void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
 }
 
 #endif
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index cf26f24..ac13490 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -10,12 +10,15 @@
 // This pass performs peephole optimizations to cleanup ugly code sequences at
 // MachineInstruction layer.
 //
-// Currently, the only optimization in this pass is to eliminate type promotion
-// sequences, those zero extend 32-bit subregisters to 64-bit registers, if the
-// compiler could prove the subregisters is defined by 32-bit operations in
-// which case the upper half of the underlying 64-bit registers were zeroed
-// implicitly.
+// Currently, there are two optimizations implemented:
+//  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
+//    zero extend 32-bit subregisters to 64-bit registers, if the compiler
+//    could prove the subregisters is defined by 32-bit operations in which
+//    case the upper half of the underlying 64-bit registers were zeroed
+//    implicitly.
 //
+//  - One post-RA PreEmit pass to do final cleanup on some redundant
+//    instructions generated due to bad RA on subregister.
 //===----------------------------------------------------------------------===//
 
 #include "BPF.h"
@@ -69,7 +72,7 @@
   MF = &MFParm;
   MRI = &MF->getRegInfo();
   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
-  DEBUG(dbgs() << "*** BPF MI peephole pass ***\n\n");
+  DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n");
 }
 
 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
@@ -166,8 +169,97 @@
 
 } // end default namespace
 
-INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, "BPF MI Peephole Optimization",
-                false, false)
+INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
+                "BPF MachineSSA Peephole Optimization", false, false)
 
 char BPFMIPeephole::ID = 0;
 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
+
+STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
+
+namespace {
+
+struct BPFMIPreEmitPeephole : public MachineFunctionPass {
+
+  static char ID;
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+
+  BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
+    initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize class variables.
+  void initialize(MachineFunction &MFParm);
+
+  bool eliminateRedundantMov(void);
+
+public:
+
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (skipFunction(MF.getFunction()))
+      return false;
+
+    initialize(MF);
+
+    return eliminateRedundantMov();
+  }
+};
+
+// Initialize class variables.
+void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+  DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
+}
+
+bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
+  MachineInstr* ToErase = nullptr;
+  bool Eliminated = false;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+      // If the previous instruction was marked for elimination, remove it now.
+      if (ToErase) {
+        ToErase->eraseFromParent();
+        ToErase = nullptr;
+      }
+
+      // Eliminate identical move:
+      //
+      //   MOV rA, rA
+      //
+      // This is particularly possible to happen when sub-register support
+      // enabled. The special type cast insn MOV_32_64 involves different
+      // register class on src (i32) and dst (i64), RA could generate useless
+      // instruction due to this.
+      if (MI.getOpcode() == BPF::MOV_32_64) {
+        unsigned dst = MI.getOperand(0).getReg();
+        unsigned dst_sub = TRI->getSubReg(dst, BPF::sub_32);
+        unsigned src = MI.getOperand(1).getReg();
+
+        if (dst_sub != src)
+          continue;
+
+        ToErase = &MI;
+        RedundantMovElemNum++;
+        Eliminated = true;
+      }
+    }
+  }
+
+  return Eliminated;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
+                "BPF PreEmit Peephole Optimization", false, false)
+
+char BPFMIPreEmitPeephole::ID = 0;
+FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
+{
+  return new BPFMIPreEmitPeephole();
+}
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 91ff64b..84d89bf 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -86,6 +86,7 @@
 
   bool addInstSelector() override;
   void addMachineSSAOptimization() override;
+  void addPreEmitPass() override;
 };
 }
 
@@ -110,3 +111,11 @@
   if (Subtarget->getHasAlu32() && !DisableMIPeephole)
     addPass(createBPFMIPeepholePass());
 }
+
+void BPFPassConfig::addPreEmitPass() {
+  const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
+
+  if (getOptLevel() != CodeGenOpt::None)
+    if (Subtarget->getHasAlu32() && !DisableMIPeephole)
+      addPass(createBPFMIPreEmitPeepholePass());
+}