Subzero: Decorate the text asm output with register availability info.

The -asm-verbose flag adds comments to the text asm output about register availability.  Specifically, it prints the registers in use at the beginning and end of each block, and it prints which registers' live ranges end at each instruction.

This is extremely helpful when studying the output to find opportunities to improve the code quality.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/682983004
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 84aff16..e8167f4 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -365,20 +365,15 @@
   return Valid;
 }
 
-// Deletes redundant assignments like "var=var".  This includes
-// architecturally redundant moves like "var1:eax=var2:eax".  As such,
-// this needs to be done very late in the translation to avoid
-// liveness inconsistencies.
-void Cfg::deleteRedundantAssignments() {
-  for (CfgNode *Node : Nodes) {
-    // Ignore Phi instructions.
-    for (Inst *I : Node->getInsts())
-      if (I->isRedundantAssign())
-        I->setDeleted();
-  }
-}
-
 void Cfg::contractEmptyNodes() {
+  // If we're decorating the asm output with register liveness info,
+  // this information may become corrupted or incorrect after
+  // contracting nodes that contain only redundant assignments.  As
+  // such, we disable this pass when DecorateAsm is specified.  This
+  // may make the resulting code look more branchy, but it should have
+  // no effect on the register assignments.
+  if (Ctx->getFlags().DecorateAsm)
+    return;
   for (CfgNode *Node : Nodes) {
     Node->contractIfEmpty();
   }
@@ -397,6 +392,12 @@
 
 void Cfg::emit() {
   TimerMarker T(TimerStack::TT_emit, this);
+  if (Ctx->getFlags().DecorateAsm) {
+    renumberInstructions();
+    getVMetadata()->init(VMK_Uses);
+    liveness(Liveness_Basic);
+    dump("After recomputing liveness for -decorate-asm");
+  }
   Ostream &Str = Ctx->getStrEmit();
   if (!Ctx->testAndSetHasEmittedFirstMethod()) {
     // Print a helpful command for assembling the output.
diff --git a/src/IceCfg.h b/src/IceCfg.h
index ad02831..fb8ebd9 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -121,7 +121,6 @@
   void livenessLightweight();
   void liveness(LivenessMode Mode);
   bool validateLiveness() const;
-  void deleteRedundantAssignments();
   void contractEmptyNodes();
   void doBranchOpt();
 
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 661dd73..b753078 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -756,9 +756,12 @@
     return;
   Inst *Branch = NULL;
   for (Inst *I : Insts) {
-    if (!I->isDeleted() && !I->isUnconditionalBranch())
+    if (I->isDeleted())
+      continue;
+    if (I->isUnconditionalBranch())
+      Branch = I;
+    else if (!I->isRedundantAssign())
       return;
-    Branch = I;
   }
   Branch->setDeleted();
   assert(OutEdges.size() == 1);
@@ -798,9 +801,73 @@
 
 // ======================== Dump routines ======================== //
 
+namespace {
+
+// Helper functions for emit().
+
+void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
+                       bool IsLiveIn, std::vector<SizeT> &LiveRegCount) {
+  Liveness *Liveness = Func->getLiveness();
+  const LivenessBV *Live;
+  if (IsLiveIn) {
+    Live = &Liveness->getLiveIn(Node);
+    Str << "\t\t\t\t# LiveIn=";
+  } else {
+    Live = &Liveness->getLiveOut(Node);
+    Str << "\t\t\t\t# LiveOut=";
+  }
+  if (!Live->empty()) {
+    bool First = true;
+    for (SizeT i = 0; i < Live->size(); ++i) {
+      if ((*Live)[i]) {
+        Variable *Var = Liveness->getVariable(i, Node);
+        if (Var->hasReg()) {
+          if (IsLiveIn)
+            ++LiveRegCount[Var->getRegNum()];
+          if (!First)
+            Str << ",";
+          First = false;
+          Var->emit(Func);
+        }
+      }
+    }
+  }
+  Str << "\n";
+}
+
+void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
+                         std::vector<SizeT> &LiveRegCount) {
+  bool First = true;
+  Variable *Dest = Instr->getDest();
+  if (Dest && Dest->hasReg())
+    ++LiveRegCount[Dest->getRegNum()];
+  for (SizeT I = 0; I < Instr->getSrcSize(); ++I) {
+    Operand *Src = Instr->getSrc(I);
+    SizeT NumVars = Src->getNumVars();
+    for (SizeT J = 0; J < NumVars; ++J) {
+      const Variable *Var = Src->getVar(J);
+      if (Var->hasReg()) {
+        if (Instr->isLastUse(Var) &&
+            --LiveRegCount[Var->getRegNum()] == 0) {
+          if (First)
+            Str << " \t# END=";
+          else
+            Str << ",";
+          Var->emit(Func);
+          First = false;
+        }
+      }
+    }
+  }
+}
+
+} // end of anonymous namespace
+
 void CfgNode::emit(Cfg *Func) const {
   Func->setCurrentNode(this);
   Ostream &Str = Func->getContext()->getStrEmit();
+  Liveness *Liveness = Func->getLiveness();
+  bool DecorateAsm = Liveness && Func->getContext()->getFlags().DecorateAsm;
   if (Func->getEntryNode() == this) {
     Str << Func->getContext()->mangleName(Func->getFunctionName()) << ":\n";
   }
@@ -809,6 +876,10 @@
     Assembler *Asm = Func->getAssembler<Assembler>();
     Asm->BindCfgNodeLabel(getIndex());
   }
+  std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
+  if (DecorateAsm)
+    emitRegisterUsage(Str, Func, this, true, LiveRegCount);
+
   for (InstPhi *Phi : Phis) {
     if (Phi->isDeleted())
       continue;
@@ -819,10 +890,18 @@
   for (Inst *I : Insts) {
     if (I->isDeleted())
       continue;
+    if (I->isRedundantAssign()) {
+      Variable *Dest = I->getDest();
+      if (DecorateAsm && Dest->hasReg() && !I->isLastUse(I->getSrc(0)))
+        ++LiveRegCount[Dest->getRegNum()];
+      continue;
+    }
     if (Func->useIntegratedAssembler()) {
       I->emitIAS(Func);
     } else {
       I->emit(Func);
+      if (DecorateAsm)
+        emitLiveRangesEnded(Str, Func, I, LiveRegCount);
       Str << "\n";
     }
     // Update emitted instruction count, plus fill/spill count for
@@ -841,6 +920,8 @@
       }
     }
   }
+  if (DecorateAsm)
+    emitRegisterUsage(Str, Func, this, false, LiveRegCount);
 }
 
 void CfgNode::dump(Cfg *Func) const {
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index eebca45..52b8f34 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -18,13 +18,13 @@
 
 namespace Ice {
 
-// TODO(stichnot) Move more command line flags into ClFlags.
 class ClFlags {
 public:
   ClFlags()
       : DisableInternal(false), SubzeroTimingEnabled(false),
         DisableTranslation(false), FunctionSections(false), DataSections(false),
-        UseIntegratedAssembler(false), UseSandboxing(false), DumpStats(false),
+        UseIntegratedAssembler(false), UseSandboxing(false),
+        PhiEdgeSplit(false), DecorateAsm(false), DumpStats(false),
         AllowUninitializedGlobals(false), TimeEachFunction(false),
         DefaultGlobalPrefix(""), DefaultFunctionPrefix(""), TimingFocusOn(""),
         VerboseFocusOn(""), TranslateOnly("") {}
@@ -36,6 +36,7 @@
   bool UseIntegratedAssembler;
   bool UseSandboxing;
   bool PhiEdgeSplit;
+  bool DecorateAsm;
   bool DumpStats;
   bool AllowUninitializedGlobals;
   bool TimeEachFunction;
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 7aa2483..4bfe4b4 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -153,6 +153,7 @@
     return false;
   }
 
+  virtual SizeT getNumRegisters() const = 0;
   // Returns a variable pre-colored to the specified physical
   // register.  This is generally used to get very direct access to
   // the register such as in the prolog or epilog or for marking
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index df7d04f..43d7261 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -392,7 +392,6 @@
     return;
   Func->dump("After stack frame mapping");
 
-  Func->deleteRedundantAssignments();
   Func->contractEmptyNodes();
   Func->reorderNodes();
 
@@ -435,8 +434,6 @@
     return;
   Func->dump("After stack frame mapping");
 
-  Func->deleteRedundantAssignments();
-
   // Nop insertion
   if (shouldDoNopInsertion()) {
     Func->doNopInsertion();
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 2ba6a15..85d6a67 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -35,6 +35,7 @@
   void translateO2() override;
   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
 
+  SizeT getNumRegisters() const override { return RegX8632::Reg_NUM; }
   Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override;
   IceString getRegName(SizeT RegNum, Type Ty) const override;
   llvm::SmallBitVector getRegisterSet(RegSetMask Include,
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 1d7d193..ffbf196 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -120,6 +120,10 @@
                    cl::desc("Enable edge splitting for Phi lowering"),
                    cl::init(true));
 
+static cl::opt<bool> DecorateAsm(
+    "asm-verbose",
+    cl::desc("Decorate textual asm output with register liveness info"));
+
 static cl::opt<bool>
 DumpStats("stats",
           cl::desc("Print statistics after translating each function"));
@@ -256,6 +260,7 @@
   Flags.UseIntegratedAssembler = UseIntegratedAssembler;
   Flags.UseSandboxing = UseSandboxing;
   Flags.PhiEdgeSplit = EnablePhiEdgeSplit;
+  Flags.DecorateAsm = DecorateAsm;
   Flags.DumpStats = DumpStats;
   Flags.AllowUninitializedGlobals = AllowUninitializedGlobals;
   Flags.TimeEachFunction = TimeEachFunction;