Subzero: Add branch optimization.
1. Unconditional branch to the next basic block is removed.
2. For a conditional branch with a "false" edge to the next basic block, remove the unconditional branch to the fallthrough block.
3. For a conditional branch with a "true" edge to the next basic block, invert the condition and do like #2.
This is enabled only for O2, particularly because inverting the branch condition is a marginally risky operation.
This decreases the instruction count by about 5-6%.
Also, --stats prints a final tally to make it easier to post-process the output.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/580903005
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index d699ef5..4439323 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -290,6 +290,14 @@
return Valid;
}
+void Cfg::doBranchOpt() {
+ for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ NodeList::iterator NextNode = I;
+ ++NextNode;
+ (*I)->doBranchOpt(*NextNode);
+ }
+}
+
// ======================== Dump routines ======================== //
void Cfg::emit() {
diff --git a/src/IceCfg.h b/src/IceCfg.h
index e12bc9d..a2582eb 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -94,6 +94,7 @@
void livenessLightweight();
void liveness(LivenessMode Mode);
bool validateLiveness() const;
+ void doBranchOpt();
// Manage the CurrentNode field, which is used for validating the
// Variable::DefNode field during dumping/emitting.
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 4de2f57..57aba6b 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -457,6 +457,19 @@
}
}
+void CfgNode::doBranchOpt(const CfgNode *NextNode) {
+ TargetLowering *Target = Func->getTarget();
+ // Check every instruction for a branch optimization opportunity.
+ // It may be more efficient to iterate in reverse and stop after the
+ // first opportunity, unless there is some target lowering where we
+ // have the possibility of multiple such optimizations per block
+ // (currently not the case for x86 lowering).
+ for (InstList::const_iterator I = Insts.begin(), E = Insts.end(); I != E;
+ ++I) {
+ Target->doBranchOpt(*I, NextNode);
+ }
+}
+
// ======================== Dump routines ======================== //
void CfgNode::emit(Cfg *Func) const {
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 954977f..eba98c0 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -65,6 +65,7 @@
void livenessLightweight();
bool liveness(Liveness *Liveness);
void livenessPostprocess(LivenessMode Mode, Liveness *Liveness);
+ void doBranchOpt(const CfgNode *NextNode);
void emit(Cfg *Func) const;
void dump(Cfg *Func) const;
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 47c7aa8..319e18a 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -384,10 +384,14 @@
llvm_unreachable("Unknown type");
}
-void GlobalContext::dumpStats(const IceString &Name) {
+void GlobalContext::dumpStats(const IceString &Name, bool Final) {
if (Flags.DumpStats) {
- StatsFunction.dump(Name, getStrDump());
- StatsCumulative.dump("_TOTAL_", getStrDump());
+ if (Final) {
+ StatsCumulative.dump(Name, getStrDump());
+ } else {
+ StatsFunction.dump(Name, getStrDump());
+ StatsCumulative.dump("_TOTAL_", getStrDump());
+ }
}
}
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index da4b6a1..f997b65 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -132,7 +132,7 @@
// Reset stats at the beginning of a function.
void resetStats() { StatsFunction.reset(); }
- void dumpStats(const IceString &Name);
+ void dumpStats(const IceString &Name, bool Final = false);
void statsUpdateEmitted(uint32_t InstCount) {
StatsFunction.updateEmitted(InstCount);
StatsCumulative.updateEmitted(InstCount);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index a61805e..df28696 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -24,11 +24,12 @@
namespace {
const struct InstX8632BrAttributes_ {
+ InstX8632::BrCond Opposite;
const char *DisplayString;
const char *EmitString;
} InstX8632BrAttributes[] = {
-#define X(tag, dump, emit) \
- { dump, emit } \
+#define X(tag, opp, dump, emit) \
+ { InstX8632::opp, dump, emit } \
,
ICEINSTX8632BR_TABLE
#undef X
@@ -128,11 +129,52 @@
return ".L" + Func->getFunctionName() + "$local$__" + buf;
}
-InstX8632Br::InstX8632Br(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
- InstX8632Label *Label, InstX8632::BrCond Condition)
+InstX8632Br::InstX8632Br(Cfg *Func, const CfgNode *TargetTrue,
+ const CfgNode *TargetFalse,
+ const InstX8632Label *Label,
+ InstX8632::BrCond Condition)
: InstX8632(Func, InstX8632::Br, 0, NULL), Condition(Condition),
TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
+bool InstX8632Br::optimizeBranch(const CfgNode *NextNode) {
+ // If there is no next block, then there can be no fallthrough to
+ // optimize.
+ if (NextNode == NULL)
+ return false;
+ // Intra-block conditional branches can't be optimized.
+ if (Label)
+ return false;
+ // If there is no fallthrough node, such as a non-default case label
+ // for a switch instruction, then there is no opportunity to
+ // optimize.
+ if (getTargetFalse() == NULL)
+ return false;
+
+ // Unconditional branch to the next node can be removed.
+ if (Condition == Br_None && getTargetFalse() == NextNode) {
+ assert(getTargetTrue() == NULL);
+ setDeleted();
+ return true;
+ }
+ // If the fallthrough is to the next node, set fallthrough to NULL
+ // to indicate.
+ if (getTargetFalse() == NextNode) {
+ TargetFalse = NULL;
+ return true;
+ }
+ // If TargetTrue is the next node, and TargetFalse is non-NULL
+ // (which was already tested above), then invert the branch
+ // condition, swap the targets, and set new fallthrough to NULL.
+ if (getTargetTrue() == NextNode) {
+ assert(Condition != Br_None);
+ Condition = InstX8632BrAttributes[Condition].Opposite;
+ TargetTrue = getTargetFalse();
+ TargetFalse = NULL;
+ return true;
+ }
+ return false;
+}
+
InstX8632Call::InstX8632Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
: InstX8632(Func, InstX8632::Call, 1, Dest) {
HasSideEffects = true;
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index 932500c..16c6fb8 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def
@@ -51,20 +51,20 @@
//#define X(val, name)
#define ICEINSTX8632BR_TABLE \
- /* enum value, dump, emit */ \
- X(Br_a, "a", "ja") \
- X(Br_ae, "ae", "jae") \
- X(Br_b, "b", "jb") \
- X(Br_be, "be", "jbe") \
- X(Br_e, "e", "je") \
- X(Br_g, "g", "jg") \
- X(Br_ge, "ge", "jge") \
- X(Br_l, "l", "jl") \
- X(Br_le, "le", "jle") \
- X(Br_ne, "ne", "jne") \
- X(Br_np, "np", "jnp") \
- X(Br_p, "p", "jp") \
-//#define X(tag, dump, emit)
+ /* enum value, opposite, dump, emit */ \
+ X(Br_a, Br_be, "a", "ja") \
+ X(Br_ae, Br_b, "ae", "jae") \
+ X(Br_b, Br_ae, "b", "jb") \
+ X(Br_be, Br_a, "be", "jbe") \
+ X(Br_e, Br_ne, "e", "je") \
+ X(Br_g, Br_le, "g", "jg") \
+ X(Br_ge, Br_l, "ge", "jge") \
+ X(Br_l, Br_ge, "l", "jl") \
+ X(Br_le, Br_g, "le", "jle") \
+ X(Br_ne, Br_e, "ne", "jne") \
+ X(Br_np, Br_p, "np", "jnp") \
+ X(Br_p, Br_np, "p", "jp") \
+//#define X(tag, opp, dump, emit)
#define ICEINSTX8632CMPPS_TABLE \
/* enum value, emit */ \
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index f0558db..09615ce 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -225,7 +225,7 @@
};
enum BrCond {
-#define X(tag, dump, emit) tag,
+#define X(tag, opp, dump, emit) tag,
ICEINSTX8632BR_TABLE
#undef X
Br_None
@@ -309,53 +309,62 @@
// Create a conditional branch to a node.
static InstX8632Br *create(Cfg *Func, CfgNode *TargetTrue,
CfgNode *TargetFalse, BrCond Condition) {
+ const InstX8632Label *NoLabel = NULL;
return new (Func->allocate<InstX8632Br>())
- InstX8632Br(Func, TargetTrue, TargetFalse, NULL, Condition);
+ InstX8632Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
}
// Create an unconditional branch to a node.
static InstX8632Br *create(Cfg *Func, CfgNode *Target) {
+ const CfgNode *NoCondTarget = NULL;
+ const InstX8632Label *NoLabel = NULL;
return new (Func->allocate<InstX8632Br>())
- InstX8632Br(Func, NULL, Target, NULL, Br_None);
+ InstX8632Br(Func, NoCondTarget, Target, NoLabel, Br_None);
}
// Create a non-terminator conditional branch to a node, with a
// fallthrough to the next instruction in the current node. This is
// used for switch lowering.
static InstX8632Br *create(Cfg *Func, CfgNode *Target, BrCond Condition) {
+ const CfgNode *NoUncondTarget = NULL;
+ const InstX8632Label *NoLabel = NULL;
return new (Func->allocate<InstX8632Br>())
- InstX8632Br(Func, Target, NULL, NULL, Condition);
+ InstX8632Br(Func, Target, NoUncondTarget, NoLabel, Condition);
}
// Create a conditional intra-block branch (or unconditional, if
// Condition==Br_None) to a label in the current block.
static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
BrCond Condition) {
+ const CfgNode *NoCondTarget = NULL;
+ const CfgNode *NoUncondTarget = NULL;
return new (Func->allocate<InstX8632Br>())
- InstX8632Br(Func, NULL, NULL, Label, Condition);
+ InstX8632Br(Func, NoCondTarget, NoUncondTarget, Label, Condition);
}
- CfgNode *getTargetTrue() const { return TargetTrue; }
- CfgNode *getTargetFalse() const { return TargetFalse; }
+ const CfgNode *getTargetTrue() const { return TargetTrue; }
+ const CfgNode *getTargetFalse() const { return TargetFalse; }
+ bool optimizeBranch(const CfgNode *NextNode);
virtual uint32_t getEmitInstCount() const {
+ uint32_t Sum = 0;
if (Label)
- return 1;
- if (Condition == Br_None)
- return 1;
+ ++Sum;
+ if (getTargetTrue())
+ ++Sum;
if (getTargetFalse())
- return 2;
- return 1;
+ ++Sum;
+ return Sum;
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
private:
- InstX8632Br(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
- InstX8632Label *Label, BrCond Condition);
+ InstX8632Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
+ const InstX8632Label *Label, BrCond Condition);
InstX8632Br(const InstX8632Br &) LLVM_DELETED_FUNCTION;
InstX8632Br &operator=(const InstX8632Br &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Br() {}
BrCond Condition;
- CfgNode *TargetTrue;
- CfgNode *TargetFalse;
- InstX8632Label *Label; // Intra-block branch target
+ const CfgNode *TargetTrue;
+ const CfgNode *TargetFalse;
+ const InstX8632Label *Label; // Intra-block branch target
};
// AdjustStack instruction - subtracts esp by the given amount and
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index b15c4f1..7383fff 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -125,6 +125,11 @@
void doNopInsertion();
// Lowers a single instruction.
void lower();
+ // Tries to do branch optimization on a single instruction. Returns
+ // true if some optimization was done.
+ virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
+ return false;
+ }
// Returns a variable pre-colored to the specified physical
// register. This is generally used to get very direct access to
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 464a2e8..31c11d4 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -395,6 +395,14 @@
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping");
+ // Branch optimization. This needs to be done just before code
+ // emission. In particular, no transformations that insert or
+ // reorder CfgNodes should be done after branch optimization. We go
+ // ahead and do it before nop insertion to reduce the amount of work
+ // needed for searching for opportunities.
+ Func->doBranchOpt();
+ Func->dump("After branch optimization");
+
// Nop insertion
if (shouldDoNopInsertion()) {
Func->doNopInsertion();
@@ -444,6 +452,13 @@
}
}
+bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
+ if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
+ return Br->optimizeBranch(NextNode);
+ }
+ return false;
+}
+
IceString TargetX8632::RegNames[] = {
#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP) \
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 99235de..a104dac 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -28,6 +28,7 @@
virtual void translateOm1();
virtual void translateO2();
+ virtual bool doBranchOpt(Inst *I, const CfgNode *NextNode);
virtual Variable *getPhysicalRegister(SizeT RegNum);
virtual IceString getRegName(SizeT RegNum, Type Ty) const;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 609804c..1353b07 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -2247,7 +2247,6 @@
<< "\n";
ErrorStatus = true;
}
- return;
}
} // end of namespace Ice
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 3cb13b3..f147877 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -166,10 +166,11 @@
Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
Flags);
+ int ErrorStatus = 0;
if (BuildOnRead) {
Ice::PNaClTranslator Translator(&Ctx, Flags);
Translator.translate(IRFilename);
- return Translator.getErrorStatus();
+ ErrorStatus = Translator.getErrorStatus();
} else {
// Parse the input LLVM IR file into a module.
SMDiagnostic Err;
@@ -189,6 +190,9 @@
Ice::Converter Converter(Mod, &Ctx, Flags);
Converter.convertToIce();
- return Converter.getErrorStatus();
+ ErrorStatus = Converter.getErrorStatus();
}
+ const bool FinalStats = true;
+ Ctx.dumpStats("_FINAL_", FinalStats);
+ return ErrorStatus;
}