Subzero: Use register availability during lowering to improve the code.
The problem is that given code like this:
a = b + c
d = a + e
...
... (use of a) ...
Lowering may produce code like this, at least on x86:
T1 = b
T1 += c
a = T1
T2 = a
T2 += e
d = T2
...
... (use of a) ...
If "a" has a long live range, it may not get a register, resulting in clumsy code in the middle of the sequence like "a=reg; reg=a". Normally one might expect store forwarding to make the clumsy code fast, but it does presumably add an extra instruction-retirement cycle to the critical path in a pointer-chasing loop, and makes a big difference on some benchmarks.
The simple fix here is, at the end of lowering "a=b+c", keep track of the final "a=T1" assignment. Then, when lowering "d=a+e" and we look up "a", we can substitute "T1". This slightly increases the live range of T1, but it does a great job of avoiding the redundant reload of the register from the stack location.
A more general fix (in the future) might be to do live range splitting and let the register allocator handle it.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095
R=kschimpf@google.com
Review URL: https://codereview.chromium.org/1385433002 .
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 9339b79..f78c107 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -570,6 +570,7 @@
// Ensure target lowering actually moved the cursor.
assert(Context.getCur() != Orig);
}
+ Context.availabilityReset();
// Do preliminary lowering of the Phi instructions.
Target->prelowerPhis();
}
@@ -683,7 +684,7 @@
// Validate the integrity of the live ranges in this block. If there are any
// errors, it prints details and returns false. On success, it returns true.
-bool CfgNode::livenessValidateIntervals(Liveness *Liveness) {
+bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
if (!BuildDefs::asserts())
return true;
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 6aae2b3..14eafa9 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -110,7 +110,7 @@
private:
CfgNode(Cfg *Func, SizeT LabelIndex);
- bool livenessValidateIntervals(Liveness *Liveness);
+ bool livenessValidateIntervals(Liveness *Liveness) const;
Cfg *const Func;
SizeT Number; /// invariant: Func->Nodes[Number]==this
const SizeT LabelNumber; /// persistent number for label generation
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 480b907..4922848 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -137,7 +137,7 @@
const DefUseErrorList &DefsWithoutUses,
const DefUseErrorList &UsesBeforeDefs,
const CfgVector<InstNumberT> &LRBegin,
- const CfgVector<InstNumberT> &LREnd) {
+ const CfgVector<InstNumberT> &LREnd) const {
if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
return true;
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 6508aa5..d0aa111 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -62,7 +62,7 @@
bool livenessValidateIntervals(const DefUseErrorList &DefsWithoutUses,
const DefUseErrorList &UsesBeforeDefs,
const CfgVector<InstNumberT> &LRBegin,
- const CfgVector<InstNumberT> &LREnd);
+ const CfgVector<InstNumberT> &LREnd) const;
void initForGlobal();
void initForInfOnly();
/// Move an item from the From set to the To set. From[Index] is pushed onto
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index eb4a54d..9a8a02a 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -46,6 +46,7 @@
Cur = Begin;
skipDeleted(Cur);
Next = Cur;
+ availabilityReset();
}
void LoweringContext::insert(Inst *Inst) {
@@ -70,6 +71,31 @@
return LastInserted;
}
+void LoweringContext::availabilityReset() {
+ LastDest = nullptr;
+ LastSrc = nullptr;
+}
+
+void LoweringContext::availabilityUpdate() {
+ availabilityReset();
+ Inst *Instr = LastInserted;
+ if (Instr == nullptr)
+ return;
+ if (!Instr->isSimpleAssign())
+ return;
+ if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(0))) {
+ LastDest = Instr->getDest();
+ LastSrc = SrcVar;
+ }
+}
+
+Variable *LoweringContext::availabilityGet(Operand *Src) const {
+ assert(Src);
+ if (Src == LastDest)
+ return LastSrc;
+ return nullptr;
+}
+
TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
#define SUBZERO_TARGET(X) \
if (Target == Target_##X) \
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 0095a39..f35983f 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -65,6 +65,9 @@
void setNext(InstList::iterator N) { Next = N; }
void rewind();
void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
+ void availabilityReset();
+ void availabilityUpdate();
+ Variable *availabilityGet(Operand *Src) const;
private:
/// Node is the argument to Inst::updateVars().
@@ -85,6 +88,11 @@
InstList::iterator Begin;
/// End is a copy of Insts.end(), used if Next needs to be advanced.
InstList::iterator End;
+ /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
+ /// assignment inserted (provided Src is a variable). This is used for simple
+ /// availability analysis.
+ Variable *LastDest = nullptr;
+ Variable *LastSrc = nullptr;
void skipDeleted(InstList::iterator &I) const;
void advanceForward(InstList::iterator &I) const;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 7f8b79b..0bcb5f7 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -5016,6 +5016,24 @@
// the shl shift amount to be either an immediate or in ecx.)
assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
+ // Substitute with an available infinite-weight variable if possible. Only do
+ // this when we are not asking for a specific register, and when the
+ // substitution is not locked to a specific register, and when the types
+ // match, in order to capture the vast majority of opportunities and avoid
+ // corner cases in the lowering.
+ if (RegNum == Variable::NoRegister) {
+ if (Variable *Subst = getContext().availabilityGet(From)) {
+ // At this point we know there is a potential substitution available.
+ if (Subst->mustHaveReg() && !Subst->hasReg()) {
+ // At this point we know the substitution will have a register.
+ if (From->getType() == Subst->getType()) {
+ // At this point we know the substitution's register is compatible.
+ return Subst;
+ }
+ }
+ }
+ }
+
if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
// Before doing anything with a Mem operand, we need to ensure that the
// Base and Index components are in physical registers.
@@ -5239,6 +5257,7 @@
if (Ctx->getFlags().getOptLevel() == Opt_m1)
return;
markRedefinitions();
+ Context.availabilityUpdate();
}
template <class Machine>
diff --git a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
index 9eca74d..3f3dbaf 100644
--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -77,10 +77,12 @@
ret void
}
; CHECK-LABEL: CallIndirectGlobal
+; Allow the first call to be to a different register because of simple
+; availability optimization.
+; CHECK: call
; CHECK: call [[REGISTER:[a-z]+]]
; CHECK: call [[REGISTER]]
; CHECK: call [[REGISTER]]
-; CHECK: call [[REGISTER]]
;
; OPTM1-LABEL: CallIndirectGlobal
; OPTM1: call [[TARGET:.+]]
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
index 86da00d..d5ca2b1 100644
--- a/tests_lit/llvm2ice_tests/phi.ll
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -92,12 +92,15 @@
; CHECK-LABEL: testPhi3
; CHECK: push [[EBX:.*]]
-; CHECK: mov {{.*}},DWORD PTR [esp
-; CHECK: mov
-; CHECK: mov {{.*}},DWORD PTR [[ADDR:.*0x3e8]]
+; CHECK: mov [[EAX:.*]],DWORD PTR [esp
+; CHECK: mov [[ECX:.*]],[[EAX]]
+;;; start of loop body
+; CHECK: mov [[EDX:.*]],[[ECX]]
+; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0x3e8]
; CHECK: cmp {{.*}},0x0
; CHECK: jne
-; CHECK: mov DWORD PTR [[ADDR]]
+;;; start of epilog
+; CHECK: mov DWORD PTR {{.}}[[EDX]]+0x3e8],
; CHECK: pop [[EBX]]
; Test of "advanced phi lowering" with undef phi arg (integer vector).