This patch is a follow-up to r2244 which fixed bugzilla #287260 on
some platforms but not on all that we test.
The issue was that cprop_BB did not see that in Add32(t2,t3) the
driving expressions for t2 and t3 were the same. Therefore, the
Add was not replaced with a shift (which is necessary for proper
memcheck operation).
So in this patch:
(1) In cprop_BB, when setting up the "env", record *any* assignment
to a temporary (and not just those that are subject to copy
propagation).
(2) Pass this env down to fold_Expr and then sameIRExprs.
(3) Replace sameIRTemps with sameIRExprs and enhance it. Upon
encountering an RdTmp, check "env" and recurse into the
expression assigned to the temporary.
As a side, the functions sameIcoU32s and sameIRTempsOrIcoU32s
and replaced with sameIRExprs.
(4) Add some machinery to monitor frequency and effectiveness of
sameIRExprs (can be enabled by setting STATS_IROPT).
Hopefully, fixes #287260 for good.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2246 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/ir_opt.c b/priv/ir_opt.c
index 0698cf7..78ac6ff 100644
--- a/priv/ir_opt.c
+++ b/priv/ir_opt.c
@@ -46,6 +46,9 @@
/* Set to 1 for lots of debugging output. */
#define DEBUG_IROPT 0
+/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
+#define STATS_IROPT 0
+
/* What iropt does, 29 Dec 04.
@@ -883,41 +886,139 @@
/*--- Constant propagation and folding ---*/
/*---------------------------------------------------------------*/
+#if STATS_IROPT
+/* How often sameIRExprs was invoked */
+static UInt invocation_count;
+/* How often sameIRExprs recursed through IRTemp assignments */
+static UInt recursion_count;
+/* How often sameIRExprs found identical IRExprs */
+static UInt success_count;
+/* How often recursing through assignments to IRTemps helped
+ establishing equality. */
+static UInt recursion_success_count;
+/* Whether or not recursing through an IRTemp assignment helped
+ establishing IRExpr equality for a given sameIRExprs invocation. */
+static Bool recursion_helped;
+/* Whether or not a given sameIRExprs invocation recursed through an
+ IRTemp assignment */
+static Bool recursed;
+/* Maximum number of nodes ever visited when comparing two IRExprs. */
+static UInt max_nodes_visited;
+#endif /* STATS_IROPT */
+
+/* Count the number of nodes visited for a given sameIRExprs invocation. */
+static UInt num_nodes_visited;
+
+/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
+ This is to guard against performance degradation by visiting large
+ trees without success. */
+#define NODE_LIMIT 30
+
+
/* The env in this section is a map from IRTemp to IRExpr*,
that is, an array indexed by IRTemp. */
-/* Are both expressions simply the same IRTemp ? */
-static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
+/* Do both expressions compute the same value? The answer is generally
+ conservative, i.e. it will report that the expressions do not compute
+ the same value when in fact they do. The reason is that we do not
+ keep track of changes in the guest state and memory. Thusly, two
+ Get's, GetI's or Load's, even when accessing the same location, will be
+ assumed to compute different values. After all the accesses may happen
+ at different times and the guest state / memory can have changed in
+ the meantime. */
+static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
- return toBool( e1->tag == Iex_RdTmp
- && e2->tag == Iex_RdTmp
- && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
-}
+ if (e1->tag != e2->tag) return False;
-static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
-{
- return toBool( e1->tag == Iex_Const
- && e2->tag == Iex_Const
- && e1->Iex.Const.con->tag == Ico_U32
- && e2->Iex.Const.con->tag == Ico_U32
- && e1->Iex.Const.con->Ico.U32
- == e2->Iex.Const.con->Ico.U32 );
-}
+ if (num_nodes_visited++ > NODE_LIMIT) return False;
-/* Are both expressions either the same IRTemp or IRConst-U32s ? If
- in doubt, say No. */
-static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
-{
switch (e1->tag) {
case Iex_RdTmp:
- return sameIRTemps(e1, e2);
- case Iex_Const:
- return sameIcoU32s(e1, e2);
- default:
+ if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
+
+ if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
+ Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
+ env[e2->Iex.RdTmp.tmp]);
+#if STATS_IROPT
+ recursed = True;
+ if (same) recursion_helped = True;
+#endif
+ return same;
+ }
return False;
+
+ case Iex_Get:
+ case Iex_GetI:
+ case Iex_Load:
+ /* Guest state / memory could have changed in the meantime. */
+ return False;
+
+ case Iex_Binop:
+ return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
+ && sameIRExprs_aux( env, e1->Iex.Binop.arg1, e2->Iex.Binop.arg1 )
+ && sameIRExprs_aux( env, e1->Iex.Binop.arg2, e2->Iex.Binop.arg2 ));
+
+ case Iex_Unop:
+ return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
+ && sameIRExprs_aux( env, e1->Iex.Unop.arg, e2->Iex.Unop.arg ));
+
+ case Iex_Const: {
+ IRConst *c1 = e1->Iex.Const.con;
+ IRConst *c2 = e2->Iex.Const.con;
+ vassert(c1->tag == c2->tag);
+ switch (c1->tag) {
+ case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 );
+ case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
+ case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
+ case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
+ case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
+ default: break;
+ }
+ return False;
+ }
+
+ case Iex_Triop:
+ return toBool( e1->Iex.Triop.op == e2->Iex.Triop.op
+ && sameIRExprs_aux( env, e1->Iex.Triop.arg1, e2->Iex.Triop.arg1 )
+ && sameIRExprs_aux( env, e1->Iex.Triop.arg2, e2->Iex.Triop.arg2 )
+ && sameIRExprs_aux( env, e1->Iex.Triop.arg3, e2->Iex.Triop.arg3 ));
+
+ case Iex_Mux0X:
+ return toBool( sameIRExprs_aux( env, e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
+ && sameIRExprs_aux( env, e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
+ && sameIRExprs_aux( env, e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX ));
+
+ default:
+ /* Not very likely to be "same". */
+ break;
}
+
+ return False;
}
+static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
+{
+ Bool same;
+
+ num_nodes_visited = 0;
+ same = sameIRExprs_aux(env, e1, e2);
+
+#if STATS_IROPT
+ ++invocation_count;
+ if (recursed) ++recursion_count;
+ success_count += same;
+ if (same && recursion_helped)
+ ++recursion_success_count;
+ if (num_nodes_visited > max_nodes_visited)
+ max_nodes_visited = num_nodes_visited;
+ recursed = False; /* reset */
+ recursion_helped = False; /* reset */
+#endif /* STATS_IROPT */
+
+ return same;
+}
+
+
/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
static Bool isZeroU32 ( IRExpr* e )
{
@@ -1014,7 +1115,7 @@
}
-static IRExpr* fold_Expr ( IRExpr* e )
+static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
{
Int shift;
IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
@@ -1638,7 +1739,7 @@
break;
}
/* Or8/Or16/Or32/Max32U(t,t) ==> t, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
@@ -1650,7 +1751,7 @@
x+x produces a defined least significant bit, and it seems
simplest just to get rid of the problem by rewriting it
out, since the opportunity to do so exists. */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
IRExpr_Const(IRConst_U8(1)));
break;
@@ -1672,7 +1773,7 @@
break;
}
/* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = IRExpr_Binop(e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
break;
@@ -1686,7 +1787,7 @@
break;
}
/* Sub64(t,t) ==> 0, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
@@ -1710,7 +1811,7 @@
break;
}
/* And32(t,t) ==> t, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
@@ -1720,7 +1821,7 @@
case Iop_And16:
case Iop_And64:
/* And8/And16/And64(t,t) ==> t, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
@@ -1728,7 +1829,7 @@
case Iop_OrV128:
/* V128(t,t) ==> t, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
@@ -1740,7 +1841,7 @@
case Iop_Xor64:
case Iop_XorV128:
/* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
@@ -1748,7 +1849,7 @@
case Iop_Sub32:
/* Sub32(t,t) ==> 0, for some IRTemp t */
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
@@ -1757,7 +1858,7 @@
case Iop_CmpEQ64:
case Iop_CmpEQ8x8:
case Iop_CmpEQ8x16:
- if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
break;
}
@@ -1782,15 +1883,15 @@
}
else
/* are the arms identical? (pretty weedy test) */
- if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
- e->Iex.Mux0X.exprX)) {
+ if (sameIRExprs(env, e->Iex.Mux0X.expr0,
+ e->Iex.Mux0X.exprX)) {
e2 = e->Iex.Mux0X.expr0;
}
}
/* Show cases where we've found but not folded 'op(t,t)'. */
if (0 && e == e2 && e->tag == Iex_Binop
- && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
vex_printf("IDENT: ");
ppIRExpr(e); vex_printf("\n");
}
@@ -1828,11 +1929,15 @@
switch (ex->tag) {
case Iex_RdTmp:
if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
- return env[(Int)ex->Iex.RdTmp.tmp];
- } else {
- /* not bound in env */
- return ex;
+ IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
+ if (rhs->tag == Iex_RdTmp)
+ return rhs;
+ if (rhs->tag == Iex_Const
+ && rhs->Iex.Const.con->tag != Ico_F64i)
+ return rhs;
}
+ /* not bound in env */
+ return ex;
case Iex_Const:
case Iex_Get:
@@ -1943,15 +2048,15 @@
vassert(isIRAtom(st->Ist.AbiHint.base));
vassert(isIRAtom(st->Ist.AbiHint.nia));
return IRStmt_AbiHint(
- fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
+ fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
st->Ist.AbiHint.len,
- fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
+ fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
);
case Ist_Put:
vassert(isIRAtom(st->Ist.Put.data));
return IRStmt_Put(
st->Ist.Put.offset,
- fold_Expr(subst_Expr(env, st->Ist.Put.data))
+ fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
);
case Ist_PutI:
@@ -1959,9 +2064,9 @@
vassert(isIRAtom(st->Ist.PutI.data));
return IRStmt_PutI(
st->Ist.PutI.descr,
- fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
+ fold_Expr(env, subst_Expr(env, st->Ist.PutI.ix)),
st->Ist.PutI.bias,
- fold_Expr(subst_Expr(env, st->Ist.PutI.data))
+ fold_Expr(env, subst_Expr(env, st->Ist.PutI.data))
);
case Ist_WrTmp:
@@ -1969,7 +2074,7 @@
allowed to be more than just a constant or a tmp. */
return IRStmt_WrTmp(
st->Ist.WrTmp.tmp,
- fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
+ fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
);
case Ist_Store:
@@ -1977,8 +2082,8 @@
vassert(isIRAtom(st->Ist.Store.data));
return IRStmt_Store(
st->Ist.Store.end,
- fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
- fold_Expr(subst_Expr(env, st->Ist.Store.data))
+ fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
+ fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
);
case Ist_CAS: {
@@ -1991,11 +2096,11 @@
vassert(isIRAtom(cas->dataLo));
cas2 = mkIRCAS(
cas->oldHi, cas->oldLo, cas->end,
- fold_Expr(subst_Expr(env, cas->addr)),
- cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
- fold_Expr(subst_Expr(env, cas->expdLo)),
- cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
- fold_Expr(subst_Expr(env, cas->dataLo))
+ fold_Expr(env, subst_Expr(env, cas->addr)),
+ cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) : NULL,
+ fold_Expr(env, subst_Expr(env, cas->expdLo)),
+ cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) : NULL,
+ fold_Expr(env, subst_Expr(env, cas->dataLo))
);
return IRStmt_CAS(cas2);
}
@@ -2007,9 +2112,9 @@
return IRStmt_LLSC(
st->Ist.LLSC.end,
st->Ist.LLSC.result,
- fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
+ fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
st->Ist.LLSC.storedata
- ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
+ ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
: NULL
);
@@ -2022,13 +2127,13 @@
d2->args = shallowCopyIRExprVec(d2->args);
if (d2->mFx != Ifx_None) {
vassert(isIRAtom(d2->mAddr));
- d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
+ d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
}
vassert(isIRAtom(d2->guard));
- d2->guard = fold_Expr(subst_Expr(env, d2->guard));
+ d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
for (i = 0; d2->args[i]; i++) {
vassert(isIRAtom(d2->args[i]));
- d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
+ d2->args[i] = fold_Expr(env, subst_Expr(env, d2->args[i]));
}
return IRStmt_Dirty(d2);
}
@@ -2047,7 +2152,7 @@
case Ist_Exit: {
IRExpr* fcond;
vassert(isIRAtom(st->Ist.Exit.guard));
- fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
+ fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
if (fcond->tag == Iex_Const) {
/* Interesting. The condition on this exit has folded down to
a constant. */
@@ -2091,10 +2196,9 @@
out->tyenv = deepCopyIRTypeEnv( in->tyenv );
/* Set up the env with which travels forward. This holds a
- substitution, mapping IRTemps to atoms, that is, IRExprs which
- are either IRTemps or IRConsts. Thus, copy and constant
- propagation is done. The environment is to be applied as we
- move along. Keys are IRTemps. Values are IRExpr*s.
+ substitution, mapping IRTemps to IRExprs. The environment
+ is to be applied as we move along. Keys are IRTemps.
+ Values are IRExpr*s.
*/
for (i = 0; i < n_tmps; i++)
env[i] = NULL;
@@ -2117,32 +2221,34 @@
/* If the statement has been folded into a no-op, forget it. */
if (st2->tag == Ist_NoOp) continue;
- /* Now consider what the stmt looks like. If it's of the form
- 't = const' or 't1 = t2', add it to the running environment
- and not to the output BB. Otherwise, add it to the output
- BB. Note, we choose not to propagate const when const is an
- F64i, so that F64i literals can be CSE'd later. This helps
- x86 floating point code generation. */
+ /* If the statement assigns to an IRTemp add it to the running
+ environment. This is for the benefit of copy propagation
+ and to allow sameIRExpr look through IRTemps. */
+ if (st2->tag == Ist_WrTmp) {
+ vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
+ env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
- if (st2->tag == Ist_WrTmp
- && st2->Ist.WrTmp.data->tag == Iex_Const
- && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
- /* 't = const' -- add to env.
- The pair (IRTemp, IRExpr*) is added. */
- env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
+ /* 't1 = t2' -- don't add to BB; will be optimized out */
+ if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) continue;
+
+ /* 't = const' && 'const != F64i' -- don't add to BB
+ Note, we choose not to propagate const when const is an
+ F64i, so that F64i literals can be CSE'd later. This helps
+ x86 floating point code generation. */
+ if (st2->Ist.WrTmp.data->tag == Iex_Const
+ && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) continue;
}
- else
- if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
- /* 't1 = t2' -- add to env.
- The pair (IRTemp, IRExpr*) is added. */
- env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
- }
- else {
- /* Not interesting, copy st2 into the output block. */
- addStmtToIRSB( out, st2 );
- }
+
+ /* Not interesting, copy st2 into the output block. */
+ addStmtToIRSB( out, st2 );
}
+#if STATS_IROPT
+ vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
+ invocation_count, recursion_count, success_count,
+ recursion_success_count, max_nodes_visited);
+#endif
+
out->next = subst_Expr( env, in->next );
out->jumpkind = in->jumpkind;
return out;