Cachegrind: use memory block numbers as tags.
This saves instructions in hot path, resulting in
3% improvement on average with perf benchmarks.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13025 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c
index a9e133e..152e5dd 100644
--- a/cachegrind/cg_sim.c
+++ b/cachegrind/cg_sim.c
@@ -123,27 +123,38 @@
__attribute__((always_inline))
static Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
{
- UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
- UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
- UWord tag = a >> c->tag_shift;
+ /* A memory block has the size of a cache line */
+ UWord block1 = a >> c->line_size_bits;
+ UWord block2 = (a+size-1) >> c->line_size_bits;
+ UInt set1 = block1 & c->sets_min_1;
+
+ /* Tags used in real caches are minimal to save space.
+ * As the last bits of the block number of addresses mapping
+ * into one cache set are the same, real caches use as tag
+ * tag = block >> log2(#sets)
+ * But using the memory block as more specific tag is fine,
+ * and saves instructions.
+ */
+ UWord tag1 = block1;
/* Access entirely within line. */
- if (set1 == set2)
- return cachesim_setref_is_miss(c, set1, tag);
+ if (block1 == block2)
+ return cachesim_setref_is_miss(c, set1, tag1);
/* Access straddles two lines. */
- /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
- else if (((set1 + 1) & (c->sets_min_1)) == set2) {
- UWord tag2 = (a+size-1) >> c->tag_shift;
+ else if (block1 + 1 == block2) {
+ UInt set2 = block2 & c->sets_min_1;
+ UWord tag2 = block2;
/* always do both, as state is updated as side effect */
- if (cachesim_setref_is_miss(c, set1, tag)) {
+ if (cachesim_setref_is_miss(c, set1, tag1)) {
cachesim_setref_is_miss(c, set2, tag2);
return True;
}
return cachesim_setref_is_miss(c, set2, tag2);
}
- VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
+ VG_(printf)("addr: %lx size: %u blocks: %ld %ld",
+ a, size, block1, block2);
VG_(tool_panic)("item straddles more than two cache sets");
/* not reached */
return True;