-----------------------------------------------------------------------------
overview
-----------------------------------------------------------------------------
This commit introduces an optimisation that speeds up Memcheck by roughly
-3 -- 28%, and Addrcheck by 1 -- 36%, at least for the SPEC2000 benchmarks on
my 1400MHz Athlon.
Basic idea: that handling of A/V bit updates on %esp-adjustments was quite
sub-optimal -- for each "PUT ESP", a function was called that computed the
delta from the old and new ESPs, and then called a looping function to deal
with it.
Improvements:
1. most of the time, the delta can be seen from the code. So there's no need
to compute it.
2. when the delta is known, we can directly call a skin function to handle it.
3. we can specialise for certain common cases (eg. +/- 4, 8, 12, 16, 32),
including having unrolled loops for these.
This slightly bloats UCode because of setting up args for the call, and for
updating ESP in code (previously was done in the called C function). Eg. for
`date' the code expansion ratio goes from 14.2 --> 14.6. But it's much faster.
Note that skins don't have to use the specialised cases, they can just
define the ordinary case if they want; the specialised cases are only used
if present.
-----------------------------------------------------------------------------
details
-----------------------------------------------------------------------------
Removed addrcheck/ac_common.c, put its (minimal) contents in ac_main.c.
Updated the major interface version, because this change isn't binary
compatible with the old core/skin interface.
Removed the hooks {new,die}_mem_stack_aligned, replaced with the better
{new,die}_mem_stack_{4,8,12,16,32}. Still have the generic {die,new}_mem_stack
hooks. These are called directly from UCode, thanks to a new pass that occurs
between instrumentation and register allocation (but only if the skin uses
these stack-adjustment hooks). VG_(unknown_esp_update)() is called from UCode
for the generic case; it determines if it's a stack switch, and calls the
generic {new,die}_stack_mem hooks accordingly. This meant
synth_handle_esp_assignment() could be removed.
The new %esp-delta computation phase is in vg_translate.c.
In Memcheck and Addrcheck, added functions for updating the A and V bits of a
single aligned word and a single aligned doubleword. These are called from the
specialised functions new_mem_stack_4, etc. Could remove the one for the old
hooks new_mem_stack_aligned and die_mem_stack_aligned.
In mc_common.h, added a big macro containing the definitions of new_mem_stack_4
et al. It's ``instantiated'' separately by Memcheck and Addrcheck. The macro
is a bit klugey, but I did it that way because speed is vital for these
functions, so eg. a function pointer would have slowed things down.
Updated the built-in profiling events appropriately for the changes (removed
one old event, added a new one; finding their names is left as an exercise for
the reader).
Fixed memory event profiling in {Addr,Mem}check, which had rotted.
A few other minor things.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1510 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c
index eb8a31d..5b3cc6b 100644
--- a/addrcheck/ac_main.c
+++ b/addrcheck/ac_main.c
@@ -34,6 +34,10 @@
#include "memcheck.h"
//#include "vg_profile.c"
+#include "mc_common.c"
+
+
+
VG_DETERMINE_INTERFACE_VERSION
/*------------------------------------------------------------*/
@@ -228,17 +232,6 @@
# undef STREQ
-/*------------------------------------------------------------*/
-/*--- Profiling events ---*/
-/*------------------------------------------------------------*/
-
-typedef
- enum {
- VgpCheckMem = VgpFini+1,
- VgpSetMem
- }
- VgpSkinCC;
-
#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
/*------------------------------------------------------------*/
@@ -536,6 +529,82 @@
set_address_range_perms ( a, len, VGM_BIT_VALID );
}
+static __inline__
+void make_aligned_word_noaccess(Addr a)
+{
+ AcSecMap* sm;
+ UInt sm_off;
+ UChar mask;
+
+ VGP_PUSHCC(VgpESPAdj);
+ ENSURE_MAPPABLE(a, "make_aligned_word_noaccess");
+ sm = primary_map[a >> 16];
+ sm_off = a & 0xFFFF;
+ mask = 0x0F;
+ mask <<= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */
+ /* mask now contains 1s where we wish to make address bits invalid (1s). */
+ sm->abits[sm_off >> 3] |= mask;
+ VGP_POPCC(VgpESPAdj);
+}
+
+static __inline__
+void make_aligned_word_accessible(Addr a)
+{
+ AcSecMap* sm;
+ UInt sm_off;
+ UChar mask;
+
+ VGP_PUSHCC(VgpESPAdj);
+ ENSURE_MAPPABLE(a, "make_aligned_word_accessible");
+ sm = primary_map[a >> 16];
+ sm_off = a & 0xFFFF;
+ mask = 0x0F;
+ mask <<= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */
+ /* mask now contains 1s where we wish to make address bits
+ invalid (0s). */
+ sm->abits[sm_off >> 3] &= ~mask;
+ VGP_POPCC(VgpESPAdj);
+}
+
+/* Nb: by "aligned" here we mean 8-byte aligned */
+static __inline__
+void make_aligned_doubleword_accessible(Addr a)
+{
+ AcSecMap* sm;
+ UInt sm_off;
+
+ VGP_PUSHCC(VgpESPAdj);
+ ENSURE_MAPPABLE(a, "make_aligned_doubleword_accessible");
+ sm = primary_map[a >> 16];
+ sm_off = a & 0xFFFF;
+ sm->abits[sm_off >> 3] = VGM_BYTE_VALID;
+ VGP_POPCC(VgpESPAdj);
+}
+
+static __inline__
+void make_aligned_doubleword_noaccess(Addr a)
+{
+ AcSecMap* sm;
+ UInt sm_off;
+
+ VGP_PUSHCC(VgpESPAdj);
+ ENSURE_MAPPABLE(a, "make_aligned_doubleword_noaccess");
+ sm = primary_map[a >> 16];
+ sm_off = a & 0xFFFF;
+ sm->abits[sm_off >> 3] = VGM_BYTE_INVALID;
+ VGP_POPCC(VgpESPAdj);
+}
+
+/* The %esp update handling functions */
+ESP_UPDATE_HANDLERS ( make_aligned_word_accessible,
+ make_aligned_word_noaccess,
+ make_aligned_doubleword_accessible,
+ make_aligned_doubleword_noaccess,
+ ac_make_accessible,
+ ac_make_noaccess
+ );
+
+
/* Block-copy permissions (needed for implementing realloc()). */
static void ac_copy_address_range_state ( Addr src, Addr dst, UInt len )
@@ -603,66 +672,6 @@
/*--- Memory event handlers ---*/
/*------------------------------------------------------------*/
-/* Setting permissions for aligned words. This supports fast stack
- operations. */
-
-static void ac_make_noaccess_aligned ( Addr a, UInt len )
-{
- AcSecMap* sm;
- UInt sm_off;
- UChar mask;
- Addr a_past_end = a + len;
-
- VGP_PUSHCC(VgpSetMem);
-
- PROF_EVENT(50);
-# ifdef VG_DEBUG_MEMORY
- sk_assert(IS_ALIGNED4_ADDR(a));
- sk_assert(IS_ALIGNED4_ADDR(len));
-# endif
-
- for ( ; a < a_past_end; a += 4) {
- ENSURE_MAPPABLE(a, "ac_make_noaccess_aligned");
- sm = primary_map[a >> 16];
- sm_off = a & 0xFFFF;
- mask = 0x0F;
- mask <<= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */
- /* mask now contains 1s where we wish to make address bits
- invalid (1s). */
- sm->abits[sm_off >> 3] |= mask;
- }
- VGP_POPCC(VgpSetMem);
-}
-
-static void ac_make_writable_aligned ( Addr a, UInt len )
-{
- AcSecMap* sm;
- UInt sm_off;
- UChar mask;
- Addr a_past_end = a + len;
-
- VGP_PUSHCC(VgpSetMem);
-
- PROF_EVENT(51);
-# ifdef VG_DEBUG_MEMORY
- sk_assert(IS_ALIGNED4_ADDR(a));
- sk_assert(IS_ALIGNED4_ADDR(len));
-# endif
-
- for ( ; a < a_past_end; a += 4) {
- ENSURE_MAPPABLE(a, "ac_make_writable_aligned");
- sm = primary_map[a >> 16];
- sm_off = a & 0xFFFF;
- mask = 0x0F;
- mask <<= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */
- /* mask now contains 1s where we wish to make address bits
- invalid (0s). */
- sm->abits[sm_off >> 3] &= ~mask;
- }
- VGP_POPCC(VgpSetMem);
-}
-
-
static __inline__
void ac_check_is_accessible ( CorePart part, ThreadState* tst,
Char* s, Addr base, UInt size, Bool isWrite )
@@ -1061,7 +1070,7 @@
break;
/* For memory-ref instrs, copy the data_addr into a temporary to be
- * passed to the cachesim_* helper at the end of the instruction.
+ * passed to the helper at the end of the instruction.
*/
case LOAD:
t_addr = u_in->val1;
@@ -1320,12 +1329,17 @@
VG_(track_new_mem_startup) ( & ac_new_mem_startup );
VG_(track_new_mem_heap) ( & ac_new_mem_heap );
- VG_(track_new_mem_stack) ( & ac_make_accessible );
- VG_(track_new_mem_stack_aligned)( & ac_make_writable_aligned );
VG_(track_new_mem_stack_signal) ( & ac_make_accessible );
VG_(track_new_mem_brk) ( & ac_make_accessible );
VG_(track_new_mem_mmap) ( & ac_set_perms );
+ VG_(track_new_mem_stack_4) ( & MC_(new_mem_stack_4) );
+ VG_(track_new_mem_stack_8) ( & MC_(new_mem_stack_8) );
+ VG_(track_new_mem_stack_12) ( & MC_(new_mem_stack_12) );
+ VG_(track_new_mem_stack_16) ( & MC_(new_mem_stack_16) );
+ VG_(track_new_mem_stack_32) ( & MC_(new_mem_stack_32) );
+ VG_(track_new_mem_stack) ( & MC_(new_mem_stack) );
+
VG_(track_copy_mem_heap) ( & ac_copy_address_range_state );
VG_(track_copy_mem_remap) ( & ac_copy_address_range_state );
VG_(track_change_mem_mprotect) ( & ac_set_perms );
@@ -1334,12 +1348,17 @@
VG_(track_ban_mem_stack) ( & ac_make_noaccess );
VG_(track_die_mem_heap) ( & ac_make_noaccess );
- VG_(track_die_mem_stack) ( & ac_make_noaccess );
- VG_(track_die_mem_stack_aligned)( & ac_make_noaccess_aligned );
VG_(track_die_mem_stack_signal) ( & ac_make_noaccess );
VG_(track_die_mem_brk) ( & ac_make_noaccess );
VG_(track_die_mem_munmap) ( & ac_make_noaccess );
+ VG_(track_die_mem_stack_4) ( & MC_(die_mem_stack_4) );
+ VG_(track_die_mem_stack_8) ( & MC_(die_mem_stack_8) );
+ VG_(track_die_mem_stack_12) ( & MC_(die_mem_stack_12) );
+ VG_(track_die_mem_stack_16) ( & MC_(die_mem_stack_16) );
+ VG_(track_die_mem_stack_32) ( & MC_(die_mem_stack_32) );
+ VG_(track_die_mem_stack) ( & MC_(die_mem_stack) );
+
VG_(track_bad_free) ( & MC_(record_free_error) );
VG_(track_mismatched_free) ( & MC_(record_freemismatch_error) );
@@ -1355,6 +1374,7 @@
VGP_(register_profile_event) ( VgpSetMem, "set-mem-perms" );
VGP_(register_profile_event) ( VgpCheckMem, "check-mem-perms" );
+ VGP_(register_profile_event) ( VgpESPAdj, "adjust-ESP" );
init_shadow_memory();
MC_(init_prof_mem)();