* Crank up the memcheck event-counting system, and enhance it to
name the events, rather than just number them, which makes it a
lot easier to use
* Based on that, fill in some fast-path cases
{LOAD,STORE}V{4,2,1}. The assembly code looks about the same
length as it did before, on x86. Fast-path cases for the
stack have yet to be done.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@3538 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/memcheck/mac_needs.c b/memcheck/mac_needs.c
index c2cf3c3..697eef4 100644
--- a/memcheck/mac_needs.c
+++ b/memcheck/mac_needs.c
@@ -797,25 +797,35 @@
#ifdef MAC_PROFILE_MEMORY
-UInt MAC_(event_ctr)[N_PROF_EVENTS];
+UInt MAC_(event_ctr)[N_PROF_EVENTS];
+HChar* MAC_(event_ctr_name)[N_PROF_EVENTS];
static void init_prof_mem ( void )
{
Int i;
- for (i = 0; i < N_PROF_EVENTS; i++)
+ for (i = 0; i < N_PROF_EVENTS; i++) {
MAC_(event_ctr)[i] = 0;
+ MAC_(event_ctr_name)[i] = NULL;
+ }
}
static void done_prof_mem ( void )
{
- Int i;
+ Int i;
+ Bool spaced = False;
for (i = 0; i < N_PROF_EVENTS; i++) {
- if ((i % 10) == 0)
+ if (!spaced && (i % 10) == 0) {
VG_(printf)("\n");
- if (MAC_(event_ctr)[i] > 0)
- VG_(printf)( "prof mem event %2d: %d\n", i, MAC_(event_ctr)[i] );
+ spaced = True;
+ }
+ if (MAC_(event_ctr)[i] > 0) {
+ spaced = False;
+ VG_(printf)( "prof mem event %3d: %9d %s\n",
+ i, MAC_(event_ctr)[i],
+ MAC_(event_ctr_name)[i]
+ ? MAC_(event_ctr_name)[i] : "unnamed");
+ }
}
- VG_(printf)("\n");
}
#else
diff --git a/memcheck/mac_shared.h b/memcheck/mac_shared.h
index 44d9d05..815cfff 100644
--- a/memcheck/mac_shared.h
+++ b/memcheck/mac_shared.h
@@ -171,21 +171,27 @@
VgpToolCC;
/* Define to collect detailed performance info. */
-/* #define MAC_PROFILE_MEMORY */
+#define MAC_PROFILE_MEMORY
#ifdef MAC_PROFILE_MEMORY
-# define N_PROF_EVENTS 150
+# define N_PROF_EVENTS 500
-extern UInt MAC_(event_ctr)[N_PROF_EVENTS];
+extern UInt MAC_(event_ctr)[N_PROF_EVENTS];
+extern HChar* MAC_(event_ctr_name)[N_PROF_EVENTS];
-# define PROF_EVENT(ev) \
- do { tl_assert((ev) >= 0 && (ev) < N_PROF_EVENTS); \
- MAC_(event_ctr)[ev]++; \
+# define PROF_EVENT(ev, name) \
+ do { tl_assert((ev) >= 0 && (ev) < N_PROF_EVENTS); \
+ /* crude and inaccurate check to ensure the same */ \
+ /* event isn't being used with > 1 name */ \
+ if (MAC_(event_ctr_name)[ev]) \
+ tl_assert(name == MAC_(event_ctr_name)[ev]); \
+ MAC_(event_ctr)[ev]++; \
+ MAC_(event_ctr_name)[ev] = (name); \
} while (False);
#else
-# define PROF_EVENT(ev) /* */
+# define PROF_EVENT(ev, name) /* */
#endif /* MAC_PROFILE_MEMORY */
@@ -437,7 +443,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_4)(Addr new_SP) \
{ \
- PROF_EVENT(110); \
+ PROF_EVENT(110, "new_mem_stack_4"); \
if (VG_IS_4_ALIGNED(new_SP)) { \
ALIGNED4_NEW ( new_SP ); \
} else { \
@@ -447,7 +453,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_4)(Addr new_SP) \
{ \
- PROF_EVENT(120); \
+ PROF_EVENT(120, "die_mem_stack_4"); \
if (VG_IS_4_ALIGNED(new_SP)) { \
ALIGNED4_DIE ( new_SP-4 ); \
} else { \
@@ -457,7 +463,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_8)(Addr new_SP) \
{ \
- PROF_EVENT(111); \
+ PROF_EVENT(111, "new_mem_stack_8"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
} else if (VG_IS_4_ALIGNED(new_SP)) { \
@@ -470,7 +476,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_8)(Addr new_SP) \
{ \
- PROF_EVENT(121); \
+ PROF_EVENT(121, "die_mem_stack_8"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_DIE ( new_SP-8 ); \
} else if (VG_IS_4_ALIGNED(new_SP)) { \
@@ -483,7 +489,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_12)(Addr new_SP) \
{ \
- PROF_EVENT(112); \
+ PROF_EVENT(112, "new_mem_stack_12"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
ALIGNED4_NEW ( new_SP+8 ); \
@@ -497,7 +503,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_12)(Addr new_SP) \
{ \
- PROF_EVENT(122); \
+ PROF_EVENT(122, "die_mem_stack_12"); \
/* Note the -12 in the test */ \
if (VG_IS_8_ALIGNED(new_SP-12)) { \
ALIGNED8_DIE ( new_SP-12 ); \
@@ -512,7 +518,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_16)(Addr new_SP) \
{ \
- PROF_EVENT(113); \
+ PROF_EVENT(113, "new_mem_stack_16"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
ALIGNED8_NEW ( new_SP+8 ); \
@@ -527,7 +533,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_16)(Addr new_SP) \
{ \
- PROF_EVENT(123); \
+ PROF_EVENT(123, "die_mem_stack_16"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_DIE ( new_SP-16 ); \
ALIGNED8_DIE ( new_SP-8 ); \
@@ -542,7 +548,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_32)(Addr new_SP) \
{ \
- PROF_EVENT(114); \
+ PROF_EVENT(114, "new_mem_stack_32"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
ALIGNED8_NEW ( new_SP+8 ); \
@@ -561,7 +567,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_32)(Addr new_SP) \
{ \
- PROF_EVENT(124); \
+ PROF_EVENT(124, "die_mem_stack_32"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_DIE ( new_SP-32 ); \
ALIGNED8_DIE ( new_SP-24 ); \
@@ -580,13 +586,13 @@
\
void MAC_(new_mem_stack) ( Addr a, SizeT len ) \
{ \
- PROF_EVENT(115); \
+ PROF_EVENT(115, "new_mem_stack"); \
UNALIGNED_NEW ( a, len ); \
} \
\
void MAC_(die_mem_stack) ( Addr a, SizeT len ) \
{ \
- PROF_EVENT(125); \
+ PROF_EVENT(125, "die_mem_stack"); \
UNALIGNED_DIE ( a, len ); \
}
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 949c319..eae7a3d 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -52,6 +52,17 @@
//#include "vg_profile.c"
+#define EXPECTED_TAKEN(cond) __builtin_expect((cond),1)
+#define EXPECTED_NOT_TAKEN(cond) __builtin_expect((cond),0)
+
+/* Define to debug the mem audit system. Set to:
+ 0 no debugging, fast cases are used
+ 1 some sanity checking, fast cases are used
+ 2 max sanity checking, only slow cases are used
+*/
+#define VG_DEBUG_MEMORY 1
+
+
typedef enum {
MC_Ok = 5, MC_AddrErr = 6, MC_ValueErr = 7
} MC_ReadResult;
@@ -69,10 +80,14 @@
we hardwire the assumption that each secondary map covers precisely
64k of address space. */
+/* Only change this. N_PRIMARY_MAPS *must* be a power of 2. */
#define N_PRIMARY_BITS 16
-#define N_PRIMARY_MAPS ((1 << N_PRIMARY_BITS)-1)
-#define MAX_PRIMARY_ADDRESS (Addr)(((Addr)65536) * N_PRIMARY_MAPS)
+/* Do not change this. */
+#define N_PRIMARY_MAPS (1 << N_PRIMARY_BITS)
+
+/* Do not change this. */
+#define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAPS)-1)
/* --------------- Secondary maps --------------- */
@@ -304,10 +319,11 @@
Bool aok;
UWord abit, vbyte;
- PROF_EVENT(70);
+ PROF_EVENT(30, "mc_LOADVn_slow");
tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
while (True) {
+ PROF_EVENT(31, "mc_LOADVn_slow(loop)");
ai = a+byte_offset_w(szB,bigendian,i);
get_abit_and_vbyte(&abit, &vbyte, ai);
aok = abit == VGM_BIT_VALID;
@@ -337,13 +353,14 @@
Bool aok;
Addr ai;
- PROF_EVENT(71);
+ PROF_EVENT(35, "mc_STOREVn_slow");
tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
/* Dump vbytes in memory, iterating from least to most significant
byte. At the same time establish addressibility of the
location. */
for (i = 0; i < szB; i++) {
+ PROF_EVENT(36, "mc_STOREVn_slow(loop)");
ai = a+byte_offset_w(szB,bigendian,i);
abit = get_abit(ai);
aok = abit == VGM_BIT_VALID;
@@ -376,8 +393,6 @@
//zz #if 0 /* this is the old implementation */
//zz
-//zz /* Define to debug the mem audit system. */
-//zz /* #define VG_DEBUG_MEMORY */
//zz
//zz
//zz /*------------------------------------------------------------*/
@@ -523,8 +538,6 @@
/* auxmap_size = auxmap_used = 0;
no ... these are statically initialised */
-
- tl_assert( TL_(expensive_sanity_check)() );
}
@@ -828,21 +841,21 @@
static void mc_make_noaccess ( Addr a, SizeT len )
{
- PROF_EVENT(35);
+ PROF_EVENT(40, "mc_make_noaccess");
DEBUG("mc_make_noaccess(%p, %llu)\n", a, (ULong)len);
set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
}
static void mc_make_writable ( Addr a, SizeT len )
{
- PROF_EVENT(36);
+ PROF_EVENT(41, "mc_make_writable");
DEBUG("mc_make_writable(%p, %llu)\n", a, (ULong)len);
set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
}
static void mc_make_readable ( Addr a, SizeT len )
{
- PROF_EVENT(37);
+ PROF_EVENT(42, "mc_make_readable");
DEBUG("mc_make_readable(%p, %llu)\n", a, (ULong)len);
set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
}
@@ -850,6 +863,7 @@
static __inline__
void make_aligned_word32_writable(Addr a)
{
+ PROF_EVENT(43, "make_aligned_word32_writable");
mc_make_writable(a, 4);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -870,6 +884,7 @@
static __inline__
void make_aligned_word32_noaccess(Addr a)
{
+ PROF_EVENT(44, "make_aligned_word32_noaccess");
mc_make_noaccess(a, 4);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -891,6 +906,7 @@
static __inline__
void make_aligned_word64_writable(Addr a)
{
+ PROF_EVENT(45, "make_aligned_word64_writable");
mc_make_writable(a, 8);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -908,6 +924,7 @@
static __inline__
void make_aligned_word64_noaccess(Addr a)
{
+ PROF_EVENT(46, "make_aligned_word64_noaccess");
mc_make_noaccess(a, 8);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -939,9 +956,9 @@
DEBUG("mc_copy_address_range_state\n");
- PROF_EVENT(40);
+ PROF_EVENT(50, "mc_copy_address_range_state");
for (i = 0; i < len; i++) {
- PROF_EVENT(41);
+ PROF_EVENT(51, "mc_copy_address_range_state(loop)");
get_abit_and_vbyte( &abit, &vbyte, src+i );
set_abit_and_vbyte( dst+i, abit, vbyte );
}
@@ -964,9 +981,9 @@
{
SizeT i;
UWord abit;
- PROF_EVENT(42);
+ PROF_EVENT(60, "mc_check_noaccess");
for (i = 0; i < len; i++) {
- PROF_EVENT(43);
+ PROF_EVENT(61, "mc_check_noaccess(loop)");
abit = get_abit(a);
if (abit == VGM_BIT_VALID) {
if (bad_addr != NULL)
@@ -982,9 +999,9 @@
{
SizeT i;
UWord abit;
- PROF_EVENT(42);
+ PROF_EVENT(62, "mc_check_writable");
for (i = 0; i < len; i++) {
- PROF_EVENT(43);
+ PROF_EVENT(63, "mc_check_writable(loop)");
abit = get_abit(a);
if (abit == VGM_BIT_INVALID) {
if (bad_addr != NULL) *bad_addr = a;
@@ -1001,10 +1018,10 @@
UWord abit;
UWord vbyte;
- PROF_EVENT(44);
+ PROF_EVENT(64, "mc_check_readable");
DEBUG("mc_check_readable\n");
for (i = 0; i < len; i++) {
- PROF_EVENT(45);
+ PROF_EVENT(65, "mc_check_readable(loop)");
get_abit_and_vbyte(&abit, &vbyte, a);
// Report addressability errors in preference to definedness errors
// by checking the A bits first.
@@ -1032,10 +1049,10 @@
{
UWord abit;
UWord vbyte;
- PROF_EVENT(46);
+ PROF_EVENT(66, "mc_check_readable_asciiz");
DEBUG("mc_check_readable_asciiz\n");
while (True) {
- PROF_EVENT(47);
+ PROF_EVENT(67, "mc_check_readable_asciiz(loop)");
get_abit_and_vbyte(&abit, &vbyte, a);
// As in mc_check_readable(), check A bits first
if (abit != VGM_BIT_VALID) {
@@ -1276,6 +1293,7 @@
VGA_REGPARM(1)
ULong MC_(helperc_LOADV8) ( Addr a )
{
+ PROF_EVENT(70, "helperc_LOADV8");
return mc_LOADVn_slow( a, 8, False/*littleendian*/ );
//zz # ifdef VG_DEBUG_MEMORY
//zz return mc_rd_V8_SLOWLY(a);
@@ -1311,6 +1329,7 @@
VGA_REGPARM(1)
void MC_(helperc_STOREV8) ( Addr a, ULong vbytes )
{
+ PROF_EVENT(71, "helperc_STOREV8");
mc_STOREVn_slow( a, 8, vbytes, False/*littleendian*/ );
//zz # ifdef VG_DEBUG_MEMORY
//zz mc_wr_V8_SLOWLY(a, vbytes);
@@ -1349,86 +1368,152 @@
/* ------------------------ Size = 4 ------------------------ */
VGA_REGPARM(1)
-UWord MC_(helperc_LOADV4) ( Addr a )
+UWord MC_(helperc_LOADV4) ( Addr aA )
{
- return (UWord)mc_LOADVn_slow( a, 4, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz return mc_rd_V4_SLOWLY(a);
-//zz # else
-//zz UInt sec_no = rotateRight16(a) & 0x3FFFF;
-//zz SecMap* sm = primary_map[sec_no];
-//zz UInt a_off = (SM_OFF(a)) >> 3;
-//zz UChar abits = sm->abits[a_off];
-//zz abits >>= (a & 4);
-//zz abits &= 15;
-//zz PROF_EVENT(60);
-//zz if (abits == VGM_NIBBLE_VALID) {
-//zz /* Handle common case quickly: a is suitably aligned, is mapped,
-//zz and is addressible. */
-//zz UInt v_off = SM_OFF(a);
-//zz return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
-//zz } else {
-//zz /* Slow but general case. */
-//zz return mc_rd_V4_SLOWLY(a);
-//zz }
-//zz # endif
+ PROF_EVENT(220, "helperc_LOADV4");
+
+# if VG_DEBUG_MEMORY >= 2
+ return (UWord)mc_LOADVn_slow( aA, 4, False/*littleendian*/ );
+# else
+
+ const UWord mask = ~((0x10000-4) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a = (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, either 'a' is not
+ naturally aligned, or 'a' exceeds the range covered by the
+ primary map. Either way we defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(221, "helperc_LOADV4-slow1");
+ return (UWord)mc_LOADVn_slow( aA, 4, False/*littleendian*/ );
+ }
+
+ UWord sec_no = (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >= 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm = primary_map[sec_no];
+ UWord v_off = a & 0xFFFF;
+ UWord a_off = v_off >> 3;
+ UWord abits = (UWord)(sm->abits[a_off]);
+ abits >>= (a & 4);
+ abits &= 15;
+ if (EXPECTED_TAKEN(abits == VGM_NIBBLE_VALID)) {
+ /* Handle common case quickly: a is suitably aligned, is mapped,
+ and is addressible. */
+ return (UWord)(
+ 0xFFFFFFFFULL
+ & ((UInt*)(sm->vbyte))[ v_off >> 2 ]
+ );
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(222, "helperc_LOADV4-slow2");
+ return (UWord)mc_LOADVn_slow( a, 4, False/*littleendian*/ );
+ }
+
+# endif
}
+
VGA_REGPARM(2)
-void MC_(helperc_STOREV4) ( Addr a, UWord vbytes )
+void MC_(helperc_STOREV4) ( Addr aA, UWord vbytes )
{
+ PROF_EVENT(230, "helperc_STOREV4");
+
+# if VG_DEBUG_MEMORY >= 2
mc_STOREVn_slow( a, 4, (ULong)vbytes, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz mc_wr_V4_SLOWLY(a, vbytes);
-//zz # else
-//zz UInt sec_no = rotateRight16(a) & 0x3FFFF;
-//zz SecMap* sm = primary_map[sec_no];
-//zz UInt a_off = (SM_OFF(a)) >> 3;
-//zz UChar abits = sm->abits[a_off];
-//zz abits >>= (a & 4);
-//zz abits &= 15;
-//zz PROF_EVENT(61);
-//zz if (!IS_DISTINGUISHED_SM(sm) && abits == VGM_NIBBLE_VALID) {
-//zz /* Handle common case quickly: a is suitably aligned, is mapped,
-//zz and is addressible. */
-//zz UInt v_off = SM_OFF(a);
-//zz ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
-//zz } else {
-//zz /* Slow but general case. */
-//zz mc_wr_V4_SLOWLY(a, vbytes);
-//zz }
-//zz # endif
+# else
+
+ const UWord mask = ~((0x10000-4) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a = (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, either 'a' is not
+ naturally aligned, or 'a' exceeds the range covered by the
+ primary map. Either way we defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(231, "helperc_STOREV4-slow1");
+ mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
+ return;
+ }
+
+ UWord sec_no = (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >= 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm = primary_map[sec_no];
+ UWord v_off = a & 0xFFFF;
+ UWord a_off = v_off >> 3;
+ UWord abits = (UWord)(sm->abits[a_off]);
+ abits >>= (a & 4);
+ abits &= 15;
+ if (EXPECTED_TAKEN(!is_distinguished_sm(sm)
+ && abits == VGM_NIBBLE_VALID)) {
+ /* Handle common case quickly: a is suitably aligned, is mapped,
+ and is addressible. */
+ ((UInt*)(sm->vbyte))[ v_off >> 2 ] = (UInt)vbytes;
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(232, "helperc_STOREV4-slow2");
+ mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
+ }
+# endif
}
/* ------------------------ Size = 2 ------------------------ */
VGA_REGPARM(1)
-UWord MC_(helperc_LOADV2) ( Addr a )
+UWord MC_(helperc_LOADV2) ( Addr aA )
{
- return (UWord)mc_LOADVn_slow( a, 2, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz return mc_rd_V2_SLOWLY(a);
-//zz # else
-//zz UInt sec_no = rotateRight16(a) & 0x1FFFF;
-//zz SecMap* sm = primary_map[sec_no];
-//zz UInt a_off = (SM_OFF(a)) >> 3;
-//zz PROF_EVENT(62);
-//zz if (sm->abits[a_off] == VGM_BYTE_VALID) {
-//zz /* Handle common case quickly. */
-//zz UInt v_off = SM_OFF(a);
-//zz return 0xFFFF0000
-//zz |
-//zz (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
-//zz } else {
-//zz /* Slow but general case. */
-//zz return mc_rd_V2_SLOWLY(a);
-//zz }
-//zz # endif
+ PROF_EVENT(240, "helperc_LOADV2");
+
+# if VG_DEBUG_MEMORY >= 2
+ return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
+# else
+
+ const UWord mask = ~((0x10000-2) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a = (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, either 'a' is not
+ naturally aligned, or 'a' exceeds the range covered by the
+ primary map. Either way we defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(241, "helperc_LOADV2-slow1");
+ return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
+ }
+
+ UWord sec_no = (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >= 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm = primary_map[sec_no];
+ UWord v_off = a & 0xFFFF;
+ UWord a_off = v_off >> 3;
+ UWord abits = (UWord)(sm->abits[a_off]);
+
+ if (EXPECTED_TAKEN(abits == VGM_BYTE_VALID)) {
+ /* Handle common case quickly: a is mapped, and the entire
+ word32 it lives in is addressible. */
+ return (~(UWord)0xFFFF)
+ |
+ (UWord)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(242, "helperc_LOADV2-slow2");
+ return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
+ }
+
+# endif
}
VGA_REGPARM(2)
void MC_(helperc_STOREV2) ( Addr a, UWord vbytes )
{
+ PROF_EVENT(250, "helperc_STOREV2");
mc_STOREVn_slow( a, 2, (ULong)vbytes, False/*littleendian*/ );
//zz # ifdef VG_DEBUG_MEMORY
//zz mc_wr_V2_SLOWLY(a, vbytes);
@@ -1451,49 +1536,91 @@
/* ------------------------ Size = 1 ------------------------ */
VGA_REGPARM(1)
-UWord MC_(helperc_LOADV1) ( Addr a )
+UWord MC_(helperc_LOADV1) ( Addr aA )
{
+ PROF_EVENT(260, "helperc_LOADV1");
+
+# if VG_DEBUG_MEMORY >= 2
return (UWord)mc_LOADVn_slow( a, 1, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz return mc_rd_V1_SLOWLY(a);
-//zz # else
-//zz UInt sec_no = shiftRight16(a);
-//zz SecMap* sm = primary_map[sec_no];
-//zz UInt a_off = (SM_OFF(a)) >> 3;
-//zz PROF_EVENT(64);
-//zz if (sm->abits[a_off] == VGM_BYTE_VALID) {
-//zz /* Handle common case quickly. */
-//zz UInt v_off = SM_OFF(a);
-//zz return 0xFFFFFF00
-//zz |
-//zz (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
-//zz } else {
-//zz /* Slow but general case. */
-//zz return mc_rd_V1_SLOWLY(a);
-//zz }
-//zz # endif
+# else
+
+ const UWord mask = ~((0x10000-1) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a = (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, it means 'a'
+ exceeds the range covered by the primary map. In which case we
+ defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(261, "helperc_LOADV1-slow1");
+ return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
+ }
+
+ UWord sec_no = (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >= 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm = primary_map[sec_no];
+ UWord v_off = a & 0xFFFF;
+ UWord a_off = v_off >> 3;
+ UWord abits = 0xFF & (UWord)(sm->abits[a_off]);
+ if (EXPECTED_TAKEN(abits == VGM_BYTE_VALID)) {
+ /* Handle common case quickly: a is mapped, and the entire
+ word32 it lives in is addressible. */
+ return (~(UWord)0xFF)
+ |
+ (UWord)( ((UChar*)(sm->vbyte))[ v_off ] );
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(262, "helperc_LOADV1-slow2");
+ return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
+ }
+# endif
}
+
VGA_REGPARM(2)
-void MC_(helperc_STOREV1) ( Addr a, UWord vbytes )
+void MC_(helperc_STOREV1) ( Addr aA, UWord vbyte )
{
- mc_STOREVn_slow( a, 1, (ULong)vbytes, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz mc_wr_V1_SLOWLY(a, vbytes);
-//zz # else
-//zz UInt sec_no = shiftRight16(a);
-//zz SecMap* sm = primary_map[sec_no];
-//zz UInt a_off = (SM_OFF(a)) >> 3;
-//zz PROF_EVENT(65);
-//zz if (!IS_DISTINGUISHED_SM(sm) && sm->abits[a_off] == VGM_BYTE_VALID) {
-//zz /* Handle common case quickly. */
-//zz UInt v_off = SM_OFF(a);
-//zz ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
-//zz } else {
-//zz /* Slow but general case. */
-//zz mc_wr_V1_SLOWLY(a, vbytes);
-//zz }
-//zz # endif
+ PROF_EVENT(270, "helperc_STOREV1");
+
+# if VG_DEBUG_MEMORY >= 2
+ mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
+# else
+
+ const UWord mask = ~((0x10000-1) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a = (UWord)aA;
+ /* If any part of 'a' indicated by the mask is 1, it means 'a'
+ exceeds the range covered by the primary map. In which case we
+ defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(271, "helperc_STOREV1-slow1");
+ mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
+ return;
+ }
+
+ UWord sec_no = (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >= 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm = primary_map[sec_no];
+ UWord v_off = a & 0xFFFF;
+ UWord a_off = v_off >> 3;
+ UWord abits = 0xFF & (UWord)(sm->abits[a_off]);
+ if (EXPECTED_TAKEN(!is_distinguished_sm(sm)
+ && abits == VGM_BYTE_VALID)) {
+ /* Handle common case quickly: a is mapped, the entire word32 it
+ lives in is addressible. */
+ ((UChar*)(sm->vbyte))[ v_off ] = (UChar)vbyte;
+ } else {
+ PROF_EVENT(272, "helperc_STOREV1-slow2");
+ mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
+ }
+
+# endif
}
@@ -1958,6 +2085,7 @@
Bool TL_(cheap_sanity_check) ( void )
{
/* nothing useful we can rapidly check */
+ PROF_EVENT(490, "cheap_sanity_check");
return True;
}
@@ -1966,6 +2094,8 @@
Int i;
SecMap* sm;
+ PROF_EVENT(491, "expensive_sanity_check");
+
/* Check the 3 distinguished SMs. */
/* Check A invalid, V invalid. */
@@ -2395,6 +2525,8 @@
init_shadow_memory();
MAC_(common_pre_clo_init)();
+
+ tl_assert( TL_(expensive_sanity_check)() );
}
void TL_(post_clo_init) ( void )