-----------------------------------------------------------------------------
overview
-----------------------------------------------------------------------------
This commit introduces an optimisation that speeds up Memcheck by roughly
-3 -- 28%, and Addrcheck by 1 -- 36%, at least for the SPEC2000 benchmarks on
my 1400MHz Athlon.

Basic idea: that handling of A/V bit updates on %esp-adjustments was quite
sub-optimal -- for each "PUT ESP", a function was called that computed the
delta from the old and new ESPs, and then called a looping function to deal
with it.

Improvements:

  1. most of the time, the delta can be seen from the code.  So there's no need
     to compute it.

  2. when the delta is known, we can directly call a skin function to handle it.

  3. we can specialise for certain common cases (eg. +/- 4, 8, 12, 16, 32),
     including having unrolled loops for these.

This slightly bloats UCode because of setting up args for the call, and for
updating ESP in code (previously was done in the called C function).  Eg. for
`date' the code expansion ratio goes from 14.2 --> 14.6.  But it's much faster.

Note that skins don't have to use the specialised cases, they can just
define the ordinary case if they want;  the specialised cases are only used
if present.

-----------------------------------------------------------------------------
details
-----------------------------------------------------------------------------
Removed addrcheck/ac_common.c, put its (minimal) contents in ac_main.c.

Updated the major interface version, because this change isn't binary
compatible with the old core/skin interface.

Removed the hooks {new,die}_mem_stack_aligned, replaced with the better
{new,die}_mem_stack_{4,8,12,16,32}.  Still have the generic {die,new}_mem_stack
hooks.  These are called directly from UCode, thanks to a new pass that occurs
between instrumentation and register allocation (but only if the skin uses
these stack-adjustment hooks).  VG_(unknown_esp_update)() is called from UCode
for the generic case;  it determines if it's a stack switch, and calls the
generic {new,die}_stack_mem hooks accordingly.  This meant
synth_handle_esp_assignment() could be removed.

The new %esp-delta computation phase is in vg_translate.c.

In Memcheck and Addrcheck, added functions for updating the A and V bits of a
single aligned word and a single aligned doubleword.  These are called from the
specialised functions new_mem_stack_4, etc.  Could remove the one for the old
hooks new_mem_stack_aligned and die_mem_stack_aligned.

In mc_common.h, added a big macro containing the definitions of new_mem_stack_4
et al.  It's ``instantiated'' separately by Memcheck and Addrcheck.  The macro
is a bit klugey, but I did it that way because speed is vital for these
functions, so eg. a function pointer would have slowed things down.

Updated the built-in profiling events appropriately for the changes (removed
one old event, added a new one;  finding their names is left as an exercise for
the reader).

Fixed memory event profiling in {Addr,Mem}check, which had rotted.

A few other minor things.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1510 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/memcheck/mc_common.h b/memcheck/mc_common.h
index da0695c..7a8d14b 100644
--- a/memcheck/mc_common.h
+++ b/memcheck/mc_common.h
@@ -36,6 +36,10 @@
 #include "vg_skin.h"
 #include "mc_constants.h"
 
+/*------------------------------------------------------------*/
+/*--- Errors and suppressions                              ---*/
+/*------------------------------------------------------------*/
+
 /* The classification of a faulting address. */
 typedef 
    enum { Undescribed,  /* as-yet unclassified */
@@ -110,20 +114,41 @@
    }
    MemCheckError;
 
+/*------------------------------------------------------------*/
+/*--- Profiling of skins and memory events                 ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   enum { 
+      VgpCheckMem = VgpFini+1,
+      VgpSetMem,
+      VgpESPAdj
+   } 
+   VgpSkinCC;
+
+/* Define to collect detailed performance info. */
+/* #define VG_PROFILE_MEMORY */
 
 #ifdef VG_PROFILE_MEMORY
+#  define N_PROF_EVENTS 150
 
-#define PROF_EVENT(ev)                                  \
+extern UInt MC_(event_ctr)[N_PROF_EVENTS];
+
+#  define PROF_EVENT(ev)                                  \
    do { sk_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
         MC_(event_ctr)[ev]++;                           \
    } while (False);
 
 #else
 
-#define PROF_EVENT(ev) /* */
+#  define PROF_EVENT(ev) /* */
 
 #endif   /* VG_PROFILE_MEMORY */
 
+/*------------------------------------------------------------*/
+/*--- V and A bits                                         ---*/
+/*------------------------------------------------------------*/
+
 #define IS_DISTINGUISHED_SM(smap) \
    ((smap) == &distinguished_secondary_map)
 
@@ -233,6 +258,191 @@
 extern ShadowChunk* MC_(any_matching_freed_ShadowChunks) 
                            ( Bool (*p)(ShadowChunk*) );
 
+extern __attribute__((regparm(1))) void MC_(new_mem_stack_4)  ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(die_mem_stack_4)  ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(new_mem_stack_8)  ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(die_mem_stack_8)  ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(new_mem_stack_12) ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(die_mem_stack_12) ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(new_mem_stack_16) ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(die_mem_stack_16) ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(new_mem_stack_32) ( Addr old_ESP );
+extern __attribute__((regparm(1))) void MC_(die_mem_stack_32) ( Addr old_ESP );
+extern                             void MC_(die_mem_stack) ( Addr a, UInt len );
+extern                             void MC_(new_mem_stack) ( Addr a, UInt len );
+
+
+/*------------------------------------------------------------*/
+/*--- Stack pointer adjustment                             ---*/
+/*------------------------------------------------------------*/
+
+/* Some noble preprocessor abuse, to enable Memcheck and Addrcheck to
+   share this code, but not call the same functions.
+
+   Note that this code is executed very frequently and must be highly
+   optimised, which is why I resort to the preprocessor to achieve the
+   factoring, rather than eg. using function pointers.
+*/
+
+#define ESP_UPDATE_HANDLERS(ALIGNED4_NEW,  ALIGNED4_DIE,                      \
+                            ALIGNED8_NEW,  ALIGNED8_DIE,                      \
+                            UNALIGNED_NEW, UNALIGNED_DIE)                     \
+                                                                              \
+void __attribute__((regparm(1))) MC_(new_mem_stack_4)(Addr new_ESP)           \
+{                                                                             \
+   PROF_EVENT(110);                                                           \
+   if (IS_ALIGNED4_ADDR(new_ESP)) {                                           \
+      ALIGNED4_NEW  ( new_ESP );                                              \
+   } else {                                                                   \
+      UNALIGNED_NEW ( new_ESP, 4 );                                           \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(die_mem_stack_4)(Addr new_ESP)           \
+{                                                                             \
+   PROF_EVENT(120);                                                           \
+   if (IS_ALIGNED4_ADDR(new_ESP)) {                                           \
+      ALIGNED4_DIE  ( new_ESP-4 );                                            \
+   } else {                                                                   \
+      UNALIGNED_DIE ( new_ESP-4, 4 );                                         \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(new_mem_stack_8)(Addr new_ESP)           \
+{                                                                             \
+   PROF_EVENT(111);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_NEW  ( new_ESP );                                              \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_NEW  ( new_ESP   );                                            \
+      ALIGNED4_NEW  ( new_ESP+4 );                                            \
+   } else {                                                                   \
+      UNALIGNED_NEW ( new_ESP, 8 );                                           \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(die_mem_stack_8)(Addr new_ESP)           \
+{                                                                             \
+   PROF_EVENT(121);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_DIE  ( new_ESP-8 );                                            \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_DIE  ( new_ESP-8 );                                            \
+      ALIGNED4_DIE  ( new_ESP-4 );                                            \
+   } else {                                                                   \
+      UNALIGNED_DIE ( new_ESP-8, 8 );                                         \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(new_mem_stack_12)(Addr new_ESP)          \
+{                                                                             \
+   PROF_EVENT(112);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_NEW  ( new_ESP   );                                            \
+      ALIGNED4_NEW  ( new_ESP+8 );                                            \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_NEW  ( new_ESP   );                                            \
+      ALIGNED8_NEW  ( new_ESP+4 );                                            \
+   } else {                                                                   \
+      UNALIGNED_NEW ( new_ESP, 12 );                                          \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(die_mem_stack_12)(Addr new_ESP)          \
+{                                                                             \
+   PROF_EVENT(122);                                                           \
+   /* Note the -12 in the test */                                             \
+   if (IS_ALIGNED8_ADDR(new_ESP-12)) {                                        \
+      ALIGNED8_DIE  ( new_ESP-12 );                                           \
+      ALIGNED4_DIE  ( new_ESP-4  );                                           \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_DIE  ( new_ESP-12 );                                           \
+      ALIGNED8_DIE  ( new_ESP-8  );                                           \
+   } else {                                                                   \
+      UNALIGNED_DIE ( new_ESP-12, 12 );                                       \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(new_mem_stack_16)(Addr new_ESP)          \
+{                                                                             \
+   PROF_EVENT(113);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_NEW  ( new_ESP   );                                            \
+      ALIGNED8_NEW  ( new_ESP+8 );                                            \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_NEW  ( new_ESP    );                                           \
+      ALIGNED8_NEW  ( new_ESP+4  );                                           \
+      ALIGNED4_NEW  ( new_ESP+12 );                                           \
+   } else {                                                                   \
+      UNALIGNED_NEW ( new_ESP, 16 );                                          \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(die_mem_stack_16)(Addr new_ESP)          \
+{                                                                             \
+   PROF_EVENT(123);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_DIE  ( new_ESP-16 );                                           \
+      ALIGNED8_DIE  ( new_ESP-8  );                                           \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_DIE  ( new_ESP-16 );                                           \
+      ALIGNED8_DIE  ( new_ESP-12 );                                           \
+      ALIGNED4_DIE  ( new_ESP-4  );                                           \
+   } else {                                                                   \
+      UNALIGNED_DIE ( new_ESP-16, 16 );                                       \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(new_mem_stack_32)(Addr new_ESP)          \
+{                                                                             \
+   PROF_EVENT(114);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_NEW  ( new_ESP    );                                           \
+      ALIGNED8_NEW  ( new_ESP+8  );                                           \
+      ALIGNED8_NEW  ( new_ESP+16 );                                           \
+      ALIGNED8_NEW  ( new_ESP+24 );                                           \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_NEW  ( new_ESP    );                                           \
+      ALIGNED8_NEW  ( new_ESP+4  );                                           \
+      ALIGNED8_NEW  ( new_ESP+12 );                                           \
+      ALIGNED8_NEW  ( new_ESP+20 );                                           \
+      ALIGNED4_NEW  ( new_ESP+28 );                                           \
+   } else {                                                                   \
+      UNALIGNED_NEW ( new_ESP, 32 );                                          \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void __attribute__((regparm(1))) MC_(die_mem_stack_32)(Addr new_ESP)          \
+{                                                                             \
+   PROF_EVENT(124);                                                           \
+   if (IS_ALIGNED8_ADDR(new_ESP)) {                                           \
+      ALIGNED8_DIE  ( new_ESP-32 );                                           \
+      ALIGNED8_DIE  ( new_ESP-24 );                                           \
+      ALIGNED8_DIE  ( new_ESP-16 );                                           \
+      ALIGNED8_DIE  ( new_ESP- 8 );                                           \
+   } else if (IS_ALIGNED4_ADDR(new_ESP)) {                                    \
+      ALIGNED4_DIE  ( new_ESP-32 );                                           \
+      ALIGNED8_DIE  ( new_ESP-28 );                                           \
+      ALIGNED8_DIE  ( new_ESP-20 );                                           \
+      ALIGNED8_DIE  ( new_ESP-12 );                                           \
+      ALIGNED4_DIE  ( new_ESP-4  );                                           \
+   } else {                                                                   \
+      UNALIGNED_DIE ( new_ESP-32, 32 );                                       \
+   }                                                                          \
+}                                                                             \
+                                                                              \
+void MC_(new_mem_stack) ( Addr a, UInt len )                                  \
+{                                                                             \
+   PROF_EVENT(115);                                                           \
+   UNALIGNED_NEW ( a, len );                                                  \
+}                                                                             \
+                                                                              \
+void MC_(die_mem_stack) ( Addr a, UInt len )                                  \
+{                                                                             \
+   PROF_EVENT(125);                                                           \
+   UNALIGNED_DIE ( a, len );                                                  \
+}
+
 #endif   /* __MC_COMMON_H */
 
 /*--------------------------------------------------------------------*/