Implement --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none

The option --keep-stacktraces controls which stack trace(s) to keep for
malloc'd and/or free'd blocks. This can be used to obtain more information
for 'use after free' errors or to decrease Valgrind memory and/or cpu usage
by recording less information for heap blocks.

This fixes 312913 Dangling pointers error should also report the alloc
stack trace.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13223 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/NEWS b/NEWS
index 6c41d1f..af88aa1 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,11 @@
     This is a.o. useful to avoid definite leaks being "catched"
     by a suppression entry aimed at suppressing possibly lost blocks.
 
+  - The option --keep-stacktraces controls which stack trace(s) to keep for
+    malloc'd and/or free'd blocks. This can be used to obtain more information
+    for 'use after free' errors or to decrease Valgrind memory and/or cpu usage
+    by recording less information for heap blocks.
+
 * ==================== OTHER CHANGES ====================
 
   - Addition of GDB server monitor command 'v.info open_fds' that gives the
@@ -62,6 +67,7 @@
 308886    [390] Missing support for PTRACE_SET/GETREGSET 
 310424    [390] --read-var-info does not properly describe static variables 
 310931    [390] s390x: Message-security assist (MSA) instruction extension not implemented
+312913    [390] Dangling pointers error should also report the alloc stack trace
 n-i-bz    [390] report error for vgdb snapshot requested before execution
 n-i-bz    [390] Some wrong command line options could be ignored
 n-i-bz    [390] same as 303624 (fixed in 3.8.0), but for x86 android
diff --git a/coregrind/m_execontext.c b/coregrind/m_execontext.c
index b81dbff..824a4b4 100644
--- a/coregrind/m_execontext.c
+++ b/coregrind/m_execontext.c
@@ -98,6 +98,7 @@
 /* ECU serial number */
 static UInt ec_next_ecu = 4; /* We must never issue zero */
 
+static ExeContext* null_ExeContext;
 
 /* Stats only: the number of times the system was searched to locate a
    context. */
@@ -119,6 +120,7 @@
 /*--- Exported functions.                                  ---*/
 /*------------------------------------------------------------*/
 
+static ExeContext* record_ExeContext_wrk2 ( Addr* ips, UInt n_ips ); /*fwds*/
 
 /* Initialise this subsystem. */
 static void init_ExeContext_storage ( void )
@@ -141,6 +143,13 @@
    for (i = 0; i < ec_htab_size; i++)
       ec_htab[i] = NULL;
 
+   {
+      Addr ips[1];
+      ips[0] = 0;
+      null_ExeContext = record_ExeContext_wrk2(ips, 1);
+      vg_assert(null_ExeContext->ecu == 4); // null execontext must be the first one.
+   }
+
    init_done = True;
 }
 
@@ -310,7 +319,6 @@
 /* Do the first part of getting a stack trace: actually unwind the
    stack, and hand the results off to the duplicate-trace-finder
    (_wrk2). */
-static ExeContext* record_ExeContext_wrk2 ( Addr* ips, UInt n_ips ); /*fwds*/
 static ExeContext* record_ExeContext_wrk ( ThreadId tid, Word first_ip_delta,
                                            Bool first_ip_only )
 {
@@ -487,9 +495,16 @@
 
 ExeContext* VG_(make_ExeContext_from_StackTrace)( Addr* ips, UInt n_ips )
 {
+   init_ExeContext_storage();
    return record_ExeContext_wrk2(ips, n_ips);
 }
 
+ExeContext* VG_(null_ExeContext) (void)
+{
+   init_ExeContext_storage();
+   return null_ExeContext;
+}
+
 /*--------------------------------------------------------------------*/
 /*--- end                                           m_execontext.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/include/pub_tool_execontext.h b/include/pub_tool_execontext.h
index ec890e4..66d90a4 100644
--- a/include/pub_tool_execontext.h
+++ b/include/pub_tool_execontext.h
@@ -110,6 +110,11 @@
 // Make an ExeContext containing exactly the specified stack frames.
 ExeContext* VG_(make_ExeContext_from_StackTrace)( Addr* ips, UInt n_ips );
 
+// Returns the "null" exe context. The null execontext is an artificial
+// exe context, with a stack trace made of one Addr (the NULL address).
+extern 
+ExeContext* VG_(null_ExeContext) (void);
+
 #endif   // __PUB_TOOL_EXECONTEXT_H
 
 /*--------------------------------------------------------------------*/
diff --git a/memcheck/docs/mc-manual.xml b/memcheck/docs/mc-manual.xml
index 98f5139..7a22401 100644
--- a/memcheck/docs/mc-manual.xml
+++ b/memcheck/docs/mc-manual.xml
@@ -827,6 +827,61 @@
     </listitem>
   </varlistentry>
 
+  <varlistentry id="opt.keep-stacktraces" xreflabel="--keep-stacktraces">
+    <term>
+      <option><![CDATA[--keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none [default: alloc-then-free] ]]></option>
+    </term>
+    <listitem>
+      <para>Controls which stack trace(s) to keep for malloc'd and/or
+      free'd blocks.
+      </para>
+
+      <para>With <varname>alloc-then-free</varname>, the malloc stack
+      trace is recorded at allocation time. The block contains a
+      reference to this allocation stack trace. When the block is
+      freed, the block will then reference the free stack trace.  So,
+      a 'use after free' error will only report the free stack trace.
+      </para>
+
+      <para>With <varname>alloc-and-free</varname>, both the malloc
+      and the free stack trace (for freed block) are recorded and
+      referenced by the block. A 'use after free' error will report
+      the free stack trace, followed by the stack trace where this
+      block was allocated.  Compared
+      to <varname>alloc-then-free</varname>, this value very slightly
+      increases Valgrind memory use as the block contains two references
+      instead of one.
+      </para>
+
+      <para>With <varname>alloc</varname>, only the malloc stack trace
+      is recorded (and reported). With <varname>free</varname>, only
+      the free stack trace is recorded (and reported). These values
+      somewhat decrease Valgrind memory and cpu usage. They can be
+      useful depending on the error types you are searching for and
+      the level of details you need to analyse them. For example, if
+      you are only interested in memory leak errors, it is sufficient
+      to record the allocation stack traces.
+      </para>
+
+      <para>With <varname>none</varname>, no stack traces are recorded
+      for malloc and free operations. If your program allocates a lot
+      of blocks and/or from many different stack traces, this can
+      significantly decrease cpu and/or memory. Of course, very little
+      details will be reported for errors related to heap blocks.
+      </para>
+
+      <para> Note that once a stack trace is recorded, Valgrind keeps
+      the stack trace in memory even if not referenced anymore by
+      any block. Some programs (for example, recursive algorithms)
+      can generate a huge number of stack traces. If Valgrind uses too
+      much memory in such circumstances, you can reduce the memory
+      usage with the options <varname>--keep-stacktraces</varname>
+      and/or by using a smaller value for the
+      option <varname>--num-callers</varname>.
+      </para>
+    </listitem>
+  </varlistentry>
+
   <varlistentry id="opt.freelist-vol" xreflabel="--freelist-vol">
     <term>
       <option><![CDATA[--freelist-vol=<number> [default: 20000000] ]]></option>
diff --git a/memcheck/mc_errors.c b/memcheck/mc_errors.c
index 91a6045..9d8d598 100644
--- a/memcheck/mc_errors.c
+++ b/memcheck/mc_errors.c
@@ -60,7 +60,6 @@
 typedef enum {
    Block_Mallocd = 111,
    Block_Freed,
-   Block_Mempool,
    Block_MempoolChunk,
    Block_UserG
 } BlockKind;
@@ -102,7 +101,8 @@
          const HChar* block_desc;    // "block", "mempool" or user-defined
          SizeT       block_szB;
          PtrdiffT    rwoffset;
-         ExeContext* lastchange;
+         ExeContext* allocated_at;  // might be null_ExeContext.
+         ExeContext* freed_at;      // might be null_ExeContext.
       } Block;
 
       // In a global .data symbol.  This holds the first 127 chars of
@@ -344,7 +344,29 @@
                                                      : "client-defined",
             xpost
          );
-         VG_(pp_ExeContext)(ai->Addr.Block.lastchange);
+         if (ai->Addr.Block.block_kind==Block_Mallocd) {
+            VG_(pp_ExeContext)(ai->Addr.Block.allocated_at);
+            tl_assert (ai->Addr.Block.freed_at == VG_(null_ExeContext)());
+         }
+         else if (ai->Addr.Block.block_kind==Block_Freed) {
+            VG_(pp_ExeContext)(ai->Addr.Block.freed_at);
+            if (ai->Addr.Block.allocated_at != VG_(null_ExeContext)()) {
+               emit(
+                  "%s block was alloc'd at%s\n",
+                  xpre,
+                  xpost
+               );
+               VG_(pp_ExeContext)(ai->Addr.Block.allocated_at);
+            }
+         }
+         else {
+            // client-defined
+            VG_(pp_ExeContext)(ai->Addr.Block.allocated_at);
+            tl_assert (ai->Addr.Block.freed_at == VG_(null_ExeContext)());
+            /* Nb: cannot have a freed_at, as a freed client-defined block
+               has a Block_Freed block_kind. */
+         }
+         
          break;
       }
 
@@ -1009,7 +1031,8 @@
    ai->Addr.Block.block_desc = "block";
    ai->Addr.Block.block_szB  = mc->szB;
    ai->Addr.Block.rwoffset   = 0;
-   ai->Addr.Block.lastchange = mc->where;
+   ai->Addr.Block.allocated_at = MC_(allocated_at) (mc);
+   ai->Addr.Block.freed_at = MC_(freed_at) (mc);
    VG_(maybe_record_error)( tid, Err_FreeMismatch, mc->data, /*s*/NULL,
                             &extra );
 }
@@ -1194,7 +1217,8 @@
             ai->Addr.Block.block_desc = "block";
          ai->Addr.Block.block_szB  = mc->szB;
          ai->Addr.Block.rwoffset   = (Word)a - (Word)mc->data;
-         ai->Addr.Block.lastchange = mc->where;
+         ai->Addr.Block.allocated_at = MC_(allocated_at)(mc);
+         ai->Addr.Block.freed_at = MC_(freed_at)(mc);
          return;
       }
    }
@@ -1206,7 +1230,8 @@
       ai->Addr.Block.block_desc = "block";
       ai->Addr.Block.block_szB  = mc->szB;
       ai->Addr.Block.rwoffset   = (Word)a - (Word)mc->data;
-      ai->Addr.Block.lastchange = mc->where;
+      ai->Addr.Block.allocated_at = MC_(allocated_at)(mc);
+      ai->Addr.Block.freed_at = MC_(freed_at)(mc);
       return;
    }
    /* -- Perhaps the variable type/location data describes it? -- */
@@ -1407,7 +1432,8 @@
          ai->Addr.Block.block_desc = cgbs[i].desc;
          ai->Addr.Block.block_szB  = cgbs[i].size;
          ai->Addr.Block.rwoffset   = (Word)(a) - (Word)(cgbs[i].start);
-         ai->Addr.Block.lastchange = cgbs[i].where;
+         ai->Addr.Block.allocated_at = cgbs[i].where;
+         ai->Addr.Block.freed_at = VG_(null_ExeContext)();;
          return True;
       }
    }
@@ -1433,7 +1459,8 @@
                ai->Addr.Block.block_desc = "block";
                ai->Addr.Block.block_szB  = mc->szB;
                ai->Addr.Block.rwoffset   = (Word)a - (Word)mc->data;
-               ai->Addr.Block.lastchange = mc->where;
+               ai->Addr.Block.allocated_at = MC_(allocated_at)(mc);
+               ai->Addr.Block.freed_at = MC_(freed_at)(mc);
                return True;
             }
          }
diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h
index f9f07bf..29a0968 100644
--- a/memcheck/mc_include.h
+++ b/memcheck/mc_include.h
@@ -67,10 +67,25 @@
       Addr         data;            // Address of the actual block.
       SizeT        szB : (sizeof(SizeT)*8)-2; // Size requested; 30 or 62 bits.
       MC_AllocKind allockind : 2;   // Which operation did the allocation.
-      ExeContext*  where;           // Where it was allocated.
+      ExeContext*  where[0];
+      /* Variable-length array. The size depends on MC_(clo_keep_stacktraces).
+         This array optionally stores the alloc and/or free stack trace. */
    }
    MC_Chunk;
 
+/* Returns the execontext where the MC_Chunk was allocated/freed.
+   Returns VG_(null_ExeContext)() if the execontext has not been recorded (due
+   to MC_(clo_keep_stacktraces) and/or because block not yet freed). */
+ExeContext* MC_(allocated_at) (MC_Chunk*);
+ExeContext* MC_(freed_at) (MC_Chunk*);
+
+/* Records and sets execontext according to MC_(clo_keep_stacktraces) */
+void  MC_(set_allocated_at) (ThreadId, MC_Chunk*);
+void  MC_(set_freed_at) (ThreadId, MC_Chunk*);
+
+/* number of pointers needed according to MC_(clo_keep_stacktraces). */
+UInt MC_(n_where_pointers) (void);
+
 /* Memory pool.  Nb: first two fields must match core's VgHashNode. */
 typedef
    struct _MC_Mempool {
@@ -492,6 +507,20 @@
 extern Int MC_(clo_malloc_fill);
 extern Int MC_(clo_free_fill);
 
+/* Which stack trace(s) to keep for malloc'd/free'd client blocks?
+   For each client block, the stack traces where it was allocated
+   and/or freed are optionally kept depending on MC_(clo_keep_stacktraces). */
+typedef
+   enum {                 // keep alloc stack trace ?  keep free stack trace ?
+      KS_none,            // never                     never
+      KS_alloc,           // always                    never
+      KS_free,            // never                     always
+      KS_alloc_then_free, // when still malloc'd       when free'd
+      KS_alloc_and_free,  // always                    always
+   }
+   KeepStacktraces;
+extern KeepStacktraces MC_(clo_keep_stacktraces);
+
 /* Indicates the level of instrumentation/checking done by Memcheck.
 
    1 = No undefined value checking, Addrcheck-style behaviour only:
diff --git a/memcheck/mc_leakcheck.c b/memcheck/mc_leakcheck.c
index 1ebd682..98addd7 100644
--- a/memcheck/mc_leakcheck.c
+++ b/memcheck/mc_leakcheck.c
@@ -1001,7 +1001,7 @@
       LossRecord*   old_lr;
       LossRecordKey lrkey;
       lrkey.state        = ex->state;
-      lrkey.allocated_at = ch->where;
+      lrkey.allocated_at = MC_(allocated_at)(ch);
 
       old_lr = VG_(OSetGen_Lookup)(lr_table, &lrkey);
       if (old_lr) {
@@ -1175,7 +1175,7 @@
          LossRecordKey ind_lrkey;
          Int lr_i;
          ind_lrkey.state = ind_ex->state;
-         ind_lrkey.allocated_at = ind_ch->where;
+         ind_lrkey.allocated_at = MC_(allocated_at)(ind_ch);
          ind_lr = VG_(OSetGen_Lookup)(lr_table, &ind_lrkey);
          for (lr_i = 0; lr_i < n_lossrecords; lr_i++)
             if (ind_lr == lr_array[lr_i])
@@ -1227,7 +1227,7 @@
       LossRecord*   old_lr;
       LossRecordKey lrkey;
       lrkey.state        = ex->state;
-      lrkey.allocated_at = ch->where;
+      lrkey.allocated_at = MC_(allocated_at)(ch);
 
       old_lr = VG_(OSetGen_Lookup)(lr_table, &lrkey);
       if (old_lr) {
@@ -1420,9 +1420,9 @@
          VG_(umsg)("Block 0x%lx..0x%lx overlaps with block 0x%lx..0x%lx\n",
                    start1, end1, start2, end2);
          VG_(umsg)("Blocks allocation contexts:\n"),
-         VG_(pp_ExeContext)( ch1->where);
+         VG_(pp_ExeContext)( MC_(allocated_at)(ch1));
          VG_(umsg)("\n"),
-         VG_(pp_ExeContext)( ch2->where);
+         VG_(pp_ExeContext)(  MC_(allocated_at)(ch2));
          VG_(umsg)("This is usually caused by using VALGRIND_MALLOCLIKE_BLOCK");
          VG_(umsg)("in an inappropriate way.\n");
          tl_assert (0);
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 17123a2..21c36f3 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -4836,6 +4836,7 @@
 Bool          MC_(clo_workaround_gcc296_bugs) = False;
 Int           MC_(clo_malloc_fill)            = -1;
 Int           MC_(clo_free_fill)              = -1;
+KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_then_free;
 Int           MC_(clo_mc_level)               = 2;
 
 static Bool mc_process_cmd_line_options(const HChar* arg)
@@ -4961,6 +4962,17 @@
    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
 
+   else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
+                       MC_(clo_keep_stacktraces), KS_alloc) {}
+   else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
+                       MC_(clo_keep_stacktraces), KS_free) {}
+   else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
+                       MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
+   else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
+                       MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
+   else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
+                       MC_(clo_keep_stacktraces), KS_none) {}
+
    else
       return VG_(replacement_malloc_process_cmd_line_option)(arg);
 
@@ -4996,6 +5008,8 @@
 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
+"    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
+"        stack trace(s) to keep for malloc'd/free'd areas       [alloc-then-free]\n"
    );
 }
 
@@ -6168,6 +6182,13 @@
       tl_assert(ocacheL2 == NULL);
    }
 
+   MC_(chunk_poolalloc) = VG_(newPA)
+      (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
+       1000,
+       VG_(malloc),
+       "mc.cMC.1 (MC_Chunk pools)",
+       VG_(free));
+
    /* Do not check definedness of guest state if --undef-value-errors=no */
    if (MC_(clo_mc_level) >= 2)
       VG_(track_pre_reg_read) ( mc_pre_reg_read );
@@ -6478,11 +6499,8 @@
    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
 
    init_shadow_memory();
-   MC_(chunk_poolalloc) = VG_(newPA) (sizeof(MC_Chunk),
-                                      1000,
-                                      VG_(malloc),
-                                      "mc.cMC.1 (MC_Chunk pools)",
-                                      VG_(free));
+   // MC_(chunk_poolalloc) must be allocated in post_clo_init
+   tl_assert(MC_(chunk_poolalloc) == NULL);
    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
    init_prof_mem();
diff --git a/memcheck/mc_malloc_wrappers.c b/memcheck/mc_malloc_wrappers.c
index 336693c..144559f 100644
--- a/memcheck/mc_malloc_wrappers.c
+++ b/memcheck/mc_malloc_wrappers.c
@@ -75,7 +75,7 @@
 /* Pool allocator for MC_Chunk. */   
 PoolAlloc *MC_(chunk_poolalloc) = NULL;
 static
-MC_Chunk* create_MC_Chunk ( ExeContext* ec, Addr p, SizeT szB,
+MC_Chunk* create_MC_Chunk ( ThreadId tid, Addr p, SizeT szB,
                             MC_AllocKind kind);
 static inline
 void delete_MC_Chunk (MC_Chunk* mc);
@@ -187,14 +187,20 @@
 /* Allocate a shadow chunk, put it on the appropriate list.
    If needed, release oldest blocks from freed list. */
 static
-MC_Chunk* create_MC_Chunk ( ExeContext* ec, Addr p, SizeT szB,
+MC_Chunk* create_MC_Chunk ( ThreadId tid, Addr p, SizeT szB,
                             MC_AllocKind kind)
 {
    MC_Chunk* mc  = VG_(allocEltPA)(MC_(chunk_poolalloc));
    mc->data      = p;
    mc->szB       = szB;
    mc->allockind = kind;
-   mc->where     = ec;
+   switch ( MC_(n_where_pointers)() ) {
+      case 2: mc->where[1] = 0; // fallback to 1
+      case 1: mc->where[0] = 0; // fallback to 0
+      case 0: break;
+      default: tl_assert(0);
+   }
+   MC_(set_allocated_at) (tid, mc);
 
    /* Each time a new MC_Chunk is created, release oldest blocks
       if the free list volume is exceeded. */
@@ -217,6 +223,114 @@
    VG_(freeEltPA) (MC_(chunk_poolalloc), mc);
 }
 
+// True if mc is in the given block list.
+static Bool in_block_list (VgHashTable block_list, MC_Chunk* mc)
+{
+   MC_Chunk* found_mc = VG_(HT_lookup) ( block_list, (UWord)mc->data );
+   if (found_mc) {
+      tl_assert (found_mc->data == mc->data);
+      /* If a user builds a pool from a malloc-ed superblock
+         and uses VALGRIND_MALLOCLIKE_BLOCK to "mark"
+         an address at the beginning of this superblock, then
+         this address will be twice in the block_list.
+         We handle this case by checking size and allockind.
+         Note: I suspect that having the same block
+         twice in MC_(malloc_list) is a recipe for bugs.
+         We might maybe better create a "standard" mempool to
+         handle all this more cleanly. */
+      if (found_mc->szB != mc->szB
+          || found_mc->allockind != mc->allockind)
+         return False;
+      tl_assert (found_mc == mc);
+      return True;
+   } else
+      return False;
+}
+
+// True if mc is a live block (not yet freed).
+static Bool live_block (MC_Chunk* mc)
+{
+   if (mc->allockind == MC_AllocCustom) {
+      MC_Mempool* mp;
+      VG_(HT_ResetIter)(MC_(mempool_list));
+      while ( (mp = VG_(HT_Next)(MC_(mempool_list))) ) {
+         if ( in_block_list (mp->chunks, mc) )
+            return True;
+      }
+   }
+   /* Note: we fallback here for a not found MC_AllocCustom
+      as such a block can be inserted in MC_(malloc_list)
+      by VALGRIND_MALLOCLIKE_BLOCK. */
+   return in_block_list ( MC_(malloc_list), mc );
+}
+
+ExeContext* MC_(allocated_at) (MC_Chunk* mc)
+{
+   switch (MC_(clo_keep_stacktraces)) {
+      case KS_none:            return VG_(null_ExeContext) ();
+      case KS_alloc:           return mc->where[0];
+      case KS_free:            return VG_(null_ExeContext) ();
+      case KS_alloc_then_free: return (live_block(mc) ?
+                                       mc->where[0] : VG_(null_ExeContext) ());
+      case KS_alloc_and_free:  return mc->where[0];
+      default: tl_assert (0);
+   }
+}
+
+ExeContext* MC_(freed_at) (MC_Chunk* mc)
+{
+   switch (MC_(clo_keep_stacktraces)) {
+      case KS_none:            return VG_(null_ExeContext) ();
+      case KS_alloc:           return VG_(null_ExeContext) ();
+      case KS_free:            return (mc->where[0] ?
+                                       mc->where[0] : VG_(null_ExeContext) ());
+      case KS_alloc_then_free: return (live_block(mc) ?
+                                       VG_(null_ExeContext) () : mc->where[0]);
+      case KS_alloc_and_free:  return (mc->where[1] ?
+                                       mc->where[1] : VG_(null_ExeContext) ());
+      default: tl_assert (0);
+   }
+}
+
+void  MC_(set_allocated_at) (ThreadId tid, MC_Chunk* mc)
+{
+   switch (MC_(clo_keep_stacktraces)) {
+      case KS_none:            return;
+      case KS_alloc:           break;
+      case KS_free:            return;
+      case KS_alloc_then_free: break;
+      case KS_alloc_and_free:  break;
+      default: tl_assert (0);
+   }
+   mc->where[0] = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
+}
+
+void  MC_(set_freed_at) (ThreadId tid, MC_Chunk* mc)
+{
+   UInt pos;
+   switch (MC_(clo_keep_stacktraces)) {
+      case KS_none:            return;
+      case KS_alloc:           return;
+      case KS_free:            pos = 0; break;
+      case KS_alloc_then_free: pos = 0; break;
+      case KS_alloc_and_free:  pos = 1; break;
+      default: tl_assert (0);
+   }
+   mc->where[pos] = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
+}
+
+UInt MC_(n_where_pointers) (void)
+{
+   switch (MC_(clo_keep_stacktraces)) {
+      case KS_none:            return 0;
+      case KS_alloc:
+      case KS_free:
+      case KS_alloc_then_free: return 1;
+      case KS_alloc_and_free:  return 2;
+      default: tl_assert (0);
+   }
+}
+
 /*------------------------------------------------------------*/
 /*--- client_malloc(), etc                                 ---*/
 /*------------------------------------------------------------*/
@@ -253,7 +367,7 @@
                        Addr p, SizeT szB, SizeT alignB,
                        Bool is_zeroed, MC_AllocKind kind, VgHashTable table)
 {
-   ExeContext* ec;
+   MC_Chunk* mc;
 
    // Allocate and zero if necessary
    if (p) {
@@ -276,16 +390,13 @@
    // Only update stats if allocation succeeded.
    cmalloc_n_mallocs ++;
    cmalloc_bs_mallocd += (ULong)szB;
-
-   ec = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
-   tl_assert(ec);
-
-   VG_(HT_add_node)( table, create_MC_Chunk(ec, p, szB, kind) );
+   mc = create_MC_Chunk (tid, p, szB, kind);
+   VG_(HT_add_node)( table, mc );
 
    if (is_zeroed)
       MC_(make_mem_defined)( p, szB );
    else {
-      UInt ecu = VG_(get_ECU_from_ExeContext)(ec);
+      UInt ecu = VG_(get_ECU_from_ExeContext)(MC_(allocated_at)(mc));
       tl_assert(VG_(is_plausible_ECU)(ecu));
       MC_(make_mem_undefined_w_otag)( p, szB, ecu | MC_OKIND_HEAP );
    }
@@ -358,7 +469,7 @@
    MC_(make_mem_noaccess)( mc->data-rzB, mc->szB + 2*rzB );
 
    /* Record where freed */
-   mc->where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
+   MC_(set_freed_at) (tid, mc);
    /* Put it out of harm's way for a while */
    add_to_freed_queue ( mc );
    /* If the free list volume is bigger than MC_(clo_freelist_vol),
@@ -367,6 +478,22 @@
       even for big blocks being freed by the client. */
 }
 
+
+static
+void record_freemismatch_error (ThreadId tid, MC_Chunk* mc)
+{
+   /* MC_(record_freemismatch_error) reports errors for still
+      allocated blocks but we are in the middle of freeing it.  To
+      report the error correctly, we re-insert the chunk (making it
+      again a "clean allocated block", report the error, and then
+      re-remove the chunk.  This avoids to do a VG_(HT_lookup)
+      followed by a VG_(HT_remove) in all "non-erroneous cases". */
+   VG_(HT_add_node)( MC_(malloc_list), mc );
+   MC_(record_freemismatch_error) ( tid, mc );
+   if ((mc != VG_(HT_remove) ( MC_(malloc_list), (UWord)mc->data )))
+      tl_assert(0);
+}
+
 void MC_(handle_free) ( ThreadId tid, Addr p, UInt rzB, MC_AllocKind kind )
 {
    MC_Chunk* mc;
@@ -380,7 +507,7 @@
       /* check if it is a matching free() / delete / delete [] */
       if (kind != mc->allockind) {
          tl_assert(p == mc->data);
-         MC_(record_freemismatch_error) ( tid, mc );
+         record_freemismatch_error ( tid, mc );
       }
       die_and_free_mem ( tid, mc, rzB );
    }
@@ -406,8 +533,9 @@
 
 void* MC_(realloc) ( ThreadId tid, void* p_old, SizeT new_szB )
 {
-   MC_Chunk* mc;
-   void*     p_new;
+   MC_Chunk* old_mc;
+   MC_Chunk* new_mc;
+   Addr      a_new; 
    SizeT     old_szB;
 
    if (complain_about_silly_args(new_szB, "realloc")) 
@@ -418,93 +546,70 @@
    cmalloc_bs_mallocd += (ULong)new_szB;
 
    /* Remove the old block */
-   mc = VG_(HT_remove) ( MC_(malloc_list), (UWord)p_old );
-   if (mc == NULL) {
+   old_mc = VG_(HT_remove) ( MC_(malloc_list), (UWord)p_old );
+   if (old_mc == NULL) {
       MC_(record_free_error) ( tid, (Addr)p_old );
       /* We return to the program regardless. */
       return NULL;
    }
 
    /* check if its a matching free() / delete / delete [] */
-   if (MC_AllocMalloc != mc->allockind) {
+   if (MC_AllocMalloc != old_mc->allockind) {
       /* can not realloc a range that was allocated with new or new [] */
-      tl_assert((Addr)p_old == mc->data);
-      MC_(record_freemismatch_error) ( tid, mc );
+      tl_assert((Addr)p_old == old_mc->data);
+      record_freemismatch_error ( tid, old_mc );
       /* but keep going anyway */
    }
 
-   old_szB = mc->szB;
+   old_szB = old_mc->szB;
 
-   /* In all cases, even when the new size is smaller or unchanged, we
-      reallocate and copy the contents, and make the old block
-      inaccessible.  This is so as to guarantee to catch all cases of
-      accesses via the old address after reallocation, regardless of
-      the change in size.  (Of course the ability to detect accesses
-      to the old block also depends on the size of the freed blocks
-      queue). */
+   /* Get new memory */
+   a_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_szB);
 
-   if (new_szB <= old_szB) {
-      /* new size is smaller or the same */
-      Addr a_new; 
-      /* Get new memory */
-      a_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_szB);
+   if (a_new) {
+      /* In all cases, even when the new size is smaller or unchanged, we
+         reallocate and copy the contents, and make the old block
+         inaccessible.  This is so as to guarantee to catch all cases of
+         accesses via the old address after reallocation, regardless of
+         the change in size.  (Of course the ability to detect accesses
+         to the old block also depends on the size of the freed blocks
+         queue). */
 
-      if (a_new) {
-         ExeContext* ec;
+      // Allocate a new chunk.
+      new_mc = create_MC_Chunk( tid, a_new, new_szB, MC_AllocMalloc );
 
-         ec = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
-         tl_assert(ec);
+      // Now insert the new mc (with a new 'data' field) into malloc_list.
+      VG_(HT_add_node)( MC_(malloc_list), new_mc );
 
-         /* Retained part is copied, red zones set as normal */
-         MC_(make_mem_noaccess)( a_new-MC_(Malloc_Redzone_SzB), 
-                                 MC_(Malloc_Redzone_SzB) );
+      /* Retained part is copied, red zones set as normal */
+
+      /* Redzone at the front */
+      MC_(make_mem_noaccess)( a_new-MC_(Malloc_Redzone_SzB), 
+                              MC_(Malloc_Redzone_SzB) );
+
+      /* payload */
+      if (old_szB >= new_szB) {
+         /* new size is smaller or the same */
+
+         /* Copy address range state and value from old to new */
          MC_(copy_address_range_state) ( (Addr)p_old, a_new, new_szB );
-         MC_(make_mem_noaccess)        ( a_new+new_szB, MC_(Malloc_Redzone_SzB));
-
-         /* Copy from old to new */
          VG_(memcpy)((void*)a_new, p_old, new_szB);
-
-         /* Possibly fill freed area with specified junk. */
-         if (MC_(clo_free_fill) != -1) {
-            tl_assert(MC_(clo_free_fill) >= 0x00 && MC_(clo_free_fill) <= 0xFF);
-            VG_(memset)((void*)p_old, MC_(clo_free_fill), old_szB);
-         }
-
-         /* Free old memory */
-         /* Nb: we have to allocate a new MC_Chunk for the new memory rather
-            than recycling the old one, so that any erroneous accesses to the
-            old memory are reported. */
-         die_and_free_mem ( tid, mc, MC_(Malloc_Redzone_SzB) );
-
-         // Allocate a new chunk.
-         mc = create_MC_Chunk( ec, a_new, new_szB, MC_AllocMalloc );
-      }
-
-      p_new = (void*)a_new;
-
-   } else {
-      /* new size is bigger */
-      Addr a_new; 
-      tl_assert(old_szB < new_szB);
-      /* Get new memory */
-      a_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_szB);
-
-      if (a_new) {
+      } else {
+         /* new size is bigger */
          UInt        ecu;
-         ExeContext* ec;
 
-         ec = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
-         tl_assert(ec);
-         ecu = VG_(get_ECU_from_ExeContext)(ec);
+         /* Copy address range state and value from old to new */
+         MC_(copy_address_range_state) ( (Addr)p_old, a_new, old_szB );
+         VG_(memcpy)((void*)a_new, p_old, old_szB);
+
+         // If the block has grown, we mark the grown area as undefined.
+         // We have to do that after VG_(HT_add_node) to ensure the ecu
+         // execontext is for a fully allocated block.
+         ecu = VG_(get_ECU_from_ExeContext)(MC_(allocated_at)(new_mc));
          tl_assert(VG_(is_plausible_ECU)(ecu));
-
-         /* First half kept and copied, second half new, red zones as normal */
-         MC_(make_mem_noaccess)( a_new-MC_(Malloc_Redzone_SzB), 
-                                 MC_(Malloc_Redzone_SzB) );
-         MC_(copy_address_range_state) ( (Addr)p_old, a_new, mc->szB );
-         MC_(make_mem_undefined_w_otag)( a_new+mc->szB, new_szB-mc->szB,
-                                                        ecu | MC_OKIND_HEAP );
-         MC_(make_mem_noaccess)        ( a_new+new_szB, MC_(Malloc_Redzone_SzB) );
+         MC_(make_mem_undefined_w_otag)( a_new+old_szB,
+                                         new_szB-old_szB,
+                                         ecu | MC_OKIND_HEAP );
 
          /* Possibly fill new area with specified junk */
          if (MC_(clo_malloc_fill) != -1) {
@@ -513,37 +618,31 @@
             VG_(memset)((void*)(a_new+old_szB), MC_(clo_malloc_fill), 
                                                 new_szB-old_szB);
          }
-
-         /* Copy from old to new */
-         VG_(memcpy)((void*)a_new, p_old, mc->szB);
-
-         /* Possibly fill freed area with specified junk. */
-         if (MC_(clo_free_fill) != -1) {
-            tl_assert(MC_(clo_free_fill) >= 0x00 && MC_(clo_free_fill) <= 0xFF);
-            VG_(memset)((void*)p_old, MC_(clo_free_fill), old_szB);
-         }
-
-         /* Free old memory */
-         /* Nb: we have to allocate a new MC_Chunk for the new memory rather
-            than recycling the old one, so that any erroneous accesses to the
-            old memory are reported. */
-         die_and_free_mem ( tid, mc, MC_(Malloc_Redzone_SzB) );
-
-         // Allocate a new chunk.
-         mc = create_MC_Chunk( ec, a_new, new_szB, MC_AllocMalloc );
       }
 
-      p_new = (void*)a_new;
-   }  
+      /* Redzone at the back. */
+      MC_(make_mem_noaccess)        ( a_new+new_szB, MC_(Malloc_Redzone_SzB));
 
-   // Now insert the new mc (with a possibly new 'data' field) into
-   // malloc_list.  If this realloc() did not increase the memory size, we
-   // will have removed and then re-added mc unnecessarily.  But that's ok
-   // because shrinking a block with realloc() is (presumably) much rarer
-   // than growing it, and this way simplifies the growing case.
-   VG_(HT_add_node)( MC_(malloc_list), mc );
+      /* Possibly fill freed area with specified junk. */
+      if (MC_(clo_free_fill) != -1) {
+         tl_assert(MC_(clo_free_fill) >= 0x00 && MC_(clo_free_fill) <= 0xFF);
+         VG_(memset)((void*)p_old, MC_(clo_free_fill), old_szB);
+      }
 
-   return p_new;
+      /* Free old memory */
+      /* Nb: we have to allocate a new MC_Chunk for the new memory rather
+         than recycling the old one, so that any erroneous accesses to the
+         old memory are reported. */
+      die_and_free_mem ( tid, old_mc, MC_(Malloc_Redzone_SzB) );
+
+   } else {
+      /* Could not allocate new client memory.
+         Re-insert the old_mc (with the old ptr) in the HT, as old_mc was
+         unconditionally removed at the beginning of the function. */
+      VG_(HT_add_node)( MC_(malloc_list), old_mc );
+   }
+
+   return (void*)a_new;
 }
 
 SizeT MC_(malloc_usable_size) ( ThreadId tid, void* p )
@@ -746,7 +845,7 @@
                          chunks[i]->data, 
                          chunks[i]->data + chunks[i]->szB);
 
-            VG_(pp_ExeContext)(chunks[i]->where);
+            VG_(pp_ExeContext)(MC_(allocated_at)(chunks[i]));
          }
    }
    VG_(free)(chunks);
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
index 2c75284..23f634c 100644
--- a/memcheck/tests/Makefile.am
+++ b/memcheck/tests/Makefile.am
@@ -125,6 +125,10 @@
 	malloc_free_fill.stderr.exp \
 	malloc_usable.stderr.exp malloc_usable.vgtest \
 	malloc1.stderr.exp malloc1.vgtest \
+	malloc1_ks_none.stderr.exp malloc1_ks_none.vgtest \
+	malloc1_ks_alloc.stderr.exp malloc1_ks_alloc.vgtest \
+	malloc1_ks_free.stderr.exp malloc1_ks_free.vgtest \
+	malloc1_ks_alloc_and_free.stderr.exp malloc1_ks_alloc_and_free.vgtest \
 	malloc2.stderr.exp malloc2.vgtest \
 	malloc3.stderr.exp malloc3.stdout.exp malloc3.vgtest \
 	manuel1.stderr.exp manuel1.stdout.exp manuel1.vgtest \
diff --git a/memcheck/tests/malloc1_ks_alloc.stderr.exp b/memcheck/tests/malloc1_ks_alloc.stderr.exp
new file mode 100644
index 0000000..b55c684
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_alloc.stderr.exp
@@ -0,0 +1,14 @@
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes inside a block of size 10 free'd
+   ...
+  block was alloc'd at
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   ...
+
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   ...
+
diff --git a/memcheck/tests/malloc1_ks_alloc.vgtest b/memcheck/tests/malloc1_ks_alloc.vgtest
new file mode 100644
index 0000000..d5d223c
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_alloc.vgtest
@@ -0,0 +1,2 @@
+prog: malloc1
+vgopts: -q --keep-stacktraces=alloc
diff --git a/memcheck/tests/malloc1_ks_alloc_and_free.stderr.exp b/memcheck/tests/malloc1_ks_alloc_and_free.stderr.exp
new file mode 100644
index 0000000..e65308e
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_alloc_and_free.stderr.exp
@@ -0,0 +1,15 @@
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes inside a block of size 10 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   ...
+  block was alloc'd at
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   ...
+
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   ...
+
diff --git a/memcheck/tests/malloc1_ks_alloc_and_free.vgtest b/memcheck/tests/malloc1_ks_alloc_and_free.vgtest
new file mode 100644
index 0000000..5000561
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_alloc_and_free.vgtest
@@ -0,0 +1,2 @@
+prog: malloc1
+vgopts: -q --keep-stacktraces=alloc-and-free
diff --git a/memcheck/tests/malloc1_ks_free.stderr.exp b/memcheck/tests/malloc1_ks_free.stderr.exp
new file mode 100644
index 0000000..d41f7cd
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_free.stderr.exp
@@ -0,0 +1,11 @@
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes inside a block of size 10 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   ...
+
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   ...
+
diff --git a/memcheck/tests/malloc1_ks_free.vgtest b/memcheck/tests/malloc1_ks_free.vgtest
new file mode 100644
index 0000000..9e4c7a0f
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_free.vgtest
@@ -0,0 +1,2 @@
+prog: malloc1
+vgopts: -q --keep-stacktraces=free
diff --git a/memcheck/tests/malloc1_ks_none.stderr.exp b/memcheck/tests/malloc1_ks_none.stderr.exp
new file mode 100644
index 0000000..f90f32f
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_none.stderr.exp
@@ -0,0 +1,10 @@
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes inside a block of size 10 free'd
+   ...
+
+Invalid write of size 1
+   ...
+ Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   ...
+
diff --git a/memcheck/tests/malloc1_ks_none.vgtest b/memcheck/tests/malloc1_ks_none.vgtest
new file mode 100644
index 0000000..3885074
--- /dev/null
+++ b/memcheck/tests/malloc1_ks_none.vgtest
@@ -0,0 +1,2 @@
+prog: malloc1
+vgopts: -q --keep-stacktraces=none