Remove existing non-working support for self-modifying code, and instead
add a simple compromise, in which the client can notify valgrind
that certain code address ranges are invalid and should be retranslated.
This is done using the VALGRIND_DISCARD_TRANSLATIONS macro in valgrind.h.

At the same time take the opportunity to close the potentially fatal
loophole that translations for executable segments were not being
discarded when those segments were munmapped.  They are now.

Documentation updated.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@274 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 8081e0a..b794e61 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -1,3 +1,4 @@
+
 /*--------------------------------------------------------------------*/
 /*--- The cache simulation framework: instrumentation, recording   ---*/
 /*--- and results printing.                                        ---*/
@@ -10,7 +11,6 @@
 
    Copyright (C) 2000-2002 Julian Seward 
       jseward@acm.org
-      Julian_Seward@muraroa.demon.co.uk
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -30,8 +30,6 @@
    The GNU General Public License is contained in the file LICENSE.
 */
 
-#include <string.h>
-
 #include "vg_include.h"
 
 #include "vg_cachesim_L2.c"
@@ -311,7 +309,7 @@
    filename_hash = hash(filename, N_FILE_ENTRIES);
    curr_file_node = BBCC_table[filename_hash];
    while (NULL != curr_file_node && 
-          strcmp(filename, curr_file_node->filename) != 0) {
+          VG_(strcmp)(filename, curr_file_node->filename) != 0) {
       curr_file_node = curr_file_node->next;
    }
    if (NULL == curr_file_node) {
@@ -323,7 +321,7 @@
    fnname_hash = hash(fn_name, N_FN_ENTRIES);
    curr_fn_node = curr_file_node->fns[fnname_hash];
    while (NULL != curr_fn_node && 
-          strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+          VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
       curr_fn_node = curr_fn_node->next;
    }
    if (NULL == curr_fn_node) {
@@ -790,7 +788,7 @@
 
       /* Allow for filename switching in the middle of a BB;  if this happens,
        * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
+      if (0 != VG_(strcmp)(fl_buf, curr_file)) {
          VG_(strcpy)(curr_file, fl_buf);
          VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
          VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
@@ -798,7 +796,7 @@
 
       /* If the function name for this instruction doesn't match that of the
        * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
          VG_(printf)("  filenames: BB:%s, instr:%s;"
                      "  fn_names:  BB:%s, instr:%s;"
@@ -1071,3 +1069,13 @@
    VGP_POPCC;
 }
 
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+               tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html
index dc66721..20fbb36 100644
--- a/cachegrind/docs/manual.html
+++ b/cachegrind/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html
index dc66721..20fbb36 100644
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index 710b12c..252353c 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -90,16 +90,6 @@
 /* Constants for the fast original-code-write check cache. */
 
 
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS  19
-#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
 /* Assembly code stubs make these requests ... */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 #define VG_USERREQ__PTHREAD_RETURNS         0x4002
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 214d2ca..573ee93 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1524,56 +1524,6 @@
 }
 
 
-/* A word in memory containing a pointer to vg_helper_smc_check4.
-   Never changes. 
-*/
-static const Addr vg_helper_smc_check4_ADDR
-   = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
-   UInt offset;
-
-   /*
-     In this example, reg is %eax and sz == 8:
-
-     -- check the first four bytes
-     0087 89C5                  movl    %eax, %ebp
-     0089 FF1544332211          call    * 0x11223344
-                  
-     -- check the second four
-     008f 89C5                  movl    %eax, %ebp
-     0091 83C504                addl    $4, %ebp
-     0094 FF1544332211          call    * 0x11223344
-
-     Because we can't call an absolute address (alas), the
-     address called is stored in memory at 0x11223344 in this
-     example, and it just contains the address of 
-     vg_helper_smc_check4 -- which is where we really want
-     to get to.
-   */
-   vg_assert(0);
-
-   if (sz < 4) sz = 4;
-
-   for (offset = 0; offset < sz; offset += 4) {
-
-      emit_movl_reg_reg ( reg, R_EBP );
-
-      if (offset > 0) {
-         newEmit();
-         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
-         if (dis) VG_(printf)("\n");
-      }
-
-      newEmit();
-      emitB ( 0xFF ); emitB ( 0x15 ); 
-      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
-      if (dis) VG_(printf)("\n");
-   }
-}
-
-
 /* Synthesise a minimal test (and which discards result) of reg32
    against lit.  It's always safe do simply
       emit_testv_lit_reg ( 4, lit, reg32 )
@@ -2264,8 +2214,10 @@
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+	 /* No longer possible, but retained for illustrative purposes.
          if (u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+	 */
          break;
       }
 
@@ -2598,8 +2550,10 @@
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
+         /* No longer possible, but retained for illustrative purposes.
          if (u->opcode == FPU_W && u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+         */
          break;
 
       case FPU:
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 62db9ec..2968922 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -146,51 +146,6 @@
 	ret
 
 
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
-	# save the live regs
-	pushl	%eax
-	pushl	%ebx
-	pushl	%ecx
-	pushl	%edx
-	pushl	%esi
-	pushl	%edi
-	
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-
-	popl	%edi
-	popl	%esi
-	popl	%edx
-	popl	%ecx
-	popl	%ebx
-	popl	%eax
-	
-	ret
-#else	
-	incl	VG_(smc_total_check4s)
-	pushl	%ebp
-	shrl	$VG_SMC_CACHE_SHIFT, %ebp
-	andl	$VG_SMC_CACHE_MASK, %ebp
-	cmpb	$0, VG_(smc_cache)(%ebp)
-	jnz	vg_smc_cache_failure
-	addl	$4, %esp
-	ret
-      vg_smc_cache_failure:
-	popl	%ebp
-	pushal
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-	popal
-	ret
-#endif
-
-	
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 22e4f48..7f44dde 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1301,7 +1301,7 @@
 extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
                                      Char* fn_name, Int n_fn_name);
 
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
 
 
 /* ---------------------------------------------------------------------
@@ -1459,21 +1459,6 @@
 /* total of register ranks over all translations */
 extern UInt VG_(total_reg_rank);
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
 /* Counts pertaining to internal sanity checking. */
 extern UInt VG_(sanity_fast_count);
 extern UInt VG_(sanity_slow_count);
@@ -1590,11 +1575,9 @@
 extern void VG_(flush_transtab) ( void );
 extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
 extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
 
-extern void VG_(smc_mark_original) ( Addr original_addr, 
-                                     Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
 
 extern void VG_(sanity_check_tc_tt) ( void );
 extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1667,9 +1650,6 @@
    Exports of vg_helpers.S
    ------------------------------------------------------------------ */
 
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
 /* Mul, div, etc, -- we don't codegen these directly. */
 extern void VG_(helper_idiv_64_32);
 extern void VG_(helper_div_64_32);
@@ -1729,6 +1709,9 @@
 extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
 extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
 
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index a7e41b2..94e175c 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -381,22 +381,6 @@
 UInt VG_(total_reg_rank) = 0;
 
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
 /* Counts pertaining to internal sanity checking. */
 UInt VG_(sanity_fast_count) = 0;
 UInt VG_(sanity_slow_count) = 0;
@@ -955,13 +939,6 @@
                 VG_(uinstrs_spill),
                 VG_(total_reg_rank) );
    VG_(message)(Vg_DebugMsg, 
-                "smc-check: %d checks, %d fast pass, "
-                "%d slow pass, %d discards.",
-		VG_(smc_total_check4s),
-		VG_(smc_cache_passed),
-		VG_(smc_fancy_passed),
-		VG_(smc_discard_count) );
-   VG_(message)(Vg_DebugMsg, 
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
                 VG_(sanity_slow_count) );
@@ -1020,11 +997,12 @@
       VGP_PUSHCC(VgpInitAudit);
       VGM_(init_memory_audit)();
       VGP_POPCC;
-      VGP_PUSHCC(VgpReadSyms);
-      VG_(read_symbols)();
-      VGP_POPCC;
    }
 
+   VGP_PUSHCC(VgpReadSyms);
+   VG_(read_symbols)();
+   VGP_POPCC;
+
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
@@ -1033,7 +1011,7 @@
       carefully sets up the permissions maps to cover the anonymous
       mmaps for the translation table and translation cache, which
       wastes > 20M of virtual address space. */
-   VG_(init_transtab_and_SMC)();
+   VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
       VG_(message)(Vg_UserMsg, 
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index d1d792a..57d687d 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -330,8 +330,6 @@
    VG_(overall_in_count) ++;
    VG_(overall_in_osize) += orig_size;
    VG_(overall_in_tsize) += trans_size;
-   /* Record translated area for SMC detection. */
-   VG_(smc_mark_original) ( orig_addr, orig_size );
 }
 
 
@@ -2684,6 +2682,7 @@
       case VG_USERREQ__MAKE_NOACCESS_STACK:
       case VG_USERREQ__RUNNING_ON_VALGRIND:
       case VG_USERREQ__DO_LEAK_CHECK:
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
          SET_EDX(
             tid, 
             VG_(handle_client_request) ( &VG_(threads)[tid], arg )
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index c781751..eb3b394 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -36,13 +36,16 @@
 
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
-   Still kludgey, though less than before:
 
-   * we don't check whether we should throw away some symbol tables 
-     when munmap() happens
+   Stabs reader greatly improved by Nick Nethercode, Apr 02.
 
-   * symbol table reading code for ELF binaries is a shambles.  
-     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+   16 May 02: when notified about munmap, return a Bool indicating
+   whether or not the area being munmapped had executable permissions.
+   This is then used to determine whether or not
+   VG_(invalid_translations) should be called for that area.  In order
+   that this work even if --instrument=no, in this case we still keep
+   track of the mapped executable segments, but do not load any debug
+   info or symbols.
 */
 
 /*------------------------------------------------------------*/
@@ -1181,9 +1184,11 @@
       = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   vg_read_lib_symbols ( si );
-   canonicaliseSymtab ( si );
-   canonicaliseLoctab ( si );
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+      vg_read_lib_symbols ( si );
+      canonicaliseSymtab ( si );
+      canonicaliseLoctab ( si );
+   }
 }
 
 
@@ -1197,9 +1202,6 @@
    which happen to correspond to the munmap()d area.  */
 void VG_(read_symbols) ( void )
 {
-   if (! VG_(clo_instrument) && ! VG_(clo_cachesim)) 
-      return;
-
    VG_(read_procselfmaps) ( read_symtab_callback );
 
    /* Do a sanity check on the symbol tables: ensure that the address
@@ -1222,7 +1224,6 @@
            /* the main assertion */
            overlap = (lo <= lo2 && lo2 <= hi)
                       || (lo <= hi2 && hi2 <= hi);
-           //vg_assert(!overlap);
 	   if (overlap) {
               VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
               ppSegInfo ( si );
@@ -1240,15 +1241,16 @@
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
-   maintain the no-overlapping invariant.  
+   maintain the no-overlapping invariant.
+
+   16 May 02: Returns a Bool indicating whether or not the discarded
+   range falls inside a known executable segment.  See comment at top
+   of file for why.
 */
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
-   if (! VG_(clo_instrument)) 
-     return;
-
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1257,7 +1259,8 @@
       prev = curr;
       curr = curr->next;
    }
-   if (curr == NULL) return;
+   if (curr == NULL) 
+      return False;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1272,6 +1275,7 @@
    }
 
    freeSegInfo(curr);
+   return True;
 }
 
 
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 1e4bff2..0a80694 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -297,7 +297,7 @@
 
    Important!  If you change the set of allocatable registers from
    %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in vg_helper_smc_check4 to match!  
+   save/restore sequences in various places to match!  
 */
 __inline__ Int VG_(rankToRealRegNo) ( Int rank )
 {
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
index d0f0eb1..a364df0 100644
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -32,6 +32,8 @@
 #include "vg_include.h"
 #include "vg_constants.h"
 
+/* #define DEBUG_TRANSTAB */
+
 
 /*------------------------------------------------------------*/
 /*--- Management of the LRU-based translation table+cache. ---*/
@@ -42,7 +44,7 @@
    of code retranslation.  */
 
 /* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
 
 /* Do a LRU pass when the translation cache becomes this full. */
 #define VG_TC_LIMIT_PERCENT 98
@@ -52,7 +54,7 @@
 
 /* Number of entries in the translation table.  This must be a prime
    number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
 
 /* Do an LRU pass when the translation table becomes this full. */
 #define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -64,9 +66,12 @@
    N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
 #define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
 
-/* This TT entry is empty. */
+/* This TT entry is empty.  There is no associated TC storage. */
 #define VG_TTE_EMPTY   ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+   contribute to the orig->trans mapping.  However, the ex-translation
+   it points at still occupies space in TC.  This slot cannot be
+   re-used without doing an LRU pass. */
 #define VG_TTE_DELETED ((Addr)3)
 
 /* The TC.  This used to be statically allocated, but that forces many
@@ -77,7 +82,8 @@
 */
 static UChar* vg_tc = NULL;
 
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC.  This includes those pointed to from
+   VG_TTE_DELETED entries. */
 static Int vg_tc_used = 0;
 
 /* The TT.  Like TC, for the same reason, is dynamically allocated at
@@ -86,7 +92,7 @@
 */
 static TTEntry* vg_tt = NULL;
 
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries.  This includes deleted ones. */
 static Int vg_tt_used = 0;
 
 /* Fast helper for the TT.  A direct-mapped cache which holds a
@@ -135,6 +141,10 @@
    if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
       return;
 
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
+#  endif
+
    VGP_PUSHCC(VgpDoLRU);
    /*   
    VG_(printf)(
@@ -157,8 +167,9 @@
       vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
 
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+            continue;
       j = vg_tt[i].mru_epoch;
       vg_assert(j <= VG_(current_epoch));
       j = VG_(current_epoch) - j;
@@ -200,11 +211,11 @@
       recently used at most thresh epochs ago.  Traverse the TT and
       mark such entries as deleted. */
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+         continue;
       if (vg_tt[i].mru_epoch <= thresh) {
          vg_tt[i].orig_addr = VG_TTE_DELETED;
-         vg_tt_used--;
 	 VG_(this_epoch_out_count) ++;
 	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
 	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
@@ -214,9 +225,6 @@
       }
    }
 
-   vg_assert(vg_tt_used >= 0);
-   vg_assert(vg_tt_used <= tt_target);
-
    /* Now compact the TC, sliding live entries downwards to fill spaces
       left by deleted entries.  In this loop, r is the offset in TC of
       the current translation under consideration, and w is the next
@@ -241,6 +249,9 @@
             vg_tc[w+i] = vg_tc[r+i];
          tte->trans_addr = (Addr)&vg_tc[w+4];
          w += 4+tte->trans_size;
+      } else {
+         tte->orig_addr = VG_TTE_EMPTY;
+         vg_tt_used--;
       }
       r += 4+tte->trans_size;
    }
@@ -252,6 +263,9 @@
    vg_assert(w <= tc_target);
    vg_tc_used = w;
 
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
    /* Invalidate the fast cache, since it is now out of date.  It will get
       reconstructed incrementally when the client resumes. */
    VG_(invalidate_tt_fast)();
@@ -274,6 +288,11 @@
       );
 
    /* Reconstruct the SMC detection structures. */
+#  ifdef DEBUG_TRANSTAB
+   for (i = 0; i < VG_TT_SIZE; i++)
+      vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+#  endif
+   VG_(sanity_check_tc_tt)();
 
    VGP_POPCC;
 }
@@ -290,7 +309,6 @@
    for (i = 0; i < VG_TT_SIZE; i++) {
       tte = &vg_tt[i];
       if (tte->orig_addr == VG_TTE_EMPTY) continue;
-      if (tte->orig_addr == VG_TTE_DELETED) continue;
       vg_assert(tte->mru_epoch >= 0);
       vg_assert(tte->mru_epoch <= VG_(current_epoch));
       counted_entries++;
@@ -323,8 +341,7 @@
    while (True) {
       if (vg_tt[i].orig_addr == tte->orig_addr)
          VG_(panic)("add_to_trans_tab: duplicate");
-      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
-          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
          /* Put it here, and set the back pointer. */
          vg_tt[i] = *tte;
          VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
@@ -377,8 +394,8 @@
 */
 static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
 {
-  //static Int queries = 0;
-  //static Int probes = 0;
+   //static Int queries = 0;
+   //static Int probes = 0;
    Int i;
    /* Hash to get initial probe point. */
    //   if (queries == 10000) {
@@ -388,7 +405,7 @@
    //queries++;
    i = ((UInt)orig_addr) % VG_TT_SIZE;
    while (True) {
-     //probes++;
+      //probes++;
       if (vg_tt[i].orig_addr == orig_addr)
          return &vg_tt[i];
       if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
@@ -426,228 +443,58 @@
 }
 
 
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code.          ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
-   vg_oldmap -- array[64k] of Bool, which approximately records
-   parts of the address space corresponding to code for which
-   a translation exists in the translation table.  vg_oldmap is
-   consulted at each write, to determine whether that write might
-   be writing a code address; if so, the program is stopped at 
-   the next jump, and the corresponding translations are invalidated.
-
-   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
-   addresses a containing a code byte which has been translated.  So
-   it acts kind-of like a direct-mapped cache with 64k entries.
-
-   The second structure is vg_CAW, a small array of addresses at which
-   vg_oldmap indicates a code write may have happened.  This is
-   (effectively) checked at each control transfer (jump), so that
-   translations can be discarded before going on.  An array is
-   somewhat overkill, since it strikes me as very unlikely that a
-   single basic block will do more than one code write.  Nevertheless
-   ...  
-
-   ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+   This is slow, so you *really* don't want to call it very often. 
 */
-
-
-/* Definitions for the self-modifying-code detection cache, intended
-   as a fast check which clears the vast majority of writes.  */
-
-#define VG_SMC_CACHE_HASH(aaa) \
-   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
-   provides a precise record of which words in the address space
-   belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
+void VG_(invalidate_translations) ( Addr start, UInt range )
 {
-   Int i;
-   VgSmcSecondary* sec 
-      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
-   for (i = 0; i < 2048; i++)
-      sec->chars[i] = 0;
-   return sec;
-}
+   Addr  i_start, i_end, o_start, o_end;
+   UInt  out_count, out_osize, out_tsize;
+   Int   i;
 
-#define GET_BIT_ARRAY(arr,indx)                      \
-   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
-           >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx)                      \
-   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
-   detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int  vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
-{
-   UInt bit_index;
-   VgSmcSecondary* smc_secondary;
-
-#  if VG_SMC_FASTCHECK_IN_C
-   VG_(smc_total_check4s)++;
-
-   /* Try the fast check first. */
-   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
 #  endif
+   i_start = start;
+   i_end   = start + range - 1;
+   out_count = out_osize = out_tsize = 0;
 
-   VG_(smc_cache_passed)++;
-
-   /* Need to do a slow check. */
-   smc_secondary = vg_smc_primary[a >> 16];
-   if (smc_secondary == NULL) return;
-
-   bit_index = (a & 0xFFFF) >> 2;
-   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
-   VG_(smc_fancy_passed)++;
-
-   /* Detected a Real Live write to code which has been translated.
-      Note it. */
-   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
-      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
-                 "increase and recompile.");
-   vg_origwrites[vg_origwrites_used] = a;
-   vg_origwrites_used++;
-
-   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
-   /* Force an exit before the next basic block, so the translation
-      cache can be flushed appropriately. */
-   //   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
-   //VG_(dispatch_ctr)       = 1;
-   //VG_(interrupt_reason)   = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
-   updating both the fast-check cache and the slow-but-correct data
-   structure.  
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
-   Addr a;
-   VgSmcSecondary* smc_secondary;
-   UInt bit_index;
-
-   for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
-      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
-      smc_secondary = vg_smc_primary[a >> 16];
-      if (smc_secondary == NULL)
-         smc_secondary = 
-         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
-      bit_index = (a & 0xFFFF) >> 2;
-      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      o_start = vg_tt[i].orig_addr;
+      o_end = o_start + vg_tt[i].orig_size - 1;
+      if (o_end < i_start || o_start > i_end)
+         continue;
+      if (VG_(clo_cachesim))
+         VG_(cachesim_notify_discard)( & vg_tt[i] );
+      vg_tt[i].orig_addr = VG_TTE_DELETED;
+      VG_(this_epoch_out_count) ++;
+      VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+      VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+      VG_(overall_out_count) ++;
+      VG_(overall_out_osize) += vg_tt[i].orig_size;
+      VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      out_count ++;
+      out_osize += vg_tt[i].orig_size;
+      out_tsize += vg_tt[i].trans_size;
    }
-}
 
-
-/* Discard any translations whose original code overlaps with the
-   range w_addr .. w_addr+3 inclusive. 
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-#  if 0
-   Int      i, rd, wr;
-   Addr     o_start, o_end;
-   TTEntry* tt;
-
-   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
-      tt = vg_transtab[i];
-      wr = 0;
-      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
-         o_start = tt[rd].orig_addr;
-         o_end   = o_start + tt[rd].orig_size;
-         if (w_addr > o_end || (w_addr+3) < o_start) {
-            /* No collision possible; keep this translation */
-            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
-            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
-            wr++;
-	 } else {
-            /* Possible collision; discard. */
-            vg_smc_discards++;
-            VG_(message) (Vg_DebugMsg, 
-                             "discarding translation of %p .. %p",
-                             tt[rd].orig_addr, 
-                             tt[rd].orig_addr + tt[rd].orig_size - 1);
-            VG_(free)((void*)tt[rd].trans_addr);
-         }         
+   if (out_count > 0) {
+      VG_(invalidate_tt_fast)();
+      VG_(sanity_check_tc_tt)();
+#     ifdef DEBUG_TRANSTAB
+      { Addr aa;
+        for (aa = i_start; aa <= i_end; aa++)
+           vg_assert(search_trans_table ( aa ) == NULL);
       }
-      vg_transtab_used[i] = wr;
-   }
-#  endif   
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
-   following the discovery of a (potential) original-code-write. 
-*/
-void VG_(flush_transtab) ( void )
-{
-#  if 0
-   Addr w_addr;
-   Int  i, j;
-
-   /* We shouldn't be here unless a code write was detected. */
-   vg_assert(vg_origwrites_used > 0);
-
-   /* Instead of incrementally fixing up the translation table cache,
-      just invalidate the whole darn thing.  Pray this doesn't happen
-      very often :) */
-   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
-      VG_(transtab_cache_orig)[i] = 
-      VG_(transtab_cache_trans)[i] = (Addr)0;
-
-   /* Clear out the fast cache; discard_translations_bracketing
-      reconstructs it. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* And also clear the slow-but-correct table. */
-   for (i = 0; i < 65536; i++) {
-      VgSmcSecondary* sec = vg_smc_primary[i];
-      if (sec)
-         for (j = 0; j < 2048; j++)
-            sec->chars[j] = 0;         
+#     endif
    }
 
-   /* This doesn't need to be particularly fast, since we (presumably)
-      don't have to handle particularly frequent writes to code
-      addresses. */
-   while (vg_origwrites_used > 0) {
-      vg_origwrites_used--;
-      w_addr = vg_origwrites[vg_origwrites_used];
-      discard_translations_bracketing ( w_addr );
-   }
-
-   vg_assert(vg_origwrites_used == 0);
-#  endif
+   if (1|| VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "discard %d (%d -> %d) translations in range %p .. %p",
+         out_count, out_osize, out_tsize, i_start, i_end );
 }
 
 
@@ -655,7 +502,7 @@
 /*--- Initialisation.                                      ---*/
 /*------------------------------------------------------------*/
 
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
 {
    Int i;
 
@@ -678,17 +525,6 @@
       at the first TT entry, which is, of course, empty. */
    for (i = 0; i < VG_TT_FAST_SIZE; i++)
       VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
-   /* No part of the address space has any translations. */
-   for (i = 0; i < 65536; i++)
-      vg_smc_primary[i] = NULL;
-
-   /* ... and the associated fast-check cache reflects this. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* Finally, no original-code-writes have been recorded. */
-   vg_origwrites_used = 0;
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/docs/manual.html b/docs/manual.html
index dc66721..20fbb36 100644
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/include/valgrind.h b/include/valgrind.h
index 43efffb..478426d 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -64,11 +64,11 @@
         _zzq_arg4     /* request fourth param */ )                      \
                                                                         \
   { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)_zzq_request;                 \
-    _zzq_args[1] = (volatile unsigned int)_zzq_arg1;                    \
-    _zzq_args[2] = (volatile unsigned int)_zzq_arg2;                    \
-    _zzq_args[3] = (volatile unsigned int)_zzq_arg3;                    \
-    _zzq_args[4] = (volatile unsigned int)_zzq_arg4;                    \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
     asm volatile("movl %1, %%eax\n\t"                                   \
                  "movl %2, %%edx\n\t"                                   \
                  "roll $29, %%eax ; roll $3, %%eax\n\t"                 \
@@ -95,8 +95,8 @@
 #define VG_USERREQ__CHECK_READABLE       0x1006
 #define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
 #define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
 
 
 /* Client-code macros to manipulate the state of memory. */
@@ -227,4 +227,17 @@
                             0, 0, 0, 0);                           \
    }
 
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)          \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DISCARD_TRANSLATIONS,      \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
 #endif
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html
index dc66721..20fbb36 100644
--- a/memcheck/docs/manual.html
+++ b/memcheck/docs/manual.html
@@ -24,8 +24,9 @@
 <body bgcolor="#ffffff">
 
 <a name="title">&nbsp;</a>
-<h1 align=center>Valgrind, snapshot 20020501</h1>
+<h1 align=center>Valgrind, snapshot 20020516</h1>
 <center>This manual was majorly updated on 20020501</center>
+<center>This manual was minorly updated on 20020516</center>
 <p>
 
 <center>
@@ -102,7 +103,9 @@
   <li>Reading/writing memory after it has been free'd</li>
   <li>Reading/writing off the end of malloc'd blocks</li>
   <li>Reading/writing inappropriate areas on the stack</li>
-  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost
+  forever</li>
+  <li>Mismatched use of malloc/new/new [] vs free/delete/delete []</li>
 </ul>
 
 Problems like these can be difficult to find by other means, often
@@ -677,25 +680,6 @@
       all fairly dodgy and doesn't work at all if threads are
       involved.</li><br>
       <p>
-
-  <li><code>--smc-check=none</code><br>
-      <code>--smc-check=some</code> [default]<br>
-      <code>--smc-check=all</code>
-      <p>How carefully should Valgrind check for self-modifying code
-      writes, so that translations can be discarded?&nbsp; When
-      "none", no writes are checked.  When "some", only writes
-      resulting from moves from integer registers to memory are
-      checked.  When "all", all memory writes are checked, even those
-      with which are no sane program would generate code -- for
-      example, floating-point writes.
-      <p>
-      NOTE that this is all a bit bogus.  This mechanism has never
-      been enabled in any snapshot of Valgrind which was made
-      available to the general public, because the extra checks reduce
-      performance, increase complexity, and I have yet to come across
-      any programs which actually use self-modifying code.  I think
-      the flag is ignored.
-      </li>
 </ul>
 
 
@@ -1185,6 +1169,24 @@
     right now.  Returns no value.  I guess this could be used to
     incrementally check for leaks between arbitrary places in the
     program's execution.  Warning: not properly tested!
+<p>
+<li><code>VALGRIND_DISCARD_TRANSLATIONS</code>: discard translations
+    of code in the specified address range.  Useful if you are
+    debugging a JITter or some other dynamic code generation system.
+    After this call, attempts to execute code in the invalidated
+    address range will cause valgrind to make new translations of that
+    code, which is probably the semantics you want.  Note that this is
+    implemented naively, and involves checking all 200191 entries in
+    the translation table to see if any of them overlap the specified
+    address range.  So try not to call it often, or performance will
+    nosedive.  Note that you can be clever about this: you only need
+    to call it when an area which previously contained code is
+    overwritten with new code.  You can choose to write code into
+    fresh memory, and just call this occasionally to discard large
+    chunks of old code all at once.
+    <p>
+    Warning: minimally tested.  Also, doesn't interact well with the
+    cache simulator.
 </ul>
 <p>
 
@@ -1255,7 +1257,7 @@
 <code>malloc</code> is 8-aligned.  Valgrind's allocator only
 guarantees 4-alignment, so without the patch Mozilla makes an illegal
 memory access, which Valgrind of course spots, and then bombs.
-
+Mozilla 1.0RC2 works fine out-of-the-box.
 
 
 <a name="install"></a>
@@ -1730,10 +1732,8 @@
       running under Valgrind.  This is due to the large amount of
       adminstrative information maintained behind the scenes.  Another
       cause is that Valgrind dynamically translates the original
-      executable and never throws any translation away, except in
-      those rare cases where self-modifying code is detected.
-      Translated, instrumented code is 12-14 times larger than the
-      original (!) so you can easily end up with 15+ MB of
+      executable.  Translated, instrumented code is 14-16 times larger
+      than the original (!) so you can easily end up with 30+ MB of
       translations when running (eg) a web browser.
       </li>
 </ul>
@@ -1809,14 +1809,14 @@
 translations.  Subsequent jumps to that address will use this
 translation.
 
-<p>Valgrind can optionally check writes made by the application, to
-see if they are writing an address contained within code which has
-been translated.  Such a write invalidates translations of code
-bracketing the written address.  Valgrind will discard the relevant
-translations, which causes them to be re-made, if they are needed
-again, reflecting the new updated data stored there.  In this way,
-self modifying code is supported.  In practice I have not found any
-Linux applications which use self-modifying-code.
+<p>Valgrind no longer directly supports detection of self-modifying
+code.  Such checking is expensive, and in practice (fortunately)
+almost no applications need it.  However, to help people who are
+debugging dynamic code generation systems, there is a Client Request 
+(basically a macro you can put in your program) which directs Valgrind
+to discard translations in a given address range.  So Valgrind can
+still work in this situation provided the client tells it when
+code has become out-of-date and needs to be retranslated.
 
 <p>The JITter translates basic blocks -- blocks of straight-line-code
 -- as single entities.  To minimise the considerable difficulties of
diff --git a/tests/discard.c b/tests/discard.c
new file mode 100644
index 0000000..0c14e9f
--- /dev/null
+++ b/tests/discard.c
@@ -0,0 +1,27 @@
+
+#include <stdio.h>
+#include <valgrind.h>
+
+int fooble ( void )
+{
+  int x, y;
+  y = 0;
+  for (x = 0; x < 100; x++) {
+    if ((x % 3) == 0) y += x; else y++;
+  }
+  return y;
+}
+
+void someother ( void )
+{
+}
+
+int main ( void )
+{
+  printf("fooble-1() = %d\n", fooble() );
+  VALGRIND_DISCARD_TRANSLATIONS( (char*)(&fooble), 
+          ((char*)(&someother)) - ((char*)(&fooble)) );
+  printf("fooble-2() = %d\n", fooble() );
+  return 0;
+}
+
diff --git a/valgrind.h b/valgrind.h
index 43efffb..478426d 100644
--- a/valgrind.h
+++ b/valgrind.h
@@ -64,11 +64,11 @@
         _zzq_arg4     /* request fourth param */ )                      \
                                                                         \
   { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)_zzq_request;                 \
-    _zzq_args[1] = (volatile unsigned int)_zzq_arg1;                    \
-    _zzq_args[2] = (volatile unsigned int)_zzq_arg2;                    \
-    _zzq_args[3] = (volatile unsigned int)_zzq_arg3;                    \
-    _zzq_args[4] = (volatile unsigned int)_zzq_arg4;                    \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
     asm volatile("movl %1, %%eax\n\t"                                   \
                  "movl %2, %%edx\n\t"                                   \
                  "roll $29, %%eax ; roll $3, %%eax\n\t"                 \
@@ -95,8 +95,8 @@
 #define VG_USERREQ__CHECK_READABLE       0x1006
 #define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
 #define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* unimplemented */
-
+#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
+#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
 
 
 /* Client-code macros to manipulate the state of memory. */
@@ -227,4 +227,17 @@
                             0, 0, 0, 0);                           \
    }
 
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)          \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DISCARD_TRANSLATIONS,      \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
 #endif
diff --git a/vg_cachesim.c b/vg_cachesim.c
index 8081e0a..b794e61 100644
--- a/vg_cachesim.c
+++ b/vg_cachesim.c
@@ -1,3 +1,4 @@
+
 /*--------------------------------------------------------------------*/
 /*--- The cache simulation framework: instrumentation, recording   ---*/
 /*--- and results printing.                                        ---*/
@@ -10,7 +11,6 @@
 
    Copyright (C) 2000-2002 Julian Seward 
       jseward@acm.org
-      Julian_Seward@muraroa.demon.co.uk
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -30,8 +30,6 @@
    The GNU General Public License is contained in the file LICENSE.
 */
 
-#include <string.h>
-
 #include "vg_include.h"
 
 #include "vg_cachesim_L2.c"
@@ -311,7 +309,7 @@
    filename_hash = hash(filename, N_FILE_ENTRIES);
    curr_file_node = BBCC_table[filename_hash];
    while (NULL != curr_file_node && 
-          strcmp(filename, curr_file_node->filename) != 0) {
+          VG_(strcmp)(filename, curr_file_node->filename) != 0) {
       curr_file_node = curr_file_node->next;
    }
    if (NULL == curr_file_node) {
@@ -323,7 +321,7 @@
    fnname_hash = hash(fn_name, N_FN_ENTRIES);
    curr_fn_node = curr_file_node->fns[fnname_hash];
    while (NULL != curr_fn_node && 
-          strcmp(fn_name, curr_fn_node->fn_name) != 0) {
+          VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
       curr_fn_node = curr_fn_node->next;
    }
    if (NULL == curr_fn_node) {
@@ -790,7 +788,7 @@
 
       /* Allow for filename switching in the middle of a BB;  if this happens,
        * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
+      if (0 != VG_(strcmp)(fl_buf, curr_file)) {
          VG_(strcpy)(curr_file, fl_buf);
          VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
          VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
@@ -798,7 +796,7 @@
 
       /* If the function name for this instruction doesn't match that of the
        * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
+      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
          VG_(printf)("  filenames: BB:%s, instr:%s;"
                      "  fn_names:  BB:%s, instr:%s;"
@@ -1071,3 +1069,13 @@
    VGP_POPCC;
 }
 
+
+void VG_(cachesim_notify_discard) ( TTEntry* tte )
+{
+  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+               tte->orig_addr, (Int)tte->orig_size);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_cachesim.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_clientperms.c b/vg_clientperms.c
index 02d0b7b..e9ecbc4 100644
--- a/vg_clientperms.c
+++ b/vg_clientperms.c
@@ -385,6 +385,10 @@
          VG_(detect_memory_leaks)();
          return 0; /* return value is meaningless */
 
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
+         VG_(invalidate_translations)( arg[1], arg[2] );
+         return 0;  /* return value is meaningless */
+
       default:
          VG_(message)(Vg_UserMsg, 
                       "Warning: unknown client request code %d", arg[0]);
diff --git a/vg_constants.h b/vg_constants.h
index 710b12c..252353c 100644
--- a/vg_constants.h
+++ b/vg_constants.h
@@ -90,16 +90,6 @@
 /* Constants for the fast original-code-write check cache. */
 
 
-/* Usually you want this to be zero. */
-#define VG_SMC_FASTCHECK_IN_C 0
-
-#define VG_SMC_CACHE_BITS  19
-#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
-#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
-
-#define VG_SMC_CACHE_SHIFT 6
-
-
 /* Assembly code stubs make these requests ... */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 #define VG_USERREQ__PTHREAD_RETURNS         0x4002
diff --git a/vg_from_ucode.c b/vg_from_ucode.c
index 214d2ca..573ee93 100644
--- a/vg_from_ucode.c
+++ b/vg_from_ucode.c
@@ -1524,56 +1524,6 @@
 }
 
 
-/* A word in memory containing a pointer to vg_helper_smc_check4.
-   Never changes. 
-*/
-static const Addr vg_helper_smc_check4_ADDR
-   = (Addr)&VG_(helper_smc_check4);
-
-static void synth_orig_code_write_check ( Int sz, Int reg )
-{
-   UInt offset;
-
-   /*
-     In this example, reg is %eax and sz == 8:
-
-     -- check the first four bytes
-     0087 89C5                  movl    %eax, %ebp
-     0089 FF1544332211          call    * 0x11223344
-                  
-     -- check the second four
-     008f 89C5                  movl    %eax, %ebp
-     0091 83C504                addl    $4, %ebp
-     0094 FF1544332211          call    * 0x11223344
-
-     Because we can't call an absolute address (alas), the
-     address called is stored in memory at 0x11223344 in this
-     example, and it just contains the address of 
-     vg_helper_smc_check4 -- which is where we really want
-     to get to.
-   */
-   vg_assert(0);
-
-   if (sz < 4) sz = 4;
-
-   for (offset = 0; offset < sz; offset += 4) {
-
-      emit_movl_reg_reg ( reg, R_EBP );
-
-      if (offset > 0) {
-         newEmit();
-         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
-         if (dis) VG_(printf)("\n");
-      }
-
-      newEmit();
-      emitB ( 0xFF ); emitB ( 0x15 ); 
-      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
-      if (dis) VG_(printf)("\n");
-   }
-}
-
-
 /* Synthesise a minimal test (and which discards result) of reg32
    against lit.  It's always safe do simply
       emit_testv_lit_reg ( 4, lit, reg32 )
@@ -2264,8 +2214,10 @@
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+	 /* No longer possible, but retained for illustrative purposes.
          if (u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+	 */
          break;
       }
 
@@ -2598,8 +2550,10 @@
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
+         /* No longer possible, but retained for illustrative purposes.
          if (u->opcode == FPU_W && u->smc_check) 
             synth_orig_code_write_check ( u->size, u->val2 );
+         */
          break;
 
       case FPU:
diff --git a/vg_helpers.S b/vg_helpers.S
index 62db9ec..2968922 100644
--- a/vg_helpers.S
+++ b/vg_helpers.S
@@ -146,51 +146,6 @@
 	ret
 
 
-/* Do a original-code-write check for the address in %ebp. */
-.global VG_(helper_smc_check4)
-VG_(helper_smc_check4):
-#if VG_SMC_FASTCHECK_IN_C
-
-	# save the live regs
-	pushl	%eax
-	pushl	%ebx
-	pushl	%ecx
-	pushl	%edx
-	pushl	%esi
-	pushl	%edi
-	
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-
-	popl	%edi
-	popl	%esi
-	popl	%edx
-	popl	%ecx
-	popl	%ebx
-	popl	%eax
-	
-	ret
-#else	
-	incl	VG_(smc_total_check4s)
-	pushl	%ebp
-	shrl	$VG_SMC_CACHE_SHIFT, %ebp
-	andl	$VG_SMC_CACHE_MASK, %ebp
-	cmpb	$0, VG_(smc_cache)(%ebp)
-	jnz	vg_smc_cache_failure
-	addl	$4, %esp
-	ret
-      vg_smc_cache_failure:
-	popl	%ebp
-	pushal
-	pushl	%ebp
-	call	VG_(smc_check4)
-	addl	$4, %esp
-	popal
-	ret
-#endif
-
-	
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/vg_include.h b/vg_include.h
index 22e4f48..7f44dde 100644
--- a/vg_include.h
+++ b/vg_include.h
@@ -1301,7 +1301,7 @@
 extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
                                      Char* fn_name, Int n_fn_name);
 
-extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
 
 
 /* ---------------------------------------------------------------------
@@ -1459,21 +1459,6 @@
 /* total of register ranks over all translations */
 extern UInt VG_(total_reg_rank);
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-//extern UInt VG_(smc_total_check4s);
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-extern UInt VG_(smc_cache_passed);
-
-/* Numnber of writes which really did write on original code. */
-extern UInt VG_(smc_fancy_passed);
-
-/* Number of translations discarded as a result. */
-//extern UInt VG_(smc_discard_count);
-
 /* Counts pertaining to internal sanity checking. */
 extern UInt VG_(sanity_fast_count);
 extern UInt VG_(sanity_slow_count);
@@ -1590,11 +1575,9 @@
 extern void VG_(flush_transtab) ( void );
 extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
 extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
 
-extern void VG_(smc_mark_original) ( Addr original_addr, 
-                                     Int original_len );
-
-extern void VG_(init_transtab_and_SMC) ( void );
+extern void VG_(init_tt_tc) ( void );
 
 extern void VG_(sanity_check_tc_tt) ( void );
 extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1667,9 +1650,6 @@
    Exports of vg_helpers.S
    ------------------------------------------------------------------ */
 
-/* SMC fast checks. */
-extern void VG_(helper_smc_check4);
-
 /* Mul, div, etc, -- we don't codegen these directly. */
 extern void VG_(helper_idiv_64_32);
 extern void VG_(helper_div_64_32);
@@ -1729,6 +1709,9 @@
 extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
 extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
 
+extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+
+
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
diff --git a/vg_main.c b/vg_main.c
index a7e41b2..94e175c 100644
--- a/vg_main.c
+++ b/vg_main.c
@@ -381,22 +381,6 @@
 UInt VG_(total_reg_rank) = 0;
 
 
-/* Counts pertaining to the self-modifying-code detection machinery. */
-
-/* Total number of writes checked. */
-UInt VG_(smc_total_check4s) = 0;
-
-/* Number of writes which the fast smc check couldn't show were
-   harmless. */
-UInt VG_(smc_cache_passed) = 0;
-
-/* Numnber of writes which really did write on original code. */
-UInt VG_(smc_fancy_passed) = 0;
-
-/* Number of translations discarded as a result. */
-UInt VG_(smc_discard_count) = 0;
-
-
 /* Counts pertaining to internal sanity checking. */
 UInt VG_(sanity_fast_count) = 0;
 UInt VG_(sanity_slow_count) = 0;
@@ -955,13 +939,6 @@
                 VG_(uinstrs_spill),
                 VG_(total_reg_rank) );
    VG_(message)(Vg_DebugMsg, 
-                "smc-check: %d checks, %d fast pass, "
-                "%d slow pass, %d discards.",
-		VG_(smc_total_check4s),
-		VG_(smc_cache_passed),
-		VG_(smc_fancy_passed),
-		VG_(smc_discard_count) );
-   VG_(message)(Vg_DebugMsg, 
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
                 VG_(sanity_slow_count) );
@@ -1020,11 +997,12 @@
       VGP_PUSHCC(VgpInitAudit);
       VGM_(init_memory_audit)();
       VGP_POPCC;
-      VGP_PUSHCC(VgpReadSyms);
-      VG_(read_symbols)();
-      VGP_POPCC;
    }
 
+   VGP_PUSHCC(VgpReadSyms);
+   VG_(read_symbols)();
+   VGP_POPCC;
+
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
@@ -1033,7 +1011,7 @@
       carefully sets up the permissions maps to cover the anonymous
       mmaps for the translation table and translation cache, which
       wastes > 20M of virtual address space. */
-   VG_(init_transtab_and_SMC)();
+   VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
       VG_(message)(Vg_UserMsg, 
diff --git a/vg_scheduler.c b/vg_scheduler.c
index d1d792a..57d687d 100644
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -330,8 +330,6 @@
    VG_(overall_in_count) ++;
    VG_(overall_in_osize) += orig_size;
    VG_(overall_in_tsize) += trans_size;
-   /* Record translated area for SMC detection. */
-   VG_(smc_mark_original) ( orig_addr, orig_size );
 }
 
 
@@ -2684,6 +2682,7 @@
       case VG_USERREQ__MAKE_NOACCESS_STACK:
       case VG_USERREQ__RUNNING_ON_VALGRIND:
       case VG_USERREQ__DO_LEAK_CHECK:
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
          SET_EDX(
             tid, 
             VG_(handle_client_request) ( &VG_(threads)[tid], arg )
diff --git a/vg_symtab2.c b/vg_symtab2.c
index c781751..eb3b394 100644
--- a/vg_symtab2.c
+++ b/vg_symtab2.c
@@ -36,13 +36,16 @@
 
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
-   Still kludgey, though less than before:
 
-   * we don't check whether we should throw away some symbol tables 
-     when munmap() happens
+   Stabs reader greatly improved by Nick Nethercode, Apr 02.
 
-   * symbol table reading code for ELF binaries is a shambles.  
-     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+   16 May 02: when notified about munmap, return a Bool indicating
+   whether or not the area being munmapped had executable permissions.
+   This is then used to determine whether or not
+   VG_(invalid_translations) should be called for that area.  In order
+   that this work even if --instrument=no, in this case we still keep
+   track of the mapped executable segments, but do not load any debug
+   info or symbols.
 */
 
 /*------------------------------------------------------------*/
@@ -1181,9 +1184,11 @@
       = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   vg_read_lib_symbols ( si );
-   canonicaliseSymtab ( si );
-   canonicaliseLoctab ( si );
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
+      vg_read_lib_symbols ( si );
+      canonicaliseSymtab ( si );
+      canonicaliseLoctab ( si );
+   }
 }
 
 
@@ -1197,9 +1202,6 @@
    which happen to correspond to the munmap()d area.  */
 void VG_(read_symbols) ( void )
 {
-   if (! VG_(clo_instrument) && ! VG_(clo_cachesim)) 
-      return;
-
    VG_(read_procselfmaps) ( read_symtab_callback );
 
    /* Do a sanity check on the symbol tables: ensure that the address
@@ -1222,7 +1224,6 @@
            /* the main assertion */
            overlap = (lo <= lo2 && lo2 <= hi)
                       || (lo <= hi2 && hi2 <= hi);
-           //vg_assert(!overlap);
 	   if (overlap) {
               VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
               ppSegInfo ( si );
@@ -1240,15 +1241,16 @@
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
-   maintain the no-overlapping invariant.  
+   maintain the no-overlapping invariant.
+
+   16 May 02: Returns a Bool indicating whether or not the discarded
+   range falls inside a known executable segment.  See comment at top
+   of file for why.
 */
-void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
-   if (! VG_(clo_instrument)) 
-     return;
-
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1257,7 +1259,8 @@
       prev = curr;
       curr = curr->next;
    }
-   if (curr == NULL) return;
+   if (curr == NULL) 
+      return False;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1272,6 +1275,7 @@
    }
 
    freeSegInfo(curr);
+   return True;
 }
 
 
diff --git a/vg_syscall_mem.c b/vg_syscall_mem.c
index ac63267..6d4e497 100644
--- a/vg_syscall_mem.c
+++ b/vg_syscall_mem.c
@@ -487,12 +487,15 @@
          KERNEL_DO_SYSCALL(tid,res);
          if (!VG_(is_kerror)(res)) {
             /* Copied from munmap() wrapper. */
+            Bool munmap_exe;
             Addr start  = arg1;
             Addr length = arg2;
             while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
             while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
             make_noaccess( start, length );
-            VG_(symtab_notify_munmap) ( start, length );
+            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+            if (munmap_exe)
+               VG_(invalidate_translations) ( start, length );
             approximate_mmap_permissions( (Addr)res, arg3, arg4 );
          }
          break;         
@@ -2070,6 +2073,7 @@
                pages.  If we don't do that, our idea of addressible
                memory diverges from that of the kernel's, which causes
                the leak detector to crash. */
+            Bool munmap_exe;
             Addr start = arg1;
             Addr length = arg2;
             while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
@@ -2083,7 +2087,9 @@
             /* Tell our symbol table machinery about this, so that if
                this happens to be a .so being unloaded, the relevant
                symbols are removed too. */
-            VG_(symtab_notify_munmap) ( start, length );
+            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
+            if (munmap_exe)
+               VG_(invalidate_translations) ( start, length );
          }
          break;
 
diff --git a/vg_translate.c b/vg_translate.c
index 1e4bff2..0a80694 100644
--- a/vg_translate.c
+++ b/vg_translate.c
@@ -297,7 +297,7 @@
 
    Important!  If you change the set of allocatable registers from
    %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in vg_helper_smc_check4 to match!  
+   save/restore sequences in various places to match!  
 */
 __inline__ Int VG_(rankToRealRegNo) ( Int rank )
 {
diff --git a/vg_transtab.c b/vg_transtab.c
index d0f0eb1..a364df0 100644
--- a/vg_transtab.c
+++ b/vg_transtab.c
@@ -32,6 +32,8 @@
 #include "vg_include.h"
 #include "vg_constants.h"
 
+/* #define DEBUG_TRANSTAB */
+
 
 /*------------------------------------------------------------*/
 /*--- Management of the LRU-based translation table+cache. ---*/
@@ -42,7 +44,7 @@
    of code retranslation.  */
 
 /* Size of the translation cache, in bytes. */
-#define VG_TC_SIZE /*16000000*/ 32000000 /*40000000*/
+#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
 
 /* Do a LRU pass when the translation cache becomes this full. */
 #define VG_TC_LIMIT_PERCENT 98
@@ -52,7 +54,7 @@
 
 /* Number of entries in the translation table.  This must be a prime
    number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*100129*/ 200191 /*250829*/
+#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
 
 /* Do an LRU pass when the translation table becomes this full. */
 #define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -64,9 +66,12 @@
    N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
 #define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
 
-/* This TT entry is empty. */
+/* This TT entry is empty.  There is no associated TC storage. */
 #define VG_TTE_EMPTY   ((Addr)1)
-/* This TT entry has been deleted. */
+/* This TT entry has been deleted, in the sense that it does not
+   contribute to the orig->trans mapping.  However, the ex-translation
+   it points at still occupies space in TC.  This slot cannot be
+   re-used without doing an LRU pass. */
 #define VG_TTE_DELETED ((Addr)3)
 
 /* The TC.  This used to be statically allocated, but that forces many
@@ -77,7 +82,8 @@
 */
 static UChar* vg_tc = NULL;
 
-/* Count of bytes used in the TC. */
+/* Count of bytes used in the TC.  This includes those pointed to from
+   VG_TTE_DELETED entries. */
 static Int vg_tc_used = 0;
 
 /* The TT.  Like TC, for the same reason, is dynamically allocated at
@@ -86,7 +92,7 @@
 */
 static TTEntry* vg_tt = NULL;
 
-/* Count of non-empty, non-deleted TT entries. */
+/* Count of non-empty TT entries.  This includes deleted ones. */
 static Int vg_tt_used = 0;
 
 /* Fast helper for the TT.  A direct-mapped cache which holds a
@@ -135,6 +141,10 @@
    if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
       return;
 
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
+#  endif
+
    VGP_PUSHCC(VgpDoLRU);
    /*   
    VG_(printf)(
@@ -157,8 +167,9 @@
       vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
 
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+            continue;
       j = vg_tt[i].mru_epoch;
       vg_assert(j <= VG_(current_epoch));
       j = VG_(current_epoch) - j;
@@ -200,11 +211,11 @@
       recently used at most thresh epochs ago.  Traverse the TT and
       mark such entries as deleted. */
    for (i = 0; i < VG_TT_SIZE; i++) {
-      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
-         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY 
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) 
+         continue;
       if (vg_tt[i].mru_epoch <= thresh) {
          vg_tt[i].orig_addr = VG_TTE_DELETED;
-         vg_tt_used--;
 	 VG_(this_epoch_out_count) ++;
 	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
 	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
@@ -214,9 +225,6 @@
       }
    }
 
-   vg_assert(vg_tt_used >= 0);
-   vg_assert(vg_tt_used <= tt_target);
-
    /* Now compact the TC, sliding live entries downwards to fill spaces
       left by deleted entries.  In this loop, r is the offset in TC of
       the current translation under consideration, and w is the next
@@ -241,6 +249,9 @@
             vg_tc[w+i] = vg_tc[r+i];
          tte->trans_addr = (Addr)&vg_tc[w+4];
          w += 4+tte->trans_size;
+      } else {
+         tte->orig_addr = VG_TTE_EMPTY;
+         vg_tt_used--;
       }
       r += 4+tte->trans_size;
    }
@@ -252,6 +263,9 @@
    vg_assert(w <= tc_target);
    vg_tc_used = w;
 
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
    /* Invalidate the fast cache, since it is now out of date.  It will get
       reconstructed incrementally when the client resumes. */
    VG_(invalidate_tt_fast)();
@@ -274,6 +288,11 @@
       );
 
    /* Reconstruct the SMC detection structures. */
+#  ifdef DEBUG_TRANSTAB
+   for (i = 0; i < VG_TT_SIZE; i++)
+      vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
+#  endif
+   VG_(sanity_check_tc_tt)();
 
    VGP_POPCC;
 }
@@ -290,7 +309,6 @@
    for (i = 0; i < VG_TT_SIZE; i++) {
       tte = &vg_tt[i];
       if (tte->orig_addr == VG_TTE_EMPTY) continue;
-      if (tte->orig_addr == VG_TTE_DELETED) continue;
       vg_assert(tte->mru_epoch >= 0);
       vg_assert(tte->mru_epoch <= VG_(current_epoch));
       counted_entries++;
@@ -323,8 +341,7 @@
    while (True) {
       if (vg_tt[i].orig_addr == tte->orig_addr)
          VG_(panic)("add_to_trans_tab: duplicate");
-      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
-          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
          /* Put it here, and set the back pointer. */
          vg_tt[i] = *tte;
          VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
@@ -377,8 +394,8 @@
 */
 static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
 {
-  //static Int queries = 0;
-  //static Int probes = 0;
+   //static Int queries = 0;
+   //static Int probes = 0;
    Int i;
    /* Hash to get initial probe point. */
    //   if (queries == 10000) {
@@ -388,7 +405,7 @@
    //queries++;
    i = ((UInt)orig_addr) % VG_TT_SIZE;
    while (True) {
-     //probes++;
+      //probes++;
       if (vg_tt[i].orig_addr == orig_addr)
          return &vg_tt[i];
       if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
@@ -426,228 +443,58 @@
 }
 
 
-/*------------------------------------------------------------*/
-/*--- Detecting and handling self-modifying code.          ---*/
-/*------------------------------------------------------------*/
-
-/* This mechanism uses two data structures:
-
-   vg_oldmap -- array[64k] of Bool, which approximately records
-   parts of the address space corresponding to code for which
-   a translation exists in the translation table.  vg_oldmap is
-   consulted at each write, to determine whether that write might
-   be writing a code address; if so, the program is stopped at 
-   the next jump, and the corresponding translations are invalidated.
-
-   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
-   addresses a containing a code byte which has been translated.  So
-   it acts kind-of like a direct-mapped cache with 64k entries.
-
-   The second structure is vg_CAW, a small array of addresses at which
-   vg_oldmap indicates a code write may have happened.  This is
-   (effectively) checked at each control transfer (jump), so that
-   translations can be discarded before going on.  An array is
-   somewhat overkill, since it strikes me as very unlikely that a
-   single basic block will do more than one code write.  Nevertheless
-   ...  
-
-   ToDo: make this comment up-to-date.
+/* Invalidate translations of original code [start .. start + range - 1].
+   This is slow, so you *really* don't want to call it very often. 
 */
-
-
-/* Definitions for the self-modifying-code detection cache, intended
-   as a fast check which clears the vast majority of writes.  */
-
-#define VG_SMC_CACHE_HASH(aaa) \
-   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
-
-Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
-
-
-/* Definitions for the fallback mechanism, which, more slowly,
-   provides a precise record of which words in the address space
-   belong to original code. */
-
-typedef struct { UChar chars[2048]; } VgSmcSecondary;
-
-static VgSmcSecondary* vg_smc_primary[65536];
-
-static VgSmcSecondary* vg_smc_new_secondary ( void )
+void VG_(invalidate_translations) ( Addr start, UInt range )
 {
-   Int i;
-   VgSmcSecondary* sec 
-      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
-   for (i = 0; i < 2048; i++)
-      sec->chars[i] = 0;
-   return sec;
-}
+   Addr  i_start, i_end, o_start, o_end;
+   UInt  out_count, out_osize, out_tsize;
+   Int   i;
 
-#define GET_BIT_ARRAY(arr,indx)                      \
-   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
-           >> ( ((UInt)indx) % 8) ) )
-
-#define SET_BIT_ARRAY(arr,indx)                      \
-   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
-
-
-/* Finally, a place to record the original-code-write addresses
-   detected in a basic block. */
-
-#define VG_ORIGWRITES_SIZE 10
-
-static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
-static Int  vg_origwrites_used;
-
-
-/* Call here to check a written address. */
-
-void VG_(smc_check4) ( Addr a )
-{
-   UInt bit_index;
-   VgSmcSecondary* smc_secondary;
-
-#  if VG_SMC_FASTCHECK_IN_C
-   VG_(smc_total_check4s)++;
-
-   /* Try the fast check first. */
-   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  ifdef DEBUG_TRANSTAB
+   VG_(sanity_check_tc_tt)();
 #  endif
+   i_start = start;
+   i_end   = start + range - 1;
+   out_count = out_osize = out_tsize = 0;
 
-   VG_(smc_cache_passed)++;
-
-   /* Need to do a slow check. */
-   smc_secondary = vg_smc_primary[a >> 16];
-   if (smc_secondary == NULL) return;
-
-   bit_index = (a & 0xFFFF) >> 2;
-   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
-
-   VG_(smc_fancy_passed)++;
-
-   /* Detected a Real Live write to code which has been translated.
-      Note it. */
-   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
-      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
-                 "increase and recompile.");
-   vg_origwrites[vg_origwrites_used] = a;
-   vg_origwrites_used++;
-
-   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
-
-   /* Force an exit before the next basic block, so the translation
-      cache can be flushed appropriately. */
-   //   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
-   //VG_(dispatch_ctr)       = 1;
-   //VG_(interrupt_reason)   = VG_Y_SMC;
-}
-
-
-/* Mark an address range as containing an original translation,
-   updating both the fast-check cache and the slow-but-correct data
-   structure.  
-*/
-void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
-{
-   Addr a;
-   VgSmcSecondary* smc_secondary;
-   UInt bit_index;
-
-   for (a = orig_addr; a < orig_addr+orig_size; a++) {
-
-      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
-
-      smc_secondary = vg_smc_primary[a >> 16];
-      if (smc_secondary == NULL)
-         smc_secondary = 
-         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
-
-      bit_index = (a & 0xFFFF) >> 2;
-      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY
+          || vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      o_start = vg_tt[i].orig_addr;
+      o_end = o_start + vg_tt[i].orig_size - 1;
+      if (o_end < i_start || o_start > i_end)
+         continue;
+      if (VG_(clo_cachesim))
+         VG_(cachesim_notify_discard)( & vg_tt[i] );
+      vg_tt[i].orig_addr = VG_TTE_DELETED;
+      VG_(this_epoch_out_count) ++;
+      VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+      VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+      VG_(overall_out_count) ++;
+      VG_(overall_out_osize) += vg_tt[i].orig_size;
+      VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      out_count ++;
+      out_osize += vg_tt[i].orig_size;
+      out_tsize += vg_tt[i].trans_size;
    }
-}
 
-
-/* Discard any translations whose original code overlaps with the
-   range w_addr .. w_addr+3 inclusive. 
-*/
-__attribute__ ((unused))
-static void discard_translations_bracketing ( Addr w_addr )
-{
-#  if 0
-   Int      i, rd, wr;
-   Addr     o_start, o_end;
-   TTEntry* tt;
-
-   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
-      tt = vg_transtab[i];
-      wr = 0;
-      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
-         o_start = tt[rd].orig_addr;
-         o_end   = o_start + tt[rd].orig_size;
-         if (w_addr > o_end || (w_addr+3) < o_start) {
-            /* No collision possible; keep this translation */
-            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
-            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
-            wr++;
-	 } else {
-            /* Possible collision; discard. */
-            vg_smc_discards++;
-            VG_(message) (Vg_DebugMsg, 
-                             "discarding translation of %p .. %p",
-                             tt[rd].orig_addr, 
-                             tt[rd].orig_addr + tt[rd].orig_size - 1);
-            VG_(free)((void*)tt[rd].trans_addr);
-         }         
+   if (out_count > 0) {
+      VG_(invalidate_tt_fast)();
+      VG_(sanity_check_tc_tt)();
+#     ifdef DEBUG_TRANSTAB
+      { Addr aa;
+        for (aa = i_start; aa <= i_end; aa++)
+           vg_assert(search_trans_table ( aa ) == NULL);
       }
-      vg_transtab_used[i] = wr;
-   }
-#  endif   
-}
-
-
-/* Top-level function in charge of discarding out-of-date translations
-   following the discovery of a (potential) original-code-write. 
-*/
-void VG_(flush_transtab) ( void )
-{
-#  if 0
-   Addr w_addr;
-   Int  i, j;
-
-   /* We shouldn't be here unless a code write was detected. */
-   vg_assert(vg_origwrites_used > 0);
-
-   /* Instead of incrementally fixing up the translation table cache,
-      just invalidate the whole darn thing.  Pray this doesn't happen
-      very often :) */
-   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
-      VG_(transtab_cache_orig)[i] = 
-      VG_(transtab_cache_trans)[i] = (Addr)0;
-
-   /* Clear out the fast cache; discard_translations_bracketing
-      reconstructs it. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* And also clear the slow-but-correct table. */
-   for (i = 0; i < 65536; i++) {
-      VgSmcSecondary* sec = vg_smc_primary[i];
-      if (sec)
-         for (j = 0; j < 2048; j++)
-            sec->chars[j] = 0;         
+#     endif
    }
 
-   /* This doesn't need to be particularly fast, since we (presumably)
-      don't have to handle particularly frequent writes to code
-      addresses. */
-   while (vg_origwrites_used > 0) {
-      vg_origwrites_used--;
-      w_addr = vg_origwrites[vg_origwrites_used];
-      discard_translations_bracketing ( w_addr );
-   }
-
-   vg_assert(vg_origwrites_used == 0);
-#  endif
+   if (1|| VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "discard %d (%d -> %d) translations in range %p .. %p",
+         out_count, out_osize, out_tsize, i_start, i_end );
 }
 
 
@@ -655,7 +502,7 @@
 /*--- Initialisation.                                      ---*/
 /*------------------------------------------------------------*/
 
-void VG_(init_transtab_and_SMC) ( void )
+void VG_(init_tt_tc) ( void )
 {
    Int i;
 
@@ -678,17 +525,6 @@
       at the first TT entry, which is, of course, empty. */
    for (i = 0; i < VG_TT_FAST_SIZE; i++)
       VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
-
-   /* No part of the address space has any translations. */
-   for (i = 0; i < 65536; i++)
-      vg_smc_primary[i] = NULL;
-
-   /* ... and the associated fast-check cache reflects this. */
-   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
-      VG_(smc_cache)[i] = False;
-
-   /* Finally, no original-code-writes have been recorded. */
-   vg_origwrites_used = 0;
 }
 
 /*--------------------------------------------------------------------*/