Initial revision


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/vg_transtab.c b/vg_transtab.c
new file mode 100644
index 0000000..b93fe40
--- /dev/null
+++ b/vg_transtab.c
@@ -0,0 +1,693 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of the translation table and cache.               ---*/
+/*---                                                vg_transtab.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Management of the LRU-based translation table+cache. ---*/
+/*------------------------------------------------------------*/
+
+/* These sizes were set up so as to be able to debug large KDE 3
+   applications (are there any small ones?) without excessive amounts
+   of code retranslation.  */
+
+/* Size of the translation cache, in bytes. */
+#define VG_TC_SIZE 16000000 
+
+/* Do a LRU pass when the translation cache becomes this full. */
+#define VG_TC_LIMIT_PERCENT 95
+
+/* When doing an LRU pass, reduce TC fullness to this level. */
+#define VG_TC_TARGET_PERCENT 85
+
+/* Number of entries in the translation table.  This must be a prime
+   number in order to make the hashing work properly. */
+#define VG_TT_SIZE /*19997*/ /*29989*/ /*50497*/ /*75083*/ 100129
+
+/* Do an LRU pass when the translation table becomes this full. */
+#define VG_TT_LIMIT_PERCENT /*67*/ 80
+
+/* When doing an LRU pass, reduce TT fullness to this level. */
+#define VG_TT_TARGET_PERCENT /*60*/ 70
+
+/* The number of age steps we track.  0 means the current epoch,
+   N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
+#define VG_N_EPOCHS /*2000*/ 4000
+
+/* This TT entry is empty. */
+#define VG_TTE_EMPTY   ((Addr)1)
+/* This TT entry has been deleted. */
+#define VG_TTE_DELETED ((Addr)3)
+
+/* The TC.  This used to be statically allocated, but that forces many
+   SecMap arrays to be pointlessly allocated at startup, bloating the
+   process size by about 22M and making startup slow.  So now we
+   dynamically allocate it at startup time.
+   was: static UChar vg_tc[VG_TC_SIZE];
+*/
+static UChar* vg_tc = NULL;
+
+/* Count of bytes used in the TC. */
+static Int vg_tc_used = 0;
+
+/* The TT.  Like TC, for the same reason, is dynamically allocated at
+   startup. 
+   was: static TTEntry vg_tt[VG_TT_SIZE];
+*/
+static TTEntry* vg_tt = NULL;
+
+/* Count of non-empty, non-deleted TT entries. */
+static Int vg_tt_used = 0;
+
+/* Fast helper for the TT.  A direct-mapped cache which holds a
+   pointer to a TT entry which may or may not be the correct one, but
+   which we hope usually is.  This array is referred to directly from
+   vg_dispatch.S. */
+Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
+
+/* For reading/writing the misaligned TT-index word at immediately
+   preceding every translation in TC. */
+#define VG_READ_MISALIGNED_WORD(aaa) (*((UInt*)(aaa)))
+#define VG_WRITE_MISALIGNED_WORD(aaa,vvv) *((UInt*)(aaa)) = ((UInt)(vvv))
+
+/* Used for figuring out an age threshold for translations. */
+static Int vg_bytes_in_epoch[VG_N_EPOCHS];
+static Int vg_entries_in_epoch[VG_N_EPOCHS];
+
+
+/* Just so these counts can be queried without making them globally
+   visible. */
+void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used )
+{
+   *tt_used = vg_tt_used;
+   *tc_used = vg_tc_used;
+}
+
+
+/* Do the LRU thing on TT/TC, clearing them back to the target limits
+   if they are over the threshold limits. 
+*/
+void VG_(maybe_do_lru_pass) ( void )
+{
+   Int i, j, r, w, thresh, ttno;
+   TTEntry* tte;
+
+   const Int tc_limit  = (Int)((VG_TC_SIZE * VG_TC_LIMIT_PERCENT) / 100.0);
+   const Int tt_limit  = (Int)((VG_TT_SIZE * VG_TT_LIMIT_PERCENT) / 100.0);
+   const Int tc_target = (Int)((VG_TC_SIZE * VG_TC_TARGET_PERCENT) / 100.0);
+   const Int tt_target = (Int)((VG_TT_SIZE * VG_TT_TARGET_PERCENT) / 100.0);
+
+   /* Decide quickly if we need to do an LRU pass ? */
+   if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
+      return;
+
+   VGP_PUSHCC(VgpDoLRU);
+   /*   
+   VG_(printf)(
+      "limits: tc_limit %d, tt_limit %d, tc_target %d, tt_target %d\n",
+      tc_limit, tt_limit, tc_target, tt_target);
+   */
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)(" pre-LRU: tc %d (target %d),  tt %d (target %d)\n",
+	          vg_tc_used, tc_target, vg_tt_used, tt_target);
+
+   /* Yes we do.  Figure out what threshold age is required in order to
+      shrink both the TC and TT occupancy below TC_TARGET_PERCENT and
+      TT_TARGET_PERCENT respectively. */
+
+   VG_(number_of_lrus)++;
+
+   /* Count the number of TC bytes and TT entries in each epoch. */
+   for (i = 0; i < VG_N_EPOCHS; i++)
+      vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
+
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
+          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      j = vg_tt[i].mru_epoch;
+      vg_assert(j <= VG_(current_epoch));
+      j = VG_(current_epoch) - j;
+      if (j >= VG_N_EPOCHS) j = VG_N_EPOCHS-1;
+      vg_assert(0 <= j && j < VG_N_EPOCHS);
+      /* Greater j now means older. */
+      vg_entries_in_epoch[j]++;
+      vg_bytes_in_epoch[j] += 4+vg_tt[i].trans_size;
+   }
+
+   /*
+   for (i = 0; i < VG_N_EPOCHS; i++)
+      VG_(printf)("epoch %d: ents %d, bytes %d\n", 
+                  i, vg_entries_in_epoch[i], vg_bytes_in_epoch[i]);
+   */
+
+   /* Cumulatise.  Make vg_{bytes,entries}_in_epoch[n] contain the
+      counts for itself and all younger epochs. */
+   for (i = 1; i < VG_N_EPOCHS; i++) {
+      vg_entries_in_epoch[i] += vg_entries_in_epoch[i-1];
+      vg_bytes_in_epoch[i] += vg_bytes_in_epoch[i-1];
+   }
+
+   for (thresh = 0; thresh < VG_N_EPOCHS; thresh++) {
+      if (vg_entries_in_epoch[thresh] > tt_target 
+          || vg_bytes_in_epoch[thresh] >= tc_target)
+         break;
+   }
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)(
+         "     LRU: discard translations %d or more epochs since last use\n",
+         thresh
+      );
+
+   thresh = VG_(current_epoch) - thresh;
+
+   /* Ok, so we will hit our targets if we retain all entries most
+      recently used at most thresh epochs ago.  Traverse the TT and
+      mark such entries as deleted. */
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
+         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].mru_epoch <= thresh) {
+         vg_tt[i].orig_addr = VG_TTE_DELETED;
+         vg_tt_used--;
+	 VG_(this_epoch_out_count) ++;
+	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+	 VG_(overall_out_count) ++;
+	 VG_(overall_out_osize) += vg_tt[i].orig_size;
+	 VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      }
+   }
+
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
+   /* Now compact the TC, sliding live entries downwards to fill spaces
+      left by deleted entries.  In this loop, r is the offset in TC of
+      the current translation under consideration, and w is the next
+      allocation point. */
+   r = w = 0;
+   while (True) {
+      if (r >= vg_tc_used) break;
+      /* The first four bytes of every translation contain the index
+         of its TT entry.  The TT entry's .trans_addr field points at
+         the start of the code proper, not at this 4-byte index, so
+         that we don't constantly have to keep adding 4 in the main
+         lookup/dispatch loop. */
+      ttno = VG_READ_MISALIGNED_WORD(&vg_tc[r]);
+      vg_assert(ttno >= 0 && ttno < VG_TT_SIZE);
+      tte = & vg_tt[ ttno ];
+      vg_assert(tte->orig_addr != VG_TTE_EMPTY);
+      if (tte->orig_addr != VG_TTE_DELETED) {
+         /* We want to keep this one alive. */
+         /* Sanity check the pointer back to TC. */
+         vg_assert(tte->trans_addr == (Addr)&vg_tc[r+4]);
+         for (i = 0; i < 4+tte->trans_size; i++)
+            vg_tc[w+i] = vg_tc[r+i];
+         tte->trans_addr = (Addr)&vg_tc[w+4];
+         w += 4+tte->trans_size;
+      }
+      r += 4+tte->trans_size;
+   }
+   /* should have traversed an exact number of translations, with no
+      slop at the end. */
+   vg_assert(w <= r);
+   vg_assert(r == vg_tc_used);
+   vg_assert(w <= r);
+   vg_assert(w <= tc_target);
+   vg_tc_used = w;
+
+   /* Invalidate the fast cache, since it is now out of date.  It will get
+      reconstructed incrementally when the client resumes. */
+   VG_(invalidate_tt_fast)();
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)("post-LRU: tc %d (target %d),  tt %d (target %d)\n",
+	          vg_tc_used, tc_target, vg_tt_used, tt_target);
+
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "epoch %d (bb %luk): thresh %d, "
+         "out %d (%dk -> %dk), new TT %d, TC %dk",
+         VG_(current_epoch), 
+         VG_(bbs_done) / 1000,
+         VG_(current_epoch) - thresh, 
+         VG_(this_epoch_out_count),
+         VG_(this_epoch_out_osize) / 1000,
+         VG_(this_epoch_out_tsize) / 1000,
+         vg_tt_used, vg_tc_used / 1000
+      );
+
+   /* Reconstruct the SMC detection structures. */
+
+   VGP_POPCC;
+}
+
+
+/* Do a sanity check on TT/TC.
+*/
+void VG_(sanity_check_tc_tt) ( void )
+{
+   Int      i, counted_entries, counted_bytes;
+   TTEntry* tte;
+   counted_entries = 0;
+   counted_bytes   = 0;
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      tte = &vg_tt[i];
+      if (tte->orig_addr == VG_TTE_EMPTY) continue;
+      if (tte->orig_addr == VG_TTE_DELETED) continue;
+      vg_assert(tte->mru_epoch >= 0);
+      vg_assert(tte->mru_epoch <= VG_(current_epoch));
+      counted_entries++;
+      counted_bytes += 4+tte->trans_size;
+      vg_assert(tte->trans_addr >= (Addr)&vg_tc[4]);
+      vg_assert(tte->trans_addr < (Addr)&vg_tc[vg_tc_used]);
+      vg_assert(VG_READ_MISALIGNED_WORD(tte->trans_addr-4) == i);
+   }
+   vg_assert(counted_entries == vg_tt_used);
+   vg_assert(counted_bytes == vg_tc_used);
+}
+
+
+/* Add this already-filled-in entry to the TT.  Assumes that the
+   relevant code chunk has been placed in TC, along with a dummy back
+   pointer, which is inserted here.  
+*/
+extern void VG_(add_to_trans_tab) ( TTEntry* tte )
+{
+   Int i;
+   /*
+   VG_(printf)("add_to_trans_tab(%d) %x %d %x %d\n",
+               vg_tt_used, tte->orig_addr, tte->orig_size, 
+               tte->trans_addr, tte->trans_size);
+   */
+   vg_assert(tte->orig_addr != VG_TTE_DELETED 
+             && tte->orig_addr != VG_TTE_EMPTY);
+   /* Hash to get initial probe point. */
+   i = ((UInt)(tte->orig_addr)) % VG_TT_SIZE;
+   while (True) {
+      if (vg_tt[i].orig_addr == tte->orig_addr)
+         VG_(panic)("add_to_trans_tab: duplicate");
+      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
+          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+         /* Put it here, and set the back pointer. */
+         vg_tt[i] = *tte;
+         VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
+         vg_tt_used++;
+         return;
+      }
+      i++;
+      if (i == VG_TT_SIZE) i = 0;
+   }
+}
+
+
+/* Copy a new translation's code into TC, leaving a 4-byte hole for
+   the back pointer, and returning a pointer to the code proper (not
+   the hole) in TC. 
+*/
+Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size )
+{
+   Int i;
+   Addr ret_addr;
+   if (4+trans_size > VG_TC_SIZE-vg_tc_used)
+      VG_(panic)("copy_to_transcache: not enough free space?!");
+   /* Leave a hole for the back pointer to the TT entry. */
+   vg_tc_used += 4;
+   ret_addr = (Addr)&vg_tc[vg_tc_used];
+   for (i = 0; i < trans_size; i++)
+      vg_tc[vg_tc_used+i] = ((UChar*)trans_addr)[i];
+   vg_tc_used += trans_size;
+   return ret_addr;
+}
+
+
+/* Invalidate the tt_fast cache, for whatever reason.  Tricky.  We
+   have to find a TTE_EMPTY slot to point all entries at. */
+void VG_(invalidate_tt_fast)( void )
+{
+   Int i, j;
+   for (i = 0; i < VG_TT_SIZE && vg_tt[i].orig_addr != VG_TTE_EMPTY; i++)
+      ;
+   vg_assert(i < VG_TT_SIZE 
+             && vg_tt[i].orig_addr == VG_TTE_EMPTY);
+   for (j = 0; j < VG_TT_FAST_SIZE; j++)
+      VG_(tt_fast)[j] = (Addr)&vg_tt[i];
+}
+
+
+/* Search TT to find the translated address of the supplied original,
+   or NULL if not found.  This routine is used when we miss in
+   VG_(tt_fast). 
+*/
+static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
+{
+  //static Int queries = 0;
+  //static Int probes = 0;
+   Int i;
+   /* Hash to get initial probe point. */
+   //   if (queries == 10000) {
+   //  VG_(printf)("%d queries, %d probes\n", queries, probes);
+   //  queries = probes = 0;
+   //}
+   //queries++;
+   i = ((UInt)orig_addr) % VG_TT_SIZE;
+   while (True) {
+     //probes++;
+      if (vg_tt[i].orig_addr == orig_addr)
+         return &vg_tt[i];
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
+         return NULL;
+      i++;
+      if (i == VG_TT_SIZE) i = 0;
+   }
+}
+
+
+/* Find the translation address for a given (original) code address.
+   If found, update VG_(tt_fast) so subsequent lookups are fast.  If
+   no translation can be found, return zero.  This routine is (the
+   only one) called from vg_run_innerloop.  */
+Addr VG_(search_transtab) ( Addr original_addr )
+{
+   TTEntry* tte;
+   VGP_PUSHCC(VgpSlowFindT);
+   tte = search_trans_table ( original_addr );
+   if (tte == NULL) {
+      /* We didn't find it.  vg_run_innerloop will have to request a
+         translation. */
+      VGP_POPCC;
+      return (Addr)0;
+   } else {
+      /* Found it.  Put the search result into the fast cache now.
+         Also set the mru_epoch to mark this translation as used. */
+      UInt cno = (UInt)original_addr & VG_TT_FAST_MASK;
+      VG_(tt_fast)[cno] = (Addr)tte;
+      VG_(tt_fast_misses)++;
+      tte->mru_epoch = VG_(current_epoch);
+      VGP_POPCC;
+      return tte->trans_addr;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting and handling self-modifying code.          ---*/
+/*------------------------------------------------------------*/
+
+/* This mechanism uses two data structures:
+
+   vg_oldmap -- array[64k] of Bool, which approximately records
+   parts of the address space corresponding to code for which
+   a translation exists in the translation table.  vg_oldmap is
+   consulted at each write, to determine whether that write might
+   be writing a code address; if so, the program is stopped at 
+   the next jump, and the corresponding translations are invalidated.
+
+   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
+   addresses a containing a code byte which has been translated.  So
+   it acts kind-of like a direct-mapped cache with 64k entries.
+
+   The second structure is vg_CAW, a small array of addresses at which
+   vg_oldmap indicates a code write may have happened.  This is
+   (effectively) checked at each control transfer (jump), so that
+   translations can be discarded before going on.  An array is
+   somewhat overkill, since it strikes me as very unlikely that a
+   single basic block will do more than one code write.  Nevertheless
+   ...  
+
+   ToDo: make this comment up-to-date.
+*/
+
+
+/* Definitions for the self-modifying-code detection cache, intended
+   as a fast check which clears the vast majority of writes.  */
+
+#define VG_SMC_CACHE_HASH(aaa) \
+   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
+
+Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
+
+
+/* Definitions for the fallback mechanism, which, more slowly,
+   provides a precise record of which words in the address space
+   belong to original code. */
+
+typedef struct { UChar chars[2048]; } VgSmcSecondary;
+
+static VgSmcSecondary* vg_smc_primary[65536];
+
+static VgSmcSecondary* vg_smc_new_secondary ( void )
+{
+   Int i;
+   VgSmcSecondary* sec 
+      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
+   for (i = 0; i < 2048; i++)
+      sec->chars[i] = 0;
+   return sec;
+}
+
+#define GET_BIT_ARRAY(arr,indx)                      \
+   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
+           >> ( ((UInt)indx) % 8) ) )
+
+#define SET_BIT_ARRAY(arr,indx)                      \
+   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
+
+
+/* Finally, a place to record the original-code-write addresses
+   detected in a basic block. */
+
+#define VG_ORIGWRITES_SIZE 10
+
+static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
+static Int  vg_origwrites_used;
+
+
+/* Call here to check a written address. */
+
+void VG_(smc_check4) ( Addr a )
+{
+   UInt bit_index;
+   VgSmcSecondary* smc_secondary;
+
+#  if VG_SMC_FASTCHECK_IN_C
+   VG_(smc_total_check4s)++;
+
+   /* Try the fast check first. */
+   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  endif
+
+   VG_(smc_cache_passed)++;
+
+   /* Need to do a slow check. */
+   smc_secondary = vg_smc_primary[a >> 16];
+   if (smc_secondary == NULL) return;
+
+   bit_index = (a & 0xFFFF) >> 2;
+   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
+
+   VG_(smc_fancy_passed)++;
+
+   /* Detected a Real Live write to code which has been translated.
+      Note it. */
+   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
+      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
+                 "increase and recompile.");
+   vg_origwrites[vg_origwrites_used] = a;
+   vg_origwrites_used++;
+
+   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
+
+   /* Force an exit before the next basic block, so the translation
+      cache can be flushed appropriately. */
+   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
+   VG_(dispatch_ctr)       = 1;
+   VG_(interrupt_reason)   = VG_Y_SMC;
+}
+
+
+/* Mark an address range as containing an original translation,
+   updating both the fast-check cache and the slow-but-correct data
+   structure.  
+*/
+void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
+{
+   Addr a;
+   VgSmcSecondary* smc_secondary;
+   UInt bit_index;
+
+   for (a = orig_addr; a < orig_addr+orig_size; a++) {
+
+      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
+
+      smc_secondary = vg_smc_primary[a >> 16];
+      if (smc_secondary == NULL)
+         smc_secondary = 
+         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
+
+      bit_index = (a & 0xFFFF) >> 2;
+      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   }
+}
+
+
+/* Discard any translations whose original code overlaps with the
+   range w_addr .. w_addr+3 inclusive. 
+*/
+__attribute__ ((unused))
+static void discard_translations_bracketing ( Addr w_addr )
+{
+#  if 0
+   Int      i, rd, wr;
+   Addr     o_start, o_end;
+   TTEntry* tt;
+
+   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
+      tt = vg_transtab[i];
+      wr = 0;
+      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
+         o_start = tt[rd].orig_addr;
+         o_end   = o_start + tt[rd].orig_size;
+         if (w_addr > o_end || (w_addr+3) < o_start) {
+            /* No collision possible; keep this translation */
+            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
+            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
+            wr++;
+	 } else {
+            /* Possible collision; discard. */
+            vg_smc_discards++;
+            VG_(message) (Vg_DebugMsg, 
+                             "discarding translation of %p .. %p",
+                             tt[rd].orig_addr, 
+                             tt[rd].orig_addr + tt[rd].orig_size - 1);
+            VG_(free)((void*)tt[rd].trans_addr);
+         }         
+      }
+      vg_transtab_used[i] = wr;
+   }
+#  endif   
+}
+
+
+/* Top-level function in charge of discarding out-of-date translations
+   following the discovery of a (potential) original-code-write. 
+*/
+void VG_(flush_transtab) ( void )
+{
+#  if 0
+   Addr w_addr;
+   Int  i, j;
+
+   /* We shouldn't be here unless a code write was detected. */
+   vg_assert(vg_origwrites_used > 0);
+
+   /* Instead of incrementally fixing up the translation table cache,
+      just invalidate the whole darn thing.  Pray this doesn't happen
+      very often :) */
+   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
+      VG_(transtab_cache_orig)[i] = 
+      VG_(transtab_cache_trans)[i] = (Addr)0;
+
+   /* Clear out the fast cache; discard_translations_bracketing
+      reconstructs it. */
+   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
+      VG_(smc_cache)[i] = False;
+
+   /* And also clear the slow-but-correct table. */
+   for (i = 0; i < 65536; i++) {
+      VgSmcSecondary* sec = vg_smc_primary[i];
+      if (sec)
+         for (j = 0; j < 2048; j++)
+            sec->chars[j] = 0;         
+   }
+
+   /* This doesn't need to be particularly fast, since we (presumably)
+      don't have to handle particularly frequent writes to code
+      addresses. */
+   while (vg_origwrites_used > 0) {
+      vg_origwrites_used--;
+      w_addr = vg_origwrites[vg_origwrites_used];
+      discard_translations_bracketing ( w_addr );
+   }
+
+   vg_assert(vg_origwrites_used == 0);
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Initialisation.                                      ---*/
+/*------------------------------------------------------------*/
+
+void VG_(init_transtab_and_SMC) ( void )
+{
+   Int i;
+
+   /* Allocate the translation table and translation cache. */
+   vg_assert(vg_tc == NULL);
+   vg_tc = VG_(get_memory_from_mmap) ( VG_TC_SIZE * sizeof(UChar) );
+   vg_assert(vg_tc != NULL);
+
+   vg_assert(vg_tt == NULL);
+   vg_tt = VG_(get_memory_from_mmap) ( VG_TT_SIZE * sizeof(TTEntry) );
+   vg_assert(vg_tt != NULL);
+
+   /* The main translation table is empty. */
+   vg_tt_used = 0;
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      vg_tt[i].orig_addr = VG_TTE_EMPTY;
+   }
+
+   /* The translation table's fast cache is empty.  Point all entries
+      at the first TT entry, which is, of course, empty. */
+   for (i = 0; i < VG_TT_FAST_SIZE; i++)
+      VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
+
+   /* No part of the address space has any translations. */
+   for (i = 0; i < 65536; i++)
+      vg_smc_primary[i] = NULL;
+
+   /* ... and the associated fast-check cache reflects this. */
+   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
+      VG_(smc_cache)[i] = False;
+
+   /* Finally, no original-code-writes have been recorded. */
+   vg_origwrites_used = 0;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_transtab.c ---*/
+/*--------------------------------------------------------------------*/