blob: a364df0b865fa4baa75fd6114a5c13fc53fb9851 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- Management of the translation table and cache. ---*/
/*--- vg_transtab.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, an x86 protected-mode emulator
designed for debugging and profiling binaries on x86-Unixes.
Copyright (C) 2000-2002 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file LICENSE.
*/
#include "vg_include.h"
#include "vg_constants.h"
/* #define DEBUG_TRANSTAB */
/*------------------------------------------------------------*/
/*--- Management of the LRU-based translation table+cache. ---*/
/*------------------------------------------------------------*/
/* These sizes were set up so as to be able to debug large KDE 3
applications (are there any small ones?) without excessive amounts
of code retranslation. */
/* Size of the translation cache, in bytes. */
#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/
/* Do a LRU pass when the translation cache becomes this full. */
#define VG_TC_LIMIT_PERCENT 98
/* When doing an LRU pass, reduce TC fullness to this level. */
#define VG_TC_TARGET_PERCENT 85
/* Number of entries in the translation table. This must be a prime
number in order to make the hashing work properly. */
#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/
/* Do an LRU pass when the translation table becomes this full. */
#define VG_TT_LIMIT_PERCENT /*67*/ 80
/* When doing an LRU pass, reduce TT fullness to this level. */
#define VG_TT_TARGET_PERCENT /*60*/ 70
/* The number of age steps we track. 0 means the current epoch,
N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago. */
#define VG_N_EPOCHS /*2000*/ /*4000*/ 20000
/* This TT entry is empty. There is no associated TC storage. */
#define VG_TTE_EMPTY ((Addr)1)
/* This TT entry has been deleted, in the sense that it does not
contribute to the orig->trans mapping. However, the ex-translation
it points at still occupies space in TC. This slot cannot be
re-used without doing an LRU pass. */
#define VG_TTE_DELETED ((Addr)3)
/* The TC. This used to be statically allocated, but that forces many
SecMap arrays to be pointlessly allocated at startup, bloating the
process size by about 22M and making startup slow. So now we
dynamically allocate it at startup time.
was: static UChar vg_tc[VG_TC_SIZE];
*/
static UChar* vg_tc = NULL;
/* Count of bytes used in the TC. This includes those pointed to from
VG_TTE_DELETED entries. */
static Int vg_tc_used = 0;
/* The TT. Like TC, for the same reason, is dynamically allocated at
startup.
was: static TTEntry vg_tt[VG_TT_SIZE];
*/
static TTEntry* vg_tt = NULL;
/* Count of non-empty TT entries. This includes deleted ones. */
static Int vg_tt_used = 0;
/* Fast helper for the TT. A direct-mapped cache which holds a
pointer to a TT entry which may or may not be the correct one, but
which we hope usually is. This array is referred to directly from
vg_dispatch.S. */
Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
/* For reading/writing the misaligned TT-index word at immediately
preceding every translation in TC. */
#define VG_READ_MISALIGNED_WORD(aaa) (*((UInt*)(aaa)))
#define VG_WRITE_MISALIGNED_WORD(aaa,vvv) *((UInt*)(aaa)) = ((UInt)(vvv))
/* Used for figuring out an age threshold for translations. */
static Int vg_bytes_in_epoch[VG_N_EPOCHS];
static Int vg_entries_in_epoch[VG_N_EPOCHS];
/* Just so these counts can be queried without making them globally
visible. */
void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used )
{
*tt_used = vg_tt_used;
*tc_used = vg_tc_used;
}
/* Do the LRU thing on TT/TC, clearing them back to the target limits
if they are over the threshold limits.
*/
void VG_(maybe_do_lru_pass) ( void )
{
Int i, j, r, w, thresh, ttno;
TTEntry* tte;
const Int tc_limit = (Int)(((double)VG_TC_SIZE * (double)VG_TC_LIMIT_PERCENT)
/ (double)100.0);
const Int tt_limit = (Int)(((double)VG_TT_SIZE * (double)VG_TT_LIMIT_PERCENT)
/ (double)100.0);
const Int tc_target = (Int)(((double)VG_TC_SIZE * (double)VG_TC_TARGET_PERCENT)
/ (double)100.0);
const Int tt_target = (Int)(((double)VG_TT_SIZE * (double)VG_TT_TARGET_PERCENT)
/ (double)100.0);
/* Decide quickly if we need to do an LRU pass ? */
if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
return;
# ifdef DEBUG_TRANSTAB
VG_(sanity_check_tc_tt)();
# endif
VGP_PUSHCC(VgpDoLRU);
/*
VG_(printf)(
"limits: tc_limit %d, tt_limit %d, tc_target %d, tt_target %d\n",
tc_limit, tt_limit, tc_target, tt_target);
*/
if (VG_(clo_verbosity) > 2)
VG_(printf)(" pre-LRU: tc %d (target %d), tt %d (target %d)\n",
vg_tc_used, tc_target, vg_tt_used, tt_target);
/* Yes we do. Figure out what threshold age is required in order to
shrink both the TC and TT occupancy below TC_TARGET_PERCENT and
TT_TARGET_PERCENT respectively. */
VG_(number_of_lrus)++;
/* Count the number of TC bytes and TT entries in each epoch. */
for (i = 0; i < VG_N_EPOCHS; i++)
vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
for (i = 0; i < VG_TT_SIZE; i++) {
if (vg_tt[i].orig_addr == VG_TTE_EMPTY
|| vg_tt[i].orig_addr == VG_TTE_DELETED)
continue;
j = vg_tt[i].mru_epoch;
vg_assert(j <= VG_(current_epoch));
j = VG_(current_epoch) - j;
if (j >= VG_N_EPOCHS) j = VG_N_EPOCHS-1;
vg_assert(0 <= j && j < VG_N_EPOCHS);
/* Greater j now means older. */
vg_entries_in_epoch[j]++;
vg_bytes_in_epoch[j] += 4+vg_tt[i].trans_size;
}
/*
for (i = 0; i < VG_N_EPOCHS; i++)
VG_(printf)("epoch %d: ents %d, bytes %d\n",
i, vg_entries_in_epoch[i], vg_bytes_in_epoch[i]);
*/
/* Cumulatise. Make vg_{bytes,entries}_in_epoch[n] contain the
counts for itself and all younger epochs. */
for (i = 1; i < VG_N_EPOCHS; i++) {
vg_entries_in_epoch[i] += vg_entries_in_epoch[i-1];
vg_bytes_in_epoch[i] += vg_bytes_in_epoch[i-1];
}
for (thresh = 0; thresh < VG_N_EPOCHS; thresh++) {
if (vg_entries_in_epoch[thresh] > tt_target
|| vg_bytes_in_epoch[thresh] >= tc_target)
break;
}
if (VG_(clo_verbosity) > 2)
VG_(printf)(
" LRU: discard translations %d or more epochs since last use\n",
thresh
);
thresh = VG_(current_epoch) - thresh;
/* Ok, so we will hit our targets if we retain all entries most
recently used at most thresh epochs ago. Traverse the TT and
mark such entries as deleted. */
for (i = 0; i < VG_TT_SIZE; i++) {
if (vg_tt[i].orig_addr == VG_TTE_EMPTY
|| vg_tt[i].orig_addr == VG_TTE_DELETED)
continue;
if (vg_tt[i].mru_epoch <= thresh) {
vg_tt[i].orig_addr = VG_TTE_DELETED;
VG_(this_epoch_out_count) ++;
VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
VG_(overall_out_count) ++;
VG_(overall_out_osize) += vg_tt[i].orig_size;
VG_(overall_out_tsize) += vg_tt[i].trans_size;
}
}
/* Now compact the TC, sliding live entries downwards to fill spaces
left by deleted entries. In this loop, r is the offset in TC of
the current translation under consideration, and w is the next
allocation point. */
r = w = 0;
while (True) {
if (r >= vg_tc_used) break;
/* The first four bytes of every translation contain the index
of its TT entry. The TT entry's .trans_addr field points at
the start of the code proper, not at this 4-byte index, so
that we don't constantly have to keep adding 4 in the main
lookup/dispatch loop. */
ttno = VG_READ_MISALIGNED_WORD(&vg_tc[r]);
vg_assert(ttno >= 0 && ttno < VG_TT_SIZE);
tte = & vg_tt[ ttno ];
vg_assert(tte->orig_addr != VG_TTE_EMPTY);
if (tte->orig_addr != VG_TTE_DELETED) {
/* We want to keep this one alive. */
/* Sanity check the pointer back to TC. */
vg_assert(tte->trans_addr == (Addr)&vg_tc[r+4]);
for (i = 0; i < 4+tte->trans_size; i++)
vg_tc[w+i] = vg_tc[r+i];
tte->trans_addr = (Addr)&vg_tc[w+4];
w += 4+tte->trans_size;
} else {
tte->orig_addr = VG_TTE_EMPTY;
vg_tt_used--;
}
r += 4+tte->trans_size;
}
/* should have traversed an exact number of translations, with no
slop at the end. */
vg_assert(w <= r);
vg_assert(r == vg_tc_used);
vg_assert(w <= r);
vg_assert(w <= tc_target);
vg_tc_used = w;
vg_assert(vg_tt_used >= 0);
vg_assert(vg_tt_used <= tt_target);
/* Invalidate the fast cache, since it is now out of date. It will get
reconstructed incrementally when the client resumes. */
VG_(invalidate_tt_fast)();
if (VG_(clo_verbosity) > 2)
VG_(printf)("post-LRU: tc %d (target %d), tt %d (target %d)\n",
vg_tc_used, tc_target, vg_tt_used, tt_target);
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_UserMsg,
"epoch %d (bb %luk): thresh %d, "
"out %d (%dk -> %dk), new TT %d, TC %dk",
VG_(current_epoch),
VG_(bbs_done) / 1000,
VG_(current_epoch) - thresh,
VG_(this_epoch_out_count),
VG_(this_epoch_out_osize) / 1000,
VG_(this_epoch_out_tsize) / 1000,
vg_tt_used, vg_tc_used / 1000
);
/* Reconstruct the SMC detection structures. */
# ifdef DEBUG_TRANSTAB
for (i = 0; i < VG_TT_SIZE; i++)
vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
# endif
VG_(sanity_check_tc_tt)();
VGP_POPCC;
}
/* Do a sanity check on TT/TC.
*/
void VG_(sanity_check_tc_tt) ( void )
{
Int i, counted_entries, counted_bytes;
TTEntry* tte;
counted_entries = 0;
counted_bytes = 0;
for (i = 0; i < VG_TT_SIZE; i++) {
tte = &vg_tt[i];
if (tte->orig_addr == VG_TTE_EMPTY) continue;
vg_assert(tte->mru_epoch >= 0);
vg_assert(tte->mru_epoch <= VG_(current_epoch));
counted_entries++;
counted_bytes += 4+tte->trans_size;
vg_assert(tte->trans_addr >= (Addr)&vg_tc[4]);
vg_assert(tte->trans_addr < (Addr)&vg_tc[vg_tc_used]);
vg_assert(VG_READ_MISALIGNED_WORD(tte->trans_addr-4) == i);
}
vg_assert(counted_entries == vg_tt_used);
vg_assert(counted_bytes == vg_tc_used);
}
/* Add this already-filled-in entry to the TT. Assumes that the
relevant code chunk has been placed in TC, along with a dummy back
pointer, which is inserted here.
*/
extern void VG_(add_to_trans_tab) ( TTEntry* tte )
{
Int i;
/*
VG_(printf)("add_to_trans_tab(%d) %x %d %x %d\n",
vg_tt_used, tte->orig_addr, tte->orig_size,
tte->trans_addr, tte->trans_size);
*/
vg_assert(tte->orig_addr != VG_TTE_DELETED
&& tte->orig_addr != VG_TTE_EMPTY);
/* Hash to get initial probe point. */
i = ((UInt)(tte->orig_addr)) % VG_TT_SIZE;
while (True) {
if (vg_tt[i].orig_addr == tte->orig_addr)
VG_(panic)("add_to_trans_tab: duplicate");
if (vg_tt[i].orig_addr == VG_TTE_EMPTY) {
/* Put it here, and set the back pointer. */
vg_tt[i] = *tte;
VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
vg_tt_used++;
return;
}
i++;
if (i == VG_TT_SIZE) i = 0;
}
}
/* Copy a new translation's code into TC, leaving a 4-byte hole for
the back pointer, and returning a pointer to the code proper (not
the hole) in TC.
*/
Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size )
{
Int i;
Addr ret_addr;
if (4+trans_size > VG_TC_SIZE-vg_tc_used)
VG_(panic)("copy_to_transcache: not enough free space?!");
/* Leave a hole for the back pointer to the TT entry. */
vg_tc_used += 4;
ret_addr = (Addr)&vg_tc[vg_tc_used];
for (i = 0; i < trans_size; i++)
vg_tc[vg_tc_used+i] = ((UChar*)trans_addr)[i];
vg_tc_used += trans_size;
return ret_addr;
}
/* Invalidate the tt_fast cache, for whatever reason. Tricky. We
have to find a TTE_EMPTY slot to point all entries at. */
void VG_(invalidate_tt_fast)( void )
{
Int i, j;
for (i = 0; i < VG_TT_SIZE && vg_tt[i].orig_addr != VG_TTE_EMPTY; i++)
;
vg_assert(i < VG_TT_SIZE
&& vg_tt[i].orig_addr == VG_TTE_EMPTY);
for (j = 0; j < VG_TT_FAST_SIZE; j++)
VG_(tt_fast)[j] = (Addr)&vg_tt[i];
}
/* Search TT to find the translated address of the supplied original,
or NULL if not found. This routine is used when we miss in
VG_(tt_fast).
*/
static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
{
//static Int queries = 0;
//static Int probes = 0;
Int i;
/* Hash to get initial probe point. */
// if (queries == 10000) {
// VG_(printf)("%d queries, %d probes\n", queries, probes);
// queries = probes = 0;
//}
//queries++;
i = ((UInt)orig_addr) % VG_TT_SIZE;
while (True) {
//probes++;
if (vg_tt[i].orig_addr == orig_addr)
return &vg_tt[i];
if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
return NULL;
i++;
if (i == VG_TT_SIZE) i = 0;
}
}
/* Find the translation address for a given (original) code address.
If found, update VG_(tt_fast) so subsequent lookups are fast. If
no translation can be found, return zero. This routine is (the
only one) called from vg_run_innerloop. */
Addr VG_(search_transtab) ( Addr original_addr )
{
TTEntry* tte;
VGP_PUSHCC(VgpSlowFindT);
tte = search_trans_table ( original_addr );
if (tte == NULL) {
/* We didn't find it. vg_run_innerloop will have to request a
translation. */
VGP_POPCC;
return (Addr)0;
} else {
/* Found it. Put the search result into the fast cache now.
Also set the mru_epoch to mark this translation as used. */
UInt cno = (UInt)original_addr & VG_TT_FAST_MASK;
VG_(tt_fast)[cno] = (Addr)tte;
VG_(tt_fast_misses)++;
tte->mru_epoch = VG_(current_epoch);
VGP_POPCC;
return tte->trans_addr;
}
}
/* Invalidate translations of original code [start .. start + range - 1].
This is slow, so you *really* don't want to call it very often.
*/
void VG_(invalidate_translations) ( Addr start, UInt range )
{
Addr i_start, i_end, o_start, o_end;
UInt out_count, out_osize, out_tsize;
Int i;
# ifdef DEBUG_TRANSTAB
VG_(sanity_check_tc_tt)();
# endif
i_start = start;
i_end = start + range - 1;
out_count = out_osize = out_tsize = 0;
for (i = 0; i < VG_TT_SIZE; i++) {
if (vg_tt[i].orig_addr == VG_TTE_EMPTY
|| vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
o_start = vg_tt[i].orig_addr;
o_end = o_start + vg_tt[i].orig_size - 1;
if (o_end < i_start || o_start > i_end)
continue;
if (VG_(clo_cachesim))
VG_(cachesim_notify_discard)( & vg_tt[i] );
vg_tt[i].orig_addr = VG_TTE_DELETED;
VG_(this_epoch_out_count) ++;
VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
VG_(overall_out_count) ++;
VG_(overall_out_osize) += vg_tt[i].orig_size;
VG_(overall_out_tsize) += vg_tt[i].trans_size;
out_count ++;
out_osize += vg_tt[i].orig_size;
out_tsize += vg_tt[i].trans_size;
}
if (out_count > 0) {
VG_(invalidate_tt_fast)();
VG_(sanity_check_tc_tt)();
# ifdef DEBUG_TRANSTAB
{ Addr aa;
for (aa = i_start; aa <= i_end; aa++)
vg_assert(search_trans_table ( aa ) == NULL);
}
# endif
}
if (1|| VG_(clo_verbosity) > 1)
VG_(message)(Vg_UserMsg,
"discard %d (%d -> %d) translations in range %p .. %p",
out_count, out_osize, out_tsize, i_start, i_end );
}
/*------------------------------------------------------------*/
/*--- Initialisation. ---*/
/*------------------------------------------------------------*/
void VG_(init_tt_tc) ( void )
{
Int i;
/* Allocate the translation table and translation cache. */
vg_assert(vg_tc == NULL);
vg_tc = VG_(get_memory_from_mmap) ( VG_TC_SIZE * sizeof(UChar) );
vg_assert(vg_tc != NULL);
vg_assert(vg_tt == NULL);
vg_tt = VG_(get_memory_from_mmap) ( VG_TT_SIZE * sizeof(TTEntry) );
vg_assert(vg_tt != NULL);
/* The main translation table is empty. */
vg_tt_used = 0;
for (i = 0; i < VG_TT_SIZE; i++) {
vg_tt[i].orig_addr = VG_TTE_EMPTY;
}
/* The translation table's fast cache is empty. Point all entries
at the first TT entry, which is, of course, empty. */
for (i = 0; i < VG_TT_FAST_SIZE; i++)
VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
}
/*--------------------------------------------------------------------*/
/*--- end vg_transtab.c ---*/
/*--------------------------------------------------------------------*/