Get rid of all vestiges of translation-chaining, and generally clean
up and paranoidise the translation cache management.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@3134 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/arm/core_arch_asm.h b/coregrind/arm/core_arch_asm.h
index 67bc901..0806226 100644
--- a/coregrind/arm/core_arch_asm.h
+++ b/coregrind/arm/core_arch_asm.h
@@ -30,13 +30,6 @@
#ifndef __ARM_CORE_ARCH_ASM_H
#define __ARM_CORE_ARCH_ASM_H
-// XXX: ???
-/* maximum number of normal jumps which can appear in a basic block */
-#define VG_MAX_JUMPS 2
-
-/* Offset of code in a TCEntry */
-#define VG_CODE_OFFSET (8 + VG_MAX_JUMPS * 2)
-
#if 0
/* Debugging hack for assembly code ... sigh. */
#if 0
diff --git a/coregrind/core.h b/coregrind/core.h
index 344d180..097458b 100644
--- a/coregrind/core.h
+++ b/coregrind/core.h
@@ -1084,6 +1084,7 @@
extern void VG_(demangle) ( Char* orig, Char* result, Int result_size );
+
/* ---------------------------------------------------------------------
Exports of vg_from_ucode.c
------------------------------------------------------------------ */
@@ -1091,8 +1092,6 @@
extern void VG_(print_ccall_stats) ( void );
extern void VG_(print_UInstr_histogram) ( void );
-extern void VG_(unchain_jumpsite) ( Addr jumpsite );
-extern Addr VG_(get_jmp_dest) ( Addr jumpsite );
/* ---------------------------------------------------------------------
Exports of vg_to_ucode.c
@@ -1239,17 +1238,10 @@
/* Counts downwards in vg_run_innerloop. */
extern UInt VG_(dispatch_ctr);
-// Instruction pointer guest state offset, used by $VG_ARCH/dispatch.S.
+/* Instruction pointer guest state offset, used by $VG_ARCH/dispatch.S. */
extern UInt VG_(instr_ptr_offset);
-/* --- Counters, for informational purposes only. --- */
-
-// These counters must be declared here because they're maintained by
-// vg_dispatch.S.
-extern UInt VG_(bb_enchain_count); // Counts of chain operations done
-extern UInt VG_(bb_dechain_count); // Counts of unchain operations done
-extern UInt VG_(unchained_jumps_done); // Number of unchained jumps performed
-
+/* Stats ... */
extern void VG_(print_scheduler_stats) ( void );
@@ -1730,10 +1722,9 @@
Addr trans_addr, Int trans_size );
extern Addr VG_(search_transtab) ( Addr original_addr );
-extern void VG_(invalidate_translations) ( Addr start, UInt range,
- Bool unchain_blocks );
+extern void VG_(invalidate_translations) ( Addr start, UInt range );
-extern void VG_(sanity_check_tt_tc) ( void );
+extern void VG_(sanity_check_tt_tc) ( Char* caller );
extern void VG_(print_tt_tc_stats) ( void );
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 681058e..022655b 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -164,11 +164,6 @@
/*=== Counters, for profiling purposes only ===*/
/*====================================================================*/
-// These ones maintained by vg_dispatch.S
-UInt VG_(bb_enchain_count) = 0; // Number of chain operations done
-UInt VG_(bb_dechain_count) = 0; // Number of unchain operations done
-UInt VG_(unchained_jumps_done) = 0; // Number of unchained jumps done
-
/* Counts pertaining to internal sanity checking. */
static UInt sanity_fast_count = 0;
static UInt sanity_slow_count = 0;
@@ -178,15 +173,7 @@
// Translation stats
VG_(print_tt_tc_stats)();
VG_(message)(Vg_DebugMsg,
- "chainings: %d chainings, %d unchainings.",
- VG_(bb_enchain_count), VG_(bb_dechain_count) );
- VG_(message)(Vg_DebugMsg,
- " dispatch: %llu jumps (bb entries); of them %u (%lu%%) unchained.",
- VG_(bbs_done),
- VG_(unchained_jumps_done),
- ((ULong)(100) * (ULong)(VG_(unchained_jumps_done)))
- / ( VG_(bbs_done)==0 ? 1 : VG_(bbs_done) )
- );
+ " dispatch: %llu jumps (bb entries).", VG_(bbs_done) );
// Scheduler stats
VG_(print_scheduler_stats)();
@@ -2286,7 +2273,7 @@
# endif
if ((sanity_fast_count % 250) == 0)
- VG_(sanity_check_tt_tc)();
+ VG_(sanity_check_tt_tc)("VG_(sanity_check_general)");
if (VG_(needs).sanity_checks) {
VGP_PUSHCC(VgpToolExpensiveSanity);
diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c
index a65c290..4be304a 100644
--- a/coregrind/vg_memory.c
+++ b/coregrind/vg_memory.c
@@ -91,7 +91,7 @@
static void recycleseg(Segment *s)
{
if (s->flags & SF_CODE)
- VG_(invalidate_translations)(s->addr, s->len, False);
+ VG_(invalidate_translations)(s->addr, s->len);
if (s->filename != NULL)
VG_(arena_free)(VG_AR_CORE, (Char *)s->filename);
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index 64def69..5bde927 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -3243,7 +3243,7 @@
" addr %p, len %d\n",
(void*)arg[1], arg[2] );
- VG_(invalidate_translations)( arg[1], arg[2], True );
+ VG_(invalidate_translations)( arg[1], arg[2] );
SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
break;
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index f67c19d..29648c8 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -2377,7 +2377,7 @@
" %s (%p -> %p)",
redir->from_sym, redir->from_addr, redir->to_addr );
}
- VG_(invalidate_translations)(redir->from_addr, 1, True);
+ VG_(invalidate_translations)(redir->from_addr, 1);
}
VG_(SkipList_Insert)(&sk_resolved_redir, redir);
@@ -2437,7 +2437,7 @@
redir->to_sym = VG_(arena_strdup)(VG_AR_SYMTAB, to_sym);
redir->to_addr = 0;
- if (0||VG_(clo_verbosity) >= 2)
+ if (0||VG_(clo_verbosity) > 2)
VG_(message)(Vg_UserMsg,
"REDIRECT %s(%s) to %s(%s)",
from_lib, from_sym, to_lib, to_sym);
@@ -2463,7 +2463,7 @@
redir->to_sym = NULL;
redir->to_addr = to_addr;
- if (0||VG_(clo_verbosity) >= 2)
+ if (0||VG_(clo_verbosity) > 2)
VG_(message)(Vg_UserMsg,
"REDIRECT %s(%s) to %p",
from_lib, from_sym, to_addr);
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
index d7461d7..72ad0c0 100644
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -48,7 +48,7 @@
/* Number of entries in the translation table. This must be a prime
number in order to make the hashing work properly. */
-#define VG_TT_SIZE /*5281*/ /*100129*/ /*200191*/ /*250829*/ 300007
+#define VG_TT_SIZE /*5281*/ /*100129*/ /*200191*/ 250829 /*300007*/
/* Do an LRU pass when the translation table becomes this full. */
#define VG_TT_LIMIT_PERCENT /*67*/ 80
@@ -58,20 +58,18 @@
/*------------------ TYPES ------------------*/
-#define CODE_ALIGNMENT sizeof(void*) // alignment of TCEntries
-#define CODE_ALIGN(a) (((a)+CODE_ALIGNMENT-1) & ~(CODE_ALIGNMENT-1))
-#define IS_ALIGNED(a) (((a) & (CODE_ALIGNMENT-1)) == 0)
-
-/* An entry in TC. Payload always is always padded out to a word-aligned
- quantity so that these structs are always word-aligned. */
+/* An entry in TC. Payload always is always padded out to a
+ word-aligned quantity so that these structs are always
+ word-aligned. Note, the layout of this is known by
+ <arch>/dispatch.S, so do not change it unless you change them
+ too. */
typedef
struct {
/* 32-bit or 64-bit offsets */
/* +0 or 0 */ Addr orig_addr;
/* +4 or 8 */ UShort orig_size;
/* +6 or 10 */ UShort trans_size;
- /* +8 or 12 */ UShort jump_sites[VG_MAX_JUMPS];
- /* +VG_CODE_OFFSET */ UChar payload[0];
+ /* +8 or 12 */ UChar payload[0];
}
TCEntry;
@@ -83,6 +81,14 @@
}
TTEntry;
+
+#define PAYLOAD_OFFSET (sizeof(void*)==8 ? 12 : 8)
+#define CODE_ALIGNMENT sizeof(void*) // alignment of TCEntries
+#define CODE_ALIGN(a) (((a)+CODE_ALIGNMENT-1) & ~(CODE_ALIGNMENT-1))
+#define IS_ALIGNED(a) (((a) & (CODE_ALIGNMENT-1)) == 0)
+
+
+
/* Denotes an empty TT slot, when TTEntry.orig_addr holds this
value. */
#define VG_TTE_EMPTY ((Addr)1)
@@ -152,57 +158,6 @@
static void for_each_tc(Int sector, void (*fn)(TCEntry *));
-/*------------------ T-CHAINING HELPERS ------------------*/
-#if 0
-static
-void for_each_jumpsite(TCEntry *tce, void (*fn)(Addr))
-{
- Int i;
- for(i = 0; i < VG_MAX_JUMPS; i++) {
- Addr a;
- UShort idx = tce->jump_sites[i];
-
- if (idx == (UShort)-1)
- continue;
-
- a = (Addr)&tce->payload[idx];
-
- (*fn)(a);
- }
-}
-
-static inline
-void unchain_tce(TCEntry *tce)
-{
- for_each_jumpsite(tce, VG_(unchain_jumpsite));
-}
-
-/* Unchain any jumps pointing to a sector we're about to free */
-static Addr sector_base;
-static Addr sector_len;
-
-static
-void unchain_site_for_sector(Addr a) {
- Addr jmp = VG_(get_jmp_dest)(a);
- if (jmp >= sector_base && jmp < (sector_base+sector_len))
- VG_(unchain_jumpsite)(a);
-}
-
-static
-void unchain_tce_for_sector(TCEntry *tce) {
- for_each_jumpsite(tce, unchain_site_for_sector);
-}
-
-static
-void unchain_sector(Int s, Addr base, UInt len)
-{
- sector_base = base;
- sector_len = len;
-
- for_each_tc(s, unchain_tce_for_sector);
-}
-#endif
-
/*------------------ TT HELPERS ------------------*/
static
@@ -216,7 +171,7 @@
tc_used += vg_tc_used[s];
VG_(message)(Vg_DebugMsg,
- "%lluk bbs: tt %d, tc %d, %s",
+ "%lluk bbs: tt %d, tc %d: %s",
VG_(bbs_done) / 1000,
vg_tt_used, tc_used, submsg );
}
@@ -293,7 +248,7 @@
static
void rebuild_TT ( void )
{
- Int s;
+ Int s;
/* Throw away TT. */
initialise_tt();
@@ -303,6 +258,10 @@
for_each_tc(s, add_tt_entry);
}
pp_tt_tc_status ( "after rebuild of TC" );
+# if 1 /* def DEBUG_TRANSTAB */
+ VG_(sanity_check_tt_tc)("rebuild_TT");
+# endif
+
}
@@ -354,17 +313,9 @@
Char msg[100];
Int s = find_oldest_sector();
if (s != -1) {
- //Int i;
-
vg_assert(s >= 0 && s < VG_TC_N_SECTORS);
VG_(sprintf)(msg, "before discard of sector %d (%d bytes)",
s, vg_tc_used[s]);
-
- //for(i = 0; i < VG_TC_N_SECTORS; i++) {
- // if (i != s && vg_tc[i] != NULL)
- // unchain_sector(i, (Addr)vg_tc[s], vg_tc_used[s]);
- // }
-
pp_tt_tc_status ( msg );
overall_out_count += vg_tc_stats_count[s];
overall_out_osize += vg_tc_stats_osize[s];
@@ -392,35 +343,16 @@
"at time %d",
s, vg_tc_age[s]);
pp_tt_tc_status ( msg );
-# ifdef DEBUG_TRANSTAB
- VG_(sanity_check_tt_tc)();
+# if 1 /* def DEBUG_TRANSTAB */
+ VG_(sanity_check_tt_tc)("maybe_commission_sector");
# endif
return s;
}
}
for (s = 0; s < VG_TC_N_SECTORS; s++) {
if (vg_tc[s] == NULL) {
-#if 1
vg_tc[s] = VG_(get_memory_from_mmap)
( vg_tc_sector_szB, "trans-cache(sector)" );
-#else
- // Alternative: put translations in an mmap'd file. The main
- // reason is to help OProfile -- OProfile can assign time spent in
- // translations to a particular file. The file format doesn't
- // really matter, which is good because it's not really readable,
- // being generated code but not a proper ELF file.
- Char buf[20];
- static Int count = 0;
- Int fd;
-
- VG_(sprintf)(buf, ".transtab.%d", count++);
-
- fd = VG_(open)(buf, VKI_O_RDWR|VKI_O_CREAT|VKI_O_TRUNC, 0700);
- //VG_(unlink)(buf);
- VG_(do_syscall)(__NR_ftruncate, fd, PGROUNDUP(vg_tc_sector_szB));
- vg_tc[s] = VG_(mmap)(0, PGROUNDUP(vg_tc_sector_szB), VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, VKI_MAP_SHARED, 0, fd, 0);
- VG_(close)(fd);
-#endif
vg_tc_used[s] = 0;
VG_(sprintf)(msg, "after allocation of sector %d (size %d)",
s, vg_tc_sector_szB );
@@ -435,13 +367,11 @@
static
UChar* allocate ( Int nBytes )
{
- Int i;
-
vg_assert(IS_ALIGNED(nBytes));
/* Ensure the TT is still OK. */
while (vg_tt_used >= VG_TT_LIMIT) {
- (void)discard_oldest_sector();
+ discard_oldest_sector();
rebuild_TT();
vg_assert(vg_tt_used < VG_TT_LIMIT);
}
@@ -463,10 +393,8 @@
if (vg_tc_current >= 0 && vg_tc_current < VG_TC_N_SECTORS)
return allocate(nBytes);
- /* That didn't work. We'll have to dump the oldest. We take the
- opportunity to dump the N oldest at once. */
- for (i = 0; i < 1; i++)
- (void)discard_oldest_sector();
+ /* That didn't work. We'll have to dump the oldest. */
+ discard_oldest_sector();
rebuild_TT();
vg_tc_current = maybe_commission_sector();
@@ -493,11 +421,27 @@
/* Do a sanity check on TT/TC.
*/
-void VG_(sanity_check_tt_tc) ( void )
+void VG_(sanity_check_tt_tc) ( Char* who )
{
- Int i, s;
+ Int i, s;
TTEntry* tte;
TCEntry* tce;
+ Char msg[200];
+
+ vg_assert(VG_(strlen)(who) < 50);
+ VG_(sprintf)(msg, "sanity_check_tt_tc: begin (%s)", who );
+ pp_tt_tc_status ( msg );
+
+ /* Some basic checks on the sector array. */
+ for (i = 0; i < VG_TC_N_SECTORS; i++) {
+ if (vg_tc[i] == NULL) {
+ vg_assert(vg_tc_used[i] == 0);
+ vg_assert(vg_tc_age[i] == 0);
+ } else {
+ vg_assert(vg_tc_used[i] <= vg_tc_sector_szB);
+ }
+ }
+
/* Checks:
- Each TT entry points to a valid and corresponding TC entry.
*/
@@ -510,17 +454,21 @@
vg_assert(IS_ALIGNED4_ADDR(tce));
/* does this point into a valid TC sector? */
for (s = 0; s < VG_TC_N_SECTORS; s++)
- if (vg_tc[s] != NULL
- && ((Addr)tce) >= (Addr)&vg_tc[s][0]
- && ((Addr)tce) < (Addr)&vg_tc[s][ vg_tc_used[s] ])
- break;
+ if (vg_tc[s] != NULL
+ && ((Addr)tce) >= (Addr)&vg_tc[s][0]
+ && ((Addr)tce) < (Addr)&vg_tc[s][ vg_tc_used[s] ])
+ break;
vg_assert(s < VG_TC_N_SECTORS);
/* It should agree with the TC entry on the orig_addr. This may
be VG_TTE_DELETED, or a real orig addr. */
vg_assert(tte->orig_addr == tce->orig_addr);
}
+
+ VG_(sprintf)(msg, "sanity_check_tt_tc: done (%s)", who );
+ pp_tt_tc_status ( msg );
}
+
static __inline__ Int safe_idiv(Int a, Int b)
{
return (b == 0 ? 0 : a / b);
@@ -564,7 +512,8 @@
tte->trans_addr, tte->trans_size);
*/
- vg_assert(offsetof(TCEntry, payload) == VG_CODE_OFFSET);
+ vg_assert(offsetof(TCEntry, payload) == PAYLOAD_OFFSET);
+ vg_assert(trans_size > 0);
/* figure out how many bytes we require. */
nBytes = CODE_ALIGN(trans_size + sizeof(TCEntry));
@@ -577,6 +526,13 @@
tce, &tce->payload[0]);
*/
vg_assert(vg_tc_current >= 0 && vg_tc_current < VG_TC_N_SECTORS);
+ vg_assert(vg_tc_sector_szB > 0);
+
+ /* Range check for writing in the trans cache. */
+ vg_assert( ((UChar*)(tce))
+ >= ((UChar*)(&vg_tc[vg_tc_current][0])) );
+ vg_assert( ((UChar*)(&tce->payload[trans_size_aligned-1]))
+ < ((UChar*)(&vg_tc[vg_tc_current][vg_tc_sector_szB])) );
tce->orig_addr = orig_addr;
tce->orig_size = (UShort)orig_size; /* what's the point of storing this? */
@@ -584,8 +540,7 @@
for (i = 0; i < trans_size; i++) {
tce->payload[i] = ((UChar*)trans_addr)[i];
}
-
- //unchain_tce(tce);
+
add_tt_entry(tce);
/* Update stats. */
@@ -626,14 +581,12 @@
/* Invalidate translations of original code [start .. start + range - 1].
This is slow, so you *really* don't want to call it very often.
- Set 'unchain_blocks' if the translation being invalidated may be chained
- to by other local blocks (which are NOT being discarded).
*/
-void VG_(invalidate_translations) ( Addr start, UInt range, Bool unchain_blocks )
+void VG_(invalidate_translations) ( Addr start, UInt range )
{
Addr i_start, i_end, o_start, o_end;
UInt out_count, out_osize, out_tsize;
- Int i; //, j;
+ Int i;
TCEntry* tce;
# ifdef DEBUG_TRANSTAB
VG_(sanity_check_tt_tc)();
@@ -658,14 +611,6 @@
vg_tt[i].orig_addr = VG_TTE_DELETED;
tce->orig_addr = VG_TTE_DELETED;
- // if (unchain_blocks) {
- // /* make sure no other blocks chain to the one we just discarded */
- // for(j = 0; j < VG_TC_N_SECTORS; j++) {
- // if (vg_tc[j] != NULL)
- // unchain_sector(j, (Addr)tce->payload, tce->trans_size);
- // }
- // }
-
overall_out_count ++;
overall_out_osize += tce->orig_size;
overall_out_tsize += tce->trans_size;
@@ -676,7 +621,7 @@
if (out_count > 0) {
vg_invalidate_tt_fast();
- VG_(sanity_check_tt_tc)();
+ VG_(sanity_check_tt_tc)("invalidate_translations");
# ifdef DEBUG_TRANSTAB
{ Addr aa;
for (aa = i_start; aa <= i_end; aa++)
@@ -700,12 +645,8 @@
{
Int s;
- /* Otherwise we wind up with non-32-bit-aligned code in
- TCEntries. */
- vg_assert((VG_MAX_JUMPS % 2) == 0);
-
- // Otherwise lots of things go wrong...
- vg_assert(VG_CODE_OFFSET == sizeof(TCEntry));
+ /* Otherwise lots of things go wrong... */
+ vg_assert(PAYLOAD_OFFSET == sizeof(TCEntry));
/* Figure out how big each sector should be. */
vg_tc_sector_szB
@@ -716,8 +657,8 @@
)
/ VG_TC_N_SECTORS;
/* Ensure the calculated value is not way crazy. */
- vg_assert(vg_tc_sector_szB >= 200000);
- vg_assert(vg_tc_sector_szB <= 8000000);
+ vg_assert(vg_tc_sector_szB >= 50000);
+ vg_assert(vg_tc_sector_szB <= 11500000);
for (s = 0; s < VG_TC_N_SECTORS; s++) {
vg_tc[s] = NULL;
diff --git a/coregrind/x86/core_arch_asm.h b/coregrind/x86/core_arch_asm.h
index ad5fcfc..a5177eb 100644
--- a/coregrind/x86/core_arch_asm.h
+++ b/coregrind/x86/core_arch_asm.h
@@ -30,13 +30,6 @@
#ifndef __X86_CORE_ARCH_ASM_H
#define __X86_CORE_ARCH_ASM_H
-/* maximum number of normal jumps which can appear in a basic block */
-#define VG_MAX_JUMPS 2
-
-/* Offset of code in a TCEntry */
-#define VG_CODE_OFFSET (8 + VG_MAX_JUMPS * 2)
-
-
#endif // __X86_CORE_ARCH_ASM_H
/*--------------------------------------------------------------------*/
diff --git a/coregrind/x86/dispatch.S b/coregrind/x86/dispatch.S
index fb72d13..ec74ede 100644
--- a/coregrind/x86/dispatch.S
+++ b/coregrind/x86/dispatch.S
@@ -102,9 +102,11 @@
/* try a fast lookup in the translation cache */
TT_LOOKUP(%ebx, fast_lookup_failed)
- /* Found a match. Call the tce.payload field (+VG_CODE_OFFSET) */
- addl $VG_CODE_OFFSET, %ebx
- call *%ebx
+ /* Found a match. Call the tce.payload field. The magic 8
+ value is offsetof(TCEntry,payload) on a 32-bit platform. */
+
+ addl $8, %ebx
+ call *%ebx
/*
%eax holds destination (original) address.