Files updated, added and removed in order to turn the ERASER branch into HEAD
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1086 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 05f4186..b21815e 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -1,7 +1,7 @@
/*--------------------------------------------------------------------*/
-/*--- The cache simulation framework: instrumentation, recording ---*/
-/*--- and results printing. ---*/
+/*--- The cache simulation skin: cache detection; instrumentation, ---*/
+/*--- recording and results printing. ---*/
/*--- vg_cachesim.c ---*/
/*--------------------------------------------------------------------*/
@@ -27,19 +27,32 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
- The GNU General Public License is contained in the file LICENSE.
+ The GNU General Public License is contained in the file COPYING.
*/
-#include "vg_include.h"
+#include "vg_skin.h"
+//#include "vg_profile.c"
+
+/* For cache simulation */
+typedef struct {
+ int size; /* bytes */
+ int assoc;
+ int line_size; /* bytes */
+} cache_t;
#include "vg_cachesim_L2.c"
#include "vg_cachesim_I1.c"
#include "vg_cachesim_D1.c"
+/*------------------------------------------------------------*/
+/*--- Constants ---*/
+/*------------------------------------------------------------*/
/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
#define MAX_x86_INSTR_SIZE 16
+#define MIN_LINE_SIZE 16
+
/* Size of various buffers used for storing strings */
#define FILENAME_LEN 256
#define FN_NAME_LEN 256
@@ -48,33 +61,29 @@
#define RESULTS_BUF_LEN 128
#define LINE_BUF_LEN 64
-
/*------------------------------------------------------------*/
-/*--- Generic utility stuff ---*/
+/*--- Profiling events ---*/
/*------------------------------------------------------------*/
-Int VG_(log2) ( Int x )
-{
- Int i;
- /* Any more than 32 and we overflow anyway... */
- for (i = 0; i < 32; i++) {
- if (1 << i == x) return i;
- }
- return -1;
-}
-
+typedef
+ enum {
+ VgpGetBBCC = VgpFini+1,
+ VgpCacheSimulate,
+ VgpCacheResults
+ }
+ VgpSkinCC;
/*------------------------------------------------------------*/
/*--- Output file related stuff ---*/
/*------------------------------------------------------------*/
-#define OUT_FILE "cachegrind.out"
+Char cachegrind_out_file[FILENAME_LEN];
static void file_err()
{
VG_(message)(Vg_UserMsg,
"error: can't open cache simulation output file `%s'",
- OUT_FILE );
+ cachegrind_out_file );
VG_(exit)(1);
}
@@ -95,7 +104,15 @@
cc->m2 = 0;
}
-typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
+typedef
+ enum {
+ InstrCC, /* eg. mov %eax, %ebx */
+ ReadCC, /* eg. mov (%ecx), %esi */
+ WriteCC, /* eg. mov %eax, (%edx) */
+ ModCC, /* eg. incl (%eax) (read+write one addr) */
+ ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
+ (read+write two different addrs) */
+ } CC_type;
/* Instruction-level cost-centres. The typedefs for these structs are in
* vg_include.c
@@ -104,33 +121,53 @@
*
* This is because we use it to work out what kind of CC we're dealing with.
*/
-struct _iCC {
- /* word 1 */
- UChar tag;
- UChar instr_size;
- /* 2 bytes padding */
+typedef
+ struct {
+ /* word 1 */
+ UChar tag;
+ UChar instr_size;
+ /* 2 bytes padding */
- /* words 2+ */
- Addr instr_addr;
- CC I;
-};
+ /* words 2+ */
+ Addr instr_addr;
+ CC I;
+ }
+ iCC;
-struct _idCC {
- /* word 1 */
- UChar tag;
- UChar instr_size;
- UChar data_size;
- /* 1 byte padding */
+typedef
+ struct _idCC {
+ /* word 1 */
+ UChar tag;
+ UChar instr_size;
+ UChar data_size;
+ /* 1 byte padding */
- /* words 2+ */
- Addr instr_addr;
- CC I;
- CC D;
-};
+ /* words 2+ */
+ Addr instr_addr;
+ CC I;
+ CC D;
+ }
+ idCC;
+
+typedef
+ struct _iddCC {
+ /* word 1 */
+ UChar tag;
+ UChar instr_size;
+ UChar data_size;
+ /* 1 byte padding */
+
+ /* words 2+ */
+ Addr instr_addr;
+ CC I;
+ CC Da;
+ CC Db;
+ }
+ iddCC;
static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
{
- cc->tag = INSTR_CC;
+ cc->tag = InstrCC;
cc->instr_size = instr_size;
cc->instr_addr = instr_addr;
initCC(&cc->I);
@@ -147,6 +184,18 @@
initCC(&cc->D);
}
+static void init_iddCC(iddCC* cc, Addr instr_addr,
+ UInt instr_size, UInt data_size)
+{
+ cc->tag = ReadWriteCC;
+ cc->instr_size = instr_size;
+ cc->data_size = data_size;
+ cc->instr_addr = instr_addr;
+ initCC(&cc->I);
+ initCC(&cc->Da);
+ initCC(&cc->Db);
+}
+
#define ADD_CC_TO(CC_type, cc, total) \
total.a += ((CC_type*)BBCC_ptr)->cc.a; \
total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
@@ -193,6 +242,22 @@
#endif
}
+static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
+{
+#if PRINT_INSTR_ADDRS
+ VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
+ cc->I.a, cc->I.m1, cc->I.m2,
+ cc->Da.a, cc->Da.m1, cc->Da.m2,
+ cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
+#else
+ VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+ cc->I.a, cc->I.m1, cc->I.m2,
+ cc->Da.a, cc->Da.m1, cc->Da.m2,
+ cc->Db.a, cc->Db.m1, cc->Db.m2);
+#endif
+}
+
+
/*------------------------------------------------------------*/
/*--- BBCC hash table stuff ---*/
/*------------------------------------------------------------*/
@@ -257,11 +322,11 @@
static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
Char fn_name[FN_NAME_LEN], Int* line_num)
{
- Bool found1, found2, no_demangle = False;
+ Bool found1, found2;
- found1 = VG_(what_line_is_this)(instr_addr, filename,
- FILENAME_LEN, line_num);
- found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
+ found1 = VG_(get_filename_linenum)(instr_addr, filename,
+ FILENAME_LEN, line_num);
+ found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
if (!found1 && !found2) {
no_debug_BBs++;
@@ -290,8 +355,8 @@
file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
{
Int i;
- file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
- new->filename = VG_(strdup)(VG_AR_PRIVATE, filename);
+ file_node* new = VG_(malloc)(sizeof(file_node));
+ new->filename = VG_(strdup)(filename);
for (i = 0; i < N_FN_ENTRIES; i++) {
new->fns[i] = NULL;
}
@@ -303,8 +368,8 @@
fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
{
Int i;
- fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
- new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
+ fn_node* new = VG_(malloc)(sizeof(fn_node));
+ new->fn_name = VG_(strdup)(fn_name);
for (i = 0; i < N_BBCC_ENTRIES; i++) {
new->BBCCs[i] = NULL;
}
@@ -318,7 +383,7 @@
Int BBCC_array_size = compute_BBCC_array_size(cb);
BBCC* new;
- new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
+ new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
new->orig_addr = bb_orig_addr;
new->array_size = BBCC_array_size;
new->next = next;
@@ -352,7 +417,7 @@
get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
- VGP_PUSHCC(VgpCacheGetBBCC);
+ VGP_PUSHCC(VgpGetBBCC);
filename_hash = hash(filename, N_FILE_ENTRIES);
curr_file_node = BBCC_table[filename_hash];
while (NULL != curr_file_node &&
@@ -410,7 +475,7 @@
BB_retranslations++;
}
}
- VGP_POPCC;
+ VGP_POPCC(VgpGetBBCC);
return curr_BBCC;
}
@@ -418,11 +483,12 @@
/*--- Cache simulation instrumentation phase ---*/
/*------------------------------------------------------------*/
+// SSS: do something about all these...
#define uInstr1 VG_(newUInstr1)
#define uInstr2 VG_(newUInstr2)
#define uInstr3 VG_(newUInstr3)
-#define dis VG_(disassemble)
#define uLiteral VG_(setLiteralField)
+#define uCCall VG_(setCCallFields)
#define newTemp VG_(getNewTemp)
static Int compute_BBCC_array_size(UCodeBlock* cb)
@@ -430,12 +496,12 @@
UInstr* u_in;
Int i, CC_size, BBCC_size = 0;
Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
+ Int t_read, t_write;
is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
+ t_read = t_write = INVALID_TEMPREG;
for (i = 0; i < cb->used; i++) {
- /* VG_(ppUInstr)(0, &cb->instrs[i]); */
-
u_in = &cb->instrs[i];
switch(u_in->opcode) {
@@ -449,8 +515,13 @@
case_for_end_of_instr:
- CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W
- ? sizeof(idCC) : sizeof(iCC));
+ if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
+ t_read != t_write)
+ CC_size = sizeof(iddCC);
+ else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
+ CC_size = sizeof(idCC);
+ else
+ CC_size = sizeof(iCC);
BBCC_size += CC_size;
is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
@@ -461,22 +532,26 @@
/* Also, a STORE can come after a LOAD for bts/btr/btc */
vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */
!is_FPU_R && !is_FPU_W);
+ t_read = u_in->val1;
is_LOAD = True;
break;
case STORE:
/* Multiple STOREs are possible for 'pushal' */
vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
+ t_write = u_in->val2;
is_STORE = True;
break;
case FPU_R:
vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+ t_read = u_in->val2;
is_FPU_R = True;
break;
case FPU_W:
vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+ t_write = u_in->val2;
is_FPU_W = True;
break;
@@ -488,41 +563,153 @@
return BBCC_size;
}
-/* Use this rather than eg. -1 because it's stored as a UInt. */
+static __attribute__ ((regparm (1)))
+void log_1I_0D_cache_access(iCC* cc)
+{
+ //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
+ // cc, cc->instr_addr, cc->instr_size)
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+ cc->I.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+/* Difference between this function and log_1I_0D_cache_access() is that
+ this one can be passed any kind of CC, not just an iCC. So we have to
+ be careful to make sure we don't make any assumptions about CC layout.
+ (As it stands, they would be safe, but this will avoid potential heartache
+ if anyone else changes CC layout.)
+ Note that we only do the switch for the JIFZ version because if we always
+ called this switching version, things would run about 5% slower. */
+static __attribute__ ((regparm (1)))
+void log_1I_0D_cache_access_JIFZ(iCC* cc)
+{
+ UChar instr_size;
+ Addr instr_addr;
+ CC* I;
+
+ //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
+ // cc, cc->instr_addr, cc->instr_size)
+ VGP_PUSHCC(VgpCacheSimulate);
+
+ switch(cc->tag) {
+ case InstrCC:
+ instr_size = cc->instr_size;
+ instr_addr = cc->instr_addr;
+ I = &(cc->I);
+ break;
+ case ReadCC:
+ case WriteCC:
+ case ModCC:
+ instr_size = ((idCC*)cc)->instr_size;
+ instr_addr = ((idCC*)cc)->instr_addr;
+ I = &( ((idCC*)cc)->I );
+ break;
+ case ReadWriteCC:
+ instr_size = ((iddCC*)cc)->instr_size;
+ instr_addr = ((iddCC*)cc)->instr_addr;
+ I = &( ((iddCC*)cc)->I );
+ break;
+ default:
+ VG_(panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
+ break;
+ }
+ cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
+ I->a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (2))) static
+void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
+{
+ //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+ // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
+ cc->D.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (2))) static
+void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
+{
+ //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+ // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+ cc->I.a++;
+
+ cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
+ cc->D.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (3))) static
+void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+{
+ //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
+ // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
+ cc->Da.a++;
+ cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
+ cc->Db.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (3))) static
+void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+{
+ //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
+ // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+ cc->I.a++;
+
+ cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
+ cc->Da.a++;
+ cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
+ cc->Db.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+/* Use this rather than eg. -1 because it's a UInt. */
#define INVALID_DATA_SIZE 999999
-UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
-{
UCodeBlock* cb;
Int i;
UInstr* u_in;
BBCC* BBCC_node;
- Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
+ Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
+ t_data_addr2, t_read, t_write;
Int CC_size = -1; /* Shut gcc warnings up */
- Addr instr_addr = orig_addr;
- UInt instr_size, data_size = INVALID_DATA_SIZE;
- Int helper = -1; /* Shut gcc warnings up */
+ Addr x86_instr_addr = orig_addr;
+ UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
+ Addr helper;
+ Int argc;
UInt stack_used;
- Bool BB_seen_before = False;
- Bool prev_instr_was_Jcond = False;
+ Bool BB_seen_before = False;
+ Bool instrumented_Jcond = False;
+ Bool has_rep_prefix = False;
Addr BBCC_ptr0, BBCC_ptr;
/* Get BBCC (creating if necessary -- requires a counting pass over the BB
* if it's the first time it's been seen), and point to start of the
* BBCC array. */
- BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
+ BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
cb = VG_(allocCodeBlock)();
cb->nextTemp = cb_in->nextTemp;
- t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
+ t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
+ t_read = t_write = INVALID_TEMPREG;
for (i = 0; i < cb_in->used; i++) {
u_in = &cb_in->instrs[i];
- //VG_(ppUInstr)(0, u_in);
-
/* What this is all about: we want to instrument each x86 instruction
* translation. The end of these are marked in three ways. The three
* ways, and the way we instrument them, are as follows:
@@ -531,144 +718,33 @@
* 2. UCode, Juncond --> UCode, Instrumentation, Juncond
* 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
*
- * We must put the instrumentation before the jumps so that it is always
+ * The last UInstr in a basic block is always a Juncond. Jconds,
+ * when they appear, are always second last. We check this with
+ * various assertions.
+ *
+ * We must put the instrumentation before any jumps so that it is always
* executed. We don't have to put the instrumentation before the INCEIP
* (it could go after) but we do so for consistency.
*
- * Junconds are always the last instruction in a basic block. Jconds are
- * always the 2nd last, and must be followed by a Jcond. We check this
- * with various assertions.
+ * x86 instruction sizes are obtained from INCEIPs (for case 1) or
+ * from .extra4b field of the final JMP (for case 2 & 3).
*
- * Note that in VG_(disBB) we patched the `extra4b' field of the first
- * occurring JMP in a block with the size of its x86 instruction. This
- * is used now.
- *
- * Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ
- * occurs in the middle of a BB and gets an INCEIP after it.
+ * Note that JIFZ is treated differently.
*
* The instrumentation is just a call to the appropriate helper function,
* passing it the address of the instruction's CC.
*/
- if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
+ if (instrumented_Jcond) vg_assert(u_in->opcode == JMP);
switch (u_in->opcode) {
-
- case INCEIP:
- instr_size = u_in->val1;
- goto case_for_end_of_x86_instr;
-
- case JMP:
- if (u_in->cond == CondAlways) {
- vg_assert(i+1 == cb_in->used);
-
- /* Don't instrument if previous instr was a Jcond. */
- if (prev_instr_was_Jcond) {
- vg_assert(0 == u_in->extra4b);
- VG_(copyUInstr)(cb, u_in);
- break;
- }
- prev_instr_was_Jcond = False;
-
- } else {
- vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */
- prev_instr_was_Jcond = True;
- }
-
- /* Ah, the first JMP... instrument, please. */
- instr_size = u_in->extra4b;
- goto case_for_end_of_x86_instr;
-
- /* Shared code that is executed at the end of an x86 translation
- * block, marked by either an INCEIP or an unconditional JMP. */
- case_for_end_of_x86_instr:
-
-#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
-
- /* Initialise the CC in the BBCC array appropriately if it hasn't
- * been initialised before.
- * Then call appropriate sim function, passing it the CC address.
- * Note that CALLM_S/CALL_E aren't required here; by this point,
- * the checking related to them has already happened. */
- stack_used = 0;
-
- vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
- vg_assert(0 != instr_addr);
-
- if (!IS_(read) && !IS_(write)) {
- iCC* CC_ptr = (iCC*)(BBCC_ptr);
- vg_assert(INVALID_DATA_SIZE == data_size);
- vg_assert(INVALID_TEMPREG == t_read_addr &&
- INVALID_TEMPREG == t_write_addr);
- CC_size = sizeof(iCC);
- if (!BB_seen_before)
- init_iCC(CC_ptr, instr_addr, instr_size);
-
- /* 1st arg: CC addr */
- t_CC_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
- uLiteral(cb, BBCC_ptr);
-
- uInstr1(cb, CCALL_1_0, 0, TempReg, t_CC_addr);
- uLiteral(cb, VGOFF_(cachesim_log_non_mem_instr));
-
- } else {
- CC_type X_CC;
- idCC* CC_ptr = (idCC*)(BBCC_ptr);
-
- vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
- 8 == data_size || 10 == data_size);
-
- CC_size = sizeof(idCC);
- helper = VGOFF_(cachesim_log_mem_instr);
-
- if (IS_(read) && !IS_(write)) {
- X_CC = READ_CC;
- vg_assert(INVALID_TEMPREG != t_read_addr &&
- INVALID_TEMPREG == t_write_addr);
- t_data_addr = t_read_addr;
-
- } else if (!IS_(read) && IS_(write)) {
- X_CC = WRITE_CC;
- vg_assert(INVALID_TEMPREG == t_read_addr &&
- INVALID_TEMPREG != t_write_addr);
- t_data_addr = t_write_addr;
-
- } else {
- vg_assert(IS_(read) && IS_(write));
- X_CC = MOD_CC;
- vg_assert(INVALID_TEMPREG != t_read_addr &&
- INVALID_TEMPREG != t_write_addr);
- t_data_addr = t_read_addr;
- }
-#undef IS_
- if (!BB_seen_before)
- init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
-
- /* 1st arg: CC addr */
- t_CC_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
- uLiteral(cb, BBCC_ptr);
-
- uInstr2(cb, CCALL_2_0, 0, TempReg, t_CC_addr,
- TempReg, t_data_addr);
- uLiteral(cb, VGOFF_(cachesim_log_mem_instr));
- }
-
- VG_(copyUInstr)(cb, u_in);
-
- /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
- BBCC_ptr += CC_size;
- instr_addr += instr_size;
- t_CC_addr = t_read_addr = t_write_addr =
- t_data_addr = INVALID_TEMPREG;
- data_size = INVALID_DATA_SIZE;
+ case NOP: case CALLM_E: case CALLM_S:
break;
-
/* For memory-ref instrs, copy the data_addr into a temporary to be
- * passed to the cachesim_log_function at the end of the instruction.
+ * passed to the cachesim_* helper at the end of the instruction.
*/
case LOAD:
+ t_read = u_in->val1;
t_read_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
data_size = u_in->size;
@@ -676,26 +752,216 @@
break;
case FPU_R:
+ t_read = u_in->val2;
t_read_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
- data_size = u_in->size;
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
VG_(copyUInstr)(cb, u_in);
break;
/* Note that we must set t_write_addr even for mod instructions;
- * that's how the code above determines whether it does a write;
- * without it, it would think a mod instruction is a read.
+ * That's how the code above determines whether it does a write.
+ * Without it, it would think a mod instruction is a read.
* As for the MOV, if it's a mod instruction it's redundant, but it's
* not expensive and mod instructions are rare anyway. */
case STORE:
case FPU_W:
+ t_write = u_in->val2;
t_write_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
- data_size = u_in->size;
+ /* 28 and 108 B data-sized instructions will be done
+ * inaccurately but they're very rare and this avoids errors
+ * from hitting more than two cache lines in the simulation. */
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
VG_(copyUInstr)(cb, u_in);
break;
- case NOP: case CALLM_E: case CALLM_S:
+
+ /* For rep-prefixed instructions, log a single I-cache access
+ * before the UCode loop that implements the repeated part, which
+ * is where the multiple D-cache accesses are logged. */
+ case JIFZ:
+ has_rep_prefix = True;
+
+ /* Setup 1st and only arg: CC addr */
+ t_CC_addr = newTemp(cb);
+ uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
+ uLiteral(cb, BBCC_ptr);
+
+ /* Call helper */
+ uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+ uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
+ VG_(copyUInstr)(cb, u_in);
+ break;
+
+
+ /* INCEIP: insert instrumentation */
+ case INCEIP:
+ x86_instr_size = u_in->val1;
+ goto instrument_x86_instr;
+
+ /* JMP: insert instrumentation if the first JMP */
+ case JMP:
+ if (instrumented_Jcond) {
+ vg_assert(CondAlways == u_in->cond);
+ vg_assert(i+1 == cb_in->used);
+ VG_(copyUInstr)(cb, u_in);
+ instrumented_Jcond = False; /* reset */
+ break;
+ }
+ /* The first JMP... instrument. */
+ if (CondAlways != u_in->cond) {
+ vg_assert(i+2 == cb_in->used);
+ instrumented_Jcond = True;
+ } else {
+ vg_assert(i+1 == cb_in->used);
+ }
+
+ /* Get x86 instr size from final JMP. */
+ x86_instr_size = LAST_UINSTR(cb_in).extra4b;
+ goto instrument_x86_instr;
+
+
+ /* Code executed at the end of each x86 instruction. */
+ instrument_x86_instr:
+
+ /* Initialise the CC in the BBCC array appropriately if it
+ * hasn't been initialised before. Then call appropriate sim
+ * function, passing it the CC address. */
+ stack_used = 0;
+
+ vg_assert(x86_instr_size >= 1 &&
+ x86_instr_size <= MAX_x86_INSTR_SIZE);
+
+#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
+
+ if (!IS_(read) && !IS_(write)) {
+ vg_assert(INVALID_DATA_SIZE == data_size);
+ vg_assert(INVALID_TEMPREG == t_read_addr &&
+ INVALID_TEMPREG == t_read &&
+ INVALID_TEMPREG == t_write_addr &&
+ INVALID_TEMPREG == t_write);
+ CC_size = sizeof(iCC);
+ if (!BB_seen_before)
+ init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
+ helper = ( has_rep_prefix
+ ? (Addr)0 /* no extra log needed */
+ : (Addr) & log_1I_0D_cache_access
+ );
+ argc = 1;
+
+ } else {
+ vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
+ 8 == data_size || 10 == data_size ||
+ MIN_LINE_SIZE == data_size);
+
+ if (IS_(read) && !IS_(write)) {
+ CC_size = sizeof(idCC);
+ /* If it uses 'rep', we've already logged the I-cache
+ * access at the JIFZ UInstr (see JIFZ case below) so
+ * don't do it here */
+ helper = ( has_rep_prefix
+ ? (Addr) & log_0I_1D_cache_access
+ : (Addr) & log_1I_1D_cache_access
+ );
+ argc = 2;
+ if (!BB_seen_before)
+ init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
+ x86_instr_size, data_size);
+ vg_assert(INVALID_TEMPREG != t_read_addr &&
+ INVALID_TEMPREG != t_read &&
+ INVALID_TEMPREG == t_write_addr &&
+ INVALID_TEMPREG == t_write);
+ t_data_addr1 = t_read_addr;
+
+ } else if (!IS_(read) && IS_(write)) {
+ CC_size = sizeof(idCC);
+ helper = ( has_rep_prefix
+ ? (Addr) & log_0I_1D_cache_access
+ : (Addr) & log_1I_1D_cache_access
+ );
+ argc = 2;
+ if (!BB_seen_before)
+ init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
+ x86_instr_size, data_size);
+ vg_assert(INVALID_TEMPREG == t_read_addr &&
+ INVALID_TEMPREG == t_read &&
+ INVALID_TEMPREG != t_write_addr &&
+ INVALID_TEMPREG != t_write);
+ t_data_addr1 = t_write_addr;
+
+ } else {
+ vg_assert(IS_(read) && IS_(write));
+ vg_assert(INVALID_TEMPREG != t_read_addr &&
+ INVALID_TEMPREG != t_read &&
+ INVALID_TEMPREG != t_write_addr &&
+ INVALID_TEMPREG != t_write);
+ if (t_read == t_write) {
+ CC_size = sizeof(idCC);
+ helper = ( has_rep_prefix
+ ? (Addr) & log_0I_1D_cache_access
+ : (Addr) & log_1I_1D_cache_access
+ );
+ argc = 2;
+ if (!BB_seen_before)
+ init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
+ x86_instr_size, data_size);
+ t_data_addr1 = t_read_addr;
+ } else {
+ CC_size = sizeof(iddCC);
+ helper = ( has_rep_prefix
+ ? (Addr) & log_0I_2D_cache_access
+ : (Addr) & log_1I_2D_cache_access
+ );
+ argc = 3;
+ if (!BB_seen_before)
+ init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
+ x86_instr_size, data_size);
+ t_data_addr1 = t_read_addr;
+ t_data_addr2 = t_write_addr;
+ }
+ }
+#undef IS_
+ }
+
+ /* Call the helper, if necessary */
+ if ((Addr)0 != helper) {
+
+ /* Setup 1st arg: CC addr */
+ t_CC_addr = newTemp(cb);
+ uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
+ uLiteral(cb, BBCC_ptr);
+
+ /* Call the helper */
+ if (1 == argc)
+ uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+ else if (2 == argc)
+ uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
+ TempReg, t_data_addr1);
+ else if (3 == argc)
+ uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
+ TempReg, t_data_addr1,
+ TempReg, t_data_addr2);
+ else
+ VG_(panic)("argc... not 1 or 2 or 3?");
+
+ uCCall(cb, helper, argc, argc, False);
+ }
+
+ /* Copy original UInstr (INCEIP or JMP) */
+ VG_(copyUInstr)(cb, u_in);
+
+ /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
+ BBCC_ptr += CC_size;
+ x86_instr_addr += x86_instr_size;
+ t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
+ t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
+ data_size = INVALID_DATA_SIZE;
+ has_rep_prefix = False;
break;
default:
@@ -709,19 +975,25 @@
VG_(freeCodeBlock)(cb_in);
return cb;
+
+#undef INVALID_DATA_SIZE
}
/*------------------------------------------------------------*/
-/*--- Cache simulation stuff ---*/
+/*--- Automagic cache initialisation stuff ---*/
/*------------------------------------------------------------*/
-#define MIN_LINE_SIZE 16
-
/* Total reads/writes/misses. Calculated during CC traversal at the end. */
static CC Ir_total;
static CC Dr_total;
static CC Dw_total;
+#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
+
+static cache_t clo_I1_cache = UNDEFINED_CACHE;
+static cache_t clo_D1_cache = UNDEFINED_CACHE;
+static cache_t clo_L2_cache = UNDEFINED_CACHE;
+
/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
/* Probably only works for Intel and AMD chips, and probably only for some of
* them.
@@ -739,7 +1011,7 @@
static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
{
VG_(message)(Vg_DebugMsg,
- "warning: Pentium with %d K micro_op instruction trace cache",
+ "warning: Pentium with %d K micro-op instruction trace cache",
actual_size);
VG_(message)(Vg_DebugMsg,
" Simulating a %d KB cache with %d B lines",
@@ -755,6 +1027,7 @@
{
UChar info[16];
Int i, trials;
+ Bool L2_found = False;
if (level < 2) {
VG_(message)(Vg_DebugMsg,
@@ -782,8 +1055,9 @@
case 0x0: /* ignore zeros */
break;
- case 0x01: case 0x02: case 0x03: case 0x04: /* TLB info, ignore */
- case 0x90: case 0x96: case 0x9b:
+ /* TLB info, ignore */
+ case 0x01: case 0x02: case 0x03: case 0x04:
+ case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
break;
case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
@@ -792,22 +1066,35 @@
case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
+ /* IA-64 info -- panic! */
+ case 0x10: case 0x15: case 0x1a:
+ case 0x88: case 0x89: case 0x8a: case 0x8d:
+ case 0x90: case 0x96: case 0x9b:
+ VG_(message)(Vg_DebugMsg,
+ "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
+ VG_(panic)("IA-64 detected");
+
case 0x22: case 0x23: case 0x25: case 0x29:
- case 0x88: case 0x89: case 0x8a:
VG_(message)(Vg_DebugMsg,
"warning: L3 cache detected but ignored\n");
break;
- case 0x40:
- VG_(message)(Vg_DebugMsg,
- "warning: L2 cache not installed, ignore L2 results.");
+ /* These are sectored, whatever that means */
+ case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
+ case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
+
+ /* If a P6 core, this means "no L2 cache".
+ If a P4 core, this means "no L3 cache".
+ We don't know what core it is, so don't issue a warning. To detect
+ a missing L2 cache, we use 'L2_found'. */
+ case 0x40:
break;
- case 0x41: *L2c = (cache_t) { 128, 4, 32 }; break;
- case 0x42: *L2c = (cache_t) { 256, 4, 32 }; break;
- case 0x43: *L2c = (cache_t) { 512, 4, 32 }; break;
- case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; break;
- case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; break;
+ case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
+ case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
+ case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
+ case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
+ case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
/* These are sectored, whatever that means */
case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
@@ -832,24 +1119,31 @@
micro_ops_warn(32, 32, 32);
break;
- case 0x79: *L2c = (cache_t) { 128, 8, 64 }; break; /* sectored */
- case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; break; /* sectored */
- case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; break; /* sectored */
- case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; break; /* sectored */
+ /* These are sectored, whatever that means */
+ case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
+ case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
+ case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
+ case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
+ case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
- case 0x81: *L2c = (cache_t) { 128, 8, 32 }; break;
- case 0x82: *L2c = (cache_t) { 256, 8, 32 }; break;
- case 0x83: *L2c = (cache_t) { 512, 8, 32 }; break;
- case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; break;
- case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; break;
+ case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
+ case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
+ case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
+ case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
+ case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
default:
VG_(message)(Vg_DebugMsg,
"warning: Unknown Intel cache config value "
- "(0x%x), ignoring\n", info[i]);
+ "(0x%x), ignoring", info[i]);
break;
}
}
+
+ if (!L2_found)
+ VG_(message)(Vg_DebugMsg,
+ "warning: L2 cache not installed, ignore L2 results.");
+
return 0;
}
@@ -871,12 +1165,16 @@
* #3 The AMD K7 processor's L2 cache must be configured prior to relying
* upon this information. (Whatever that means -- njn)
*
+ * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
+ * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
+ * so we detect that.
+ *
* Returns 0 on success, non-zero on failure.
*/
static
Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
{
- Int dummy, ext_level;
+ Int dummy, model, ext_level;
Int I1i, D1i, L2i;
cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
@@ -891,6 +1189,16 @@
cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
+ cpuid(0x1, &model, &dummy, &dummy, &dummy);
+ /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
+
+ /* Check for Duron bug */
+ if (model == 0x630) {
+ VG_(message)(Vg_UserMsg,
+ "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
+ L2i = (64 << 16) | (L2i & 0xffff);
+ }
+
D1c->size = (D1i >> 24) & 0xff;
D1c->assoc = (D1i >> 16) & 0xff;
D1c->line_size = (D1i >> 0) & 0xff;
@@ -1044,14 +1352,14 @@
cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
-#define CMD_LINE_DEFINED(L) \
- (-1 != VG_(clo_##L##_cache).size || \
- -1 != VG_(clo_##L##_cache).assoc || \
- -1 != VG_(clo_##L##_cache).line_size)
+#define CMD_LINE_DEFINED(L) \
+ (-1 != clo_##L##_cache.size || \
+ -1 != clo_##L##_cache.assoc || \
+ -1 != clo_##L##_cache.line_size)
- *I1c = VG_(clo_I1_cache);
- *D1c = VG_(clo_D1_cache);
- *L2c = VG_(clo_L2_cache);
+ *I1c = clo_I1_cache;
+ *D1c = clo_D1_cache;
+ *L2c = clo_L2_cache;
/* If any undefined on command-line, try CPUID */
if (! CMD_LINE_DEFINED(I1) ||
@@ -1061,9 +1369,9 @@
/* Overwrite CPUID result for any cache defined on command-line */
if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
- if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
- if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
- if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
+ if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
+ if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
+ if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
/* CPUID failed, use defaults for each undefined by command-line */
} else {
@@ -1071,9 +1379,9 @@
"Couldn't detect cache configuration, using one "
"or more defaults ");
- *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
- *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
- *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
+ *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
+ *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
+ *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
}
}
#undef CMD_LINE_DEFINED
@@ -1093,65 +1401,8 @@
}
}
-void VG_(init_cachesim)(void)
-{
- cache_t I1c, D1c, L2c;
-
- /* Make sure the output file can be written. */
- Int fd = VG_(open_write)(OUT_FILE);
- if (-1 == fd) {
- fd = VG_(create_and_write)(OUT_FILE);
- if (-1 == fd) {
- file_err();
- }
- }
- VG_(close)(fd);
-
- initCC(&Ir_total);
- initCC(&Dr_total);
- initCC(&Dw_total);
-
- initCC(&Ir_discards);
- initCC(&Dr_discards);
- initCC(&Dw_discards);
-
- get_caches(&I1c, &D1c, &L2c);
-
- cachesim_I1_initcache(I1c);
- //cachesim_I1_initcache();
- cachesim_D1_initcache(D1c);
- //cachesim_D1_initcache();
- cachesim_L2_initcache(L2c);
- //cachesim_L2_initcache();
-
- init_BBCC_table();
-}
-
-void VG_(cachesim_log_non_mem_instr)(iCC* cc)
-{
- //VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
- // cc, cc->instr_addr, cc->instr_size)
- VGP_PUSHCC(VgpCacheSimulate);
- cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
- cc->I.a++;
- VGP_POPCC;
-}
-
-void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
-{
- //VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
- // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
- VGP_PUSHCC(VgpCacheSimulate);
- cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
- cc->I.a++;
-
- cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
- cc->D.a++;
- VGP_POPCC;
-}
-
/*------------------------------------------------------------*/
-/*--- Printing of output file and summary stats ---*/
+/*--- SK_(fini)() and related function ---*/
/*------------------------------------------------------------*/
static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
@@ -1181,15 +1432,15 @@
Addr instr_addr;
switch ( ((iCC*)BBCC_ptr)->tag ) {
- case INSTR_CC:
+ case InstrCC:
instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
sprint_iCC(buf, (iCC*)BBCC_ptr);
ADD_CC_TO(iCC, I, Ir_total);
BBCC_ptr += sizeof(iCC);
break;
- case READ_CC:
- case MOD_CC:
+ case ReadCC:
+ case ModCC:
instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
ADD_CC_TO(idCC, I, Ir_total);
@@ -1197,7 +1448,7 @@
BBCC_ptr += sizeof(idCC);
break;
- case WRITE_CC:
+ case WriteCC:
instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
sprint_write_CC(buf, (idCC*)BBCC_ptr);
ADD_CC_TO(idCC, I, Ir_total);
@@ -1205,6 +1456,15 @@
BBCC_ptr += sizeof(idCC);
break;
+ case ReadWriteCC:
+ instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
+ sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
+ ADD_CC_TO(iddCC, I, Ir_total);
+ ADD_CC_TO(iddCC, Da, Dr_total);
+ ADD_CC_TO(iddCC, Db, Dw_total);
+ BBCC_ptr += sizeof(iddCC);
+ break;
+
default:
VG_(panic)("Unknown CC type in fprint_BBCC()\n");
break;
@@ -1223,7 +1483,7 @@
/* If the function name for this instruction doesn't match that of the
* first instruction in the BB, print warning. */
- if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
+ if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
VG_(printf)("Mismatched function names\n");
VG_(printf)(" filenames: BB:%s, instr:%s;"
" fn_names: BB:%s, instr:%s;"
@@ -1251,8 +1511,7 @@
vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
}
-static void fprint_BBCC_table_and_calc_totals(Int client_argc,
- Char** client_argv)
+static void fprint_BBCC_table_and_calc_totals(void)
{
Int fd;
Char buf[BUF_LEN];
@@ -1261,8 +1520,8 @@
BBCC *curr_BBCC;
Int i,j,k;
- VGP_PUSHCC(VgpCacheDump);
- fd = VG_(open_write)(OUT_FILE);
+ VGP_PUSHCC(VgpCacheResults);
+ fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
if (-1 == fd) { file_err(); }
/* "desc:" lines (giving I1/D1/L2 cache configuration) */
@@ -1276,8 +1535,8 @@
/* "cmd:" line */
VG_(strcpy)(buf, "cmd:");
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
- for (i = 0; i < client_argc; i++) {
- VG_(sprintf)(buf, " %s", client_argv[i]);
+ for (i = 0; i < VG_(client_argc); i++) {
+ VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
}
/* "events:" line */
@@ -1395,6 +1654,7 @@
VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
len = VG_(strlen)(buf);
space = field_width - len;
+ if (space < 0) space = 0; /* Allow for v. small field_width */
i = len;
/* Right justify in field */
@@ -1402,7 +1662,7 @@
for (i = 0; i < space; i++) buf[i] = ' ';
}
-void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
+void SK_(fini)(void)
{
CC D_total;
ULong L2_total_m, L2_total_mr, L2_total_mw,
@@ -1413,7 +1673,7 @@
Int l1, l2, l3;
Int p;
- fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
+ fprint_BBCC_table_and_calc_totals();
if (VG_(clo_verbosity) == 0)
return;
@@ -1431,6 +1691,7 @@
p = 100;
+ if (0 == Ir_total.a) Ir_total.a = 1;
percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
@@ -1464,6 +1725,9 @@
p = 10;
+ if (0 == D_total.a) D_total.a = 1;
+ if (0 == Dr_total.a) Dr_total.a = 1;
+ if (0 == Dw_total.a) Dw_total.a = 1;
percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
@@ -1525,7 +1789,7 @@
VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
}
- VGP_POPCC;
+ VGP_POPCC(VgpCacheResults);
}
@@ -1534,19 +1798,18 @@
*
* Finds the BBCC in the table, removes it, adds the counts to the discard
* counters, and then frees the BBCC. */
-void VG_(cachesim_notify_discard) ( TTEntry* tte )
+void SK_(discard_basic_block_info) ( Addr a, UInt size )
{
BBCC *BBCC_node;
Addr BBCC_ptr0, BBCC_ptr;
Bool BB_seen_before;
if (0)
- VG_(printf)( "cachesim_notify_discard: %p for %d\n",
- tte->orig_addr, (Int)tte->orig_size);
+ VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
/* 2nd arg won't be used since BB should have been seen before (assertions
* ensure this). */
- BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
+ BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
vg_assert(True == BB_seen_before);
@@ -1559,33 +1822,182 @@
switch ( ((iCC*)BBCC_ptr)->tag ) {
- case INSTR_CC:
+ case InstrCC:
ADD_CC_TO(iCC, I, Ir_discards);
BBCC_ptr += sizeof(iCC);
break;
- case READ_CC:
- case MOD_CC:
+ case ReadCC:
+ case ModCC:
ADD_CC_TO(idCC, I, Ir_discards);
ADD_CC_TO(idCC, D, Dr_discards);
BBCC_ptr += sizeof(idCC);
break;
- case WRITE_CC:
+ case WriteCC:
ADD_CC_TO(idCC, I, Ir_discards);
ADD_CC_TO(idCC, D, Dw_discards);
BBCC_ptr += sizeof(idCC);
break;
+ case ReadWriteCC:
+ ADD_CC_TO(iddCC, I, Ir_discards);
+ ADD_CC_TO(iddCC, Da, Dr_discards);
+ ADD_CC_TO(iddCC, Db, Dw_discards);
+ BBCC_ptr += sizeof(iddCC);
+ break;
+
default:
- VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
+ VG_(panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
break;
}
}
-
- VG_(free)(VG_AR_PRIVATE, BBCC_node);
+ VG_(free)(BBCC_node);
}
/*--------------------------------------------------------------------*/
+/*--- Command line processing ---*/
+/*--------------------------------------------------------------------*/
+
+static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+{
+ int i1, i2, i3;
+ int i;
+ char *opt = VG_(strdup)(orig_opt);
+
+ i = i1 = opt_len;
+
+ /* Option looks like "--I1=65536,2,64".
+ * Find commas, replace with NULs to make three independent
+ * strings, then extract numbers. Yuck. */
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i2 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i3 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if ('\0' != opt[i]) goto bad;
+
+ cache->size = (Int)VG_(atoll)(opt + i1);
+ cache->assoc = (Int)VG_(atoll)(opt + i2);
+ cache->line_size = (Int)VG_(atoll)(opt + i3);
+
+ VG_(free)(opt);
+
+ return;
+
+ bad:
+ VG_(bad_option)(orig_opt);
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+ /* 5 is length of "--I1=" */
+ if (0 == VG_(strncmp)(arg, "--I1=", 5))
+ parse_cache_opt(&clo_I1_cache, arg, 5);
+ else if (0 == VG_(strncmp)(arg, "--D1=", 5))
+ parse_cache_opt(&clo_D1_cache, arg, 5);
+ else if (0 == VG_(strncmp)(arg, "--L2=", 5))
+ parse_cache_opt(&clo_L2_cache, arg, 5);
+ else
+ return False;
+
+ return True;
+}
+
+Char* SK_(usage)(void)
+{
+ return
+" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
+" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
+" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n";
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Setup ---*/
+/*--------------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* not_used)
+{
+ needs->name = "cachegrind";
+ needs->description = "an I1/D1/L2 cache profiler";
+
+ needs->basic_block_discards = True;
+ needs->command_line_options = True;
+
+ VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
+ VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
+ VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
+ VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
+ VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
+ VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
+}
+
+void SK_(post_clo_init)(void)
+{
+ cache_t I1c, D1c, L2c;
+ Int fd;
+
+ /* Set output file name: cachegrind.<pid>.out */
+ VG_(sprintf)(cachegrind_out_file, "cachegrind.out.%d", VG_(getpid)());
+
+ /* Make sure the output file can be written. */
+ fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
+ if (-1 == fd) {
+ fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
+ VKI_S_IRUSR|VKI_S_IWUSR);
+ if (-1 == fd) {
+ file_err();
+ }
+ }
+ VG_(close)(fd);
+
+ initCC(&Ir_total);
+ initCC(&Dr_total);
+ initCC(&Dw_total);
+
+ initCC(&Ir_discards);
+ initCC(&Dr_discards);
+ initCC(&Dw_discards);
+
+ get_caches(&I1c, &D1c, &L2c);
+
+ cachesim_I1_initcache(I1c);
+ cachesim_D1_initcache(D1c);
+ cachesim_L2_initcache(L2c);
+
+ VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
+ VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
+ VGP_(register_profile_event)(VgpCacheResults, "cache-results");
+
+ init_BBCC_table();
+}
+
+#if 0
+Bool SK_(cheap_sanity_check)(void) { return True; }
+
+extern TTEntry* vg_tt;
+
+Bool SK_(expensive_sanity_check)(void)
+{
+ Int i;
+ Bool dummy;
+ for (i = 0; i < 200191; i++) {
+ if (vg_tt[i].orig_addr != (Addr)1 &&
+ vg_tt[i].orig_addr != (Addr)3) {
+ VG_(printf)(".");
+ get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
+ }
+ }
+ return True;
+}
+#endif
+
+/*--------------------------------------------------------------------*/
/*--- end vg_cachesim.c ---*/
/*--------------------------------------------------------------------*/