New files:
- vg_cachesim.c
- vg_cachesim_{I1,D1,L2}.c
- vg_annotate.in
- vg_cachegen.in
Changes to existing files:
- valgrind/valgrind.in, added option:
--cachesim=no|yes [no]
- Makefile/Makefile.am:
* added vg_cachesim.c to valgrind_so_SOURCES var
* added vg_cachesim_I1.c, vg_cachesim_D1.c, vg_cachesim_L2.c to
noinst_HEADERS var
* added vg_annotate, vg_cachegen to 'bin_SCRIPTS' var, and added empty
targets for them
- vg_main.c:
* added two offsets for cache sim functions (put in positions 17a,17b)
* added option handling (detection of --cachesim=yes which turns off of
--instrument);
* added calls to cachesim initialisation/finalisation functions
- vg_mylibc: added some system call wrappers (for chmod, open_write, etc) for
file writing
- vg_symtab2.c:
* allow it to read symbols if either of --instrument or --cachesim is
used
* made vg_symtab2.c:vg_what_{line,fn}_is_this extern, renaming it as
VG_(what_line_is_this) (and added to vg_include.h)
* completely rewrote the read loop in vg_read_lib_symbols, fixing
several bugs. Much better now, although probably not perfect. It's
also relatively fragile -- I'm using the "die immediately if anything
unexpected happens" approach.
- vg_to_ucode.c:
* in VG_(disBB), patching in x86 instruction size into extra4b field of
JMP instructions at the end of basic blocks if --cachesim=yes.
Shifted things around to do this; also had to fiddle around with
single-step stuff to get this to work, by not sticking extra JMPs on
the end of the single-instruction block if there was already one
there (to avoid breaking an assertion in vg_cachesim.c). Did a
similar thing to avoid an extra JMP on huge basic blocks that are
split.
- vg_translate.c:
* if --cachesim=yes call the cachesim instrumentation phase
* made some functions extern and renamed:
allocCodeBlock() --> VG_(allocCodeBlock)()
freeCodeBlock() --> VG_(freeCodeBlock)()
copyUInstr() --> VG_(copyUInstr)()
(added to vg_include.h too)
- vg_include.c: declared
* cachesim offsets
* exports of vg_cachesim.c
* added four new profiling events (increasing VGP_M_CCS to 24 -- I kept
the spare ones)
* added comment about UInstr.extra4b field being used for instr size in
JMPs for cache simulation
- docs/manual.html:
* Added --cachesim option to section 2.5.
* Added cache profiling stuff as section 7.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@168 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_sim_L2.c b/cachegrind/cg_sim_L2.c
new file mode 100644
index 0000000..bb68532
--- /dev/null
+++ b/cachegrind/cg_sim_L2.c
@@ -0,0 +1,250 @@
+/* L2 cache simulator, generated by vg_cachegen.
+ * total size = 262144 bytes
+ * line size = 64 bytes
+ * associativity = 8-way associative
+ *
+ * This file should be #include-d into vg_cachesim.c
+ */
+
+static char L2_desc_line[] =
+ "desc: L2 cache: 262144 B, 64 B, 8-way associative\n";
+
+static UInt L2_tags[512][8];
+
+static void cachesim_L2_initcache(void)
+{
+ UInt set, way;
+ for (set = 0; set < 512; set++)
+ for (way = 0; way < 8; way++)
+ L2_tags[set][way] = 0;
+}
+
+static __inline__
+void cachesim_L2_doref(Addr a, UChar size, ULong *m2)
+{
+ register UInt set1 = ( a >> 6) & (512-1);
+ register UInt set2 = ((a + size) >> 6) & (512-1);
+ register UInt tag = a >> (6 + 9);
+
+ if (set1 == set2) {
+
+ if (tag == L2_tags[set1][0]) {
+ return;
+ }
+ else if (tag == L2_tags[set1][1]) {
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else if (tag == L2_tags[set1][2]) {
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else if (tag == L2_tags[set1][3]) {
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else if (tag == L2_tags[set1][4]) {
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else if (tag == L2_tags[set1][5]) {
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else if (tag == L2_tags[set1][6]) {
+ L2_tags[set1][6] = L2_tags[set1][5];
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else if (tag == L2_tags[set1][7]) {
+ L2_tags[set1][7] = L2_tags[set1][6];
+ L2_tags[set1][6] = L2_tags[set1][5];
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ return;
+ }
+ else {
+ /* A miss */
+ L2_tags[set1][7] = L2_tags[set1][6];
+ L2_tags[set1][6] = L2_tags[set1][5];
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+
+ (*m2)++;
+ }
+
+ } else if ((set1 + 1) % 512 == set2) {
+
+ Bool is_L2_miss = False;
+
+ /* Block one */
+ if (tag == L2_tags[set1][0]) {
+ }
+ else if (tag == L2_tags[set1][1]) {
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else if (tag == L2_tags[set1][2]) {
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else if (tag == L2_tags[set1][3]) {
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else if (tag == L2_tags[set1][4]) {
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else if (tag == L2_tags[set1][5]) {
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else if (tag == L2_tags[set1][6]) {
+ L2_tags[set1][6] = L2_tags[set1][5];
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else if (tag == L2_tags[set1][7]) {
+ L2_tags[set1][7] = L2_tags[set1][6];
+ L2_tags[set1][6] = L2_tags[set1][5];
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+ }
+ else {
+ /* A miss */
+ L2_tags[set1][7] = L2_tags[set1][6];
+ L2_tags[set1][6] = L2_tags[set1][5];
+ L2_tags[set1][5] = L2_tags[set1][4];
+ L2_tags[set1][4] = L2_tags[set1][3];
+ L2_tags[set1][3] = L2_tags[set1][2];
+ L2_tags[set1][2] = L2_tags[set1][1];
+ L2_tags[set1][1] = L2_tags[set1][0];
+ L2_tags[set1][0] = tag;
+
+ is_L2_miss = True;
+ }
+
+ /* Block two */
+ if (tag == L2_tags[set2][0]) {
+ }
+ else if (tag == L2_tags[set2][1]) {
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else if (tag == L2_tags[set2][2]) {
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else if (tag == L2_tags[set2][3]) {
+ L2_tags[set2][3] = L2_tags[set2][2];
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else if (tag == L2_tags[set2][4]) {
+ L2_tags[set2][4] = L2_tags[set2][3];
+ L2_tags[set2][3] = L2_tags[set2][2];
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else if (tag == L2_tags[set2][5]) {
+ L2_tags[set2][5] = L2_tags[set2][4];
+ L2_tags[set2][4] = L2_tags[set2][3];
+ L2_tags[set2][3] = L2_tags[set2][2];
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else if (tag == L2_tags[set2][6]) {
+ L2_tags[set2][6] = L2_tags[set2][5];
+ L2_tags[set2][5] = L2_tags[set2][4];
+ L2_tags[set2][4] = L2_tags[set2][3];
+ L2_tags[set2][3] = L2_tags[set2][2];
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else if (tag == L2_tags[set2][7]) {
+ L2_tags[set2][7] = L2_tags[set2][6];
+ L2_tags[set2][6] = L2_tags[set2][5];
+ L2_tags[set2][5] = L2_tags[set2][4];
+ L2_tags[set2][4] = L2_tags[set2][3];
+ L2_tags[set2][3] = L2_tags[set2][2];
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+ }
+ else {
+ /* A miss */
+ L2_tags[set2][7] = L2_tags[set2][6];
+ L2_tags[set2][6] = L2_tags[set2][5];
+ L2_tags[set2][5] = L2_tags[set2][4];
+ L2_tags[set2][4] = L2_tags[set2][3];
+ L2_tags[set2][3] = L2_tags[set2][2];
+ L2_tags[set2][2] = L2_tags[set2][1];
+ L2_tags[set2][1] = L2_tags[set2][0];
+ L2_tags[set2][0] = tag;
+
+ is_L2_miss = True;
+ }
+
+ /* Miss treatment */
+ if (is_L2_miss) {
+ (*m2)++;
+ }
+
+ } else {
+ VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
+ VG_(printf)("sets %d and %d.\n", set1, set2);
+ VG_(panic)("L2 cache set mismatch");
+ }
+}