nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Cache simulation cg_sim.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
njn | 5361242 | 2005-03-12 16:22:54 +0000 | [diff] [blame] | 10 | Copyright (C) 2002-2005 Nicholas Nethercote |
njn | 2bc1012 | 2005-05-08 02:10:27 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | /* Notes: |
| 32 | - simulates a write-allocate cache |
| 33 | - (block --> set) hash function uses simple bit selection |
| 34 | - handling of references straddling two cache blocks: |
| 35 | - counts as only one cache access (not two) |
| 36 | - both blocks hit --> one hit |
| 37 | - one block hits, the other misses --> one miss |
| 38 | - both blocks miss --> one miss (not two) |
| 39 | */ |
| 40 | |
| 41 | typedef struct { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 42 | Int size; /* bytes */ |
| 43 | Int assoc; |
| 44 | Int line_size; /* bytes */ |
| 45 | Int sets; |
| 46 | Int sets_min_1; |
| 47 | Int assoc_bits; |
| 48 | Int line_size_bits; |
| 49 | Int tag_shift; |
| 50 | Char desc_line[128]; |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 51 | UWord* tags; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 52 | } cache_t2; |
| 53 | |
| 54 | /* By this point, the size/assoc/line_size has been checked. */ |
| 55 | static void cachesim_initcache(cache_t config, cache_t2* c) |
| 56 | { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 57 | Int i; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 58 | |
| 59 | c->size = config.size; |
| 60 | c->assoc = config.assoc; |
| 61 | c->line_size = config.line_size; |
| 62 | |
| 63 | c->sets = (c->size / c->line_size) / c->assoc; |
| 64 | c->sets_min_1 = c->sets - 1; |
| 65 | c->assoc_bits = VG_(log2)(c->assoc); |
| 66 | c->line_size_bits = VG_(log2)(c->line_size); |
| 67 | c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); |
| 68 | |
| 69 | if (c->assoc == 1) { |
| 70 | VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", |
| 71 | c->size, c->line_size); |
| 72 | } else { |
| 73 | VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", |
| 74 | c->size, c->line_size, c->assoc); |
| 75 | } |
| 76 | |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 77 | c->tags = VG_(malloc)(sizeof(UWord) * c->sets * c->assoc); |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 78 | |
| 79 | for (i = 0; i < c->sets * c->assoc; i++) |
| 80 | c->tags[i] = 0; |
| 81 | } |
| 82 | |
| 83 | #if 0 |
| 84 | static void print_cache(cache_t2* c) |
| 85 | { |
| 86 | UInt set, way, i; |
| 87 | |
| 88 | /* Note initialisation and update of 'i'. */ |
| 89 | for (i = 0, set = 0; set < c->sets; set++) { |
| 90 | for (way = 0; way < c->assoc; way++, i++) { |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 91 | VG_(printf)("%16lx ", c->tags[i]); |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 92 | } |
| 93 | VG_(printf)("\n"); |
| 94 | } |
| 95 | } |
| 96 | #endif |
| 97 | |
| 98 | /* This is done as a macro rather than by passing in the cache_t2 as an |
| 99 | * arg because it slows things down by a small amount (3-5%) due to all |
| 100 | * that extra indirection. */ |
| 101 | |
| 102 | #define CACHESIM(L, MISS_TREATMENT) \ |
| 103 | /* The cache and associated bits and pieces. */ \ |
| 104 | static cache_t2 L; \ |
| 105 | \ |
| 106 | static void cachesim_##L##_initcache(cache_t config) \ |
| 107 | { \ |
| 108 | cachesim_initcache(config, &L); \ |
| 109 | } \ |
| 110 | \ |
njn | fcd0488 | 2005-11-13 17:57:32 +0000 | [diff] [blame] | 111 | /* This attribute forces GCC to inline this function, even though it's */ \ |
| 112 | /* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \ |
| 113 | __attribute__((always_inline)) \ |
| 114 | static __inline__ \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 115 | void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \ |
| 116 | { \ |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 117 | register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ |
| 118 | register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ |
| 119 | register UWord tag = a >> L.tag_shift; \ |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 120 | Int i, j; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 121 | Bool is_miss = False; \ |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 122 | UWord* set; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 123 | \ |
| 124 | /* First case: word entirely within line. */ \ |
| 125 | if (set1 == set2) { \ |
| 126 | \ |
| 127 | /* Shifting is a bit faster than multiplying */ \ |
| 128 | set = &(L.tags[set1 << L.assoc_bits]); \ |
| 129 | \ |
| 130 | /* This loop is unrolled for just the first case, which is the most */\ |
| 131 | /* common. We can't unroll any further because it would screw up */\ |
| 132 | /* if we have a direct-mapped (1-way) cache. */\ |
| 133 | if (tag == set[0]) { \ |
| 134 | return; \ |
| 135 | } \ |
| 136 | /* If the tag is one other than the MRU, move it into the MRU spot */\ |
| 137 | /* and shuffle the rest down. */\ |
| 138 | for (i = 1; i < L.assoc; i++) { \ |
| 139 | if (tag == set[i]) { \ |
| 140 | for (j = i; j > 0; j--) { \ |
| 141 | set[j] = set[j - 1]; \ |
| 142 | } \ |
| 143 | set[0] = tag; \ |
| 144 | return; \ |
| 145 | } \ |
| 146 | } \ |
| 147 | \ |
| 148 | /* A miss; install this tag as MRU, shuffle rest down. */ \ |
| 149 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 150 | set[j] = set[j - 1]; \ |
| 151 | } \ |
| 152 | set[0] = tag; \ |
| 153 | MISS_TREATMENT; \ |
| 154 | return; \ |
| 155 | \ |
| 156 | /* Second case: word straddles two lines. */ \ |
| 157 | /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ |
| 158 | } else if (((set1 + 1) & (L.sets-1)) == set2) { \ |
| 159 | set = &(L.tags[set1 << L.assoc_bits]); \ |
| 160 | if (tag == set[0]) { \ |
| 161 | goto block2; \ |
| 162 | } \ |
| 163 | for (i = 1; i < L.assoc; i++) { \ |
| 164 | if (tag == set[i]) { \ |
| 165 | for (j = i; j > 0; j--) { \ |
| 166 | set[j] = set[j - 1]; \ |
| 167 | } \ |
| 168 | set[0] = tag; \ |
| 169 | goto block2; \ |
| 170 | } \ |
| 171 | } \ |
| 172 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 173 | set[j] = set[j - 1]; \ |
| 174 | } \ |
| 175 | set[0] = tag; \ |
| 176 | is_miss = True; \ |
| 177 | block2: \ |
| 178 | set = &(L.tags[set2 << L.assoc_bits]); \ |
| 179 | if (tag == set[0]) { \ |
| 180 | goto miss_treatment; \ |
| 181 | } \ |
| 182 | for (i = 1; i < L.assoc; i++) { \ |
| 183 | if (tag == set[i]) { \ |
| 184 | for (j = i; j > 0; j--) { \ |
| 185 | set[j] = set[j - 1]; \ |
| 186 | } \ |
| 187 | set[0] = tag; \ |
| 188 | goto miss_treatment; \ |
| 189 | } \ |
| 190 | } \ |
| 191 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 192 | set[j] = set[j - 1]; \ |
| 193 | } \ |
| 194 | set[0] = tag; \ |
| 195 | is_miss = True; \ |
| 196 | miss_treatment: \ |
| 197 | if (is_miss) { MISS_TREATMENT; } \ |
| 198 | \ |
| 199 | } else { \ |
| 200 | VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); \ |
njn | 6799325 | 2004-11-22 18:02:32 +0000 | [diff] [blame] | 201 | VG_(tool_panic)("item straddles more than two cache sets"); \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 202 | } \ |
| 203 | return; \ |
| 204 | } |
| 205 | |
| 206 | CACHESIM(L2, (*m2)++ ); |
| 207 | CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); |
| 208 | CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); |
| 209 | |
| 210 | /*--------------------------------------------------------------------*/ |
| 211 | /*--- end cg_sim.c ---*/ |
| 212 | /*--------------------------------------------------------------------*/ |
| 213 | |