nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Cache simulation cg_sim.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
sewardj | 0f157dd | 2013-10-18 14:27:36 +0000 | [diff] [blame] | 10 | Copyright (C) 2002-2013 Nicholas Nethercote |
njn | 2bc1012 | 2005-05-08 02:10:27 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | /* Notes: |
| 32 | - simulates a write-allocate cache |
| 33 | - (block --> set) hash function uses simple bit selection |
| 34 | - handling of references straddling two cache blocks: |
| 35 | - counts as only one cache access (not two) |
| 36 | - both blocks hit --> one hit |
| 37 | - one block hits, the other misses --> one miss |
| 38 | - both blocks miss --> one miss (not two) |
| 39 | */ |
| 40 | |
| 41 | typedef struct { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 42 | Int size; /* bytes */ |
| 43 | Int assoc; |
| 44 | Int line_size; /* bytes */ |
| 45 | Int sets; |
| 46 | Int sets_min_1; |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 47 | Int line_size_bits; |
| 48 | Int tag_shift; |
florian | fed3c04 | 2014-09-30 22:15:05 +0000 | [diff] [blame^] | 49 | HChar desc_line[128]; /* large enough */ |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 50 | UWord* tags; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 51 | } cache_t2; |
| 52 | |
| 53 | /* By this point, the size/assoc/line_size has been checked. */ |
| 54 | static void cachesim_initcache(cache_t config, cache_t2* c) |
| 55 | { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 56 | Int i; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 57 | |
| 58 | c->size = config.size; |
| 59 | c->assoc = config.assoc; |
| 60 | c->line_size = config.line_size; |
| 61 | |
| 62 | c->sets = (c->size / c->line_size) / c->assoc; |
| 63 | c->sets_min_1 = c->sets - 1; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 64 | c->line_size_bits = VG_(log2)(c->line_size); |
| 65 | c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); |
| 66 | |
| 67 | if (c->assoc == 1) { |
| 68 | VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", |
| 69 | c->size, c->line_size); |
| 70 | } else { |
| 71 | VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", |
| 72 | c->size, c->line_size, c->assoc); |
| 73 | } |
| 74 | |
sewardj | 9c606bd | 2008-09-18 18:12:50 +0000 | [diff] [blame] | 75 | c->tags = VG_(malloc)("cg.sim.ci.1", |
| 76 | sizeof(UWord) * c->sets * c->assoc); |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 77 | |
| 78 | for (i = 0; i < c->sets * c->assoc; i++) |
| 79 | c->tags[i] = 0; |
| 80 | } |
| 81 | |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 82 | /* This attribute forces GCC to inline the function, getting rid of a |
| 83 | * lot of indirection around the cache_t2 pointer, if it is known to be |
| 84 | * constant in the caller (the caller is inlined itself). |
| 85 | * Without inlining of simulator functions, cachegrind can get 40% slower. |
| 86 | */ |
| 87 | __attribute__((always_inline)) |
weidendo | b0909b4 | 2012-10-29 21:28:03 +0000 | [diff] [blame] | 88 | static __inline__ |
| 89 | Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag) |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 90 | { |
| 91 | int i, j; |
| 92 | UWord *set; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 93 | |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 94 | set = &(c->tags[set_no * c->assoc]); |
| 95 | |
| 96 | /* This loop is unrolled for just the first case, which is the most */ |
| 97 | /* common. We can't unroll any further because it would screw up */ |
| 98 | /* if we have a direct-mapped (1-way) cache. */ |
| 99 | if (tag == set[0]) |
| 100 | return False; |
| 101 | |
| 102 | /* If the tag is one other than the MRU, move it into the MRU spot */ |
| 103 | /* and shuffle the rest down. */ |
| 104 | for (i = 1; i < c->assoc; i++) { |
| 105 | if (tag == set[i]) { |
| 106 | for (j = i; j > 0; j--) { |
| 107 | set[j] = set[j - 1]; |
| 108 | } |
| 109 | set[0] = tag; |
| 110 | |
| 111 | return False; |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | /* A miss; install this tag as MRU, shuffle rest down. */ |
| 116 | for (j = c->assoc - 1; j > 0; j--) { |
| 117 | set[j] = set[j - 1]; |
| 118 | } |
| 119 | set[0] = tag; |
| 120 | |
| 121 | return True; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 122 | } |
| 123 | |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 124 | __attribute__((always_inline)) |
weidendo | b0909b4 | 2012-10-29 21:28:03 +0000 | [diff] [blame] | 125 | static __inline__ |
| 126 | Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size) |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 127 | { |
weidendo | 3694c9f | 2012-10-05 23:58:23 +0000 | [diff] [blame] | 128 | /* A memory block has the size of a cache line */ |
| 129 | UWord block1 = a >> c->line_size_bits; |
| 130 | UWord block2 = (a+size-1) >> c->line_size_bits; |
| 131 | UInt set1 = block1 & c->sets_min_1; |
| 132 | |
| 133 | /* Tags used in real caches are minimal to save space. |
| 134 | * As the last bits of the block number of addresses mapping |
| 135 | * into one cache set are the same, real caches use as tag |
| 136 | * tag = block >> log2(#sets) |
| 137 | * But using the memory block as more specific tag is fine, |
| 138 | * and saves instructions. |
| 139 | */ |
| 140 | UWord tag1 = block1; |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 141 | |
| 142 | /* Access entirely within line. */ |
weidendo | 3694c9f | 2012-10-05 23:58:23 +0000 | [diff] [blame] | 143 | if (block1 == block2) |
| 144 | return cachesim_setref_is_miss(c, set1, tag1); |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 145 | |
| 146 | /* Access straddles two lines. */ |
weidendo | 3694c9f | 2012-10-05 23:58:23 +0000 | [diff] [blame] | 147 | else if (block1 + 1 == block2) { |
| 148 | UInt set2 = block2 & c->sets_min_1; |
| 149 | UWord tag2 = block2; |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 150 | |
| 151 | /* always do both, as state is updated as side effect */ |
weidendo | 3694c9f | 2012-10-05 23:58:23 +0000 | [diff] [blame] | 152 | if (cachesim_setref_is_miss(c, set1, tag1)) { |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 153 | cachesim_setref_is_miss(c, set2, tag2); |
| 154 | return True; |
| 155 | } |
| 156 | return cachesim_setref_is_miss(c, set2, tag2); |
| 157 | } |
weidendo | 3694c9f | 2012-10-05 23:58:23 +0000 | [diff] [blame] | 158 | VG_(printf)("addr: %lx size: %u blocks: %ld %ld", |
| 159 | a, size, block1, block2); |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 160 | VG_(tool_panic)("item straddles more than two cache sets"); |
| 161 | /* not reached */ |
| 162 | return True; |
| 163 | } |
| 164 | |
| 165 | |
| 166 | static cache_t2 LL; |
| 167 | static cache_t2 I1; |
| 168 | static cache_t2 D1; |
| 169 | |
| 170 | static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc) |
| 171 | { |
| 172 | cachesim_initcache(I1c, &I1); |
| 173 | cachesim_initcache(D1c, &D1); |
| 174 | cachesim_initcache(LLc, &LL); |
| 175 | } |
| 176 | |
| 177 | __attribute__((always_inline)) |
weidendo | b0909b4 | 2012-10-29 21:28:03 +0000 | [diff] [blame] | 178 | static __inline__ |
weidendo | 6fc0de0 | 2012-10-30 00:28:29 +0000 | [diff] [blame] | 179 | void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL) |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 180 | { |
| 181 | if (cachesim_ref_is_miss(&I1, a, size)) { |
| 182 | (*m1)++; |
| 183 | if (cachesim_ref_is_miss(&LL, a, size)) |
| 184 | (*mL)++; |
| 185 | } |
| 186 | } |
| 187 | |
weidendo | 6fc0de0 | 2012-10-30 00:28:29 +0000 | [diff] [blame] | 188 | // common special case IrNoX |
| 189 | __attribute__((always_inline)) |
| 190 | static __inline__ |
| 191 | void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL) |
| 192 | { |
| 193 | UWord block = a >> I1.line_size_bits; |
| 194 | UInt I1_set = block & I1.sets_min_1; |
| 195 | |
| 196 | // use block as tag |
| 197 | if (cachesim_setref_is_miss(&I1, I1_set, block)) { |
| 198 | UInt LL_set = block & LL.sets_min_1; |
| 199 | (*m1)++; |
| 200 | // can use block as tag as L1I and LL cache line sizes are equal |
| 201 | if (cachesim_setref_is_miss(&LL, LL_set, block)) |
| 202 | (*mL)++; |
| 203 | } |
| 204 | } |
| 205 | |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 206 | __attribute__((always_inline)) |
weidendo | b0909b4 | 2012-10-29 21:28:03 +0000 | [diff] [blame] | 207 | static __inline__ |
| 208 | void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL) |
weidendo | c1e9426 | 2012-10-05 23:58:17 +0000 | [diff] [blame] | 209 | { |
| 210 | if (cachesim_ref_is_miss(&D1, a, size)) { |
| 211 | (*m1)++; |
| 212 | if (cachesim_ref_is_miss(&LL, a, size)) |
| 213 | (*mL)++; |
| 214 | } |
| 215 | } |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 216 | |
weidendo | 6fc0de0 | 2012-10-30 00:28:29 +0000 | [diff] [blame] | 217 | /* Check for special case IrNoX. Called at instrumentation time. |
| 218 | * |
| 219 | * Does this Ir only touch one cache line, and are L1I/LL cache |
| 220 | * line sizes the same? This allows to get rid of a runtime check. |
| 221 | * |
| 222 | * Returning false is always fine, as this calls the generic case |
| 223 | */ |
| 224 | static Bool cachesim_is_IrNoX(Addr a, UChar size) |
| 225 | { |
| 226 | UWord block1, block2; |
| 227 | |
| 228 | if (I1.line_size_bits != LL.line_size_bits) return False; |
| 229 | block1 = a >> I1.line_size_bits; |
| 230 | block2 = (a+size-1) >> I1.line_size_bits; |
| 231 | if (block1 != block2) return False; |
| 232 | |
| 233 | return True; |
| 234 | } |
| 235 | |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 236 | /*--------------------------------------------------------------------*/ |
| 237 | /*--- end cg_sim.c ---*/ |
| 238 | /*--------------------------------------------------------------------*/ |
| 239 | |