nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Cache simulation cg_sim.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
sewardj | 03f8d3f | 2012-08-05 15:46:46 +0000 | [diff] [blame] | 10 | Copyright (C) 2002-2012 Nicholas Nethercote |
njn | 2bc1012 | 2005-05-08 02:10:27 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | /* Notes: |
| 32 | - simulates a write-allocate cache |
| 33 | - (block --> set) hash function uses simple bit selection |
| 34 | - handling of references straddling two cache blocks: |
| 35 | - counts as only one cache access (not two) |
| 36 | - both blocks hit --> one hit |
| 37 | - one block hits, the other misses --> one miss |
| 38 | - both blocks miss --> one miss (not two) |
| 39 | */ |
| 40 | |
| 41 | typedef struct { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 42 | Int size; /* bytes */ |
| 43 | Int assoc; |
| 44 | Int line_size; /* bytes */ |
| 45 | Int sets; |
| 46 | Int sets_min_1; |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 47 | Int line_size_bits; |
| 48 | Int tag_shift; |
| 49 | Char desc_line[128]; |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 50 | UWord* tags; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 51 | } cache_t2; |
| 52 | |
| 53 | /* By this point, the size/assoc/line_size has been checked. */ |
| 54 | static void cachesim_initcache(cache_t config, cache_t2* c) |
| 55 | { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 56 | Int i; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 57 | |
| 58 | c->size = config.size; |
| 59 | c->assoc = config.assoc; |
| 60 | c->line_size = config.line_size; |
| 61 | |
| 62 | c->sets = (c->size / c->line_size) / c->assoc; |
| 63 | c->sets_min_1 = c->sets - 1; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 64 | c->line_size_bits = VG_(log2)(c->line_size); |
| 65 | c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); |
| 66 | |
| 67 | if (c->assoc == 1) { |
| 68 | VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", |
| 69 | c->size, c->line_size); |
| 70 | } else { |
| 71 | VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", |
| 72 | c->size, c->line_size, c->assoc); |
| 73 | } |
| 74 | |
sewardj | 9c606bd | 2008-09-18 18:12:50 +0000 | [diff] [blame] | 75 | c->tags = VG_(malloc)("cg.sim.ci.1", |
| 76 | sizeof(UWord) * c->sets * c->assoc); |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 77 | |
| 78 | for (i = 0; i < c->sets * c->assoc; i++) |
| 79 | c->tags[i] = 0; |
| 80 | } |
| 81 | |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 82 | /* This is done as a macro rather than by passing in the cache_t2 as an |
| 83 | * arg because it slows things down by a small amount (3-5%) due to all |
| 84 | * that extra indirection. */ |
| 85 | |
| 86 | #define CACHESIM(L, MISS_TREATMENT) \ |
| 87 | /* The cache and associated bits and pieces. */ \ |
| 88 | static cache_t2 L; \ |
| 89 | \ |
| 90 | static void cachesim_##L##_initcache(cache_t config) \ |
| 91 | { \ |
| 92 | cachesim_initcache(config, &L); \ |
| 93 | } \ |
| 94 | \ |
njn | fcd0488 | 2005-11-13 17:57:32 +0000 | [diff] [blame] | 95 | /* This attribute forces GCC to inline this function, even though it's */ \ |
| 96 | /* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \ |
| 97 | __attribute__((always_inline)) \ |
| 98 | static __inline__ \ |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 99 | void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL) \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 100 | { \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 101 | UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ |
| 102 | UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ |
| 103 | UWord tag = a >> L.tag_shift; \ |
| 104 | UWord tag2; \ |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 105 | Int i, j; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 106 | Bool is_miss = False; \ |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 107 | UWord* set; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 108 | \ |
| 109 | /* First case: word entirely within line. */ \ |
| 110 | if (set1 == set2) { \ |
| 111 | \ |
weidendo | 144b76c | 2009-01-26 22:56:14 +0000 | [diff] [blame] | 112 | set = &(L.tags[set1 * L.assoc]); \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 113 | \ |
| 114 | /* This loop is unrolled for just the first case, which is the most */\ |
| 115 | /* common. We can't unroll any further because it would screw up */\ |
| 116 | /* if we have a direct-mapped (1-way) cache. */\ |
| 117 | if (tag == set[0]) { \ |
| 118 | return; \ |
| 119 | } \ |
| 120 | /* If the tag is one other than the MRU, move it into the MRU spot */\ |
| 121 | /* and shuffle the rest down. */\ |
| 122 | for (i = 1; i < L.assoc; i++) { \ |
| 123 | if (tag == set[i]) { \ |
| 124 | for (j = i; j > 0; j--) { \ |
| 125 | set[j] = set[j - 1]; \ |
| 126 | } \ |
| 127 | set[0] = tag; \ |
| 128 | return; \ |
| 129 | } \ |
| 130 | } \ |
| 131 | \ |
| 132 | /* A miss; install this tag as MRU, shuffle rest down. */ \ |
| 133 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 134 | set[j] = set[j - 1]; \ |
| 135 | } \ |
| 136 | set[0] = tag; \ |
| 137 | MISS_TREATMENT; \ |
| 138 | return; \ |
| 139 | \ |
| 140 | /* Second case: word straddles two lines. */ \ |
| 141 | /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ |
tom | 7c0f639 | 2011-06-09 12:26:42 +0000 | [diff] [blame] | 142 | } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \ |
weidendo | 144b76c | 2009-01-26 22:56:14 +0000 | [diff] [blame] | 143 | set = &(L.tags[set1 * L.assoc]); \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 144 | if (tag == set[0]) { \ |
| 145 | goto block2; \ |
| 146 | } \ |
| 147 | for (i = 1; i < L.assoc; i++) { \ |
| 148 | if (tag == set[i]) { \ |
| 149 | for (j = i; j > 0; j--) { \ |
| 150 | set[j] = set[j - 1]; \ |
| 151 | } \ |
| 152 | set[0] = tag; \ |
| 153 | goto block2; \ |
| 154 | } \ |
| 155 | } \ |
| 156 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 157 | set[j] = set[j - 1]; \ |
| 158 | } \ |
| 159 | set[0] = tag; \ |
| 160 | is_miss = True; \ |
| 161 | block2: \ |
weidendo | 144b76c | 2009-01-26 22:56:14 +0000 | [diff] [blame] | 162 | set = &(L.tags[set2 * L.assoc]); \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 163 | tag2 = (a+size-1) >> L.tag_shift; \ |
| 164 | if (tag2 == set[0]) { \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 165 | goto miss_treatment; \ |
| 166 | } \ |
| 167 | for (i = 1; i < L.assoc; i++) { \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 168 | if (tag2 == set[i]) { \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 169 | for (j = i; j > 0; j--) { \ |
| 170 | set[j] = set[j - 1]; \ |
| 171 | } \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 172 | set[0] = tag2; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 173 | goto miss_treatment; \ |
| 174 | } \ |
| 175 | } \ |
| 176 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 177 | set[j] = set[j - 1]; \ |
| 178 | } \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 179 | set[0] = tag2; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 180 | is_miss = True; \ |
| 181 | miss_treatment: \ |
| 182 | if (is_miss) { MISS_TREATMENT; } \ |
| 183 | \ |
| 184 | } else { \ |
njn | 8a7b41b | 2007-09-23 00:51:24 +0000 | [diff] [blame] | 185 | VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\ |
njn | 6799325 | 2004-11-22 18:02:32 +0000 | [diff] [blame] | 186 | VG_(tool_panic)("item straddles more than two cache sets"); \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 187 | } \ |
| 188 | return; \ |
| 189 | } |
| 190 | |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 191 | CACHESIM(LL, (*mL)++ ); |
| 192 | CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } ); |
| 193 | CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } ); |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 194 | |
| 195 | /*--------------------------------------------------------------------*/ |
| 196 | /*--- end cg_sim.c ---*/ |
| 197 | /*--------------------------------------------------------------------*/ |
| 198 | |