nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Cache simulation cg_sim.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
sewardj | 4d474d0 | 2008-02-11 11:34:59 +0000 | [diff] [blame] | 10 | Copyright (C) 2002-2008 Nicholas Nethercote |
njn | 2bc1012 | 2005-05-08 02:10:27 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | /* Notes: |
| 32 | - simulates a write-allocate cache |
| 33 | - (block --> set) hash function uses simple bit selection |
| 34 | - handling of references straddling two cache blocks: |
| 35 | - counts as only one cache access (not two) |
| 36 | - both blocks hit --> one hit |
| 37 | - one block hits, the other misses --> one miss |
| 38 | - both blocks miss --> one miss (not two) |
| 39 | */ |
| 40 | |
| 41 | typedef struct { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 42 | Int size; /* bytes */ |
| 43 | Int assoc; |
| 44 | Int line_size; /* bytes */ |
| 45 | Int sets; |
| 46 | Int sets_min_1; |
| 47 | Int assoc_bits; |
| 48 | Int line_size_bits; |
| 49 | Int tag_shift; |
| 50 | Char desc_line[128]; |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 51 | UWord* tags; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 52 | } cache_t2; |
| 53 | |
| 54 | /* By this point, the size/assoc/line_size has been checked. */ |
| 55 | static void cachesim_initcache(cache_t config, cache_t2* c) |
| 56 | { |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 57 | Int i; |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 58 | |
| 59 | c->size = config.size; |
| 60 | c->assoc = config.assoc; |
| 61 | c->line_size = config.line_size; |
| 62 | |
| 63 | c->sets = (c->size / c->line_size) / c->assoc; |
| 64 | c->sets_min_1 = c->sets - 1; |
| 65 | c->assoc_bits = VG_(log2)(c->assoc); |
| 66 | c->line_size_bits = VG_(log2)(c->line_size); |
| 67 | c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); |
| 68 | |
| 69 | if (c->assoc == 1) { |
| 70 | VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", |
| 71 | c->size, c->line_size); |
| 72 | } else { |
| 73 | VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", |
| 74 | c->size, c->line_size, c->assoc); |
| 75 | } |
| 76 | |
sewardj | 9c606bd | 2008-09-18 18:12:50 +0000 | [diff] [blame^] | 77 | c->tags = VG_(malloc)("cg.sim.ci.1", |
| 78 | sizeof(UWord) * c->sets * c->assoc); |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 79 | |
| 80 | for (i = 0; i < c->sets * c->assoc; i++) |
| 81 | c->tags[i] = 0; |
| 82 | } |
| 83 | |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 84 | /* This is done as a macro rather than by passing in the cache_t2 as an |
| 85 | * arg because it slows things down by a small amount (3-5%) due to all |
| 86 | * that extra indirection. */ |
| 87 | |
| 88 | #define CACHESIM(L, MISS_TREATMENT) \ |
| 89 | /* The cache and associated bits and pieces. */ \ |
| 90 | static cache_t2 L; \ |
| 91 | \ |
| 92 | static void cachesim_##L##_initcache(cache_t config) \ |
| 93 | { \ |
| 94 | cachesim_initcache(config, &L); \ |
| 95 | } \ |
| 96 | \ |
njn | fcd0488 | 2005-11-13 17:57:32 +0000 | [diff] [blame] | 97 | /* This attribute forces GCC to inline this function, even though it's */ \ |
| 98 | /* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \ |
| 99 | __attribute__((always_inline)) \ |
| 100 | static __inline__ \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 101 | void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \ |
| 102 | { \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 103 | UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ |
| 104 | UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ |
| 105 | UWord tag = a >> L.tag_shift; \ |
| 106 | UWord tag2; \ |
njn | 0103de5 | 2005-10-10 16:49:01 +0000 | [diff] [blame] | 107 | Int i, j; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 108 | Bool is_miss = False; \ |
njn | b619ca7 | 2005-10-10 16:18:09 +0000 | [diff] [blame] | 109 | UWord* set; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 110 | \ |
| 111 | /* First case: word entirely within line. */ \ |
| 112 | if (set1 == set2) { \ |
| 113 | \ |
| 114 | /* Shifting is a bit faster than multiplying */ \ |
| 115 | set = &(L.tags[set1 << L.assoc_bits]); \ |
| 116 | \ |
| 117 | /* This loop is unrolled for just the first case, which is the most */\ |
| 118 | /* common. We can't unroll any further because it would screw up */\ |
| 119 | /* if we have a direct-mapped (1-way) cache. */\ |
| 120 | if (tag == set[0]) { \ |
| 121 | return; \ |
| 122 | } \ |
| 123 | /* If the tag is one other than the MRU, move it into the MRU spot */\ |
| 124 | /* and shuffle the rest down. */\ |
| 125 | for (i = 1; i < L.assoc; i++) { \ |
| 126 | if (tag == set[i]) { \ |
| 127 | for (j = i; j > 0; j--) { \ |
| 128 | set[j] = set[j - 1]; \ |
| 129 | } \ |
| 130 | set[0] = tag; \ |
| 131 | return; \ |
| 132 | } \ |
| 133 | } \ |
| 134 | \ |
| 135 | /* A miss; install this tag as MRU, shuffle rest down. */ \ |
| 136 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 137 | set[j] = set[j - 1]; \ |
| 138 | } \ |
| 139 | set[0] = tag; \ |
| 140 | MISS_TREATMENT; \ |
| 141 | return; \ |
| 142 | \ |
| 143 | /* Second case: word straddles two lines. */ \ |
| 144 | /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ |
| 145 | } else if (((set1 + 1) & (L.sets-1)) == set2) { \ |
| 146 | set = &(L.tags[set1 << L.assoc_bits]); \ |
| 147 | if (tag == set[0]) { \ |
| 148 | goto block2; \ |
| 149 | } \ |
| 150 | for (i = 1; i < L.assoc; i++) { \ |
| 151 | if (tag == set[i]) { \ |
| 152 | for (j = i; j > 0; j--) { \ |
| 153 | set[j] = set[j - 1]; \ |
| 154 | } \ |
| 155 | set[0] = tag; \ |
| 156 | goto block2; \ |
| 157 | } \ |
| 158 | } \ |
| 159 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 160 | set[j] = set[j - 1]; \ |
| 161 | } \ |
| 162 | set[0] = tag; \ |
| 163 | is_miss = True; \ |
| 164 | block2: \ |
| 165 | set = &(L.tags[set2 << L.assoc_bits]); \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 166 | tag2 = (a+size-1) >> L.tag_shift; \ |
| 167 | if (tag2 == set[0]) { \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 168 | goto miss_treatment; \ |
| 169 | } \ |
| 170 | for (i = 1; i < L.assoc; i++) { \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 171 | if (tag2 == set[i]) { \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 172 | for (j = i; j > 0; j--) { \ |
| 173 | set[j] = set[j - 1]; \ |
| 174 | } \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 175 | set[0] = tag2; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 176 | goto miss_treatment; \ |
| 177 | } \ |
| 178 | } \ |
| 179 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 180 | set[j] = set[j - 1]; \ |
| 181 | } \ |
njn | cbdfcd6 | 2006-11-22 11:38:07 +0000 | [diff] [blame] | 182 | set[0] = tag2; \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 183 | is_miss = True; \ |
| 184 | miss_treatment: \ |
| 185 | if (is_miss) { MISS_TREATMENT; } \ |
| 186 | \ |
| 187 | } else { \ |
njn | 8a7b41b | 2007-09-23 00:51:24 +0000 | [diff] [blame] | 188 | VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\ |
njn | 6799325 | 2004-11-22 18:02:32 +0000 | [diff] [blame] | 189 | VG_(tool_panic)("item straddles more than two cache sets"); \ |
nethercote | 27fc1da | 2004-01-04 16:56:57 +0000 | [diff] [blame] | 190 | } \ |
| 191 | return; \ |
| 192 | } |
| 193 | |
| 194 | CACHESIM(L2, (*m2)++ ); |
| 195 | CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); |
| 196 | CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); |
| 197 | |
| 198 | /*--------------------------------------------------------------------*/ |
| 199 | /*--- end cg_sim.c ---*/ |
| 200 | /*--------------------------------------------------------------------*/ |
| 201 | |