sewardj | 07133bf | 2002-06-13 10:25:56 +0000 | [diff] [blame] | 1 | |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Generic stuff shared by all cache simulation files. ---*/ |
njn25 | cac76cb | 2002-09-23 11:21:57 +0000 | [diff] [blame] | 4 | /*--- cg_sim_gen.c ---*/ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 5 | /*--------------------------------------------------------------------*/ |
| 6 | |
| 7 | /* |
nethercote | 137bc55 | 2003-11-14 17:47:54 +0000 | [diff] [blame] | 8 | This file is part of Cachegrind, a Valgrind tool for cache |
njn | c953984 | 2002-10-02 13:26:35 +0000 | [diff] [blame] | 9 | profiling programs. |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 10 | |
njn | 0e1b514 | 2003-04-15 14:58:06 +0000 | [diff] [blame] | 11 | Copyright (C) 2002-2003 Nicholas Nethercote |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 12 | njn25@cam.ac.uk |
| 13 | |
| 14 | This program is free software; you can redistribute it and/or |
| 15 | modify it under the terms of the GNU General Public License as |
| 16 | published by the Free Software Foundation; either version 2 of the |
| 17 | License, or (at your option) any later version. |
| 18 | |
| 19 | This program is distributed in the hope that it will be useful, but |
| 20 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 22 | General Public License for more details. |
| 23 | |
| 24 | You should have received a copy of the GNU General Public License |
| 25 | along with this program; if not, write to the Free Software |
| 26 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 27 | 02111-1307, USA. |
| 28 | |
njn25 | e49d8e7 | 2002-09-23 09:36:25 +0000 | [diff] [blame] | 29 | The GNU General Public License is contained in the file COPYING. |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 30 | */ |
| 31 | |
| 32 | /* Notes: |
| 33 | - simulates a write-allocate cache |
| 34 | - (block --> set) hash function uses simple bit selection |
| 35 | - handling of references straddling two cache blocks: |
| 36 | - counts as only one cache access (not two) |
| 37 | - both blocks hit --> one hit |
| 38 | - one block hits, the other misses --> one miss |
| 39 | - both blocks miss --> one miss (not two) |
| 40 | */ |
| 41 | |
njn25 | cac76cb | 2002-09-23 11:21:57 +0000 | [diff] [blame] | 42 | #ifndef __CG_SIM_GEN_C |
| 43 | #define __CG_SIM_GEN_C |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 44 | |
| 45 | typedef struct { |
| 46 | int size; /* bytes */ |
| 47 | int assoc; |
| 48 | int line_size; /* bytes */ |
| 49 | int sets; |
| 50 | int sets_min_1; |
| 51 | int assoc_bits; |
| 52 | int line_size_bits; |
| 53 | int tag_shift; |
| 54 | char desc_line[128]; |
| 55 | int* tags; |
| 56 | } cache_t2; |
| 57 | |
| 58 | /* By this point, the size/assoc/line_size has been checked. */ |
| 59 | static void cachesim_initcache(cache_t config, cache_t2* c) |
| 60 | { |
| 61 | int i; |
| 62 | |
| 63 | c->size = config.size; |
| 64 | c->assoc = config.assoc; |
| 65 | c->line_size = config.line_size; |
| 66 | |
| 67 | c->sets = (c->size / c->line_size) / c->assoc; |
| 68 | c->sets_min_1 = c->sets - 1; |
sewardj | 07133bf | 2002-06-13 10:25:56 +0000 | [diff] [blame] | 69 | c->assoc_bits = VG_(log2)(c->assoc); |
| 70 | c->line_size_bits = VG_(log2)(c->line_size); |
| 71 | c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 72 | |
| 73 | if (c->assoc == 1) { |
| 74 | VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", |
| 75 | c->size, c->line_size); |
| 76 | } else { |
| 77 | VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", |
| 78 | c->size, c->line_size, c->assoc); |
| 79 | } |
| 80 | |
njn25 | e49d8e7 | 2002-09-23 09:36:25 +0000 | [diff] [blame] | 81 | c->tags = VG_(malloc)(sizeof(UInt) * c->sets * c->assoc); |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 82 | |
| 83 | for (i = 0; i < c->sets * c->assoc; i++) |
| 84 | c->tags[i] = 0; |
| 85 | } |
| 86 | |
| 87 | #if 0 |
| 88 | static void print_cache(cache_t2* c) |
| 89 | { |
| 90 | UInt set, way, i; |
| 91 | |
| 92 | /* Note initialisation and update of 'i'. */ |
| 93 | for (i = 0, set = 0; set < c->sets; set++) { |
| 94 | for (way = 0; way < c->assoc; way++, i++) { |
| 95 | VG_(printf)("%8x ", c->tags[i]); |
| 96 | } |
| 97 | VG_(printf)("\n"); |
| 98 | } |
| 99 | } |
| 100 | #endif |
| 101 | |
njn25 | e49d8e7 | 2002-09-23 09:36:25 +0000 | [diff] [blame] | 102 | /* This is done as a macro rather than by passing in the cache_t2 as an |
| 103 | * arg because it slows things down by a small amount (3-5%) due to all |
| 104 | * that extra indirection. */ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 105 | |
| 106 | #define CACHESIM(L, MISS_TREATMENT) \ |
| 107 | /* The cache and associated bits and pieces. */ \ |
sewardj | b7ad5a9 | 2002-06-13 11:11:05 +0000 | [diff] [blame] | 108 | static cache_t2 L; \ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 109 | \ |
sewardj | 07133bf | 2002-06-13 10:25:56 +0000 | [diff] [blame] | 110 | static void cachesim_##L##_initcache(cache_t config) \ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 111 | { \ |
| 112 | cachesim_initcache(config, &L); \ |
| 113 | } \ |
| 114 | \ |
sewardj | 5686735 | 2003-10-12 10:27:06 +0000 | [diff] [blame] | 115 | static /* __inline__ */ \ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 116 | void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \ |
| 117 | { \ |
| 118 | register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ |
| 119 | register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ |
| 120 | register UInt tag = a >> L.tag_shift; \ |
| 121 | int i, j; \ |
| 122 | Bool is_miss = False; \ |
| 123 | int* set; \ |
| 124 | \ |
sewardj | 07133bf | 2002-06-13 10:25:56 +0000 | [diff] [blame] | 125 | /* First case: word entirely within line. */ \ |
| 126 | if (set1 == set2) { \ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 127 | \ |
| 128 | /* Shifting is a bit faster than multiplying */ \ |
sewardj | 07133bf | 2002-06-13 10:25:56 +0000 | [diff] [blame] | 129 | set = &(L.tags[set1 << L.assoc_bits]); \ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 130 | \ |
| 131 | /* This loop is unrolled for just the first case, which is the most */\ |
| 132 | /* common. We can't unroll any further because it would screw up */\ |
| 133 | /* if we have a direct-mapped (1-way) cache. */\ |
| 134 | if (tag == set[0]) { \ |
| 135 | return; \ |
| 136 | } \ |
| 137 | /* If the tag is one other than the MRU, move it into the MRU spot */\ |
| 138 | /* and shuffle the rest down. */\ |
| 139 | for (i = 1; i < L.assoc; i++) { \ |
| 140 | if (tag == set[i]) { \ |
| 141 | for (j = i; j > 0; j--) { \ |
| 142 | set[j] = set[j - 1]; \ |
| 143 | } \ |
| 144 | set[0] = tag; \ |
| 145 | return; \ |
| 146 | } \ |
| 147 | } \ |
| 148 | \ |
| 149 | /* A miss; install this tag as MRU, shuffle rest down. */ \ |
| 150 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 151 | set[j] = set[j - 1]; \ |
| 152 | } \ |
| 153 | set[0] = tag; \ |
| 154 | MISS_TREATMENT; \ |
| 155 | return; \ |
| 156 | \ |
| 157 | /* Second case: word straddles two lines. */ \ |
| 158 | /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ |
| 159 | } else if (((set1 + 1) & (L.sets-1)) == set2) { \ |
| 160 | set = &(L.tags[set1 << L.assoc_bits]); \ |
| 161 | if (tag == set[0]) { \ |
| 162 | goto block2; \ |
| 163 | } \ |
| 164 | for (i = 1; i < L.assoc; i++) { \ |
| 165 | if (tag == set[i]) { \ |
| 166 | for (j = i; j > 0; j--) { \ |
| 167 | set[j] = set[j - 1]; \ |
| 168 | } \ |
| 169 | set[0] = tag; \ |
| 170 | goto block2; \ |
| 171 | } \ |
| 172 | } \ |
| 173 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 174 | set[j] = set[j - 1]; \ |
| 175 | } \ |
| 176 | set[0] = tag; \ |
| 177 | is_miss = True; \ |
| 178 | block2: \ |
| 179 | set = &(L.tags[set2 << L.assoc_bits]); \ |
| 180 | if (tag == set[0]) { \ |
| 181 | goto miss_treatment; \ |
| 182 | } \ |
| 183 | for (i = 1; i < L.assoc; i++) { \ |
| 184 | if (tag == set[i]) { \ |
| 185 | for (j = i; j > 0; j--) { \ |
| 186 | set[j] = set[j - 1]; \ |
| 187 | } \ |
| 188 | set[0] = tag; \ |
| 189 | goto miss_treatment; \ |
| 190 | } \ |
| 191 | } \ |
| 192 | for (j = L.assoc - 1; j > 0; j--) { \ |
| 193 | set[j] = set[j - 1]; \ |
| 194 | } \ |
| 195 | set[0] = tag; \ |
| 196 | is_miss = True; \ |
| 197 | miss_treatment: \ |
| 198 | if (is_miss) { MISS_TREATMENT; } \ |
| 199 | \ |
| 200 | } else { \ |
njn | 6431be7 | 2002-07-28 09:53:34 +0000 | [diff] [blame] | 201 | VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); \ |
njn | e427a66 | 2002-10-02 11:08:25 +0000 | [diff] [blame] | 202 | VG_(skin_panic)("item straddles more than two cache sets"); \ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 203 | } \ |
| 204 | return; \ |
| 205 | } |
| 206 | |
njn25 | cac76cb | 2002-09-23 11:21:57 +0000 | [diff] [blame] | 207 | #endif /* ndef __CG_SIM_GEN_C */ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 208 | |
| 209 | /*--------------------------------------------------------------------*/ |
njn25 | cac76cb | 2002-09-23 11:21:57 +0000 | [diff] [blame] | 210 | /*--- end cg_sim_gen.c ---*/ |
njn | 05b7dfd | 2002-06-12 08:49:04 +0000 | [diff] [blame] | 211 | /*--------------------------------------------------------------------*/ |
| 212 | |