Blame - cachegrind/cg_sim.c - platform/external/valgrind

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

1

2

/*--------------------------------------------------------------------*/

3

/*--- Cache simulation cg_sim.c ---*/

4

/*--------------------------------------------------------------------*/

5

6

/*

7

This file is part of Cachegrind, a Valgrind tool for cache

8

profiling programs.

9

sewardj

e4b0bf0

2006-06-05 23:21:15 +0000

[diff] [blame]

10

njn

2bc1012

2005-05-08 02:10:27 +0000

[diff] [blame]

11

njn@valgrind.org

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

12

13

This program is free software; you can redistribute it and/or

14

modify it under the terms of the GNU General Public License as

15

published by the Free Software Foundation; either version 2 of the

16

License, or (at your option) any later version.

17

18

This program is distributed in the hope that it will be useful, but

19

WITHOUT ANY WARRANTY; without even the implied warranty of

20

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

21

General Public License for more details.

22

23

You should have received a copy of the GNU General Public License

24

along with this program; if not, write to the Free Software

25

Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA

26

02111-1307, USA.

27

28

The GNU General Public License is contained in the file COPYING.

*/

/* Notes:

- simulates a write-allocate cache

33

- (block --> set) hash function uses simple bit selection

34

- handling of references straddling two cache blocks:

35

- counts as only one cache access (not two)

36

- both blocks hit --> one hit

37

- one block hits, the other misses --> one miss

38

- both blocks miss --> one miss (not two)

39

*/

40

41

typedef struct {

njn

0103de5

2005-10-10 16:49:01 +0000

[diff] [blame]

42

Int size; /* bytes */

43

Int assoc;

44

Int line_size; /* bytes */

Int sets;

Int sets_min_1;

Int assoc_bits;

Int line_size_bits;

Int tag_shift;

Char desc_line[128];

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

51

UWord* tags;

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

52

} cache_t2;

53

54

/* By this point, the size/assoc/line_size has been checked. */

55

static void cachesim_initcache(cache_t config, cache_t2* c)

56

{

njn

0103de5

2005-10-10 16:49:01 +0000

[diff] [blame]

57

Int i;

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

58

59

c->size = config.size;

60

c->assoc = config.assoc;

61

c->line_size = config.line_size;

62

63

c->sets = (c->size / c->line_size) / c->assoc;

64

c->sets_min_1 = c->sets - 1;

65

c->assoc_bits = VG_(log2)(c->assoc);

66

c->line_size_bits = VG_(log2)(c->line_size);

67

c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);

68

69

if (c->assoc == 1) {

70

VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",

71

c->size, c->line_size);

72

} else {

73

VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",

74

c->size, c->line_size, c->assoc);

75

}

76

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

77

c->tags = VG_(malloc)(sizeof(UWord) * c->sets * c->assoc);

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

78

79

for (i = 0; i < c->sets * c->assoc; i++)

c->tags[i] = 0;

}

#if 0

static void print_cache(cache_t2* c)

{

UInt set, way, i;

/* Note initialisation and update of 'i'. */

89

for (i = 0, set = 0; set < c->sets; set++) {

90

for (way = 0; way < c->assoc; way++, i++) {

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

91

VG_(printf)("%16lx ", c->tags[i]);

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

}

VG_(printf)("\n");

}

}

#endif

/* This is done as a macro rather than by passing in the cache_t2 as an

99

* arg because it slows things down by a small amount (3-5%) due to all

100

* that extra indirection. */

101

102

#define CACHESIM(L, MISS_TREATMENT) \

103

/* The cache and associated bits and pieces. */ \

104

static cache_t2 L; \

105

\

106

static void cachesim_##L##_initcache(cache_t config) \

107

{ \

108

cachesim_initcache(config, &L); \

109

} \

110

\

njn

fcd0488

2005-11-13 17:57:32 +0000

[diff] [blame]

111

/* This attribute forces GCC to inline this function, even though it's */ \

112

/* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \

113

__attribute__((always_inline)) \

114

static __inline__ \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

115

void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \

116

{ \

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

117

register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \

118

register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \

119

register UWord tag = a >> L.tag_shift; \

njn

0103de5

2005-10-10 16:49:01 +0000

[diff] [blame]

120

Int i, j; \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

121

Bool is_miss = False; \

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

122

UWord* set; \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

123

\

124

/* First case: word entirely within line. */ \

125

if (set1 == set2) { \

126

\

127

/* Shifting is a bit faster than multiplying */ \

128

set = &(L.tags[set1 << L.assoc_bits]); \

129

\

130

/* This loop is unrolled for just the first case, which is the most */\

131

/* common. We can't unroll any further because it would screw up */\

132

/* if we have a direct-mapped (1-way) cache. */\

133

if (tag == set[0]) { \

134

return; \

135

} \

136

/* If the tag is one other than the MRU, move it into the MRU spot */\

137

/* and shuffle the rest down. */\

138

for (i = 1; i < L.assoc; i++) { \

139

if (tag == set[i]) { \

140

for (j = i; j > 0; j--) { \

141

set[j] = set[j - 1]; \

} \

set[0] = tag; \

return; \

} \

} \

\

/* A miss; install this tag as MRU, shuffle rest down. */ \

149

for (j = L.assoc - 1; j > 0; j--) { \

150

set[j] = set[j - 1]; \

} \

set[0] = tag; \

MISS_TREATMENT; \

return; \

\

/* Second case: word straddles two lines. */ \

157

/* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \

158

} else if (((set1 + 1) & (L.sets-1)) == set2) { \

159

set = &(L.tags[set1 << L.assoc_bits]); \

160

if (tag == set[0]) { \

161

goto block2; \

162

} \

163

for (i = 1; i < L.assoc; i++) { \

164

if (tag == set[i]) { \

165

for (j = i; j > 0; j--) { \

166

set[j] = set[j - 1]; \

} \

set[0] = tag; \

goto block2; \

} \

} \

for (j = L.assoc - 1; j > 0; j--) { \

173

set[j] = set[j - 1]; \

} \

set[0] = tag; \

is_miss = True; \

block2: \

set = &(L.tags[set2 << L.assoc_bits]); \

179

if (tag == set[0]) { \

180

goto miss_treatment; \

181

} \

182

for (i = 1; i < L.assoc; i++) { \

183

if (tag == set[i]) { \

184

for (j = i; j > 0; j--) { \

185

set[j] = set[j - 1]; \

186

} \

187

set[0] = tag; \

188

goto miss_treatment; \

189

} \

190

} \

191

for (j = L.assoc - 1; j > 0; j--) { \

192

set[j] = set[j - 1]; \

} \

set[0] = tag; \

is_miss = True; \

miss_treatment: \

if (is_miss) { MISS_TREATMENT; } \

198

\

199

} else { \

200

VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); \

njn

6799325

2004-11-22 18:02:32 +0000

[diff] [blame]

201

VG_(tool_panic)("item straddles more than two cache sets"); \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

} \

return; \

}

CACHESIM(L2, (*m2)++ );

207

CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );

208

CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );

209

210

/*--------------------------------------------------------------------*/

211

/*--- end cg_sim.c ---*/

212

/*--------------------------------------------------------------------*/

213