Blame - cachegrind/cg_sim.c - platform/external/valgrind

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

1

2

/*--------------------------------------------------------------------*/

3

/*--- Cache simulation cg_sim.c ---*/

4

/*--------------------------------------------------------------------*/

5

6

/*

7

This file is part of Cachegrind, a Valgrind tool for cache

8

profiling programs.

9

sewardj

4d474d0

2008-02-11 11:34:59 +0000

[diff] [blame]

10

njn

2bc1012

2005-05-08 02:10:27 +0000

[diff] [blame]

11

njn@valgrind.org

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

12

13

This program is free software; you can redistribute it and/or

14

modify it under the terms of the GNU General Public License as

15

published by the Free Software Foundation; either version 2 of the

16

License, or (at your option) any later version.

17

18

This program is distributed in the hope that it will be useful, but

19

WITHOUT ANY WARRANTY; without even the implied warranty of

20

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

21

General Public License for more details.

22

23

You should have received a copy of the GNU General Public License

24

along with this program; if not, write to the Free Software

25

Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA

26

02111-1307, USA.

27

28

The GNU General Public License is contained in the file COPYING.

*/

/* Notes:

- simulates a write-allocate cache

33

- (block --> set) hash function uses simple bit selection

34

- handling of references straddling two cache blocks:

35

- counts as only one cache access (not two)

36

- both blocks hit --> one hit

37

- one block hits, the other misses --> one miss

38

- both blocks miss --> one miss (not two)

39

*/

40

41

typedef struct {

njn

0103de5

2005-10-10 16:49:01 +0000

[diff] [blame]

42

Int size; /* bytes */

43

Int assoc;

44

Int line_size; /* bytes */

Int sets;

Int sets_min_1;

Int assoc_bits;

Int line_size_bits;

Int tag_shift;

Char desc_line[128];

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

51

UWord* tags;

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

52

} cache_t2;

53

54

/* By this point, the size/assoc/line_size has been checked. */

55

static void cachesim_initcache(cache_t config, cache_t2* c)

56

{

njn

0103de5

2005-10-10 16:49:01 +0000

[diff] [blame]

57

Int i;

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

58

59

c->size = config.size;

60

c->assoc = config.assoc;

61

c->line_size = config.line_size;

62

63

c->sets = (c->size / c->line_size) / c->assoc;

64

c->sets_min_1 = c->sets - 1;

65

c->assoc_bits = VG_(log2)(c->assoc);

66

c->line_size_bits = VG_(log2)(c->line_size);

67

c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);

68

69

if (c->assoc == 1) {

70

VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",

71

c->size, c->line_size);

72

} else {

73

VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",

74

c->size, c->line_size, c->assoc);

75

}

76

sewardj

9c606bd

2008-09-18 18:12:50 +0000

[diff] [blame^]

77

c->tags = VG_(malloc)("cg.sim.ci.1",

78

sizeof(UWord) * c->sets * c->assoc);

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

79

80

for (i = 0; i < c->sets * c->assoc; i++)

c->tags[i] = 0;

}

nethercote

2004-01-04 16:56:57 +0000

[diff] [blame]

84

/* This is done as a macro rather than by passing in the cache_t2 as an

85

* arg because it slows things down by a small amount (3-5%) due to all

86

* that extra indirection. */

87

88

#define CACHESIM(L, MISS_TREATMENT) \

89

/* The cache and associated bits and pieces. */ \

90

static cache_t2 L; \

91

\

92

static void cachesim_##L##_initcache(cache_t config) \

93

{ \

94

cachesim_initcache(config, &L); \

95

} \

96

\

njn

fcd0488

2005-11-13 17:57:32 +0000

[diff] [blame]

97

/* This attribute forces GCC to inline this function, even though it's */ \

98

/* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \

99

__attribute__((always_inline)) \

100

static __inline__ \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

101

void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \

102

{ \

njn

cbdfcd6

2006-11-22 11:38:07 +0000

[diff] [blame]

103

UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \

104

UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \

105

UWord tag = a >> L.tag_shift; \

106

UWord tag2; \

njn

0103de5

2005-10-10 16:49:01 +0000

[diff] [blame]

107

Int i, j; \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

108

Bool is_miss = False; \

njn

b619ca7

2005-10-10 16:18:09 +0000

[diff] [blame]

109

UWord* set; \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

110

\

111

/* First case: word entirely within line. */ \

112

if (set1 == set2) { \

113

\

114

/* Shifting is a bit faster than multiplying */ \

115

set = &(L.tags[set1 << L.assoc_bits]); \

116

\

117

/* This loop is unrolled for just the first case, which is the most */\

118

/* common. We can't unroll any further because it would screw up */\

119

/* if we have a direct-mapped (1-way) cache. */\

120

if (tag == set[0]) { \

121

return; \

122

} \

123

/* If the tag is one other than the MRU, move it into the MRU spot */\

124

/* and shuffle the rest down. */\

125

for (i = 1; i < L.assoc; i++) { \

126

if (tag == set[i]) { \

127

for (j = i; j > 0; j--) { \

128

set[j] = set[j - 1]; \

} \

set[0] = tag; \

return; \

} \

} \

\

/* A miss; install this tag as MRU, shuffle rest down. */ \

136

for (j = L.assoc - 1; j > 0; j--) { \

137

set[j] = set[j - 1]; \

} \

set[0] = tag; \

MISS_TREATMENT; \

return; \

\

/* Second case: word straddles two lines. */ \

144

/* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \

145

} else if (((set1 + 1) & (L.sets-1)) == set2) { \

146

set = &(L.tags[set1 << L.assoc_bits]); \

147

if (tag == set[0]) { \

148

goto block2; \

149

} \

150

for (i = 1; i < L.assoc; i++) { \

151

if (tag == set[i]) { \

152

for (j = i; j > 0; j--) { \

153

set[j] = set[j - 1]; \

} \

set[0] = tag; \

goto block2; \

} \

} \

for (j = L.assoc - 1; j > 0; j--) { \

160

set[j] = set[j - 1]; \

} \

set[0] = tag; \

is_miss = True; \

block2: \

set = &(L.tags[set2 << L.assoc_bits]); \

njn

cbdfcd6

2006-11-22 11:38:07 +0000

[diff] [blame]

166

tag2 = (a+size-1) >> L.tag_shift; \

167

if (tag2 == set[0]) { \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

168

goto miss_treatment; \

169

} \

170

for (i = 1; i < L.assoc; i++) { \

njn

cbdfcd6

2006-11-22 11:38:07 +0000

[diff] [blame]

171

if (tag2 == set[i]) { \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

172

for (j = i; j > 0; j--) { \

173

set[j] = set[j - 1]; \

174

} \

njn

cbdfcd6

2006-11-22 11:38:07 +0000

[diff] [blame]

175

set[0] = tag2; \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

176

goto miss_treatment; \

177

} \

178

} \

179

for (j = L.assoc - 1; j > 0; j--) { \

180

set[j] = set[j - 1]; \

181

} \

njn

cbdfcd6

2006-11-22 11:38:07 +0000

[diff] [blame]

182

set[0] = tag2; \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

183

is_miss = True; \

184

miss_treatment: \

185

if (is_miss) { MISS_TREATMENT; } \

186

\

187

} else { \

njn

8a7b41b

2007-09-23 00:51:24 +0000

[diff] [blame]

188

VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\

njn

6799325

2004-11-22 18:02:32 +0000

[diff] [blame]

189

VG_(tool_panic)("item straddles more than two cache sets"); \

nethercote

27fc1da

2004-01-04 16:56:57 +0000

[diff] [blame]

} \

return; \

}

CACHESIM(L2, (*m2)++ );

195

CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );

196

CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );

197

198

/*--------------------------------------------------------------------*/

199

/*--- end cg_sim.c ---*/

200

/*--------------------------------------------------------------------*/

201