sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1 | |
| 2 | //--------------------------------------------------------------------*/ |
| 3 | //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---*/ |
| 4 | //--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of DHAT, a Valgrind tool for profiling the |
| 8 | heap usage of programs. |
| 9 | |
sewardj | 03f8d3f | 2012-08-05 15:46:46 +0000 | [diff] [blame] | 10 | Copyright (C) 2010-2012 Mozilla Inc |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 11 | |
| 12 | This program is free software; you can redistribute it and/or |
| 13 | modify it under the terms of the GNU General Public License as |
| 14 | published by the Free Software Foundation; either version 2 of the |
| 15 | License, or (at your option) any later version. |
| 16 | |
| 17 | This program is distributed in the hope that it will be useful, but |
| 18 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 20 | General Public License for more details. |
| 21 | |
| 22 | You should have received a copy of the GNU General Public License |
| 23 | along with this program; if not, write to the Free Software |
| 24 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 25 | 02111-1307, USA. |
| 26 | |
| 27 | The GNU General Public License is contained in the file COPYING. |
| 28 | */ |
| 29 | |
| 30 | /* Contributed by Julian Seward <jseward@acm.org> */ |
| 31 | |
| 32 | |
| 33 | #include "pub_tool_basics.h" |
| 34 | #include "pub_tool_libcbase.h" |
| 35 | #include "pub_tool_libcassert.h" |
| 36 | #include "pub_tool_libcprint.h" |
| 37 | #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) |
| 38 | #include "pub_tool_mallocfree.h" |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 39 | #include "pub_tool_options.h" |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 40 | #include "pub_tool_replacemalloc.h" |
| 41 | #include "pub_tool_tooliface.h" |
| 42 | #include "pub_tool_wordfm.h" |
| 43 | |
sewardj | 4e43740 | 2011-01-10 14:56:59 +0000 | [diff] [blame] | 44 | #define HISTOGRAM_SIZE_LIMIT 1024 |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 45 | |
| 46 | |
| 47 | //------------------------------------------------------------// |
| 48 | //--- Globals ---// |
| 49 | //------------------------------------------------------------// |
| 50 | |
| 51 | // Number of guest instructions executed so far. This is |
| 52 | // incremented directly from the generated code. |
| 53 | static ULong g_guest_instrs_executed = 0; |
| 54 | |
| 55 | // Summary statistics for the entire run. |
| 56 | static ULong g_tot_blocks = 0; // total blocks allocated |
| 57 | static ULong g_tot_bytes = 0; // total bytes allocated |
| 58 | |
| 59 | static ULong g_cur_blocks_live = 0; // curr # blocks live |
| 60 | static ULong g_cur_bytes_live = 0; // curr # bytes live |
| 61 | |
| 62 | static ULong g_max_blocks_live = 0; // bytes and blocks at |
| 63 | static ULong g_max_bytes_live = 0; // the max residency point |
| 64 | |
| 65 | |
| 66 | //------------------------------------------------------------// |
| 67 | //--- an Interval Tree of live blocks ---// |
| 68 | //------------------------------------------------------------// |
| 69 | |
| 70 | /* Tracks information about live blocks. */ |
| 71 | typedef |
| 72 | struct { |
| 73 | Addr payload; |
| 74 | SizeT req_szB; |
| 75 | ExeContext* ap; /* allocation ec */ |
| 76 | ULong allocd_at; /* instruction number */ |
| 77 | ULong n_reads; |
| 78 | ULong n_writes; |
| 79 | /* Approx histogram, one byte per payload byte. Counts latch up |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 80 | therefore at 0xFFFF. Can be NULL if the block is resized or if |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 81 | the block is larger than HISTOGRAM_SIZE_LIMIT. */ |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 82 | UShort* histoW; /* [0 .. req_szB-1] */ |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 83 | } |
| 84 | Block; |
| 85 | |
| 86 | /* May not contain zero-sized blocks. May not contain |
| 87 | overlapping blocks. */ |
| 88 | static WordFM* interval_tree = NULL; /* WordFM* Block* void */ |
| 89 | |
| 90 | /* Here's the comparison function. Since the tree is required |
| 91 | to contain non-zero sized, non-overlapping blocks, it's good |
| 92 | enough to consider any overlap as a match. */ |
| 93 | static Word interval_tree_Cmp ( UWord k1, UWord k2 ) |
| 94 | { |
| 95 | Block* b1 = (Block*)k1; |
| 96 | Block* b2 = (Block*)k2; |
| 97 | tl_assert(b1->req_szB > 0); |
| 98 | tl_assert(b2->req_szB > 0); |
| 99 | if (b1->payload + b1->req_szB <= b2->payload) return -1; |
| 100 | if (b2->payload + b2->req_szB <= b1->payload) return 1; |
| 101 | return 0; |
| 102 | } |
| 103 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 104 | // 2-entry cache for find_Block_containing |
| 105 | static Block* fbc_cache0 = NULL; |
| 106 | static Block* fbc_cache1 = NULL; |
| 107 | |
| 108 | static UWord stats__n_fBc_cached = 0; |
| 109 | static UWord stats__n_fBc_uncached = 0; |
| 110 | static UWord stats__n_fBc_notfound = 0; |
| 111 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 112 | static Block* find_Block_containing ( Addr a ) |
| 113 | { |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 114 | if (LIKELY(fbc_cache0 |
| 115 | && fbc_cache0->payload <= a |
| 116 | && a < fbc_cache0->payload + fbc_cache0->req_szB)) { |
| 117 | // found at 0 |
| 118 | stats__n_fBc_cached++; |
| 119 | return fbc_cache0; |
| 120 | } |
| 121 | if (LIKELY(fbc_cache1 |
| 122 | && fbc_cache1->payload <= a |
| 123 | && a < fbc_cache1->payload + fbc_cache1->req_szB)) { |
| 124 | // found at 1; swap 0 and 1 |
| 125 | Block* tmp = fbc_cache0; |
| 126 | fbc_cache0 = fbc_cache1; |
| 127 | fbc_cache1 = tmp; |
| 128 | stats__n_fBc_cached++; |
| 129 | return fbc_cache0; |
| 130 | } |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 131 | Block fake; |
| 132 | fake.payload = a; |
| 133 | fake.req_szB = 1; |
| 134 | UWord foundkey = 1; |
| 135 | UWord foundval = 1; |
| 136 | Bool found = VG_(lookupFM)( interval_tree, |
| 137 | &foundkey, &foundval, (UWord)&fake ); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 138 | if (!found) { |
| 139 | stats__n_fBc_notfound++; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 140 | return NULL; |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 141 | } |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 142 | tl_assert(foundval == 0); // we don't store vals in the interval tree |
| 143 | tl_assert(foundkey != 1); |
sewardj | eb795f8 | 2010-10-13 14:04:25 +0000 | [diff] [blame] | 144 | Block* res = (Block*)foundkey; |
| 145 | tl_assert(res != &fake); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 146 | // put at the top position |
| 147 | fbc_cache1 = fbc_cache0; |
| 148 | fbc_cache0 = res; |
| 149 | stats__n_fBc_uncached++; |
sewardj | eb795f8 | 2010-10-13 14:04:25 +0000 | [diff] [blame] | 150 | return res; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 151 | } |
| 152 | |
| 153 | // delete a block; asserts if not found. (viz, 'a' must be |
| 154 | // known to be present.) |
| 155 | static void delete_Block_starting_at ( Addr a ) |
| 156 | { |
| 157 | Block fake; |
| 158 | fake.payload = a; |
| 159 | fake.req_szB = 1; |
| 160 | Bool found = VG_(delFromFM)( interval_tree, |
| 161 | NULL, NULL, (Addr)&fake ); |
| 162 | tl_assert(found); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 163 | fbc_cache0 = fbc_cache1 = NULL; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 164 | } |
| 165 | |
| 166 | |
| 167 | //------------------------------------------------------------// |
| 168 | //--- a FM of allocation points (APs) ---// |
| 169 | //------------------------------------------------------------// |
| 170 | |
| 171 | typedef |
| 172 | struct { |
| 173 | // the allocation point that we're summarising stats for |
| 174 | ExeContext* ap; |
| 175 | // used when printing results |
| 176 | Bool shown; |
| 177 | // The current number of blocks and bytes live for this AP |
| 178 | ULong cur_blocks_live; |
| 179 | ULong cur_bytes_live; |
| 180 | // The number of blocks and bytes live at the max-liveness |
| 181 | // point. Note this is a bit subtle. max_blocks_live is not |
| 182 | // the maximum number of live blocks, but rather the number of |
| 183 | // blocks live at the point of maximum byte liveness. These are |
| 184 | // not necessarily the same thing. |
| 185 | ULong max_blocks_live; |
| 186 | ULong max_bytes_live; |
| 187 | // Total number of blocks and bytes allocated by this AP. |
| 188 | ULong tot_blocks; |
| 189 | ULong tot_bytes; |
| 190 | // Sum of death ages for all blocks allocated by this AP, |
| 191 | // that have subsequently been freed. |
| 192 | ULong death_ages_sum; |
| 193 | ULong deaths; |
| 194 | // Total number of reads and writes in all blocks allocated |
| 195 | // by this AP. |
| 196 | ULong n_reads; |
| 197 | ULong n_writes; |
| 198 | /* Histogram information. We maintain a histogram aggregated for |
| 199 | all retiring Blocks allocated by this AP, but only if: |
| 200 | - this AP has only ever allocated objects of one size |
| 201 | - that size is <= HISTOGRAM_SIZE_LIMIT |
| 202 | What we need therefore is a mechanism to see if this AP |
| 203 | has only ever allocated blocks of one size. |
| 204 | |
| 205 | 3 states: |
| 206 | Unknown because no retirement yet |
| 207 | Exactly xsize all retiring blocks are of this size |
| 208 | Mixed multiple different sizes seen |
| 209 | */ |
| 210 | enum { Unknown=999, Exactly, Mixed } xsize_tag; |
| 211 | SizeT xsize; |
| 212 | UInt* histo; /* [0 .. xsize-1] */ |
| 213 | } |
| 214 | APInfo; |
| 215 | |
| 216 | /* maps ExeContext*'s to APInfo*'s. Note that the keys must match the |
| 217 | .ap field in the values. */ |
| 218 | static WordFM* apinfo = NULL; /* WordFM* ExeContext* APInfo* */ |
| 219 | |
| 220 | |
| 221 | /* 'bk' is being introduced (has just been allocated). Find the |
| 222 | relevant APInfo entry for it, or create one, based on the block's |
| 223 | allocation EC. Then, update the APInfo to the extent that we |
| 224 | actually can, to reflect the allocation. */ |
| 225 | static void intro_Block ( Block* bk ) |
| 226 | { |
| 227 | tl_assert(bk); |
| 228 | tl_assert(bk->ap); |
| 229 | |
| 230 | APInfo* api = NULL; |
| 231 | UWord keyW = 0; |
| 232 | UWord valW = 0; |
| 233 | Bool found = VG_(lookupFM)( apinfo, |
| 234 | &keyW, &valW, (UWord)bk->ap ); |
| 235 | if (found) { |
| 236 | api = (APInfo*)valW; |
| 237 | tl_assert(keyW == (UWord)bk->ap); |
| 238 | } else { |
| 239 | api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) ); |
| 240 | VG_(memset)(api, 0, sizeof(*api)); |
| 241 | api->ap = bk->ap; |
| 242 | Bool present = VG_(addToFM)( apinfo, |
| 243 | (UWord)bk->ap, (UWord)api ); |
| 244 | tl_assert(!present); |
| 245 | // histo stuff |
| 246 | tl_assert(api->deaths == 0); |
| 247 | api->xsize_tag = Unknown; |
| 248 | api->xsize = 0; |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 249 | if (0) VG_(printf)("api %p --> Unknown\n", api); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 250 | } |
| 251 | |
| 252 | tl_assert(api->ap == bk->ap); |
| 253 | |
| 254 | /* So: update stats to reflect an allocation */ |
| 255 | |
| 256 | // # live blocks |
| 257 | api->cur_blocks_live++; |
| 258 | |
| 259 | // # live bytes |
| 260 | api->cur_bytes_live += bk->req_szB; |
| 261 | if (api->cur_bytes_live > api->max_bytes_live) { |
| 262 | api->max_bytes_live = api->cur_bytes_live; |
| 263 | api->max_blocks_live = api->cur_blocks_live; |
| 264 | } |
| 265 | |
| 266 | // total blocks and bytes allocated here |
| 267 | api->tot_blocks++; |
| 268 | api->tot_bytes += bk->req_szB; |
| 269 | |
| 270 | // update summary globals |
| 271 | g_tot_blocks++; |
| 272 | g_tot_bytes += bk->req_szB; |
| 273 | |
| 274 | g_cur_blocks_live++; |
| 275 | g_cur_bytes_live += bk->req_szB; |
| 276 | if (g_cur_bytes_live > g_max_bytes_live) { |
| 277 | g_max_bytes_live = g_cur_bytes_live; |
| 278 | g_max_blocks_live = g_cur_blocks_live; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | |
| 283 | /* 'bk' is retiring (being freed). Find the relevant APInfo entry for |
| 284 | it, which must already exist. Then, fold info from 'bk' into that |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 285 | entry. 'because_freed' is True if the block is retiring because |
| 286 | the client has freed it. If it is False then the block is retiring |
| 287 | because the program has finished, in which case we want to skip the |
| 288 | updates of the total blocks live etc for this AP, but still fold in |
| 289 | the access counts and histo data that have so far accumulated for |
| 290 | the block. */ |
| 291 | static void retire_Block ( Block* bk, Bool because_freed ) |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 292 | { |
| 293 | tl_assert(bk); |
| 294 | tl_assert(bk->ap); |
| 295 | |
| 296 | APInfo* api = NULL; |
| 297 | UWord keyW = 0; |
| 298 | UWord valW = 0; |
| 299 | Bool found = VG_(lookupFM)( apinfo, |
| 300 | &keyW, &valW, (UWord)bk->ap ); |
| 301 | |
| 302 | tl_assert(found); |
| 303 | api = (APInfo*)valW; |
| 304 | tl_assert(api->ap == bk->ap); |
| 305 | |
| 306 | // update stats following this free. |
| 307 | if (0) |
| 308 | VG_(printf)("ec %p api->c_by_l %llu bk->rszB %llu\n", |
| 309 | bk->ap, api->cur_bytes_live, (ULong)bk->req_szB); |
| 310 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 311 | // update total blocks live etc for this AP |
| 312 | if (because_freed) { |
| 313 | tl_assert(api->cur_blocks_live >= 1); |
| 314 | tl_assert(api->cur_bytes_live >= bk->req_szB); |
| 315 | api->cur_blocks_live--; |
| 316 | api->cur_bytes_live -= bk->req_szB; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 317 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 318 | api->deaths++; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 319 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 320 | tl_assert(bk->allocd_at <= g_guest_instrs_executed); |
| 321 | api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 322 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 323 | // update global summary stats |
| 324 | tl_assert(g_cur_blocks_live > 0); |
| 325 | g_cur_blocks_live--; |
| 326 | tl_assert(g_cur_bytes_live >= bk->req_szB); |
| 327 | g_cur_bytes_live -= bk->req_szB; |
| 328 | } |
| 329 | |
| 330 | // access counts |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 331 | api->n_reads += bk->n_reads; |
| 332 | api->n_writes += bk->n_writes; |
| 333 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 334 | // histo stuff. First, do state transitions for xsize/xsize_tag. |
| 335 | switch (api->xsize_tag) { |
| 336 | |
| 337 | case Unknown: |
| 338 | tl_assert(api->xsize == 0); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 339 | tl_assert(api->deaths == 1 || api->deaths == 0); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 340 | tl_assert(!api->histo); |
| 341 | api->xsize_tag = Exactly; |
| 342 | api->xsize = bk->req_szB; |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 343 | if (0) VG_(printf)("api %p --> Exactly(%lu)\n", api, api->xsize); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 344 | // and allocate the histo |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 345 | if (bk->histoW) { |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 346 | api->histo = VG_(malloc)("dh.main.retire_Block.1", |
| 347 | api->xsize * sizeof(UInt)); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 348 | VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt)); |
| 349 | } |
| 350 | break; |
| 351 | |
| 352 | case Exactly: |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 353 | //tl_assert(api->deaths > 1); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 354 | if (bk->req_szB != api->xsize) { |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 355 | if (0) VG_(printf)("api %p --> Mixed(%lu -> %lu)\n", |
| 356 | api, api->xsize, bk->req_szB); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 357 | api->xsize_tag = Mixed; |
| 358 | api->xsize = 0; |
| 359 | // deallocate the histo, if any |
| 360 | if (api->histo) { |
| 361 | VG_(free)(api->histo); |
| 362 | api->histo = NULL; |
| 363 | } |
| 364 | } |
| 365 | break; |
| 366 | |
| 367 | case Mixed: |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 368 | //tl_assert(api->deaths > 1); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 369 | break; |
| 370 | |
| 371 | default: |
| 372 | tl_assert(0); |
| 373 | } |
| 374 | |
| 375 | // See if we can fold the histo data from this block into |
| 376 | // the data for the AP |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 377 | if (api->xsize_tag == Exactly && api->histo && bk->histoW) { |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 378 | tl_assert(api->xsize == bk->req_szB); |
| 379 | UWord i; |
| 380 | for (i = 0; i < api->xsize; i++) { |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 381 | // FIXME: do something better in case of overflow of api->histo[..] |
| 382 | // Right now, at least don't let it overflow/wrap around |
| 383 | if (api->histo[i] <= 0xFFFE0000) |
| 384 | api->histo[i] += (UInt)bk->histoW[i]; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 385 | } |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 386 | if (0) VG_(printf)("fold in, AP = %p\n", api); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 387 | } |
| 388 | |
| 389 | |
| 390 | |
| 391 | #if 0 |
| 392 | if (bk->histoB) { |
| 393 | VG_(printf)("block retiring, histo %lu: ", bk->req_szB); |
| 394 | UWord i; |
| 395 | for (i = 0; i < bk->req_szB; i++) |
| 396 | VG_(printf)("%u ", (UInt)bk->histoB[i]); |
| 397 | VG_(printf)("\n"); |
| 398 | } else { |
| 399 | VG_(printf)("block retiring, no histo %lu\n", bk->req_szB); |
| 400 | } |
| 401 | #endif |
| 402 | } |
| 403 | |
| 404 | /* This handles block resizing. When a block with AP 'ec' has a |
| 405 | size change of 'delta', call here to update the APInfo. */ |
| 406 | static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta ) |
| 407 | { |
| 408 | APInfo* api = NULL; |
| 409 | UWord keyW = 0; |
| 410 | UWord valW = 0; |
| 411 | Bool found = VG_(lookupFM)( apinfo, |
| 412 | &keyW, &valW, (UWord)ec ); |
| 413 | |
| 414 | tl_assert(found); |
| 415 | api = (APInfo*)valW; |
| 416 | tl_assert(api->ap == ec); |
| 417 | |
| 418 | if (delta < 0) { |
| 419 | tl_assert(api->cur_bytes_live >= -delta); |
| 420 | tl_assert(g_cur_bytes_live >= -delta); |
| 421 | } |
| 422 | |
| 423 | // adjust current live size |
| 424 | api->cur_bytes_live += delta; |
| 425 | g_cur_bytes_live += delta; |
| 426 | |
| 427 | if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) { |
| 428 | api->max_bytes_live = api->cur_bytes_live; |
| 429 | api->max_blocks_live = api->cur_blocks_live; |
| 430 | } |
| 431 | |
| 432 | // update global summary stats |
| 433 | if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) { |
| 434 | g_max_bytes_live = g_cur_bytes_live; |
| 435 | g_max_blocks_live = g_cur_blocks_live; |
| 436 | } |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 437 | if (delta > 0) |
| 438 | g_tot_bytes += delta; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 439 | |
| 440 | // adjust total allocation size |
| 441 | if (delta > 0) |
| 442 | api->tot_bytes += delta; |
| 443 | } |
| 444 | |
| 445 | |
| 446 | //------------------------------------------------------------// |
| 447 | //--- update both Block and APInfos after {m,re}alloc/free ---// |
| 448 | //------------------------------------------------------------// |
| 449 | |
| 450 | static |
| 451 | void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB, |
| 452 | Bool is_zeroed ) |
| 453 | { |
| 454 | tl_assert(p == NULL); // don't handle custom allocators right now |
sewardj | c7ffc94 | 2011-03-28 16:26:42 +0000 | [diff] [blame] | 455 | SizeT actual_szB /*, slop_szB*/; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 456 | |
| 457 | if ((SSizeT)req_szB < 0) return NULL; |
| 458 | |
| 459 | if (req_szB == 0) |
| 460 | req_szB = 1; /* can't allow zero-sized blocks in the interval tree */ |
| 461 | |
| 462 | // Allocate and zero if necessary |
| 463 | if (!p) { |
| 464 | p = VG_(cli_malloc)( req_alignB, req_szB ); |
| 465 | if (!p) { |
| 466 | return NULL; |
| 467 | } |
| 468 | if (is_zeroed) VG_(memset)(p, 0, req_szB); |
| 469 | actual_szB = VG_(malloc_usable_size)(p); |
| 470 | tl_assert(actual_szB >= req_szB); |
sewardj | c7ffc94 | 2011-03-28 16:26:42 +0000 | [diff] [blame] | 471 | /* slop_szB = actual_szB - req_szB; */ |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 472 | } else { |
sewardj | c7ffc94 | 2011-03-28 16:26:42 +0000 | [diff] [blame] | 473 | /* slop_szB = 0; */ |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 474 | } |
| 475 | |
| 476 | // Make new HP_Chunk node, add to malloc_list |
| 477 | Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block)); |
| 478 | bk->payload = (Addr)p; |
| 479 | bk->req_szB = req_szB; |
| 480 | bk->ap = VG_(record_ExeContext)(tid, 0/*first word delta*/); |
| 481 | bk->allocd_at = g_guest_instrs_executed; |
| 482 | bk->n_reads = 0; |
| 483 | bk->n_writes = 0; |
| 484 | // set up histogram array, if the block isn't too large |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 485 | bk->histoW = NULL; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 486 | if (req_szB <= HISTOGRAM_SIZE_LIMIT) { |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 487 | bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort)); |
| 488 | VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort)); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 489 | } |
| 490 | |
| 491 | Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/); |
| 492 | tl_assert(!present); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 493 | fbc_cache0 = fbc_cache1 = NULL; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 494 | |
| 495 | intro_Block(bk); |
| 496 | |
| 497 | if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p); |
| 498 | |
| 499 | return p; |
| 500 | } |
| 501 | |
| 502 | static |
| 503 | void die_block ( void* p, Bool custom_free ) |
| 504 | { |
| 505 | tl_assert(!custom_free); // at least for now |
| 506 | |
| 507 | Block* bk = find_Block_containing( (Addr)p ); |
| 508 | |
| 509 | if (!bk) { |
| 510 | return; // bogus free |
| 511 | } |
| 512 | |
| 513 | tl_assert(bk->req_szB > 0); |
| 514 | // assert the block finder is behaving sanely |
| 515 | tl_assert(bk->payload <= (Addr)p); |
| 516 | tl_assert( (Addr)p < bk->payload + bk->req_szB ); |
| 517 | |
| 518 | if (bk->payload != (Addr)p) { |
| 519 | return; // bogus free |
| 520 | } |
| 521 | |
| 522 | if (0) VG_(printf)(" FREE %p %llu\n", |
| 523 | p, g_guest_instrs_executed - bk->allocd_at); |
| 524 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 525 | retire_Block(bk, True/*because_freed*/); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 526 | |
| 527 | VG_(cli_free)( (void*)bk->payload ); |
| 528 | delete_Block_starting_at( bk->payload ); |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 529 | if (bk->histoW) { |
| 530 | VG_(free)( bk->histoW ); |
| 531 | bk->histoW = NULL; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 532 | } |
| 533 | VG_(free)( bk ); |
| 534 | } |
| 535 | |
| 536 | |
| 537 | static |
| 538 | void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB ) |
| 539 | { |
| 540 | if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB); |
| 541 | void* p_new = NULL; |
| 542 | |
| 543 | tl_assert(new_req_szB > 0); // map 0 to 1 |
| 544 | |
| 545 | // Find the old block. |
| 546 | Block* bk = find_Block_containing( (Addr)p_old ); |
| 547 | if (!bk) { |
| 548 | return NULL; // bogus realloc |
| 549 | } |
| 550 | |
| 551 | tl_assert(bk->req_szB > 0); |
| 552 | // assert the block finder is behaving sanely |
| 553 | tl_assert(bk->payload <= (Addr)p_old); |
| 554 | tl_assert( (Addr)p_old < bk->payload + bk->req_szB ); |
| 555 | |
| 556 | if (bk->payload != (Addr)p_old) { |
| 557 | return NULL; // bogus realloc |
| 558 | } |
| 559 | |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 560 | // Keeping the histogram alive in any meaningful way across |
| 561 | // block resizing is too darn complicated. Just throw it away. |
| 562 | if (bk->histoW) { |
| 563 | VG_(free)(bk->histoW); |
| 564 | bk->histoW = NULL; |
| 565 | } |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 566 | |
| 567 | // Actually do the allocation, if necessary. |
| 568 | if (new_req_szB <= bk->req_szB) { |
| 569 | |
| 570 | // New size is smaller or same; block not moved. |
| 571 | apinfo_change_cur_bytes_live(bk->ap, |
| 572 | (Long)new_req_szB - (Long)bk->req_szB); |
| 573 | bk->req_szB = new_req_szB; |
| 574 | return p_old; |
| 575 | |
| 576 | } else { |
| 577 | |
| 578 | // New size is bigger; make new block, copy shared contents, free old. |
| 579 | p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB); |
| 580 | if (!p_new) { |
| 581 | // Nb: if realloc fails, NULL is returned but the old block is not |
| 582 | // touched. What an awful function. |
| 583 | return NULL; |
| 584 | } |
| 585 | tl_assert(p_new != p_old); |
| 586 | |
| 587 | VG_(memcpy)(p_new, p_old, bk->req_szB); |
| 588 | VG_(cli_free)(p_old); |
| 589 | |
| 590 | // Since the block has moved, we need to re-insert it into the |
| 591 | // interval tree at the new place. Do this by removing |
| 592 | // and re-adding it. |
| 593 | delete_Block_starting_at( (Addr)p_old ); |
| 594 | // now 'bk' is no longer in the tree, but the Block itself |
| 595 | // is still alive |
| 596 | |
| 597 | // Update the metadata. |
| 598 | apinfo_change_cur_bytes_live(bk->ap, |
| 599 | (Long)new_req_szB - (Long)bk->req_szB); |
| 600 | bk->payload = (Addr)p_new; |
| 601 | bk->req_szB = new_req_szB; |
| 602 | |
| 603 | // and re-add |
| 604 | Bool present |
| 605 | = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/); |
| 606 | tl_assert(!present); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 607 | fbc_cache0 = fbc_cache1 = NULL; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 608 | |
| 609 | return p_new; |
| 610 | } |
| 611 | /*NOTREACHED*/ |
| 612 | tl_assert(0); |
| 613 | } |
| 614 | |
| 615 | |
| 616 | //------------------------------------------------------------// |
| 617 | //--- malloc() et al replacement wrappers ---// |
| 618 | //------------------------------------------------------------// |
| 619 | |
| 620 | static void* dh_malloc ( ThreadId tid, SizeT szB ) |
| 621 | { |
| 622 | return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False ); |
| 623 | } |
| 624 | |
| 625 | static void* dh___builtin_new ( ThreadId tid, SizeT szB ) |
| 626 | { |
| 627 | return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False ); |
| 628 | } |
| 629 | |
| 630 | static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB ) |
| 631 | { |
| 632 | return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False ); |
| 633 | } |
| 634 | |
| 635 | static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB ) |
| 636 | { |
| 637 | return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True ); |
| 638 | } |
| 639 | |
| 640 | static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB ) |
| 641 | { |
| 642 | return new_block( tid, NULL, szB, alignB, False ); |
| 643 | } |
| 644 | |
| 645 | static void dh_free ( ThreadId tid __attribute__((unused)), void* p ) |
| 646 | { |
| 647 | die_block( p, /*custom_free*/False ); |
| 648 | } |
| 649 | |
| 650 | static void dh___builtin_delete ( ThreadId tid, void* p ) |
| 651 | { |
| 652 | die_block( p, /*custom_free*/False); |
| 653 | } |
| 654 | |
| 655 | static void dh___builtin_vec_delete ( ThreadId tid, void* p ) |
| 656 | { |
| 657 | die_block( p, /*custom_free*/False ); |
| 658 | } |
| 659 | |
| 660 | static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB ) |
| 661 | { |
| 662 | if (p_old == NULL) { |
| 663 | return dh_malloc(tid, new_szB); |
| 664 | } |
| 665 | if (new_szB == 0) { |
| 666 | dh_free(tid, p_old); |
| 667 | return NULL; |
| 668 | } |
| 669 | return renew_block(tid, p_old, new_szB); |
| 670 | } |
| 671 | |
| 672 | static SizeT dh_malloc_usable_size ( ThreadId tid, void* p ) |
| 673 | { |
| 674 | tl_assert(0); |
| 675 | //zz HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p ); |
| 676 | //zz |
| 677 | //zz return ( hc ? hc->req_szB + hc->slop_szB : 0 ); |
| 678 | } |
| 679 | |
| 680 | //------------------------------------------------------------// |
| 681 | //--- memory references ---// |
| 682 | //------------------------------------------------------------// |
| 683 | |
| 684 | static |
| 685 | void inc_histo_for_block ( Block* bk, Addr addr, UWord szB ) |
| 686 | { |
| 687 | UWord i, offMin, offMax1; |
| 688 | offMin = addr - bk->payload; |
| 689 | tl_assert(offMin < bk->req_szB); |
| 690 | offMax1 = offMin + szB; |
| 691 | if (offMax1 > bk->req_szB) |
| 692 | offMax1 = bk->req_szB; |
| 693 | //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB); |
| 694 | for (i = offMin; i < offMax1; i++) { |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 695 | UShort n = bk->histoW[i]; |
| 696 | if (n < 0xFFFF) n++; |
| 697 | bk->histoW[i] = n; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 698 | } |
| 699 | } |
| 700 | |
| 701 | static VG_REGPARM(2) |
| 702 | void dh_handle_write ( Addr addr, UWord szB ) |
| 703 | { |
| 704 | Block* bk = find_Block_containing(addr); |
| 705 | if (bk) { |
| 706 | bk->n_writes += szB; |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 707 | if (bk->histoW) |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 708 | inc_histo_for_block(bk, addr, szB); |
| 709 | } |
| 710 | } |
| 711 | |
| 712 | static VG_REGPARM(2) |
| 713 | void dh_handle_read ( Addr addr, UWord szB ) |
| 714 | { |
| 715 | Block* bk = find_Block_containing(addr); |
| 716 | if (bk) { |
| 717 | bk->n_reads += szB; |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 718 | if (bk->histoW) |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 719 | inc_histo_for_block(bk, addr, szB); |
| 720 | } |
| 721 | } |
| 722 | |
| 723 | |
| 724 | // Handle reads and writes by syscalls (read == kernel |
| 725 | // reads user space, write == kernel writes user space). |
| 726 | // Assumes no such read or write spans a heap block |
| 727 | // boundary and so we can treat it just as one giant |
| 728 | // read or write. |
| 729 | static |
| 730 | void dh_handle_noninsn_read ( CorePart part, ThreadId tid, Char* s, |
| 731 | Addr base, SizeT size ) |
| 732 | { |
| 733 | switch (part) { |
| 734 | case Vg_CoreSysCall: |
| 735 | dh_handle_read(base, size); |
| 736 | break; |
| 737 | case Vg_CoreSysCallArgInMem: |
| 738 | break; |
| 739 | case Vg_CoreTranslate: |
| 740 | break; |
| 741 | default: |
| 742 | tl_assert(0); |
| 743 | } |
| 744 | } |
| 745 | |
| 746 | static |
| 747 | void dh_handle_noninsn_write ( CorePart part, ThreadId tid, |
| 748 | Addr base, SizeT size ) |
| 749 | { |
| 750 | switch (part) { |
| 751 | case Vg_CoreSysCall: |
| 752 | dh_handle_write(base, size); |
| 753 | break; |
| 754 | case Vg_CoreSignal: |
| 755 | break; |
| 756 | default: |
| 757 | tl_assert(0); |
| 758 | } |
| 759 | } |
| 760 | |
| 761 | |
| 762 | //------------------------------------------------------------// |
| 763 | //--- Instrumentation ---// |
| 764 | //------------------------------------------------------------// |
| 765 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 766 | #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) |
| 767 | #define mkexpr(_tmp) IRExpr_RdTmp((_tmp)) |
| 768 | #define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) |
| 769 | #define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) |
| 770 | #define assign(_t, _e) IRStmt_WrTmp((_t), (_e)) |
| 771 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 772 | static |
| 773 | void add_counter_update(IRSB* sbOut, Int n) |
| 774 | { |
| 775 | #if defined(VG_BIGENDIAN) |
| 776 | # define END Iend_BE |
| 777 | #elif defined(VG_LITTLEENDIAN) |
| 778 | # define END Iend_LE |
| 779 | #else |
| 780 | # error "Unknown endianness" |
| 781 | #endif |
| 782 | // Add code to increment 'g_guest_instrs_executed' by 'n', like this: |
| 783 | // WrTmp(t1, Load64(&g_guest_instrs_executed)) |
| 784 | // WrTmp(t2, Add64(RdTmp(t1), Const(n))) |
| 785 | // Store(&g_guest_instrs_executed, t2) |
| 786 | IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64); |
| 787 | IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64); |
| 788 | IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed ); |
| 789 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 790 | IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr)); |
| 791 | IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n))); |
| 792 | IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2)); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 793 | |
| 794 | addStmtToIRSB( sbOut, st1 ); |
| 795 | addStmtToIRSB( sbOut, st2 ); |
| 796 | addStmtToIRSB( sbOut, st3 ); |
| 797 | } |
| 798 | |
| 799 | static |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 800 | void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr, |
| 801 | Int goff_sp) |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 802 | { |
| 803 | IRType tyAddr = Ity_INVALID; |
| 804 | HChar* hName = NULL; |
| 805 | void* hAddr = NULL; |
| 806 | IRExpr** argv = NULL; |
| 807 | IRDirty* di = NULL; |
| 808 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 809 | const Int THRESH = 4096 * 4; // somewhat arbitrary |
| 810 | const Int rz_szB = VG_STACK_REDZONE_SZB; |
| 811 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 812 | tyAddr = typeOfIRExpr( sbOut->tyenv, addr ); |
| 813 | tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); |
| 814 | |
| 815 | if (isWrite) { |
| 816 | hName = "dh_handle_write"; |
| 817 | hAddr = &dh_handle_write; |
| 818 | } else { |
| 819 | hName = "dh_handle_read"; |
| 820 | hAddr = &dh_handle_read; |
| 821 | } |
| 822 | |
| 823 | argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) ); |
| 824 | |
| 825 | /* Add the helper. */ |
| 826 | tl_assert(hName); |
| 827 | tl_assert(hAddr); |
| 828 | tl_assert(argv); |
| 829 | di = unsafeIRDirty_0_N( 2/*regparms*/, |
| 830 | hName, VG_(fnptr_to_fnentry)( hAddr ), |
| 831 | argv ); |
| 832 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 833 | /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for |
| 834 | some arbitrary N. If that fails then addr is in the range (SP - |
| 835 | RZ .. SP + N - RZ). If N is smallish (a page?) then we can say |
| 836 | addr is within a page of SP and so can't possibly be a heap |
| 837 | access, and so can be skipped. */ |
| 838 | IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr); |
| 839 | addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr))); |
| 840 | |
| 841 | IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr); |
| 842 | addStmtToIRSB( |
| 843 | sbOut, |
| 844 | assign(sp_minus_rz, |
| 845 | tyAddr == Ity_I32 |
| 846 | ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB)) |
| 847 | : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB))) |
| 848 | ); |
| 849 | |
| 850 | IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr); |
| 851 | addStmtToIRSB( |
| 852 | sbOut, |
| 853 | assign(diff, |
| 854 | tyAddr == Ity_I32 |
| 855 | ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz)) |
| 856 | : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz))) |
| 857 | ); |
| 858 | |
| 859 | IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1); |
| 860 | addStmtToIRSB( |
| 861 | sbOut, |
| 862 | assign(guard, |
| 863 | tyAddr == Ity_I32 |
| 864 | ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff)) |
| 865 | : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff))) |
| 866 | ); |
| 867 | di->guard = mkexpr(guard); |
| 868 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 869 | addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); |
| 870 | } |
| 871 | |
| 872 | static |
| 873 | IRSB* dh_instrument ( VgCallbackClosure* closure, |
| 874 | IRSB* sbIn, |
| 875 | VexGuestLayout* layout, |
| 876 | VexGuestExtents* vge, |
| 877 | IRType gWordTy, IRType hWordTy ) |
| 878 | { |
| 879 | Int i, n = 0; |
| 880 | IRSB* sbOut; |
| 881 | IRTypeEnv* tyenv = sbIn->tyenv; |
| 882 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 883 | const Int goff_sp = layout->offset_SP; |
| 884 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 885 | // We increment the instruction count in two places: |
| 886 | // - just before any Ist_Exit statements; |
| 887 | // - just before the IRSB's end. |
| 888 | // In the former case, we zero 'n' and then continue instrumenting. |
| 889 | |
| 890 | sbOut = deepCopyIRSBExceptStmts(sbIn); |
| 891 | |
| 892 | // Copy verbatim any IR preamble preceding the first IMark |
| 893 | i = 0; |
| 894 | while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { |
| 895 | addStmtToIRSB( sbOut, sbIn->stmts[i] ); |
| 896 | i++; |
| 897 | } |
| 898 | |
| 899 | for (/*use current i*/; i < sbIn->stmts_used; i++) { |
| 900 | IRStmt* st = sbIn->stmts[i]; |
| 901 | |
| 902 | if (!st || st->tag == Ist_NoOp) continue; |
| 903 | |
| 904 | switch (st->tag) { |
| 905 | |
| 906 | case Ist_IMark: { |
| 907 | n++; |
| 908 | break; |
| 909 | } |
| 910 | |
| 911 | case Ist_Exit: { |
| 912 | if (n > 0) { |
| 913 | // Add an increment before the Exit statement, then reset 'n'. |
| 914 | add_counter_update(sbOut, n); |
| 915 | n = 0; |
| 916 | } |
| 917 | break; |
| 918 | } |
| 919 | |
| 920 | case Ist_WrTmp: { |
| 921 | IRExpr* data = st->Ist.WrTmp.data; |
| 922 | if (data->tag == Iex_Load) { |
| 923 | IRExpr* aexpr = data->Iex.Load.addr; |
| 924 | // Note also, endianness info is ignored. I guess |
| 925 | // that's not interesting. |
| 926 | addMemEvent( sbOut, False/*!isWrite*/, |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 927 | sizeofIRType(data->Iex.Load.ty), |
| 928 | aexpr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 929 | } |
| 930 | break; |
| 931 | } |
| 932 | |
| 933 | case Ist_Store: { |
| 934 | IRExpr* data = st->Ist.Store.data; |
| 935 | IRExpr* aexpr = st->Ist.Store.addr; |
| 936 | addMemEvent( sbOut, True/*isWrite*/, |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 937 | sizeofIRType(typeOfIRExpr(tyenv, data)), |
| 938 | aexpr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 939 | break; |
| 940 | } |
| 941 | |
| 942 | case Ist_Dirty: { |
| 943 | Int dataSize; |
| 944 | IRDirty* d = st->Ist.Dirty.details; |
| 945 | if (d->mFx != Ifx_None) { |
| 946 | /* This dirty helper accesses memory. Collect the details. */ |
| 947 | tl_assert(d->mAddr != NULL); |
| 948 | tl_assert(d->mSize != 0); |
| 949 | dataSize = d->mSize; |
| 950 | // Large (eg. 28B, 108B, 512B on x86) data-sized |
| 951 | // instructions will be done inaccurately, but they're |
| 952 | // very rare and this avoids errors from hitting more |
| 953 | // than two cache lines in the simulation. |
| 954 | if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) |
| 955 | addMemEvent( sbOut, False/*!isWrite*/, |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 956 | dataSize, d->mAddr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 957 | if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) |
| 958 | addMemEvent( sbOut, True/*isWrite*/, |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 959 | dataSize, d->mAddr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 960 | } else { |
| 961 | tl_assert(d->mAddr == NULL); |
| 962 | tl_assert(d->mSize == 0); |
| 963 | } |
| 964 | break; |
| 965 | } |
| 966 | |
| 967 | case Ist_CAS: { |
| 968 | /* We treat it as a read and a write of the location. I |
| 969 | think that is the same behaviour as it was before IRCAS |
| 970 | was introduced, since prior to that point, the Vex |
| 971 | front ends would translate a lock-prefixed instruction |
| 972 | into a (normal) read followed by a (normal) write. */ |
| 973 | Int dataSize; |
| 974 | IRCAS* cas = st->Ist.CAS.details; |
| 975 | tl_assert(cas->addr != NULL); |
| 976 | tl_assert(cas->dataLo != NULL); |
| 977 | dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo)); |
| 978 | if (cas->dataHi != NULL) |
| 979 | dataSize *= 2; /* since it's a doubleword-CAS */ |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 980 | addMemEvent( sbOut, False/*!isWrite*/, |
| 981 | dataSize, cas->addr, goff_sp ); |
| 982 | addMemEvent( sbOut, True/*isWrite*/, |
| 983 | dataSize, cas->addr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 984 | break; |
| 985 | } |
| 986 | |
| 987 | case Ist_LLSC: { |
| 988 | IRType dataTy; |
| 989 | if (st->Ist.LLSC.storedata == NULL) { |
| 990 | /* LL */ |
| 991 | dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result); |
| 992 | addMemEvent( sbOut, False/*!isWrite*/, |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 993 | sizeofIRType(dataTy), |
| 994 | st->Ist.LLSC.addr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 995 | } else { |
| 996 | /* SC */ |
| 997 | dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata); |
| 998 | addMemEvent( sbOut, True/*isWrite*/, |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 999 | sizeofIRType(dataTy), |
| 1000 | st->Ist.LLSC.addr, goff_sp ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1001 | } |
| 1002 | break; |
| 1003 | } |
| 1004 | |
| 1005 | default: |
| 1006 | break; |
| 1007 | } |
| 1008 | |
| 1009 | addStmtToIRSB( sbOut, st ); |
| 1010 | } |
| 1011 | |
| 1012 | if (n > 0) { |
| 1013 | // Add an increment before the SB end. |
| 1014 | add_counter_update(sbOut, n); |
| 1015 | } |
| 1016 | return sbOut; |
| 1017 | } |
| 1018 | |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 1019 | #undef binop |
| 1020 | #undef mkexpr |
| 1021 | #undef mkU32 |
| 1022 | #undef mkU64 |
| 1023 | #undef assign |
| 1024 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1025 | |
| 1026 | //------------------------------------------------------------// |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1027 | //--- Command line args ---// |
| 1028 | //------------------------------------------------------------// |
| 1029 | |
| 1030 | // FORWARDS |
| 1031 | static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*), |
| 1032 | /*OUT*/Bool* increasingP, |
| 1033 | Char* metric_name ); |
| 1034 | |
| 1035 | static Int clo_show_top_n = 10; |
| 1036 | static HChar* clo_sort_by = "max-bytes-live"; |
| 1037 | |
| 1038 | static Bool dh_process_cmd_line_option(Char* arg) |
| 1039 | { |
| 1040 | if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {} |
| 1041 | |
| 1042 | else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) { |
| 1043 | ULong (*dummyFn)(APInfo*); |
| 1044 | Bool dummyB; |
| 1045 | Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by); |
| 1046 | if (!ok) |
| 1047 | return False; |
| 1048 | // otherwise it's OK, in which case leave it alone. |
| 1049 | // show_top_n_apinfos will later convert the string by a |
| 1050 | // second call to identify_metric. |
| 1051 | } |
| 1052 | |
| 1053 | else |
| 1054 | return VG_(replacement_malloc_process_cmd_line_option)(arg); |
| 1055 | |
| 1056 | return True; |
| 1057 | } |
| 1058 | |
| 1059 | |
| 1060 | static void dh_print_usage(void) |
| 1061 | { |
| 1062 | VG_(printf)( |
| 1063 | " --show-top-n=number show the top <number> alloc points [10]\n" |
| 1064 | " --sort-by=string\n" |
| 1065 | " sort the allocation points by the metric\n" |
| 1066 | " defined by <string>, thusly:\n" |
sewardj | eb795f8 | 2010-10-13 14:04:25 +0000 | [diff] [blame] | 1067 | " max-bytes-live maximum live bytes [default]\n" |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1068 | " tot-bytes-allocd total allocation (turnover)\n" |
| 1069 | " max-blocks-live maximum live blocks\n" |
| 1070 | ); |
| 1071 | } |
| 1072 | |
| 1073 | static void dh_print_debug_usage(void) |
| 1074 | { |
| 1075 | VG_(printf)( |
| 1076 | " (none)\n" |
| 1077 | ); |
| 1078 | } |
| 1079 | |
| 1080 | |
| 1081 | //------------------------------------------------------------// |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1082 | //--- Finalisation ---// |
| 1083 | //------------------------------------------------------------// |
| 1084 | |
| 1085 | static void show_N_div_100( /*OUT*/HChar* buf, ULong n ) |
| 1086 | { |
| 1087 | ULong nK = n / 100; |
| 1088 | ULong nR = n % 100; |
| 1089 | VG_(sprintf)(buf, "%llu.%s%llu", nK, |
| 1090 | nR < 10 ? "0" : "", |
| 1091 | nR); |
| 1092 | } |
| 1093 | |
| 1094 | static void show_APInfo ( APInfo* api ) |
| 1095 | { |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1096 | HChar bufA[80]; |
| 1097 | VG_(memset)(bufA, 0, sizeof(bufA)); |
| 1098 | if (api->tot_blocks > 0) { |
| 1099 | show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL) |
| 1100 | / (ULong)api->tot_blocks ); |
| 1101 | } else { |
| 1102 | bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N'; |
| 1103 | } |
| 1104 | |
sewardj | eb795f8 | 2010-10-13 14:04:25 +0000 | [diff] [blame] | 1105 | VG_(umsg)("max-live: %'llu in %'llu blocks\n", |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1106 | api->max_bytes_live, api->max_blocks_live); |
sewardj | eb795f8 | 2010-10-13 14:04:25 +0000 | [diff] [blame] | 1107 | VG_(umsg)("tot-alloc: %'llu in %'llu blocks (avg size %s)\n", |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1108 | api->tot_bytes, api->tot_blocks, bufA); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1109 | |
| 1110 | tl_assert(api->tot_blocks >= api->max_blocks_live); |
| 1111 | tl_assert(api->tot_bytes >= api->max_bytes_live); |
| 1112 | |
| 1113 | if (api->deaths > 0) { |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 1114 | // Average Age at Death |
| 1115 | ULong aad = api->deaths == 0 |
| 1116 | ? 0 : (api->death_ages_sum / api->deaths); |
| 1117 | // AAD as a fraction of the total program lifetime (so far) |
| 1118 | // measured in ten-thousand-ths (aad_frac_10k == 10000 means the |
| 1119 | // complete lifetime of the program. |
| 1120 | ULong aad_frac_10k |
| 1121 | = g_guest_instrs_executed == 0 |
| 1122 | ? 0 : (10000ULL * aad) / g_guest_instrs_executed; |
| 1123 | HChar buf[16]; |
| 1124 | show_N_div_100(buf, aad_frac_10k); |
| 1125 | VG_(umsg)("deaths: %'llu, at avg age %'llu " |
| 1126 | "(%s%% of prog lifetime)\n", |
| 1127 | api->deaths, aad, buf ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1128 | } else { |
| 1129 | VG_(umsg)("deaths: none (none of these blocks were freed)\n"); |
| 1130 | } |
| 1131 | |
| 1132 | HChar bufR[80], bufW[80]; |
| 1133 | VG_(memset)(bufR, 0, sizeof(bufR)); |
| 1134 | VG_(memset)(bufW, 0, sizeof(bufW)); |
| 1135 | if (api->tot_bytes > 0) { |
| 1136 | show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes); |
| 1137 | show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes); |
| 1138 | } else { |
| 1139 | VG_(strcat)(bufR, "Inf"); |
| 1140 | VG_(strcat)(bufW, "Inf"); |
| 1141 | } |
| 1142 | |
| 1143 | VG_(umsg)("acc-ratios: %s rd, %s wr " |
| 1144 | " (%'llu b-read, %'llu b-written)\n", |
| 1145 | bufR, bufW, |
| 1146 | api->n_reads, api->n_writes); |
| 1147 | |
| 1148 | VG_(pp_ExeContext)(api->ap); |
| 1149 | |
| 1150 | if (api->histo && api->xsize_tag == Exactly) { |
sewardj | eb795f8 | 2010-10-13 14:04:25 +0000 | [diff] [blame] | 1151 | VG_(umsg)("\nAggregated access counts by offset:\n"); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1152 | VG_(umsg)("\n"); |
| 1153 | UWord i; |
| 1154 | if (api->xsize > 0) |
| 1155 | VG_(umsg)("[ 0] "); |
| 1156 | for (i = 0; i < api->xsize; i++) { |
| 1157 | if (i > 0 && (i % 16) == 0 && i != api->xsize-1) { |
| 1158 | VG_(umsg)("\n"); |
| 1159 | VG_(umsg)("[%4lu] ", i); |
| 1160 | } |
| 1161 | VG_(umsg)("%u ", api->histo[i]); |
| 1162 | } |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1163 | VG_(umsg)("\n"); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1164 | } |
| 1165 | } |
| 1166 | |
| 1167 | |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1168 | /* Metric-access functions for APInfos. */ |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1169 | static ULong get_metric__max_bytes_live ( APInfo* api ) { |
| 1170 | return api->max_bytes_live; |
| 1171 | } |
| 1172 | static ULong get_metric__tot_bytes ( APInfo* api ) { |
| 1173 | return api->tot_bytes; |
| 1174 | } |
| 1175 | static ULong get_metric__max_blocks_live ( APInfo* api ) { |
| 1176 | return api->max_blocks_live; |
| 1177 | } |
| 1178 | |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1179 | /* Given a string, return the metric-access function and also a Bool |
| 1180 | indicating whether we want increasing or decreasing values of the |
| 1181 | metric. This is used twice, once in command line processing, and |
| 1182 | then again in show_top_n_apinfos. Returns False if the given |
| 1183 | string could not be identified.*/ |
| 1184 | static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*), |
| 1185 | /*OUT*/Bool* increasingP, |
| 1186 | Char* metric_name ) |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1187 | { |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1188 | if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) { |
| 1189 | *get_metricP = get_metric__max_bytes_live; |
| 1190 | *increasingP = False; |
| 1191 | return True; |
| 1192 | } |
| 1193 | if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) { |
| 1194 | *get_metricP = get_metric__tot_bytes; |
| 1195 | *increasingP = False; |
| 1196 | return True; |
| 1197 | } |
| 1198 | if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) { |
| 1199 | *get_metricP = get_metric__max_blocks_live; |
| 1200 | *increasingP = False; |
| 1201 | return True; |
| 1202 | } |
| 1203 | return False; |
| 1204 | } |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1205 | |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1206 | |
| 1207 | static void show_top_n_apinfos ( void ) |
| 1208 | { |
| 1209 | Int i; |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1210 | UWord keyW, valW; |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1211 | ULong (*get_metric)(APInfo*); |
| 1212 | Bool increasing; |
| 1213 | |
| 1214 | HChar* metric_name = clo_sort_by; |
| 1215 | tl_assert(metric_name); // ensured by clo processing |
| 1216 | |
| 1217 | Bool ok = identify_metric( &get_metric, &increasing, metric_name ); |
| 1218 | tl_assert(ok); // ensured by clo processing |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1219 | |
| 1220 | VG_(umsg)("\n"); |
| 1221 | VG_(umsg)("======== ORDERED BY %s \"%s\": " |
| 1222 | "top %d allocators ========\n", |
| 1223 | increasing ? "increasing" : "decreasing", |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1224 | metric_name, clo_show_top_n ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1225 | |
| 1226 | // Clear all .shown bits |
| 1227 | VG_(initIterFM)( apinfo ); |
| 1228 | while (VG_(nextIterFM)( apinfo, &keyW, &valW )) { |
| 1229 | APInfo* api = (APInfo*)valW; |
| 1230 | tl_assert(api && api->ap == (ExeContext*)keyW); |
| 1231 | api->shown = False; |
| 1232 | } |
| 1233 | VG_(doneIterFM)( apinfo ); |
| 1234 | |
| 1235 | // Now print the top N entries. Each one requires a |
| 1236 | // complete scan of the set. Duh. |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1237 | for (i = 0; i < clo_show_top_n; i++) { |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1238 | ULong best_metric = increasing ? ~0ULL : 0ULL; |
| 1239 | APInfo* best_api = NULL; |
| 1240 | |
| 1241 | VG_(initIterFM)( apinfo ); |
| 1242 | while (VG_(nextIterFM)( apinfo, &keyW, &valW )) { |
| 1243 | APInfo* api = (APInfo*)valW; |
| 1244 | if (api->shown) |
| 1245 | continue; |
| 1246 | ULong metric = get_metric(api); |
| 1247 | if (increasing ? (metric < best_metric) : (metric > best_metric)) { |
| 1248 | best_metric = metric; |
| 1249 | best_api = api; |
| 1250 | } |
| 1251 | } |
| 1252 | VG_(doneIterFM)( apinfo ); |
| 1253 | |
| 1254 | if (!best_api) |
| 1255 | break; // all APIs have been shown. Stop. |
| 1256 | |
| 1257 | VG_(umsg)("\n"); |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1258 | VG_(umsg)("-------------------- %d of %d --------------------\n", |
| 1259 | i+1, clo_show_top_n ); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1260 | show_APInfo(best_api); |
| 1261 | best_api->shown = True; |
| 1262 | } |
| 1263 | |
| 1264 | VG_(umsg)("\n"); |
| 1265 | } |
| 1266 | |
| 1267 | |
| 1268 | static void dh_fini(Int exit_status) |
| 1269 | { |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 1270 | // Before printing statistics, we must harvest access counts for |
| 1271 | // all the blocks that are still alive. Not doing so gives |
| 1272 | // access ratios which are too low (zero, in the worst case) |
| 1273 | // for such blocks, since the accesses that do get made will |
| 1274 | // (if we skip this step) not get folded into the AP summaries. |
| 1275 | UWord keyW, valW; |
| 1276 | VG_(initIterFM)( interval_tree ); |
| 1277 | while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) { |
| 1278 | Block* bk = (Block*)keyW; |
| 1279 | tl_assert(valW == 0); |
| 1280 | tl_assert(bk); |
| 1281 | retire_Block(bk, False/*!because_freed*/); |
| 1282 | } |
| 1283 | VG_(doneIterFM)( interval_tree ); |
| 1284 | |
| 1285 | // show results |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1286 | VG_(umsg)("======== SUMMARY STATISTICS ========\n"); |
| 1287 | VG_(umsg)("\n"); |
| 1288 | VG_(umsg)("guest_insns: %'llu\n", g_guest_instrs_executed); |
| 1289 | VG_(umsg)("\n"); |
| 1290 | VG_(umsg)("max_live: %'llu in %'llu blocks\n", |
| 1291 | g_max_bytes_live, g_max_blocks_live); |
| 1292 | VG_(umsg)("\n"); |
| 1293 | VG_(umsg)("tot_alloc: %'llu in %'llu blocks\n", |
| 1294 | g_tot_bytes, g_tot_blocks); |
| 1295 | VG_(umsg)("\n"); |
| 1296 | if (g_tot_bytes > 0) { |
| 1297 | VG_(umsg)("insns per allocated byte: %'llu\n", |
| 1298 | g_guest_instrs_executed / g_tot_bytes); |
| 1299 | VG_(umsg)("\n"); |
| 1300 | } |
| 1301 | |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1302 | show_top_n_apinfos(); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1303 | |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1304 | VG_(umsg)("\n"); |
| 1305 | VG_(umsg)("\n"); |
| 1306 | VG_(umsg)("==============================================================\n"); |
| 1307 | VG_(umsg)("\n"); |
| 1308 | VG_(umsg)("Some hints: (see --help for command line option details):\n"); |
| 1309 | VG_(umsg)("\n"); |
| 1310 | VG_(umsg)("* summary stats for whole program are at the top of this output\n"); |
| 1311 | VG_(umsg)("\n"); |
| 1312 | VG_(umsg)("* --show-top-n= controls how many alloc points are shown.\n"); |
| 1313 | VG_(umsg)(" You probably want to set it much higher than\n"); |
| 1314 | VG_(umsg)(" the default value (10)\n"); |
| 1315 | VG_(umsg)("\n"); |
| 1316 | VG_(umsg)("* --sort-by= specifies the sort key for output.\n"); |
| 1317 | VG_(umsg)(" See --help for details.\n"); |
| 1318 | VG_(umsg)("\n"); |
| 1319 | VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n"); |
| 1320 | VG_(umsg)(" a separate alloc point. This causes the data to be spread out\n"); |
| 1321 | VG_(umsg)(" over far too many alloc points. I strongly suggest using\n"); |
| 1322 | VG_(umsg)(" --num-callers=4 or some such, to reduce the spreading.\n"); |
| 1323 | VG_(umsg)("\n"); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 1324 | |
| 1325 | if (VG_(clo_stats)) { |
| 1326 | VG_(dmsg)(" dhat: find_Block_containing:\n"); |
| 1327 | VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n", |
| 1328 | stats__n_fBc_cached + stats__n_fBc_uncached, |
| 1329 | stats__n_fBc_cached, |
| 1330 | stats__n_fBc_uncached); |
| 1331 | VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound); |
| 1332 | VG_(dmsg)("\n"); |
| 1333 | } |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1334 | } |
| 1335 | |
| 1336 | |
| 1337 | //------------------------------------------------------------// |
| 1338 | //--- Initialisation ---// |
| 1339 | //------------------------------------------------------------// |
| 1340 | |
| 1341 | static void dh_post_clo_init(void) |
| 1342 | { |
| 1343 | } |
| 1344 | |
| 1345 | static void dh_pre_clo_init(void) |
| 1346 | { |
| 1347 | VG_(details_name) ("DHAT"); |
| 1348 | VG_(details_version) (NULL); |
| 1349 | VG_(details_description) ("a dynamic heap analysis tool"); |
| 1350 | VG_(details_copyright_author)( |
sewardj | 03f8d3f | 2012-08-05 15:46:46 +0000 | [diff] [blame] | 1351 | "Copyright (C) 2010-2012, and GNU GPL'd, by Mozilla Inc"); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1352 | VG_(details_bug_reports_to) (VG_BUGS_TO); |
| 1353 | |
| 1354 | // Basic functions. |
| 1355 | VG_(basic_tool_funcs) (dh_post_clo_init, |
| 1356 | dh_instrument, |
| 1357 | dh_fini); |
| 1358 | //zz |
sewardj | 5fd79df | 2010-10-12 18:08:33 +0000 | [diff] [blame] | 1359 | // Needs. |
| 1360 | VG_(needs_libc_freeres)(); |
| 1361 | VG_(needs_command_line_options)(dh_process_cmd_line_option, |
| 1362 | dh_print_usage, |
| 1363 | dh_print_debug_usage); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1364 | //zz VG_(needs_client_requests) (dh_handle_client_request); |
| 1365 | //zz VG_(needs_sanity_checks) (dh_cheap_sanity_check, |
| 1366 | //zz dh_expensive_sanity_check); |
| 1367 | VG_(needs_malloc_replacement) (dh_malloc, |
| 1368 | dh___builtin_new, |
| 1369 | dh___builtin_vec_new, |
| 1370 | dh_memalign, |
| 1371 | dh_calloc, |
| 1372 | dh_free, |
| 1373 | dh___builtin_delete, |
| 1374 | dh___builtin_vec_delete, |
| 1375 | dh_realloc, |
| 1376 | dh_malloc_usable_size, |
| 1377 | 0 ); |
| 1378 | |
| 1379 | VG_(track_pre_mem_read) ( dh_handle_noninsn_read ); |
| 1380 | //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz ); |
| 1381 | VG_(track_post_mem_write) ( dh_handle_noninsn_write ); |
| 1382 | |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1383 | tl_assert(!interval_tree); |
sewardj | 41aa79d | 2010-12-06 10:56:09 +0000 | [diff] [blame] | 1384 | tl_assert(!fbc_cache0); |
| 1385 | tl_assert(!fbc_cache1); |
sewardj | 4d7d8f5 | 2010-10-12 10:09:15 +0000 | [diff] [blame] | 1386 | |
| 1387 | interval_tree = VG_(newFM)( VG_(malloc), |
| 1388 | "dh.main.interval_tree.1", |
| 1389 | VG_(free), |
| 1390 | interval_tree_Cmp ); |
| 1391 | |
| 1392 | apinfo = VG_(newFM)( VG_(malloc), |
| 1393 | "dh.main.apinfo.1", |
| 1394 | VG_(free), |
| 1395 | NULL/*unboxedcmp*/ ); |
| 1396 | } |
| 1397 | |
| 1398 | VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init) |
| 1399 | |
| 1400 | //--------------------------------------------------------------------// |
| 1401 | //--- end dh_main.c ---// |
| 1402 | //--------------------------------------------------------------------// |