blob: c23134b2c6006ea68408d081f7bf216ada1dceb0 [file] [log] [blame]
sewardj4d7d8f52010-10-12 10:09:15 +00001
2//--------------------------------------------------------------------*/
3//--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---*/
4//--------------------------------------------------------------------*/
5
6/*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
9
Elliott Hughesed398002017-06-21 14:41:24 -070010 Copyright (C) 2010-2017 Mozilla Inc
sewardj4d7d8f52010-10-12 10:09:15 +000011
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28*/
29
30/* Contributed by Julian Seward <jseward@acm.org> */
31
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcbase.h"
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
38#include "pub_tool_mallocfree.h"
sewardj5fd79df2010-10-12 18:08:33 +000039#include "pub_tool_options.h"
sewardj4d7d8f52010-10-12 10:09:15 +000040#include "pub_tool_replacemalloc.h"
41#include "pub_tool_tooliface.h"
42#include "pub_tool_wordfm.h"
43
sewardj4e437402011-01-10 14:56:59 +000044#define HISTOGRAM_SIZE_LIMIT 1024
sewardj4d7d8f52010-10-12 10:09:15 +000045
46
47//------------------------------------------------------------//
48//--- Globals ---//
49//------------------------------------------------------------//
50
51// Number of guest instructions executed so far. This is
52// incremented directly from the generated code.
53static ULong g_guest_instrs_executed = 0;
54
55// Summary statistics for the entire run.
56static ULong g_tot_blocks = 0; // total blocks allocated
57static ULong g_tot_bytes = 0; // total bytes allocated
58
59static ULong g_cur_blocks_live = 0; // curr # blocks live
60static ULong g_cur_bytes_live = 0; // curr # bytes live
61
62static ULong g_max_blocks_live = 0; // bytes and blocks at
63static ULong g_max_bytes_live = 0; // the max residency point
64
65
66//------------------------------------------------------------//
67//--- an Interval Tree of live blocks ---//
68//------------------------------------------------------------//
69
70/* Tracks information about live blocks. */
71typedef
72 struct {
73 Addr payload;
74 SizeT req_szB;
75 ExeContext* ap; /* allocation ec */
76 ULong allocd_at; /* instruction number */
77 ULong n_reads;
78 ULong n_writes;
79 /* Approx histogram, one byte per payload byte. Counts latch up
sewardj5fd79df2010-10-12 18:08:33 +000080 therefore at 0xFFFF. Can be NULL if the block is resized or if
sewardj4d7d8f52010-10-12 10:09:15 +000081 the block is larger than HISTOGRAM_SIZE_LIMIT. */
sewardj5fd79df2010-10-12 18:08:33 +000082 UShort* histoW; /* [0 .. req_szB-1] */
sewardj4d7d8f52010-10-12 10:09:15 +000083 }
84 Block;
85
86/* May not contain zero-sized blocks. May not contain
87 overlapping blocks. */
88static WordFM* interval_tree = NULL; /* WordFM* Block* void */
89
90/* Here's the comparison function. Since the tree is required
91to contain non-zero sized, non-overlapping blocks, it's good
92enough to consider any overlap as a match. */
93static Word interval_tree_Cmp ( UWord k1, UWord k2 )
94{
95 Block* b1 = (Block*)k1;
96 Block* b2 = (Block*)k2;
97 tl_assert(b1->req_szB > 0);
98 tl_assert(b2->req_szB > 0);
99 if (b1->payload + b1->req_szB <= b2->payload) return -1;
100 if (b2->payload + b2->req_szB <= b1->payload) return 1;
101 return 0;
102}
103
sewardj41aa79d2010-12-06 10:56:09 +0000104// 2-entry cache for find_Block_containing
105static Block* fbc_cache0 = NULL;
106static Block* fbc_cache1 = NULL;
107
108static UWord stats__n_fBc_cached = 0;
109static UWord stats__n_fBc_uncached = 0;
110static UWord stats__n_fBc_notfound = 0;
111
sewardj4d7d8f52010-10-12 10:09:15 +0000112static Block* find_Block_containing ( Addr a )
113{
sewardj41aa79d2010-12-06 10:56:09 +0000114 if (LIKELY(fbc_cache0
115 && fbc_cache0->payload <= a
116 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
117 // found at 0
118 stats__n_fBc_cached++;
119 return fbc_cache0;
120 }
121 if (LIKELY(fbc_cache1
122 && fbc_cache1->payload <= a
123 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
124 // found at 1; swap 0 and 1
125 Block* tmp = fbc_cache0;
126 fbc_cache0 = fbc_cache1;
127 fbc_cache1 = tmp;
128 stats__n_fBc_cached++;
129 return fbc_cache0;
130 }
sewardj4d7d8f52010-10-12 10:09:15 +0000131 Block fake;
132 fake.payload = a;
133 fake.req_szB = 1;
134 UWord foundkey = 1;
135 UWord foundval = 1;
136 Bool found = VG_(lookupFM)( interval_tree,
137 &foundkey, &foundval, (UWord)&fake );
sewardj41aa79d2010-12-06 10:56:09 +0000138 if (!found) {
139 stats__n_fBc_notfound++;
sewardj4d7d8f52010-10-12 10:09:15 +0000140 return NULL;
sewardj41aa79d2010-12-06 10:56:09 +0000141 }
sewardj4d7d8f52010-10-12 10:09:15 +0000142 tl_assert(foundval == 0); // we don't store vals in the interval tree
143 tl_assert(foundkey != 1);
sewardjeb795f82010-10-13 14:04:25 +0000144 Block* res = (Block*)foundkey;
145 tl_assert(res != &fake);
sewardj41aa79d2010-12-06 10:56:09 +0000146 // put at the top position
147 fbc_cache1 = fbc_cache0;
148 fbc_cache0 = res;
149 stats__n_fBc_uncached++;
sewardjeb795f82010-10-13 14:04:25 +0000150 return res;
sewardj4d7d8f52010-10-12 10:09:15 +0000151}
152
153// delete a block; asserts if not found. (viz, 'a' must be
154// known to be present.)
155static void delete_Block_starting_at ( Addr a )
156{
157 Block fake;
158 fake.payload = a;
159 fake.req_szB = 1;
160 Bool found = VG_(delFromFM)( interval_tree,
161 NULL, NULL, (Addr)&fake );
162 tl_assert(found);
sewardj41aa79d2010-12-06 10:56:09 +0000163 fbc_cache0 = fbc_cache1 = NULL;
sewardj4d7d8f52010-10-12 10:09:15 +0000164}
165
166
167//------------------------------------------------------------//
168//--- a FM of allocation points (APs) ---//
169//------------------------------------------------------------//
170
171typedef
172 struct {
173 // the allocation point that we're summarising stats for
174 ExeContext* ap;
175 // used when printing results
176 Bool shown;
177 // The current number of blocks and bytes live for this AP
178 ULong cur_blocks_live;
179 ULong cur_bytes_live;
180 // The number of blocks and bytes live at the max-liveness
181 // point. Note this is a bit subtle. max_blocks_live is not
182 // the maximum number of live blocks, but rather the number of
183 // blocks live at the point of maximum byte liveness. These are
184 // not necessarily the same thing.
185 ULong max_blocks_live;
186 ULong max_bytes_live;
187 // Total number of blocks and bytes allocated by this AP.
188 ULong tot_blocks;
189 ULong tot_bytes;
190 // Sum of death ages for all blocks allocated by this AP,
191 // that have subsequently been freed.
192 ULong death_ages_sum;
193 ULong deaths;
194 // Total number of reads and writes in all blocks allocated
195 // by this AP.
196 ULong n_reads;
197 ULong n_writes;
198 /* Histogram information. We maintain a histogram aggregated for
199 all retiring Blocks allocated by this AP, but only if:
200 - this AP has only ever allocated objects of one size
201 - that size is <= HISTOGRAM_SIZE_LIMIT
202 What we need therefore is a mechanism to see if this AP
203 has only ever allocated blocks of one size.
204
205 3 states:
206 Unknown because no retirement yet
207 Exactly xsize all retiring blocks are of this size
208 Mixed multiple different sizes seen
209 */
210 enum { Unknown=999, Exactly, Mixed } xsize_tag;
211 SizeT xsize;
212 UInt* histo; /* [0 .. xsize-1] */
213 }
214 APInfo;
215
216/* maps ExeContext*'s to APInfo*'s. Note that the keys must match the
217 .ap field in the values. */
218static WordFM* apinfo = NULL; /* WordFM* ExeContext* APInfo* */
219
220
221/* 'bk' is being introduced (has just been allocated). Find the
222 relevant APInfo entry for it, or create one, based on the block's
223 allocation EC. Then, update the APInfo to the extent that we
224 actually can, to reflect the allocation. */
225static void intro_Block ( Block* bk )
226{
227 tl_assert(bk);
228 tl_assert(bk->ap);
229
230 APInfo* api = NULL;
231 UWord keyW = 0;
232 UWord valW = 0;
233 Bool found = VG_(lookupFM)( apinfo,
234 &keyW, &valW, (UWord)bk->ap );
235 if (found) {
236 api = (APInfo*)valW;
237 tl_assert(keyW == (UWord)bk->ap);
238 } else {
239 api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
240 VG_(memset)(api, 0, sizeof(*api));
241 api->ap = bk->ap;
242 Bool present = VG_(addToFM)( apinfo,
243 (UWord)bk->ap, (UWord)api );
244 tl_assert(!present);
245 // histo stuff
246 tl_assert(api->deaths == 0);
247 api->xsize_tag = Unknown;
248 api->xsize = 0;
sewardj5fd79df2010-10-12 18:08:33 +0000249 if (0) VG_(printf)("api %p --> Unknown\n", api);
sewardj4d7d8f52010-10-12 10:09:15 +0000250 }
251
252 tl_assert(api->ap == bk->ap);
253
254 /* So: update stats to reflect an allocation */
255
256 // # live blocks
257 api->cur_blocks_live++;
258
259 // # live bytes
260 api->cur_bytes_live += bk->req_szB;
261 if (api->cur_bytes_live > api->max_bytes_live) {
262 api->max_bytes_live = api->cur_bytes_live;
263 api->max_blocks_live = api->cur_blocks_live;
264 }
265
266 // total blocks and bytes allocated here
267 api->tot_blocks++;
268 api->tot_bytes += bk->req_szB;
269
270 // update summary globals
271 g_tot_blocks++;
272 g_tot_bytes += bk->req_szB;
273
274 g_cur_blocks_live++;
275 g_cur_bytes_live += bk->req_szB;
276 if (g_cur_bytes_live > g_max_bytes_live) {
277 g_max_bytes_live = g_cur_bytes_live;
278 g_max_blocks_live = g_cur_blocks_live;
279 }
280}
281
282
283/* 'bk' is retiring (being freed). Find the relevant APInfo entry for
284 it, which must already exist. Then, fold info from 'bk' into that
sewardj41aa79d2010-12-06 10:56:09 +0000285 entry. 'because_freed' is True if the block is retiring because
286 the client has freed it. If it is False then the block is retiring
287 because the program has finished, in which case we want to skip the
288 updates of the total blocks live etc for this AP, but still fold in
289 the access counts and histo data that have so far accumulated for
290 the block. */
291static void retire_Block ( Block* bk, Bool because_freed )
sewardj4d7d8f52010-10-12 10:09:15 +0000292{
293 tl_assert(bk);
294 tl_assert(bk->ap);
295
296 APInfo* api = NULL;
297 UWord keyW = 0;
298 UWord valW = 0;
299 Bool found = VG_(lookupFM)( apinfo,
300 &keyW, &valW, (UWord)bk->ap );
301
302 tl_assert(found);
303 api = (APInfo*)valW;
304 tl_assert(api->ap == bk->ap);
305
306 // update stats following this free.
307 if (0)
308 VG_(printf)("ec %p api->c_by_l %llu bk->rszB %llu\n",
309 bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
310
sewardj41aa79d2010-12-06 10:56:09 +0000311 // update total blocks live etc for this AP
312 if (because_freed) {
313 tl_assert(api->cur_blocks_live >= 1);
314 tl_assert(api->cur_bytes_live >= bk->req_szB);
315 api->cur_blocks_live--;
316 api->cur_bytes_live -= bk->req_szB;
sewardj4d7d8f52010-10-12 10:09:15 +0000317
sewardj41aa79d2010-12-06 10:56:09 +0000318 api->deaths++;
sewardj4d7d8f52010-10-12 10:09:15 +0000319
sewardj41aa79d2010-12-06 10:56:09 +0000320 tl_assert(bk->allocd_at <= g_guest_instrs_executed);
321 api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
sewardj4d7d8f52010-10-12 10:09:15 +0000322
sewardj41aa79d2010-12-06 10:56:09 +0000323 // update global summary stats
324 tl_assert(g_cur_blocks_live > 0);
325 g_cur_blocks_live--;
326 tl_assert(g_cur_bytes_live >= bk->req_szB);
327 g_cur_bytes_live -= bk->req_szB;
328 }
329
330 // access counts
sewardj4d7d8f52010-10-12 10:09:15 +0000331 api->n_reads += bk->n_reads;
332 api->n_writes += bk->n_writes;
333
sewardj4d7d8f52010-10-12 10:09:15 +0000334 // histo stuff. First, do state transitions for xsize/xsize_tag.
335 switch (api->xsize_tag) {
336
337 case Unknown:
338 tl_assert(api->xsize == 0);
sewardj41aa79d2010-12-06 10:56:09 +0000339 tl_assert(api->deaths == 1 || api->deaths == 0);
sewardj4d7d8f52010-10-12 10:09:15 +0000340 tl_assert(!api->histo);
341 api->xsize_tag = Exactly;
342 api->xsize = bk->req_szB;
sewardj5fd79df2010-10-12 18:08:33 +0000343 if (0) VG_(printf)("api %p --> Exactly(%lu)\n", api, api->xsize);
sewardj4d7d8f52010-10-12 10:09:15 +0000344 // and allocate the histo
sewardj5fd79df2010-10-12 18:08:33 +0000345 if (bk->histoW) {
sewardj41aa79d2010-12-06 10:56:09 +0000346 api->histo = VG_(malloc)("dh.main.retire_Block.1",
347 api->xsize * sizeof(UInt));
sewardj4d7d8f52010-10-12 10:09:15 +0000348 VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
349 }
350 break;
351
352 case Exactly:
sewardj41aa79d2010-12-06 10:56:09 +0000353 //tl_assert(api->deaths > 1);
sewardj4d7d8f52010-10-12 10:09:15 +0000354 if (bk->req_szB != api->xsize) {
sewardj5fd79df2010-10-12 18:08:33 +0000355 if (0) VG_(printf)("api %p --> Mixed(%lu -> %lu)\n",
356 api, api->xsize, bk->req_szB);
sewardj4d7d8f52010-10-12 10:09:15 +0000357 api->xsize_tag = Mixed;
358 api->xsize = 0;
359 // deallocate the histo, if any
360 if (api->histo) {
361 VG_(free)(api->histo);
362 api->histo = NULL;
363 }
364 }
365 break;
366
367 case Mixed:
sewardj41aa79d2010-12-06 10:56:09 +0000368 //tl_assert(api->deaths > 1);
sewardj4d7d8f52010-10-12 10:09:15 +0000369 break;
370
371 default:
372 tl_assert(0);
373 }
374
375 // See if we can fold the histo data from this block into
376 // the data for the AP
sewardj5fd79df2010-10-12 18:08:33 +0000377 if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
sewardj4d7d8f52010-10-12 10:09:15 +0000378 tl_assert(api->xsize == bk->req_szB);
379 UWord i;
380 for (i = 0; i < api->xsize; i++) {
sewardj5fd79df2010-10-12 18:08:33 +0000381 // FIXME: do something better in case of overflow of api->histo[..]
382 // Right now, at least don't let it overflow/wrap around
383 if (api->histo[i] <= 0xFFFE0000)
384 api->histo[i] += (UInt)bk->histoW[i];
sewardj4d7d8f52010-10-12 10:09:15 +0000385 }
sewardj5fd79df2010-10-12 18:08:33 +0000386 if (0) VG_(printf)("fold in, AP = %p\n", api);
sewardj4d7d8f52010-10-12 10:09:15 +0000387 }
388
389
390
391#if 0
392 if (bk->histoB) {
393 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
394 UWord i;
395 for (i = 0; i < bk->req_szB; i++)
396 VG_(printf)("%u ", (UInt)bk->histoB[i]);
397 VG_(printf)("\n");
398 } else {
399 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
400 }
401#endif
402}
403
404/* This handles block resizing. When a block with AP 'ec' has a
405 size change of 'delta', call here to update the APInfo. */
406static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
407{
408 APInfo* api = NULL;
409 UWord keyW = 0;
410 UWord valW = 0;
411 Bool found = VG_(lookupFM)( apinfo,
412 &keyW, &valW, (UWord)ec );
413
414 tl_assert(found);
415 api = (APInfo*)valW;
416 tl_assert(api->ap == ec);
417
418 if (delta < 0) {
419 tl_assert(api->cur_bytes_live >= -delta);
420 tl_assert(g_cur_bytes_live >= -delta);
421 }
422
423 // adjust current live size
424 api->cur_bytes_live += delta;
425 g_cur_bytes_live += delta;
426
427 if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
428 api->max_bytes_live = api->cur_bytes_live;
429 api->max_blocks_live = api->cur_blocks_live;
430 }
431
432 // update global summary stats
433 if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
434 g_max_bytes_live = g_cur_bytes_live;
435 g_max_blocks_live = g_cur_blocks_live;
436 }
sewardj41aa79d2010-12-06 10:56:09 +0000437 if (delta > 0)
438 g_tot_bytes += delta;
sewardj4d7d8f52010-10-12 10:09:15 +0000439
440 // adjust total allocation size
441 if (delta > 0)
442 api->tot_bytes += delta;
443}
444
445
446//------------------------------------------------------------//
447//--- update both Block and APInfos after {m,re}alloc/free ---//
448//------------------------------------------------------------//
449
450static
451void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
452 Bool is_zeroed )
453{
454 tl_assert(p == NULL); // don't handle custom allocators right now
sewardjc7ffc942011-03-28 16:26:42 +0000455 SizeT actual_szB /*, slop_szB*/;
sewardj4d7d8f52010-10-12 10:09:15 +0000456
457 if ((SSizeT)req_szB < 0) return NULL;
458
459 if (req_szB == 0)
460 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
461
462 // Allocate and zero if necessary
463 if (!p) {
464 p = VG_(cli_malloc)( req_alignB, req_szB );
465 if (!p) {
466 return NULL;
467 }
468 if (is_zeroed) VG_(memset)(p, 0, req_szB);
floriane464e802014-09-11 20:15:23 +0000469 actual_szB = VG_(cli_malloc_usable_size)(p);
sewardj4d7d8f52010-10-12 10:09:15 +0000470 tl_assert(actual_szB >= req_szB);
sewardjc7ffc942011-03-28 16:26:42 +0000471 /* slop_szB = actual_szB - req_szB; */
sewardj4d7d8f52010-10-12 10:09:15 +0000472 } else {
sewardjc7ffc942011-03-28 16:26:42 +0000473 /* slop_szB = 0; */
sewardj4d7d8f52010-10-12 10:09:15 +0000474 }
475
476 // Make new HP_Chunk node, add to malloc_list
477 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
478 bk->payload = (Addr)p;
479 bk->req_szB = req_szB;
480 bk->ap = VG_(record_ExeContext)(tid, 0/*first word delta*/);
481 bk->allocd_at = g_guest_instrs_executed;
482 bk->n_reads = 0;
483 bk->n_writes = 0;
484 // set up histogram array, if the block isn't too large
sewardj5fd79df2010-10-12 18:08:33 +0000485 bk->histoW = NULL;
sewardj4d7d8f52010-10-12 10:09:15 +0000486 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
sewardj5fd79df2010-10-12 18:08:33 +0000487 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
488 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
sewardj4d7d8f52010-10-12 10:09:15 +0000489 }
490
491 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
492 tl_assert(!present);
sewardj41aa79d2010-12-06 10:56:09 +0000493 fbc_cache0 = fbc_cache1 = NULL;
sewardj4d7d8f52010-10-12 10:09:15 +0000494
495 intro_Block(bk);
496
florian16eef852015-08-03 21:05:20 +0000497 if (0) VG_(printf)("ALLOC %lu -> %p\n", req_szB, p);
sewardj4d7d8f52010-10-12 10:09:15 +0000498
499 return p;
500}
501
502static
503void die_block ( void* p, Bool custom_free )
504{
505 tl_assert(!custom_free); // at least for now
506
507 Block* bk = find_Block_containing( (Addr)p );
508
509 if (!bk) {
510 return; // bogus free
511 }
512
513 tl_assert(bk->req_szB > 0);
514 // assert the block finder is behaving sanely
515 tl_assert(bk->payload <= (Addr)p);
516 tl_assert( (Addr)p < bk->payload + bk->req_szB );
517
518 if (bk->payload != (Addr)p) {
519 return; // bogus free
520 }
521
522 if (0) VG_(printf)(" FREE %p %llu\n",
523 p, g_guest_instrs_executed - bk->allocd_at);
524
sewardj41aa79d2010-12-06 10:56:09 +0000525 retire_Block(bk, True/*because_freed*/);
sewardj4d7d8f52010-10-12 10:09:15 +0000526
527 VG_(cli_free)( (void*)bk->payload );
528 delete_Block_starting_at( bk->payload );
sewardj5fd79df2010-10-12 18:08:33 +0000529 if (bk->histoW) {
530 VG_(free)( bk->histoW );
531 bk->histoW = NULL;
sewardj4d7d8f52010-10-12 10:09:15 +0000532 }
533 VG_(free)( bk );
534}
535
536
537static
538void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
539{
florian16eef852015-08-03 21:05:20 +0000540 if (0) VG_(printf)("REALL %p %lu\n", p_old, new_req_szB);
sewardj4d7d8f52010-10-12 10:09:15 +0000541 void* p_new = NULL;
542
543 tl_assert(new_req_szB > 0); // map 0 to 1
544
545 // Find the old block.
546 Block* bk = find_Block_containing( (Addr)p_old );
547 if (!bk) {
548 return NULL; // bogus realloc
549 }
550
551 tl_assert(bk->req_szB > 0);
552 // assert the block finder is behaving sanely
553 tl_assert(bk->payload <= (Addr)p_old);
554 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
555
556 if (bk->payload != (Addr)p_old) {
557 return NULL; // bogus realloc
558 }
559
sewardj5fd79df2010-10-12 18:08:33 +0000560 // Keeping the histogram alive in any meaningful way across
561 // block resizing is too darn complicated. Just throw it away.
562 if (bk->histoW) {
563 VG_(free)(bk->histoW);
564 bk->histoW = NULL;
565 }
sewardj4d7d8f52010-10-12 10:09:15 +0000566
567 // Actually do the allocation, if necessary.
568 if (new_req_szB <= bk->req_szB) {
569
570 // New size is smaller or same; block not moved.
571 apinfo_change_cur_bytes_live(bk->ap,
572 (Long)new_req_szB - (Long)bk->req_szB);
573 bk->req_szB = new_req_szB;
574 return p_old;
575
576 } else {
577
578 // New size is bigger; make new block, copy shared contents, free old.
579 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
580 if (!p_new) {
581 // Nb: if realloc fails, NULL is returned but the old block is not
582 // touched. What an awful function.
583 return NULL;
584 }
585 tl_assert(p_new != p_old);
586
587 VG_(memcpy)(p_new, p_old, bk->req_szB);
588 VG_(cli_free)(p_old);
589
590 // Since the block has moved, we need to re-insert it into the
591 // interval tree at the new place. Do this by removing
592 // and re-adding it.
593 delete_Block_starting_at( (Addr)p_old );
594 // now 'bk' is no longer in the tree, but the Block itself
595 // is still alive
596
597 // Update the metadata.
598 apinfo_change_cur_bytes_live(bk->ap,
599 (Long)new_req_szB - (Long)bk->req_szB);
600 bk->payload = (Addr)p_new;
601 bk->req_szB = new_req_szB;
602
603 // and re-add
604 Bool present
605 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
606 tl_assert(!present);
sewardj41aa79d2010-12-06 10:56:09 +0000607 fbc_cache0 = fbc_cache1 = NULL;
sewardj4d7d8f52010-10-12 10:09:15 +0000608
609 return p_new;
610 }
611 /*NOTREACHED*/
612 tl_assert(0);
613}
614
615
616//------------------------------------------------------------//
617//--- malloc() et al replacement wrappers ---//
618//------------------------------------------------------------//
619
620static void* dh_malloc ( ThreadId tid, SizeT szB )
621{
622 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
623}
624
625static void* dh___builtin_new ( ThreadId tid, SizeT szB )
626{
627 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
628}
629
630static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
631{
632 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
633}
634
635static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
636{
637 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
638}
639
640static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
641{
642 return new_block( tid, NULL, szB, alignB, False );
643}
644
645static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
646{
647 die_block( p, /*custom_free*/False );
648}
649
650static void dh___builtin_delete ( ThreadId tid, void* p )
651{
652 die_block( p, /*custom_free*/False);
653}
654
655static void dh___builtin_vec_delete ( ThreadId tid, void* p )
656{
657 die_block( p, /*custom_free*/False );
658}
659
660static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
661{
662 if (p_old == NULL) {
663 return dh_malloc(tid, new_szB);
664 }
665 if (new_szB == 0) {
666 dh_free(tid, p_old);
667 return NULL;
668 }
669 return renew_block(tid, p_old, new_szB);
670}
671
672static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
673{
sewardj79660072014-11-14 10:13:49 +0000674 Block* bk = find_Block_containing( (Addr)p );
675 return bk ? bk->req_szB : 0;
sewardj4d7d8f52010-10-12 10:09:15 +0000676}
677
sewardj79660072014-11-14 10:13:49 +0000678
sewardj4d7d8f52010-10-12 10:09:15 +0000679//------------------------------------------------------------//
680//--- memory references ---//
681//------------------------------------------------------------//
682
683static
684void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
685{
686 UWord i, offMin, offMax1;
687 offMin = addr - bk->payload;
688 tl_assert(offMin < bk->req_szB);
689 offMax1 = offMin + szB;
690 if (offMax1 > bk->req_szB)
691 offMax1 = bk->req_szB;
692 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
693 for (i = offMin; i < offMax1; i++) {
sewardj5fd79df2010-10-12 18:08:33 +0000694 UShort n = bk->histoW[i];
695 if (n < 0xFFFF) n++;
696 bk->histoW[i] = n;
sewardj4d7d8f52010-10-12 10:09:15 +0000697 }
698}
699
700static VG_REGPARM(2)
701void dh_handle_write ( Addr addr, UWord szB )
702{
703 Block* bk = find_Block_containing(addr);
704 if (bk) {
705 bk->n_writes += szB;
sewardj5fd79df2010-10-12 18:08:33 +0000706 if (bk->histoW)
sewardj4d7d8f52010-10-12 10:09:15 +0000707 inc_histo_for_block(bk, addr, szB);
708 }
709}
710
711static VG_REGPARM(2)
712void dh_handle_read ( Addr addr, UWord szB )
713{
714 Block* bk = find_Block_containing(addr);
715 if (bk) {
716 bk->n_reads += szB;
sewardj5fd79df2010-10-12 18:08:33 +0000717 if (bk->histoW)
sewardj4d7d8f52010-10-12 10:09:15 +0000718 inc_histo_for_block(bk, addr, szB);
719 }
720}
721
722
723// Handle reads and writes by syscalls (read == kernel
724// reads user space, write == kernel writes user space).
725// Assumes no such read or write spans a heap block
726// boundary and so we can treat it just as one giant
727// read or write.
728static
floriane543f302012-10-21 19:43:43 +0000729void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
sewardj4d7d8f52010-10-12 10:09:15 +0000730 Addr base, SizeT size )
731{
732 switch (part) {
733 case Vg_CoreSysCall:
734 dh_handle_read(base, size);
735 break;
736 case Vg_CoreSysCallArgInMem:
737 break;
738 case Vg_CoreTranslate:
739 break;
740 default:
741 tl_assert(0);
742 }
743}
744
745static
746void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
747 Addr base, SizeT size )
748{
749 switch (part) {
750 case Vg_CoreSysCall:
751 dh_handle_write(base, size);
752 break;
753 case Vg_CoreSignal:
754 break;
755 default:
756 tl_assert(0);
757 }
758}
759
760
761//------------------------------------------------------------//
762//--- Instrumentation ---//
763//------------------------------------------------------------//
764
sewardj41aa79d2010-12-06 10:56:09 +0000765#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
766#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
767#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
768#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
769#define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
770
sewardj4d7d8f52010-10-12 10:09:15 +0000771static
772void add_counter_update(IRSB* sbOut, Int n)
773{
774 #if defined(VG_BIGENDIAN)
775 # define END Iend_BE
776 #elif defined(VG_LITTLEENDIAN)
777 # define END Iend_LE
778 #else
779 # error "Unknown endianness"
780 #endif
781 // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
782 // WrTmp(t1, Load64(&g_guest_instrs_executed))
783 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
784 // Store(&g_guest_instrs_executed, t2)
785 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
786 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
787 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
788
sewardj41aa79d2010-12-06 10:56:09 +0000789 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
790 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
791 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
sewardj4d7d8f52010-10-12 10:09:15 +0000792
793 addStmtToIRSB( sbOut, st1 );
794 addStmtToIRSB( sbOut, st2 );
795 addStmtToIRSB( sbOut, st3 );
796}
797
798static
sewardj41aa79d2010-12-06 10:56:09 +0000799void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
800 Int goff_sp)
sewardj4d7d8f52010-10-12 10:09:15 +0000801{
802 IRType tyAddr = Ity_INVALID;
florian19f91bb2012-11-10 22:29:54 +0000803 const HChar* hName= NULL;
sewardj4d7d8f52010-10-12 10:09:15 +0000804 void* hAddr = NULL;
805 IRExpr** argv = NULL;
806 IRDirty* di = NULL;
807
sewardj41aa79d2010-12-06 10:56:09 +0000808 const Int THRESH = 4096 * 4; // somewhat arbitrary
809 const Int rz_szB = VG_STACK_REDZONE_SZB;
810
sewardj4d7d8f52010-10-12 10:09:15 +0000811 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
812 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
813
814 if (isWrite) {
815 hName = "dh_handle_write";
816 hAddr = &dh_handle_write;
817 } else {
818 hName = "dh_handle_read";
819 hAddr = &dh_handle_read;
820 }
821
822 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
823
824 /* Add the helper. */
825 tl_assert(hName);
826 tl_assert(hAddr);
827 tl_assert(argv);
828 di = unsafeIRDirty_0_N( 2/*regparms*/,
829 hName, VG_(fnptr_to_fnentry)( hAddr ),
830 argv );
831
sewardj41aa79d2010-12-06 10:56:09 +0000832 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
833 some arbitrary N. If that fails then addr is in the range (SP -
834 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
835 addr is within a page of SP and so can't possibly be a heap
836 access, and so can be skipped. */
837 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
838 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
839
840 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
841 addStmtToIRSB(
842 sbOut,
843 assign(sp_minus_rz,
844 tyAddr == Ity_I32
845 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
846 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
847 );
848
849 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
850 addStmtToIRSB(
851 sbOut,
852 assign(diff,
853 tyAddr == Ity_I32
854 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
855 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
856 );
857
858 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
859 addStmtToIRSB(
860 sbOut,
861 assign(guard,
862 tyAddr == Ity_I32
863 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
864 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
865 );
866 di->guard = mkexpr(guard);
867
sewardj4d7d8f52010-10-12 10:09:15 +0000868 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
869}
870
871static
872IRSB* dh_instrument ( VgCallbackClosure* closure,
873 IRSB* sbIn,
florian3c0c9472014-09-24 12:06:55 +0000874 const VexGuestLayout* layout,
875 const VexGuestExtents* vge,
876 const VexArchInfo* archinfo_host,
sewardj4d7d8f52010-10-12 10:09:15 +0000877 IRType gWordTy, IRType hWordTy )
878{
879 Int i, n = 0;
880 IRSB* sbOut;
881 IRTypeEnv* tyenv = sbIn->tyenv;
882
sewardj41aa79d2010-12-06 10:56:09 +0000883 const Int goff_sp = layout->offset_SP;
884
sewardj4d7d8f52010-10-12 10:09:15 +0000885 // We increment the instruction count in two places:
886 // - just before any Ist_Exit statements;
887 // - just before the IRSB's end.
888 // In the former case, we zero 'n' and then continue instrumenting.
889
890 sbOut = deepCopyIRSBExceptStmts(sbIn);
891
892 // Copy verbatim any IR preamble preceding the first IMark
893 i = 0;
894 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
895 addStmtToIRSB( sbOut, sbIn->stmts[i] );
896 i++;
897 }
898
899 for (/*use current i*/; i < sbIn->stmts_used; i++) {
900 IRStmt* st = sbIn->stmts[i];
901
902 if (!st || st->tag == Ist_NoOp) continue;
903
904 switch (st->tag) {
905
906 case Ist_IMark: {
907 n++;
908 break;
909 }
910
911 case Ist_Exit: {
912 if (n > 0) {
913 // Add an increment before the Exit statement, then reset 'n'.
914 add_counter_update(sbOut, n);
915 n = 0;
916 }
917 break;
918 }
919
920 case Ist_WrTmp: {
921 IRExpr* data = st->Ist.WrTmp.data;
922 if (data->tag == Iex_Load) {
923 IRExpr* aexpr = data->Iex.Load.addr;
924 // Note also, endianness info is ignored. I guess
925 // that's not interesting.
926 addMemEvent( sbOut, False/*!isWrite*/,
sewardj41aa79d2010-12-06 10:56:09 +0000927 sizeofIRType(data->Iex.Load.ty),
928 aexpr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +0000929 }
930 break;
931 }
932
933 case Ist_Store: {
934 IRExpr* data = st->Ist.Store.data;
935 IRExpr* aexpr = st->Ist.Store.addr;
936 addMemEvent( sbOut, True/*isWrite*/,
sewardj41aa79d2010-12-06 10:56:09 +0000937 sizeofIRType(typeOfIRExpr(tyenv, data)),
938 aexpr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +0000939 break;
940 }
941
942 case Ist_Dirty: {
943 Int dataSize;
944 IRDirty* d = st->Ist.Dirty.details;
945 if (d->mFx != Ifx_None) {
946 /* This dirty helper accesses memory. Collect the details. */
947 tl_assert(d->mAddr != NULL);
948 tl_assert(d->mSize != 0);
949 dataSize = d->mSize;
950 // Large (eg. 28B, 108B, 512B on x86) data-sized
951 // instructions will be done inaccurately, but they're
952 // very rare and this avoids errors from hitting more
953 // than two cache lines in the simulation.
954 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
955 addMemEvent( sbOut, False/*!isWrite*/,
sewardj41aa79d2010-12-06 10:56:09 +0000956 dataSize, d->mAddr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +0000957 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
958 addMemEvent( sbOut, True/*isWrite*/,
sewardj41aa79d2010-12-06 10:56:09 +0000959 dataSize, d->mAddr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +0000960 } else {
961 tl_assert(d->mAddr == NULL);
962 tl_assert(d->mSize == 0);
963 }
964 break;
965 }
966
967 case Ist_CAS: {
968 /* We treat it as a read and a write of the location. I
969 think that is the same behaviour as it was before IRCAS
970 was introduced, since prior to that point, the Vex
971 front ends would translate a lock-prefixed instruction
972 into a (normal) read followed by a (normal) write. */
973 Int dataSize;
974 IRCAS* cas = st->Ist.CAS.details;
975 tl_assert(cas->addr != NULL);
976 tl_assert(cas->dataLo != NULL);
977 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
978 if (cas->dataHi != NULL)
979 dataSize *= 2; /* since it's a doubleword-CAS */
sewardj41aa79d2010-12-06 10:56:09 +0000980 addMemEvent( sbOut, False/*!isWrite*/,
981 dataSize, cas->addr, goff_sp );
982 addMemEvent( sbOut, True/*isWrite*/,
983 dataSize, cas->addr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +0000984 break;
985 }
986
987 case Ist_LLSC: {
988 IRType dataTy;
989 if (st->Ist.LLSC.storedata == NULL) {
990 /* LL */
991 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
992 addMemEvent( sbOut, False/*!isWrite*/,
sewardj41aa79d2010-12-06 10:56:09 +0000993 sizeofIRType(dataTy),
994 st->Ist.LLSC.addr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +0000995 } else {
996 /* SC */
997 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
998 addMemEvent( sbOut, True/*isWrite*/,
sewardj41aa79d2010-12-06 10:56:09 +0000999 sizeofIRType(dataTy),
1000 st->Ist.LLSC.addr, goff_sp );
sewardj4d7d8f52010-10-12 10:09:15 +00001001 }
1002 break;
1003 }
1004
1005 default:
1006 break;
1007 }
1008
1009 addStmtToIRSB( sbOut, st );
1010 }
1011
1012 if (n > 0) {
1013 // Add an increment before the SB end.
1014 add_counter_update(sbOut, n);
1015 }
1016 return sbOut;
1017}
1018
sewardj41aa79d2010-12-06 10:56:09 +00001019#undef binop
1020#undef mkexpr
1021#undef mkU32
1022#undef mkU64
1023#undef assign
1024
sewardj4d7d8f52010-10-12 10:09:15 +00001025
1026//------------------------------------------------------------//
sewardj5fd79df2010-10-12 18:08:33 +00001027//--- Command line args ---//
1028//------------------------------------------------------------//
1029
1030// FORWARDS
1031static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1032 /*OUT*/Bool* increasingP,
florian19f91bb2012-11-10 22:29:54 +00001033 const HChar* metric_name );
sewardj5fd79df2010-10-12 18:08:33 +00001034
1035static Int clo_show_top_n = 10;
florian19f91bb2012-11-10 22:29:54 +00001036static const HChar *clo_sort_by = "max-bytes-live";
sewardj5fd79df2010-10-12 18:08:33 +00001037
florian19f91bb2012-11-10 22:29:54 +00001038static Bool dh_process_cmd_line_option(const HChar* arg)
sewardj5fd79df2010-10-12 18:08:33 +00001039{
1040 if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
1041
1042 else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
1043 ULong (*dummyFn)(APInfo*);
1044 Bool dummyB;
1045 Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
1046 if (!ok)
1047 return False;
1048 // otherwise it's OK, in which case leave it alone.
1049 // show_top_n_apinfos will later convert the string by a
1050 // second call to identify_metric.
1051 }
1052
1053 else
1054 return VG_(replacement_malloc_process_cmd_line_option)(arg);
1055
1056 return True;
1057}
1058
1059
1060static void dh_print_usage(void)
1061{
1062 VG_(printf)(
1063" --show-top-n=number show the top <number> alloc points [10]\n"
1064" --sort-by=string\n"
1065" sort the allocation points by the metric\n"
1066" defined by <string>, thusly:\n"
sewardjeb795f82010-10-13 14:04:25 +00001067" max-bytes-live maximum live bytes [default]\n"
Elliott Hughesa0664b92017-04-18 17:46:52 -07001068" tot-bytes-allocd bytes allocated in total (turnover)\n"
sewardj5fd79df2010-10-12 18:08:33 +00001069" max-blocks-live maximum live blocks\n"
Elliott Hughesa0664b92017-04-18 17:46:52 -07001070" tot-blocks-allocd blocks allocated in total (turnover)\n"
sewardj5fd79df2010-10-12 18:08:33 +00001071 );
1072}
1073
1074static void dh_print_debug_usage(void)
1075{
1076 VG_(printf)(
1077" (none)\n"
1078 );
1079}
1080
1081
1082//------------------------------------------------------------//
sewardj4d7d8f52010-10-12 10:09:15 +00001083//--- Finalisation ---//
1084//------------------------------------------------------------//
1085
1086static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
1087{
1088 ULong nK = n / 100;
1089 ULong nR = n % 100;
1090 VG_(sprintf)(buf, "%llu.%s%llu", nK,
1091 nR < 10 ? "0" : "",
1092 nR);
1093}
1094
1095static void show_APInfo ( APInfo* api )
1096{
florianfed3c042014-09-30 22:15:05 +00001097 HChar bufA[80]; // large enough
sewardj5fd79df2010-10-12 18:08:33 +00001098 VG_(memset)(bufA, 0, sizeof(bufA));
1099 if (api->tot_blocks > 0) {
1100 show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
1101 / (ULong)api->tot_blocks );
1102 } else {
1103 bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
1104 }
1105
sewardjeb795f82010-10-13 14:04:25 +00001106 VG_(umsg)("max-live: %'llu in %'llu blocks\n",
sewardj4d7d8f52010-10-12 10:09:15 +00001107 api->max_bytes_live, api->max_blocks_live);
sewardjeb795f82010-10-13 14:04:25 +00001108 VG_(umsg)("tot-alloc: %'llu in %'llu blocks (avg size %s)\n",
sewardj5fd79df2010-10-12 18:08:33 +00001109 api->tot_bytes, api->tot_blocks, bufA);
sewardj4d7d8f52010-10-12 10:09:15 +00001110
1111 tl_assert(api->tot_blocks >= api->max_blocks_live);
1112 tl_assert(api->tot_bytes >= api->max_bytes_live);
1113
1114 if (api->deaths > 0) {
sewardj41aa79d2010-12-06 10:56:09 +00001115 // Average Age at Death
1116 ULong aad = api->deaths == 0
1117 ? 0 : (api->death_ages_sum / api->deaths);
1118 // AAD as a fraction of the total program lifetime (so far)
1119 // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1120 // complete lifetime of the program.
1121 ULong aad_frac_10k
1122 = g_guest_instrs_executed == 0
1123 ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
florianfed3c042014-09-30 22:15:05 +00001124 HChar buf[80]; // large enough
sewardj41aa79d2010-12-06 10:56:09 +00001125 show_N_div_100(buf, aad_frac_10k);
1126 VG_(umsg)("deaths: %'llu, at avg age %'llu "
1127 "(%s%% of prog lifetime)\n",
1128 api->deaths, aad, buf );
sewardj4d7d8f52010-10-12 10:09:15 +00001129 } else {
1130 VG_(umsg)("deaths: none (none of these blocks were freed)\n");
1131 }
1132
florianfed3c042014-09-30 22:15:05 +00001133 HChar bufR[80], bufW[80]; // large enough
sewardj4d7d8f52010-10-12 10:09:15 +00001134 VG_(memset)(bufR, 0, sizeof(bufR));
1135 VG_(memset)(bufW, 0, sizeof(bufW));
1136 if (api->tot_bytes > 0) {
1137 show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
1138 show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
1139 } else {
1140 VG_(strcat)(bufR, "Inf");
1141 VG_(strcat)(bufW, "Inf");
1142 }
1143
1144 VG_(umsg)("acc-ratios: %s rd, %s wr "
1145 " (%'llu b-read, %'llu b-written)\n",
1146 bufR, bufW,
1147 api->n_reads, api->n_writes);
1148
1149 VG_(pp_ExeContext)(api->ap);
1150
1151 if (api->histo && api->xsize_tag == Exactly) {
sewardjeb795f82010-10-13 14:04:25 +00001152 VG_(umsg)("\nAggregated access counts by offset:\n");
sewardj4d7d8f52010-10-12 10:09:15 +00001153 VG_(umsg)("\n");
1154 UWord i;
1155 if (api->xsize > 0)
1156 VG_(umsg)("[ 0] ");
1157 for (i = 0; i < api->xsize; i++) {
1158 if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
1159 VG_(umsg)("\n");
1160 VG_(umsg)("[%4lu] ", i);
1161 }
1162 VG_(umsg)("%u ", api->histo[i]);
1163 }
sewardj5fd79df2010-10-12 18:08:33 +00001164 VG_(umsg)("\n");
sewardj4d7d8f52010-10-12 10:09:15 +00001165 }
1166}
1167
1168
sewardj5fd79df2010-10-12 18:08:33 +00001169/* Metric-access functions for APInfos. */
sewardj4d7d8f52010-10-12 10:09:15 +00001170static ULong get_metric__max_bytes_live ( APInfo* api ) {
1171 return api->max_bytes_live;
1172}
1173static ULong get_metric__tot_bytes ( APInfo* api ) {
1174 return api->tot_bytes;
1175}
1176static ULong get_metric__max_blocks_live ( APInfo* api ) {
1177 return api->max_blocks_live;
1178}
Elliott Hughesa0664b92017-04-18 17:46:52 -07001179static ULong get_metric__tot_blocks ( APInfo* api ) {
1180 return api->tot_blocks;
1181}
sewardj4d7d8f52010-10-12 10:09:15 +00001182
sewardj5fd79df2010-10-12 18:08:33 +00001183/* Given a string, return the metric-access function and also a Bool
1184 indicating whether we want increasing or decreasing values of the
1185 metric. This is used twice, once in command line processing, and
1186 then again in show_top_n_apinfos. Returns False if the given
1187 string could not be identified.*/
1188static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1189 /*OUT*/Bool* increasingP,
florian19f91bb2012-11-10 22:29:54 +00001190 const HChar* metric_name )
sewardj4d7d8f52010-10-12 10:09:15 +00001191{
sewardj5fd79df2010-10-12 18:08:33 +00001192 if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
1193 *get_metricP = get_metric__max_bytes_live;
1194 *increasingP = False;
1195 return True;
1196 }
1197 if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
1198 *get_metricP = get_metric__tot_bytes;
1199 *increasingP = False;
1200 return True;
1201 }
1202 if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
1203 *get_metricP = get_metric__max_blocks_live;
1204 *increasingP = False;
1205 return True;
1206 }
Elliott Hughesa0664b92017-04-18 17:46:52 -07001207 if (0 == VG_(strcmp)(metric_name, "tot-blocks-allocd")) {
1208 *get_metricP = get_metric__tot_blocks;
1209 *increasingP = False;
1210 return True;
1211 }
sewardj5fd79df2010-10-12 18:08:33 +00001212 return False;
1213}
sewardj4d7d8f52010-10-12 10:09:15 +00001214
sewardj5fd79df2010-10-12 18:08:33 +00001215
1216static void show_top_n_apinfos ( void )
1217{
1218 Int i;
sewardj4d7d8f52010-10-12 10:09:15 +00001219 UWord keyW, valW;
sewardj5fd79df2010-10-12 18:08:33 +00001220 ULong (*get_metric)(APInfo*);
1221 Bool increasing;
1222
florian19f91bb2012-11-10 22:29:54 +00001223 const HChar* metric_name = clo_sort_by;
sewardj5fd79df2010-10-12 18:08:33 +00001224 tl_assert(metric_name); // ensured by clo processing
1225
1226 Bool ok = identify_metric( &get_metric, &increasing, metric_name );
1227 tl_assert(ok); // ensured by clo processing
sewardj4d7d8f52010-10-12 10:09:15 +00001228
1229 VG_(umsg)("\n");
1230 VG_(umsg)("======== ORDERED BY %s \"%s\": "
1231 "top %d allocators ========\n",
1232 increasing ? "increasing" : "decreasing",
sewardj5fd79df2010-10-12 18:08:33 +00001233 metric_name, clo_show_top_n );
sewardj4d7d8f52010-10-12 10:09:15 +00001234
1235 // Clear all .shown bits
1236 VG_(initIterFM)( apinfo );
1237 while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1238 APInfo* api = (APInfo*)valW;
1239 tl_assert(api && api->ap == (ExeContext*)keyW);
1240 api->shown = False;
1241 }
1242 VG_(doneIterFM)( apinfo );
1243
1244 // Now print the top N entries. Each one requires a
1245 // complete scan of the set. Duh.
sewardj5fd79df2010-10-12 18:08:33 +00001246 for (i = 0; i < clo_show_top_n; i++) {
sewardj4d7d8f52010-10-12 10:09:15 +00001247 ULong best_metric = increasing ? ~0ULL : 0ULL;
1248 APInfo* best_api = NULL;
1249
1250 VG_(initIterFM)( apinfo );
1251 while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1252 APInfo* api = (APInfo*)valW;
1253 if (api->shown)
1254 continue;
1255 ULong metric = get_metric(api);
1256 if (increasing ? (metric < best_metric) : (metric > best_metric)) {
1257 best_metric = metric;
1258 best_api = api;
1259 }
1260 }
1261 VG_(doneIterFM)( apinfo );
1262
1263 if (!best_api)
1264 break; // all APIs have been shown. Stop.
1265
1266 VG_(umsg)("\n");
sewardj5fd79df2010-10-12 18:08:33 +00001267 VG_(umsg)("-------------------- %d of %d --------------------\n",
1268 i+1, clo_show_top_n );
sewardj4d7d8f52010-10-12 10:09:15 +00001269 show_APInfo(best_api);
1270 best_api->shown = True;
1271 }
1272
1273 VG_(umsg)("\n");
1274}
1275
1276
1277static void dh_fini(Int exit_status)
1278{
sewardj41aa79d2010-12-06 10:56:09 +00001279 // Before printing statistics, we must harvest access counts for
1280 // all the blocks that are still alive. Not doing so gives
1281 // access ratios which are too low (zero, in the worst case)
1282 // for such blocks, since the accesses that do get made will
1283 // (if we skip this step) not get folded into the AP summaries.
1284 UWord keyW, valW;
1285 VG_(initIterFM)( interval_tree );
1286 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1287 Block* bk = (Block*)keyW;
1288 tl_assert(valW == 0);
1289 tl_assert(bk);
1290 retire_Block(bk, False/*!because_freed*/);
1291 }
1292 VG_(doneIterFM)( interval_tree );
1293
1294 // show results
sewardj4d7d8f52010-10-12 10:09:15 +00001295 VG_(umsg)("======== SUMMARY STATISTICS ========\n");
1296 VG_(umsg)("\n");
1297 VG_(umsg)("guest_insns: %'llu\n", g_guest_instrs_executed);
1298 VG_(umsg)("\n");
1299 VG_(umsg)("max_live: %'llu in %'llu blocks\n",
1300 g_max_bytes_live, g_max_blocks_live);
1301 VG_(umsg)("\n");
1302 VG_(umsg)("tot_alloc: %'llu in %'llu blocks\n",
1303 g_tot_bytes, g_tot_blocks);
1304 VG_(umsg)("\n");
1305 if (g_tot_bytes > 0) {
1306 VG_(umsg)("insns per allocated byte: %'llu\n",
1307 g_guest_instrs_executed / g_tot_bytes);
1308 VG_(umsg)("\n");
1309 }
1310
sewardj5fd79df2010-10-12 18:08:33 +00001311 show_top_n_apinfos();
sewardj4d7d8f52010-10-12 10:09:15 +00001312
sewardj5fd79df2010-10-12 18:08:33 +00001313 VG_(umsg)("\n");
1314 VG_(umsg)("\n");
1315 VG_(umsg)("==============================================================\n");
1316 VG_(umsg)("\n");
1317 VG_(umsg)("Some hints: (see --help for command line option details):\n");
1318 VG_(umsg)("\n");
1319 VG_(umsg)("* summary stats for whole program are at the top of this output\n");
1320 VG_(umsg)("\n");
1321 VG_(umsg)("* --show-top-n= controls how many alloc points are shown.\n");
1322 VG_(umsg)(" You probably want to set it much higher than\n");
1323 VG_(umsg)(" the default value (10)\n");
1324 VG_(umsg)("\n");
1325 VG_(umsg)("* --sort-by= specifies the sort key for output.\n");
1326 VG_(umsg)(" See --help for details.\n");
1327 VG_(umsg)("\n");
1328 VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
1329 VG_(umsg)(" a separate alloc point. This causes the data to be spread out\n");
1330 VG_(umsg)(" over far too many alloc points. I strongly suggest using\n");
1331 VG_(umsg)(" --num-callers=4 or some such, to reduce the spreading.\n");
1332 VG_(umsg)("\n");
sewardj41aa79d2010-12-06 10:56:09 +00001333
1334 if (VG_(clo_stats)) {
1335 VG_(dmsg)(" dhat: find_Block_containing:\n");
1336 VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1337 stats__n_fBc_cached + stats__n_fBc_uncached,
1338 stats__n_fBc_cached,
1339 stats__n_fBc_uncached);
1340 VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound);
1341 VG_(dmsg)("\n");
1342 }
sewardj4d7d8f52010-10-12 10:09:15 +00001343}
1344
1345
1346//------------------------------------------------------------//
1347//--- Initialisation ---//
1348//------------------------------------------------------------//
1349
1350static void dh_post_clo_init(void)
1351{
1352}
1353
1354static void dh_pre_clo_init(void)
1355{
1356 VG_(details_name) ("DHAT");
1357 VG_(details_version) (NULL);
1358 VG_(details_description) ("a dynamic heap analysis tool");
1359 VG_(details_copyright_author)(
Elliott Hughesed398002017-06-21 14:41:24 -07001360 "Copyright (C) 2010-2017, and GNU GPL'd, by Mozilla Inc");
sewardj4d7d8f52010-10-12 10:09:15 +00001361 VG_(details_bug_reports_to) (VG_BUGS_TO);
1362
1363 // Basic functions.
1364 VG_(basic_tool_funcs) (dh_post_clo_init,
1365 dh_instrument,
1366 dh_fini);
1367//zz
sewardj5fd79df2010-10-12 18:08:33 +00001368 // Needs.
1369 VG_(needs_libc_freeres)();
Elliott Hughesa0664b92017-04-18 17:46:52 -07001370 VG_(needs_cxx_freeres)();
sewardj5fd79df2010-10-12 18:08:33 +00001371 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1372 dh_print_usage,
1373 dh_print_debug_usage);
sewardj4d7d8f52010-10-12 10:09:15 +00001374//zz VG_(needs_client_requests) (dh_handle_client_request);
1375//zz VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1376//zz dh_expensive_sanity_check);
1377 VG_(needs_malloc_replacement) (dh_malloc,
1378 dh___builtin_new,
1379 dh___builtin_vec_new,
1380 dh_memalign,
1381 dh_calloc,
1382 dh_free,
1383 dh___builtin_delete,
1384 dh___builtin_vec_delete,
1385 dh_realloc,
1386 dh_malloc_usable_size,
1387 0 );
1388
1389 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1390 //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1391 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1392
sewardj4d7d8f52010-10-12 10:09:15 +00001393 tl_assert(!interval_tree);
sewardj41aa79d2010-12-06 10:56:09 +00001394 tl_assert(!fbc_cache0);
1395 tl_assert(!fbc_cache1);
sewardj4d7d8f52010-10-12 10:09:15 +00001396
1397 interval_tree = VG_(newFM)( VG_(malloc),
1398 "dh.main.interval_tree.1",
1399 VG_(free),
1400 interval_tree_Cmp );
1401
1402 apinfo = VG_(newFM)( VG_(malloc),
1403 "dh.main.apinfo.1",
1404 VG_(free),
1405 NULL/*unboxedcmp*/ );
1406}
1407
1408VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1409
1410//--------------------------------------------------------------------//
1411//--- end dh_main.c ---//
1412//--------------------------------------------------------------------//