blob: 1576b5fe6e1fb2d4539807bcb31a75aa75c3aa3d [file] [log] [blame]
nethercotec9f36922004-02-14 16:40:02 +00001
2/*--------------------------------------------------------------------*/
nethercote996901a2004-08-03 13:29:09 +00003/*--- Massif: a heap profiling tool. ms_main.c ---*/
nethercotec9f36922004-02-14 16:40:02 +00004/*--------------------------------------------------------------------*/
5
6/*
nethercote996901a2004-08-03 13:29:09 +00007 This file is part of Massif, a Valgrind tool for profiling memory
nethercotec9f36922004-02-14 16:40:02 +00008 usage of programs.
9
njn53612422005-03-12 16:22:54 +000010 Copyright (C) 2003-2005 Nicholas Nethercote
nethercotec9f36922004-02-14 16:40:02 +000011 njn25@cam.ac.uk
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31// Memory profiler. Produces a graph, gives lots of information about
32// allocation contexts, in terms of space.time values (ie. area under the
33// graph). Allocation context information is hierarchical, and can thus
34// be inspected step-wise to an appropriate depth. See comments on data
35// structures below for more info on how things work.
36
nethercote46063202004-09-02 08:51:43 +000037#include "tool.h"
nethercotec9f36922004-02-14 16:40:02 +000038//#include "vg_profile.c"
39
40#include "valgrind.h" // For {MALLOC,FREE}LIKE_BLOCK
41
42/*------------------------------------------------------------*/
43/*--- Overview of operation ---*/
44/*------------------------------------------------------------*/
45
46// Heap blocks are tracked, and the amount of space allocated by various
47// contexts (ie. lines of code, more or less) is also tracked.
48// Periodically, a census is taken, and the amount of space used, at that
49// point, by the most significant (highly allocating) contexts is recorded.
50// Census start off frequently, but are scaled back as the program goes on,
51// so that there are always a good number of them. At the end, overall
52// spacetimes for different contexts (of differing levels of precision) is
53// calculated, the graph is printed, and the text giving spacetimes for the
54// increasingly precise contexts is given.
55//
56// Measures the following:
57// - heap blocks
58// - heap admin bytes
59// - stack(s)
60// - code (code segments loaded at startup, and loaded with mmap)
61// - data (data segments loaded at startup, and loaded/created with mmap,
62// and brk()d segments)
63
64/*------------------------------------------------------------*/
65/*--- Main types ---*/
66/*------------------------------------------------------------*/
67
68// An XPt represents an "execution point", ie. a code address. Each XPt is
69// part of a tree of XPts (an "execution tree", or "XTree"). Each
70// top-to-bottom path through an XTree gives an execution context ("XCon"),
71// and is equivalent to a traditional Valgrind ExeContext.
72//
73// The XPt at the top of an XTree (but below "alloc_xpt") is called a
74// "top-XPt". The XPts are the bottom of an XTree (leaf nodes) are
75// "bottom-XPTs". The number of XCons in an XTree is equal to the number of
76// bottom-XPTs in that XTree.
77//
78// All XCons have the same top-XPt, "alloc_xpt", which represents all
79// allocation functions like malloc(). It's a bit of a fake XPt, though,
80// and is only used because it makes some of the code simpler.
81//
82// XTrees are bi-directional.
83//
84// > parent < Example: if child1() calls parent() and child2()
85// / | \ also calls parent(), and parent() calls malloc(),
86// | / \ | the XTree will look like this.
87// | v v |
88// child1 child2
89
90typedef struct _XPt XPt;
91
92struct _XPt {
93 Addr eip; // code address
94
95 // Bottom-XPts: space for the precise context.
96 // Other XPts: space of all the descendent bottom-XPts.
97 // Nb: this value goes up and down as the program executes.
98 UInt curr_space;
99
100 // An approximate space.time calculation used along the way for selecting
101 // which contexts to include at each census point.
102 // !!! top-XPTs only !!!
nethercote43a15ce2004-08-30 19:15:12 +0000103 ULong approx_ST;
nethercotec9f36922004-02-14 16:40:02 +0000104
nethercote43a15ce2004-08-30 19:15:12 +0000105 // exact_ST_dbld is an exact space.time calculation done at the end, and
nethercotec9f36922004-02-14 16:40:02 +0000106 // used in the results.
107 // Note that it is *doubled*, to avoid rounding errors.
108 // !!! not used for 'alloc_xpt' !!!
nethercote43a15ce2004-08-30 19:15:12 +0000109 ULong exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +0000110
111 // n_children and max_children are integers; a very big program might
112 // have more than 65536 allocation points (Konqueror startup has 1800).
113 XPt* parent; // pointer to parent XPt
114 UInt n_children; // number of children
115 UInt max_children; // capacity of children array
116 XPt** children; // pointers to children XPts
117};
118
119// Each census snapshots the most significant XTrees, each XTree having a
120// top-XPt as its root. The 'curr_space' element for each XPt is recorded
121// in the snapshot. The snapshot contains all the XTree's XPts, not in a
122// tree structure, but flattened into an array. This flat snapshot is used
nethercote43a15ce2004-08-30 19:15:12 +0000123// at the end for computing exact_ST_dbld for each XPt.
nethercotec9f36922004-02-14 16:40:02 +0000124//
125// Graph resolution, x-axis: no point having more than about 200 census
126// x-points; you can't see them on the graph. Therefore:
127//
128// - do a census every 1 ms for first 200 --> 200, all (200 ms)
129// - halve (drop half of them) --> 100, every 2nd (200 ms)
130// - do a census every 2 ms for next 200 --> 200, every 2nd (400 ms)
131// - halve --> 100, every 4th (400 ms)
132// - do a census every 4 ms for next 400 --> 200, every 4th (800 ms)
133// - etc.
134//
135// This isn't exactly right, because we actually drop (N/2)-1 when halving,
136// but it shows the basic idea.
137
138#define MAX_N_CENSI 200 // Keep it even, for simplicity
139
140// Graph resolution, y-axis: hp2ps only draws the 19 biggest (in space-time)
141// bands, rest get lumped into OTHERS. I only print the top N
142// (cumulative-so-far space-time) at each point. N should be a bit bigger
143// than 19 in case the cumulative space-time doesn't fit with the eventual
144// space-time computed by hp2ps (but it should be close if the samples are
145// evenly spread, since hp2ps does an approximate per-band space-time
146// calculation that just sums the totals; ie. it assumes all samples are
147// the same distance apart).
148
149#define MAX_SNAPSHOTS 32
150
151typedef
152 struct {
153 XPt* xpt;
154 UInt space;
155 }
156 XPtSnapshot;
157
158// An XTree snapshot is stored as an array of of XPt snapshots.
159typedef XPtSnapshot* XTreeSnapshot;
160
161typedef
162 struct {
163 Int ms_time; // Int: must allow -1
164 XTreeSnapshot xtree_snapshots[MAX_SNAPSHOTS+1]; // +1 for zero-termination
165 UInt others_space;
166 UInt heap_admin_space;
167 UInt stacks_space;
168 }
169 Census;
170
171// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
172// which is a foothold into the XCon at which it was allocated. From
173// HP_Chunks, XPt 'space' fields are incremented (at allocation) and
174// decremented (at deallocation).
175//
176// Nb: first two fields must match core's VgHashNode.
177typedef
178 struct _HP_Chunk {
179 struct _HP_Chunk* next;
180 Addr data; // Ptr to actual block
nethercote7ac7f7b2004-11-02 12:36:02 +0000181 SizeT size; // Size requested
nethercotec9f36922004-02-14 16:40:02 +0000182 XPt* where; // Where allocated; bottom-XPt
183 }
184 HP_Chunk;
185
186/*------------------------------------------------------------*/
187/*--- Profiling events ---*/
188/*------------------------------------------------------------*/
189
190typedef
191 enum {
192 VgpGetXPt = VgpFini+1,
193 VgpGetXPtSearch,
194 VgpCensus,
195 VgpCensusHeap,
196 VgpCensusSnapshot,
197 VgpCensusTreeSize,
198 VgpUpdateXCon,
199 VgpCalcSpacetime2,
200 VgpPrintHp,
201 VgpPrintXPts,
202 }
njn4be0a692004-11-22 18:10:36 +0000203 VgpToolCC;
nethercotec9f36922004-02-14 16:40:02 +0000204
205/*------------------------------------------------------------*/
206/*--- Statistics ---*/
207/*------------------------------------------------------------*/
208
209// Konqueror startup, to give an idea of the numbers involved with a biggish
210// program, with default depth:
211//
212// depth=3 depth=40
213// - 310,000 allocations
214// - 300,000 frees
215// - 15,000 XPts 800,000 XPts
216// - 1,800 top-XPts
217
218static UInt n_xpts = 0;
219static UInt n_bot_xpts = 0;
220static UInt n_allocs = 0;
221static UInt n_zero_allocs = 0;
222static UInt n_frees = 0;
223static UInt n_children_reallocs = 0;
224static UInt n_snapshot_frees = 0;
225
226static UInt n_halvings = 0;
227static UInt n_real_censi = 0;
228static UInt n_fake_censi = 0;
229static UInt n_attempted_censi = 0;
230
231/*------------------------------------------------------------*/
232/*--- Globals ---*/
233/*------------------------------------------------------------*/
234
235#define FILENAME_LEN 256
236
237#define SPRINTF(zz_buf, fmt, args...) \
238 do { Int len = VG_(sprintf)(zz_buf, fmt, ## args); \
239 VG_(write)(fd, (void*)zz_buf, len); \
240 } while (0)
241
242#define BUF_LEN 1024 // general purpose
243static Char buf [BUF_LEN];
244static Char buf2[BUF_LEN];
245static Char buf3[BUF_LEN];
246
nethercote8b5f40c2004-11-02 13:29:50 +0000247static SizeT sigstacks_space = 0; // Current signal stacks space sum
nethercotec9f36922004-02-14 16:40:02 +0000248
249static VgHashTable malloc_list = NULL; // HP_Chunks
250
251static UInt n_heap_blocks = 0;
252
253
254#define MAX_ALLOC_FNS 32 // includes the builtin ones
255
nethercotec7469182004-05-11 09:21:08 +0000256// First few filled in, rest should be zeroed. Zero-terminated vector.
257static UInt n_alloc_fns = 11;
nethercotec9f36922004-02-14 16:40:02 +0000258static Char* alloc_fns[MAX_ALLOC_FNS] = {
259 "malloc",
260 "operator new(unsigned)",
261 "operator new[](unsigned)",
nethercoteeb479cb2004-05-11 16:37:17 +0000262 "operator new(unsigned, std::nothrow_t const&)",
263 "operator new[](unsigned, std::nothrow_t const&)",
nethercotec9f36922004-02-14 16:40:02 +0000264 "__builtin_new",
265 "__builtin_vec_new",
266 "calloc",
267 "realloc",
fitzhardinge51f3ff12004-03-04 22:42:03 +0000268 "memalign",
nethercotec9f36922004-02-14 16:40:02 +0000269};
270
271
272/*------------------------------------------------------------*/
273/*--- Command line args ---*/
274/*------------------------------------------------------------*/
275
276#define MAX_DEPTH 50
277
278typedef
279 enum {
280 XText, XHTML,
281 }
282 XFormat;
283
284static Bool clo_heap = True;
285static UInt clo_heap_admin = 8;
286static Bool clo_stacks = True;
287static Bool clo_depth = 3;
288static XFormat clo_format = XText;
289
njn26f02512004-11-22 18:33:15 +0000290Bool TL_(process_cmd_line_option)(Char* arg)
nethercotec9f36922004-02-14 16:40:02 +0000291{
nethercote27fec902004-06-16 21:26:32 +0000292 VG_BOOL_CLO("--heap", clo_heap)
293 else VG_BOOL_CLO("--stacks", clo_stacks)
nethercotec9f36922004-02-14 16:40:02 +0000294
nethercote27fec902004-06-16 21:26:32 +0000295 else VG_NUM_CLO ("--heap-admin", clo_heap_admin)
296 else VG_BNUM_CLO("--depth", clo_depth, 1, MAX_DEPTH)
nethercotec9f36922004-02-14 16:40:02 +0000297
298 else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
299 alloc_fns[n_alloc_fns] = & arg[11];
300 n_alloc_fns++;
301 if (n_alloc_fns >= MAX_ALLOC_FNS) {
302 VG_(printf)("Too many alloc functions specified, sorry");
303 VG_(bad_option)(arg);
304 }
305 }
306
307 else if (VG_CLO_STREQ(arg, "--format=text"))
308 clo_format = XText;
309 else if (VG_CLO_STREQ(arg, "--format=html"))
310 clo_format = XHTML;
311
312 else
313 return VG_(replacement_malloc_process_cmd_line_option)(arg);
nethercote27fec902004-06-16 21:26:32 +0000314
nethercotec9f36922004-02-14 16:40:02 +0000315 return True;
316}
317
njn26f02512004-11-22 18:33:15 +0000318void TL_(print_usage)(void)
nethercotec9f36922004-02-14 16:40:02 +0000319{
320 VG_(printf)(
321" --heap=no|yes profile heap blocks [yes]\n"
322" --heap-admin=<number> average admin bytes per heap block [8]\n"
323" --stacks=no|yes profile stack(s) [yes]\n"
324" --depth=<number> depth of contexts [3]\n"
325" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
326" --format=text|html format of textual output [text]\n"
327 );
328 VG_(replacement_malloc_print_usage)();
329}
330
njn26f02512004-11-22 18:33:15 +0000331void TL_(print_debug_usage)(void)
nethercotec9f36922004-02-14 16:40:02 +0000332{
333 VG_(replacement_malloc_print_debug_usage)();
334}
335
336/*------------------------------------------------------------*/
337/*--- Execution contexts ---*/
338/*------------------------------------------------------------*/
339
340// Fake XPt representing all allocation functions like malloc(). Acts as
341// parent node to all top-XPts.
342static XPt* alloc_xpt;
343
344// Cheap allocation for blocks that never need to be freed. Saves about 10%
345// for Konqueror startup with --depth=40.
nethercote7ac7f7b2004-11-02 12:36:02 +0000346static void* perm_malloc(SizeT n_bytes)
nethercotec9f36922004-02-14 16:40:02 +0000347{
348 static Addr hp = 0; // current heap pointer
349 static Addr hp_lim = 0; // maximum usable byte in current block
350
351 #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
352
353 if (hp + n_bytes > hp_lim) {
354 hp = (Addr)VG_(get_memory_from_mmap)(SUPERBLOCK_SIZE, "perm_malloc");
355 hp_lim = hp + SUPERBLOCK_SIZE - 1;
356 }
357
358 hp += n_bytes;
359
360 return (void*)(hp - n_bytes);
361}
362
363
364
365static XPt* new_XPt(Addr eip, XPt* parent, Bool is_bottom)
366{
367 XPt* xpt = perm_malloc(sizeof(XPt));
368 xpt->eip = eip;
369
nethercote43a15ce2004-08-30 19:15:12 +0000370 xpt->curr_space = 0;
371 xpt->approx_ST = 0;
372 xpt->exact_ST_dbld = 0;
nethercotec9f36922004-02-14 16:40:02 +0000373
374 xpt->parent = parent;
nethercotefc016352004-04-27 09:51:51 +0000375
376 // Check parent is not a bottom-XPt
njnca82cc02004-11-22 17:18:48 +0000377 tl_assert(parent == NULL || 0 != parent->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000378
379 xpt->n_children = 0;
380
381 // If a bottom-XPt, don't allocate space for children. This can be 50%
382 // or more, although it tends to drop as --depth increases (eg. 10% for
383 // konqueror with --depth=20).
384 if ( is_bottom ) {
385 xpt->max_children = 0;
386 xpt->children = NULL;
387 n_bot_xpts++;
388 } else {
389 xpt->max_children = 4;
390 xpt->children = VG_(malloc)( xpt->max_children * sizeof(XPt*) );
391 }
392
393 // Update statistics
394 n_xpts++;
395
396 return xpt;
397}
398
399static Bool is_alloc_fn(Addr eip)
400{
401 Int i;
402
403 if ( VG_(get_fnname)(eip, buf, BUF_LEN) ) {
404 for (i = 0; i < n_alloc_fns; i++) {
405 if (VG_STREQ(buf, alloc_fns[i]))
406 return True;
407 }
408 }
409 return False;
410}
411
412// Returns an XCon, from the bottom-XPt. Nb: the XPt returned must be a
413// bottom-XPt now and must always remain a bottom-XPt. We go to some effort
414// to ensure this in certain cases. See comments below.
415static XPt* get_XCon( ThreadId tid, Bool custom_malloc )
416{
nethercoteacac2fd2004-11-04 13:49:28 +0000417 // Static to minimise stack size. +1 for added ~0 %eip.
nethercotec9f36922004-02-14 16:40:02 +0000418 static Addr eips[MAX_DEPTH + MAX_ALLOC_FNS + 1];
419
420 XPt* xpt = alloc_xpt;
421 UInt n_eips, L, A, B, nC;
422 UInt overestimate;
423 Bool reached_bottom;
424
425 VGP_PUSHCC(VgpGetXPt);
426
427 // Want at least clo_depth non-alloc-fn entries in the snapshot.
428 // However, because we have 1 or more (an unknown number, at this point)
429 // alloc-fns ignored, we overestimate the size needed for the stack
430 // snapshot. Then, if necessary, we repeatedly increase the size until
431 // it is enough.
432 overestimate = 2;
433 while (True) {
434 n_eips = VG_(stack_snapshot)( tid, eips, clo_depth + overestimate );
435
436 // Now we add a dummy "unknown" %eip at the end. This is only used if we
437 // run out of %eips before hitting clo_depth. It's done to ensure the
438 // XPt we return is (now and forever) a bottom-XPt. If the returned XPt
439 // wasn't a bottom-XPt (now or later) it would cause problems later (eg.
nethercote43a15ce2004-08-30 19:15:12 +0000440 // the parent's approx_ST wouldn't be equal [or almost equal] to the
441 // total of the childrens' approx_STs).
nethercoteacac2fd2004-11-04 13:49:28 +0000442 eips[ n_eips++ ] = ~((Addr)0);
nethercotec9f36922004-02-14 16:40:02 +0000443
444 // Skip over alloc functions in eips[].
445 for (L = 0; is_alloc_fn(eips[L]) && L < n_eips; L++) { }
446
447 // Must be at least one alloc function, unless client used
448 // MALLOCLIKE_BLOCK
njnca82cc02004-11-22 17:18:48 +0000449 if (!custom_malloc) tl_assert(L > 0);
nethercotec9f36922004-02-14 16:40:02 +0000450
451 // Should be at least one non-alloc function. If not, try again.
452 if (L == n_eips) {
453 overestimate += 2;
454 if (overestimate > MAX_ALLOC_FNS)
njn67993252004-11-22 18:02:32 +0000455 VG_(tool_panic)("No stk snapshot big enough to find non-alloc fns");
nethercotec9f36922004-02-14 16:40:02 +0000456 } else {
457 break;
458 }
459 }
460 A = L;
461 B = n_eips - 1;
462 reached_bottom = False;
463
464 // By this point, the eips we care about are in eips[A]..eips[B]
465
466 // Now do the search/insertion of the XCon. 'L' is the loop counter,
467 // being the index into eips[].
468 while (True) {
469 // Look for %eip in xpt's children.
470 // XXX: linear search, ugh -- about 10% of time for konqueror startup
471 // XXX: tried cacheing last result, only hit about 4% for konqueror
472 // Nb: this search hits about 98% of the time for konqueror
473 VGP_PUSHCC(VgpGetXPtSearch);
474
475 // If we've searched/added deep enough, or run out of EIPs, this is
476 // the bottom XPt.
477 if (L - A + 1 == clo_depth || L == B)
478 reached_bottom = True;
479
480 nC = 0;
481 while (True) {
482 if (nC == xpt->n_children) {
483 // not found, insert new XPt
njnca82cc02004-11-22 17:18:48 +0000484 tl_assert(xpt->max_children != 0);
485 tl_assert(xpt->n_children <= xpt->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000486 // Expand 'children' if necessary
487 if (xpt->n_children == xpt->max_children) {
488 xpt->max_children *= 2;
489 xpt->children = VG_(realloc)( xpt->children,
490 xpt->max_children * sizeof(XPt*) );
491 n_children_reallocs++;
492 }
493 // Make new XPt for %eip, insert in list
494 xpt->children[ xpt->n_children++ ] =
495 new_XPt(eips[L], xpt, reached_bottom);
496 break;
497 }
498 if (eips[L] == xpt->children[nC]->eip) break; // found the %eip
499 nC++; // keep looking
500 }
501 VGP_POPCC(VgpGetXPtSearch);
502
503 // Return found/built bottom-XPt.
504 if (reached_bottom) {
njnca82cc02004-11-22 17:18:48 +0000505 tl_assert(0 == xpt->children[nC]->n_children); // Must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000506 VGP_POPCC(VgpGetXPt);
507 return xpt->children[nC];
508 }
509
510 // Descend to next level in XTree, the newly found/built non-bottom-XPt
511 xpt = xpt->children[nC];
512 L++;
513 }
514}
515
516// Update 'curr_space' of every XPt in the XCon, by percolating upwards.
517static void update_XCon(XPt* xpt, Int space_delta)
518{
519 VGP_PUSHCC(VgpUpdateXCon);
520
njnca82cc02004-11-22 17:18:48 +0000521 tl_assert(True == clo_heap);
522 tl_assert(0 != space_delta);
523 tl_assert(NULL != xpt);
524 tl_assert(0 == xpt->n_children); // must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000525
526 while (xpt != alloc_xpt) {
njnca82cc02004-11-22 17:18:48 +0000527 if (space_delta < 0) tl_assert(xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000528 xpt->curr_space += space_delta;
529 xpt = xpt->parent;
530 }
njnca82cc02004-11-22 17:18:48 +0000531 if (space_delta < 0) tl_assert(alloc_xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000532 alloc_xpt->curr_space += space_delta;
533
534 VGP_POPCC(VgpUpdateXCon);
535}
536
537// Actually want a reverse sort, biggest to smallest
nethercote43a15ce2004-08-30 19:15:12 +0000538static Int XPt_cmp_approx_ST(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000539{
540 XPt* xpt1 = *(XPt**)n1;
541 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000542 return (xpt1->approx_ST < xpt2->approx_ST ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000543}
544
nethercote43a15ce2004-08-30 19:15:12 +0000545static Int XPt_cmp_exact_ST_dbld(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000546{
547 XPt* xpt1 = *(XPt**)n1;
548 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000549 return (xpt1->exact_ST_dbld < xpt2->exact_ST_dbld ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000550}
551
552
553/*------------------------------------------------------------*/
554/*--- A generic Queue ---*/
555/*------------------------------------------------------------*/
556
557typedef
558 struct {
559 UInt head; // Index of first entry
560 UInt tail; // Index of final+1 entry, ie. next free slot
561 UInt max_elems;
562 void** elems;
563 }
564 Queue;
565
566static Queue* construct_queue(UInt size)
567{
568 UInt i;
569 Queue* q = VG_(malloc)(sizeof(Queue));
570 q->head = 0;
571 q->tail = 0;
572 q->max_elems = size;
573 q->elems = VG_(malloc)(size * sizeof(void*));
574 for (i = 0; i < size; i++)
575 q->elems[i] = NULL;
576
577 return q;
578}
579
580static void destruct_queue(Queue* q)
581{
582 VG_(free)(q->elems);
583 VG_(free)(q);
584}
585
586static void shuffle(Queue* dest_q, void** old_elems)
587{
588 UInt i, j;
589 for (i = 0, j = dest_q->head; j < dest_q->tail; i++, j++)
590 dest_q->elems[i] = old_elems[j];
591 dest_q->head = 0;
592 dest_q->tail = i;
593 for ( ; i < dest_q->max_elems; i++)
594 dest_q->elems[i] = NULL; // paranoia
595}
596
597// Shuffles elements down. If not enough slots free, increase size. (We
598// don't wait until we've completely run out of space, because there could
599// be lots of shuffling just before that point which would be slow.)
600static void adjust(Queue* q)
601{
602 void** old_elems;
603
njnca82cc02004-11-22 17:18:48 +0000604 tl_assert(q->tail == q->max_elems);
nethercotec9f36922004-02-14 16:40:02 +0000605 if (q->head < 10) {
606 old_elems = q->elems;
607 q->max_elems *= 2;
608 q->elems = VG_(malloc)(q->max_elems * sizeof(void*));
609 shuffle(q, old_elems);
610 VG_(free)(old_elems);
611 } else {
612 shuffle(q, q->elems);
613 }
614}
615
616static void enqueue(Queue* q, void* elem)
617{
618 if (q->tail == q->max_elems)
619 adjust(q);
620 q->elems[q->tail++] = elem;
621}
622
623static Bool is_empty_queue(Queue* q)
624{
625 return (q->head == q->tail);
626}
627
628static void* dequeue(Queue* q)
629{
630 if (is_empty_queue(q))
631 return NULL; // Queue empty
632 else
633 return q->elems[q->head++];
634}
635
636/*------------------------------------------------------------*/
637/*--- malloc() et al replacement wrappers ---*/
638/*------------------------------------------------------------*/
639
640static __inline__
641void add_HP_Chunk(HP_Chunk* hc)
642{
643 n_heap_blocks++;
644 VG_(HT_add_node) ( malloc_list, (VgHashNode*)hc );
645}
646
647static __inline__
648HP_Chunk* get_HP_Chunk(void* p, HP_Chunk*** prev_chunks_next_ptr)
649{
nethercote3d6b6112004-11-04 16:39:43 +0000650 return (HP_Chunk*)VG_(HT_get_node) ( malloc_list, (UWord)p,
nethercotec9f36922004-02-14 16:40:02 +0000651 (VgHashNode***)prev_chunks_next_ptr );
652}
653
654static __inline__
655void remove_HP_Chunk(HP_Chunk* hc, HP_Chunk** prev_chunks_next_ptr)
656{
njnca82cc02004-11-22 17:18:48 +0000657 tl_assert(n_heap_blocks > 0);
nethercotec9f36922004-02-14 16:40:02 +0000658 n_heap_blocks--;
659 *prev_chunks_next_ptr = hc->next;
660}
661
662// Forward declaration
663static void hp_census(void);
664
nethercote159dfef2004-09-13 13:27:30 +0000665static
njn57735902004-11-25 18:04:54 +0000666void* new_block ( ThreadId tid, void* p, SizeT size, SizeT align,
667 Bool is_zeroed )
nethercotec9f36922004-02-14 16:40:02 +0000668{
669 HP_Chunk* hc;
nethercote57e36b32004-07-10 14:56:28 +0000670 Bool custom_alloc = (NULL == p);
nethercotec9f36922004-02-14 16:40:02 +0000671 if (size < 0) return NULL;
672
673 VGP_PUSHCC(VgpCliMalloc);
674
675 // Update statistics
676 n_allocs++;
nethercote57e36b32004-07-10 14:56:28 +0000677 if (0 == size) n_zero_allocs++;
nethercotec9f36922004-02-14 16:40:02 +0000678
nethercote57e36b32004-07-10 14:56:28 +0000679 // Allocate and zero if necessary
680 if (!p) {
681 p = VG_(cli_malloc)( align, size );
682 if (!p) {
683 VGP_POPCC(VgpCliMalloc);
684 return NULL;
685 }
686 if (is_zeroed) VG_(memset)(p, 0, size);
687 }
688
689 // Make new HP_Chunk node, add to malloclist
690 hc = VG_(malloc)(sizeof(HP_Chunk));
691 hc->size = size;
692 hc->data = (Addr)p;
693 hc->where = NULL; // paranoia
694 if (clo_heap) {
njn57735902004-11-25 18:04:54 +0000695 hc->where = get_XCon( tid, custom_alloc );
nethercote57e36b32004-07-10 14:56:28 +0000696 if (0 != size)
697 update_XCon(hc->where, size);
698 }
699 add_HP_Chunk( hc );
700
701 // do a census!
702 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000703
704 VGP_POPCC(VgpCliMalloc);
705 return p;
706}
707
708static __inline__
709void die_block ( void* p, Bool custom_free )
710{
nethercote57e36b32004-07-10 14:56:28 +0000711 HP_Chunk *hc, **remove_handle;
nethercotec9f36922004-02-14 16:40:02 +0000712
713 VGP_PUSHCC(VgpCliMalloc);
714
715 // Update statistics
716 n_frees++;
717
nethercote57e36b32004-07-10 14:56:28 +0000718 // Remove HP_Chunk from malloclist
719 hc = get_HP_Chunk( p, &remove_handle );
nethercotec9f36922004-02-14 16:40:02 +0000720 if (hc == NULL)
721 return; // must have been a bogus free(), or p==NULL
njnca82cc02004-11-22 17:18:48 +0000722 tl_assert(hc->data == (Addr)p);
nethercote57e36b32004-07-10 14:56:28 +0000723 remove_HP_Chunk(hc, remove_handle);
nethercotec9f36922004-02-14 16:40:02 +0000724
725 if (clo_heap && hc->size != 0)
726 update_XCon(hc->where, -hc->size);
727
nethercote57e36b32004-07-10 14:56:28 +0000728 VG_(free)( hc );
729
730 // Actually free the heap block, if necessary
nethercotec9f36922004-02-14 16:40:02 +0000731 if (!custom_free)
732 VG_(cli_free)( p );
733
nethercote57e36b32004-07-10 14:56:28 +0000734 // do a census!
735 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000736
nethercotec9f36922004-02-14 16:40:02 +0000737 VGP_POPCC(VgpCliMalloc);
738}
739
740
njn57735902004-11-25 18:04:54 +0000741void* TL_(malloc) ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000742{
njn57735902004-11-25 18:04:54 +0000743 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000744}
745
njn57735902004-11-25 18:04:54 +0000746void* TL_(__builtin_new) ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000747{
njn57735902004-11-25 18:04:54 +0000748 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000749}
750
njn57735902004-11-25 18:04:54 +0000751void* TL_(__builtin_vec_new) ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000752{
njn57735902004-11-25 18:04:54 +0000753 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000754}
755
njn57735902004-11-25 18:04:54 +0000756void* TL_(calloc) ( ThreadId tid, SizeT m, SizeT size )
nethercotec9f36922004-02-14 16:40:02 +0000757{
njn57735902004-11-25 18:04:54 +0000758 return new_block( tid, NULL, m*size, VG_(clo_alignment), /*is_zeroed*/True );
nethercotec9f36922004-02-14 16:40:02 +0000759}
760
njn57735902004-11-25 18:04:54 +0000761void *TL_(memalign)( ThreadId tid, SizeT align, SizeT n )
fitzhardinge51f3ff12004-03-04 22:42:03 +0000762{
njn57735902004-11-25 18:04:54 +0000763 return new_block( tid, NULL, n, align, False );
fitzhardinge51f3ff12004-03-04 22:42:03 +0000764}
765
njn57735902004-11-25 18:04:54 +0000766void TL_(free) ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000767{
768 die_block( p, /*custom_free*/False );
769}
770
njn57735902004-11-25 18:04:54 +0000771void TL_(__builtin_delete) ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000772{
773 die_block( p, /*custom_free*/False);
774}
775
njn57735902004-11-25 18:04:54 +0000776void TL_(__builtin_vec_delete) ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000777{
778 die_block( p, /*custom_free*/False );
779}
780
njn57735902004-11-25 18:04:54 +0000781void* TL_(realloc) ( ThreadId tid, void* p_old, SizeT new_size )
nethercotec9f36922004-02-14 16:40:02 +0000782{
783 HP_Chunk* hc;
784 HP_Chunk** remove_handle;
785 Int i;
786 void* p_new;
nethercote7ac7f7b2004-11-02 12:36:02 +0000787 SizeT old_size;
nethercotec9f36922004-02-14 16:40:02 +0000788 XPt *old_where, *new_where;
789
790 VGP_PUSHCC(VgpCliMalloc);
791
792 // First try and find the block.
793 hc = get_HP_Chunk ( p_old, &remove_handle );
794 if (hc == NULL) {
795 VGP_POPCC(VgpCliMalloc);
796 return NULL; // must have been a bogus free()
797 }
798
njnca82cc02004-11-22 17:18:48 +0000799 tl_assert(hc->data == (Addr)p_old);
nethercotec9f36922004-02-14 16:40:02 +0000800 old_size = hc->size;
801
802 if (new_size <= old_size) {
803 // new size is smaller or same; block not moved
804 p_new = p_old;
805
806 } else {
807 // new size is bigger; make new block, copy shared contents, free old
808 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_size);
809
810 for (i = 0; i < old_size; i++)
811 ((UChar*)p_new)[i] = ((UChar*)p_old)[i];
812
813 VG_(cli_free)(p_old);
814 }
815
816 old_where = hc->where;
njn57735902004-11-25 18:04:54 +0000817 new_where = get_XCon( tid, /*custom_malloc*/False);
nethercotec9f36922004-02-14 16:40:02 +0000818
819 // Update HP_Chunk
820 hc->data = (Addr)p_new;
821 hc->size = new_size;
822 hc->where = new_where;
823
824 // Update XPt curr_space fields
825 if (clo_heap) {
826 if (0 != old_size) update_XCon(old_where, -old_size);
827 if (0 != new_size) update_XCon(new_where, new_size);
828 }
829
830 // If block has moved, have to remove and reinsert in the malloclist
831 // (since the updated 'data' field is the hash lookup key).
832 if (p_new != p_old) {
833 remove_HP_Chunk(hc, remove_handle);
834 add_HP_Chunk(hc);
835 }
836
837 VGP_POPCC(VgpCliMalloc);
838 return p_new;
839}
840
841
842/*------------------------------------------------------------*/
843/*--- Taking a census ---*/
844/*------------------------------------------------------------*/
845
846static Census censi[MAX_N_CENSI];
847static UInt curr_census = 0;
848
849// Must return False so that all stacks are traversed
thughes4ad52d02004-06-27 17:37:21 +0000850static Bool count_stack_size( Addr stack_min, Addr stack_max, void *cp )
nethercotec9f36922004-02-14 16:40:02 +0000851{
thughes4ad52d02004-06-27 17:37:21 +0000852 *(UInt *)cp += (stack_max - stack_min);
nethercotec9f36922004-02-14 16:40:02 +0000853 return False;
854}
855
856static UInt get_xtree_size(XPt* xpt, UInt ix)
857{
858 UInt i;
859
nethercote43a15ce2004-08-30 19:15:12 +0000860 // If no memory allocated at all, nothing interesting to record.
861 if (alloc_xpt->curr_space == 0) return 0;
862
863 // Ignore sub-XTrees that account for a miniscule fraction of current
864 // allocated space.
865 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000866 ix++;
867
868 // Count all (non-zero) descendent XPts
869 for (i = 0; i < xpt->n_children; i++)
870 ix = get_xtree_size(xpt->children[i], ix);
871 }
872 return ix;
873}
874
875static
876UInt do_space_snapshot(XPt xpt[], XTreeSnapshot xtree_snapshot, UInt ix)
877{
878 UInt i;
879
nethercote43a15ce2004-08-30 19:15:12 +0000880 // Structure of this function mirrors that of get_xtree_size().
881
882 if (alloc_xpt->curr_space == 0) return 0;
883
884 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000885 xtree_snapshot[ix].xpt = xpt;
886 xtree_snapshot[ix].space = xpt->curr_space;
887 ix++;
888
nethercotec9f36922004-02-14 16:40:02 +0000889 for (i = 0; i < xpt->n_children; i++)
890 ix = do_space_snapshot(xpt->children[i], xtree_snapshot, ix);
891 }
892 return ix;
893}
894
895static UInt ms_interval;
896static UInt do_every_nth_census = 30;
897
898// Weed out half the censi; we choose those that represent the smallest
899// time-spans, because that loses the least information.
900//
901// Algorithm for N censi: We find the census representing the smallest
902// timeframe, and remove it. We repeat this until (N/2)-1 censi are gone.
903// (It's (N/2)-1 because we never remove the first and last censi.)
904// We have to do this one census at a time, rather than finding the (N/2)-1
905// smallest censi in one hit, because when a census is removed, it's
906// neighbours immediately cover greater timespans. So it's N^2, but N only
907// equals 200, and this is only done every 100 censi, which is not too often.
908static void halve_censi(void)
909{
910 Int i, jp, j, jn, k;
911 Census* min_census;
912
913 n_halvings++;
914 if (VG_(clo_verbosity) > 1)
915 VG_(message)(Vg_UserMsg, "Halving censi...");
916
917 // Sets j to the index of the first not-yet-removed census at or after i
918 #define FIND_CENSUS(i, j) \
919 for (j = i; -1 == censi[j].ms_time; j++) { }
920
921 for (i = 2; i < MAX_N_CENSI; i += 2) {
922 // Find the censi representing the smallest timespan. The timespan
923 // for census n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
924 // censi A and B. We don't consider the first and last censi for
925 // removal.
926 Int min_span = 0x7fffffff;
927 Int min_j = 0;
928
929 // Initial triple: (prev, curr, next) == (jp, j, jn)
930 jp = 0;
931 FIND_CENSUS(1, j);
932 FIND_CENSUS(j+1, jn);
933 while (jn < MAX_N_CENSI) {
934 Int timespan = censi[jn].ms_time - censi[jp].ms_time;
njnca82cc02004-11-22 17:18:48 +0000935 tl_assert(timespan >= 0);
nethercotec9f36922004-02-14 16:40:02 +0000936 if (timespan < min_span) {
937 min_span = timespan;
938 min_j = j;
939 }
940 // Move on to next triple
941 jp = j;
942 j = jn;
943 FIND_CENSUS(jn+1, jn);
944 }
945 // We've found the least important census, now remove it
946 min_census = & censi[ min_j ];
947 for (k = 0; NULL != min_census->xtree_snapshots[k]; k++) {
948 n_snapshot_frees++;
949 VG_(free)(min_census->xtree_snapshots[k]);
950 min_census->xtree_snapshots[k] = NULL;
951 }
952 min_census->ms_time = -1;
953 }
954
955 // Slide down the remaining censi over the removed ones. The '<=' is
956 // because we are removing on (N/2)-1, rather than N/2.
957 for (i = 0, j = 0; i <= MAX_N_CENSI / 2; i++, j++) {
958 FIND_CENSUS(j, j);
959 if (i != j) {
960 censi[i] = censi[j];
961 }
962 }
963 curr_census = i;
964
965 // Double intervals
966 ms_interval *= 2;
967 do_every_nth_census *= 2;
968
969 if (VG_(clo_verbosity) > 1)
970 VG_(message)(Vg_UserMsg, "...done");
971}
972
973// Take a census. Census time seems to be insignificant (usually <= 0 ms,
974// almost always <= 1ms) so don't have to worry about subtracting it from
975// running time in any way.
976//
977// XXX: NOT TRUE! with bigger depths, konqueror censuses can easily take
978// 50ms!
979static void hp_census(void)
980{
981 static UInt ms_prev_census = 0;
982 static UInt ms_next_census = 0; // zero allows startup census
983
984 Int ms_time, ms_time_since_prev;
985 Int i, K;
986 Census* census;
987
988 VGP_PUSHCC(VgpCensus);
989
990 // Only do a census if it's time
991 ms_time = VG_(read_millisecond_timer)();
992 ms_time_since_prev = ms_time - ms_prev_census;
993 if (ms_time < ms_next_census) {
994 n_fake_censi++;
995 VGP_POPCC(VgpCensus);
996 return;
997 }
998 n_real_censi++;
999
1000 census = & censi[curr_census];
1001
1002 census->ms_time = ms_time;
1003
1004 // Heap: snapshot the K most significant XTrees -------------------
1005 if (clo_heap) {
1006 K = ( alloc_xpt->n_children < MAX_SNAPSHOTS
1007 ? alloc_xpt->n_children
1008 : MAX_SNAPSHOTS); // max out
1009
nethercote43a15ce2004-08-30 19:15:12 +00001010 // Update .approx_ST field (approximatively) for all top-XPts.
nethercotec9f36922004-02-14 16:40:02 +00001011 // We *do not* do it for any non-top-XPTs.
1012 for (i = 0; i < alloc_xpt->n_children; i++) {
1013 XPt* top_XPt = alloc_xpt->children[i];
nethercote43a15ce2004-08-30 19:15:12 +00001014 top_XPt->approx_ST += top_XPt->curr_space * ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +00001015 }
nethercote43a15ce2004-08-30 19:15:12 +00001016 // Sort top-XPts by approx_ST field.
nethercotec9f36922004-02-14 16:40:02 +00001017 VG_(ssort)(alloc_xpt->children, alloc_xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001018 XPt_cmp_approx_ST);
nethercotec9f36922004-02-14 16:40:02 +00001019
1020 VGP_PUSHCC(VgpCensusHeap);
1021
1022 // For each significant top-level XPt, record space info about its
1023 // entire XTree, in a single census entry.
1024 // Nb: the xtree_size count/snapshot buffer allocation, and the actual
1025 // snapshot, take similar amounts of time (measured with the
nethercote43a15ce2004-08-30 19:15:12 +00001026 // millisecond counter).
nethercotec9f36922004-02-14 16:40:02 +00001027 for (i = 0; i < K; i++) {
1028 UInt xtree_size, xtree_size2;
nethercote43a15ce2004-08-30 19:15:12 +00001029// VG_(printf)("%7u ", alloc_xpt->children[i]->approx_ST);
1030 // Count how many XPts are in the XTree
nethercotec9f36922004-02-14 16:40:02 +00001031 VGP_PUSHCC(VgpCensusTreeSize);
1032 xtree_size = get_xtree_size( alloc_xpt->children[i], 0 );
1033 VGP_POPCC(VgpCensusTreeSize);
nethercote43a15ce2004-08-30 19:15:12 +00001034
1035 // If no XPts counted (ie. alloc_xpt.curr_space==0 or XTree
1036 // insignificant) then don't take any more snapshots.
1037 if (0 == xtree_size) break;
1038
1039 // Make array of the appropriate size (+1 for zero termination,
1040 // which calloc() does for us).
nethercotec9f36922004-02-14 16:40:02 +00001041 census->xtree_snapshots[i] =
1042 VG_(calloc)(xtree_size+1, sizeof(XPtSnapshot));
jseward612e8362004-03-07 10:23:20 +00001043 if (0 && VG_(clo_verbosity) > 1)
nethercotec9f36922004-02-14 16:40:02 +00001044 VG_(printf)("calloc: %d (%d B)\n", xtree_size+1,
1045 (xtree_size+1) * sizeof(XPtSnapshot));
1046
1047 // Take space-snapshot: copy 'curr_space' for every XPt in the
1048 // XTree into the snapshot array, along with pointers to the XPts.
1049 // (Except for ones with curr_space==0, which wouldn't contribute
nethercote43a15ce2004-08-30 19:15:12 +00001050 // to the final exact_ST_dbld calculation anyway; excluding them
nethercotec9f36922004-02-14 16:40:02 +00001051 // saves a lot of memory and up to 40% time with big --depth valus.
1052 VGP_PUSHCC(VgpCensusSnapshot);
1053 xtree_size2 = do_space_snapshot(alloc_xpt->children[i],
1054 census->xtree_snapshots[i], 0);
njnca82cc02004-11-22 17:18:48 +00001055 tl_assert(xtree_size == xtree_size2);
nethercotec9f36922004-02-14 16:40:02 +00001056 VGP_POPCC(VgpCensusSnapshot);
1057 }
1058// VG_(printf)("\n\n");
1059 // Zero-terminate 'xtree_snapshot' array
1060 census->xtree_snapshots[i] = NULL;
1061
1062 VGP_POPCC(VgpCensusHeap);
1063
1064 //VG_(printf)("printed %d censi\n", K);
1065
1066 // Lump the rest into a single "others" entry.
1067 census->others_space = 0;
1068 for (i = K; i < alloc_xpt->n_children; i++) {
1069 census->others_space += alloc_xpt->children[i]->curr_space;
1070 }
1071 }
1072
1073 // Heap admin -------------------------------------------------------
1074 if (clo_heap_admin > 0)
1075 census->heap_admin_space = clo_heap_admin * n_heap_blocks;
1076
1077 // Stack(s) ---------------------------------------------------------
1078 if (clo_stacks) {
thughes4ad52d02004-06-27 17:37:21 +00001079 census->stacks_space = sigstacks_space;
nethercotec9f36922004-02-14 16:40:02 +00001080 // slightly abusing this function
thughes4ad52d02004-06-27 17:37:21 +00001081 VG_(first_matching_thread_stack)( count_stack_size, &census->stacks_space );
nethercotec9f36922004-02-14 16:40:02 +00001082 i++;
1083 }
1084
1085 // Finish, update interval if necessary -----------------------------
1086 curr_census++;
1087 census = NULL; // don't use again now that curr_census changed
1088
1089 // Halve the entries, if our census table is full
1090 if (MAX_N_CENSI == curr_census) {
1091 halve_censi();
1092 }
1093
1094 // Take time for next census from now, rather than when this census
1095 // should have happened. Because, if there's a big gap due to a kernel
1096 // operation, there's no point doing catch-up censi every BB for a while
1097 // -- that would just give N censi at almost the same time.
1098 if (VG_(clo_verbosity) > 1) {
1099 VG_(message)(Vg_UserMsg, "census: %d ms (took %d ms)", ms_time,
1100 VG_(read_millisecond_timer)() - ms_time );
1101 }
1102 ms_prev_census = ms_time;
1103 ms_next_census = ms_time + ms_interval;
1104 //ms_next_census += ms_interval;
1105
1106 //VG_(printf)("Next: %d ms\n", ms_next_census);
1107
1108 VGP_POPCC(VgpCensus);
1109}
1110
1111/*------------------------------------------------------------*/
1112/*--- Tracked events ---*/
1113/*------------------------------------------------------------*/
1114
nethercote8b5f40c2004-11-02 13:29:50 +00001115static void new_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001116{
1117 sigstacks_space += len;
1118}
1119
nethercote8b5f40c2004-11-02 13:29:50 +00001120static void die_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001121{
njnca82cc02004-11-22 17:18:48 +00001122 tl_assert(sigstacks_space >= len);
nethercotec9f36922004-02-14 16:40:02 +00001123 sigstacks_space -= len;
1124}
1125
1126/*------------------------------------------------------------*/
1127/*--- Client Requests ---*/
1128/*------------------------------------------------------------*/
1129
njn26f02512004-11-22 18:33:15 +00001130Bool TL_(handle_client_request) ( ThreadId tid, UWord* argv, UWord* ret )
nethercotec9f36922004-02-14 16:40:02 +00001131{
1132 switch (argv[0]) {
1133 case VG_USERREQ__MALLOCLIKE_BLOCK: {
nethercote57e36b32004-07-10 14:56:28 +00001134 void* res;
nethercotec9f36922004-02-14 16:40:02 +00001135 void* p = (void*)argv[1];
nethercoted1b64b22004-11-04 18:22:28 +00001136 SizeT sizeB = argv[2];
nethercotec9f36922004-02-14 16:40:02 +00001137 *ret = 0;
njn57735902004-11-25 18:04:54 +00001138 res = new_block( tid, p, sizeB, /*align--ignored*/0, /*is_zeroed*/False );
njnca82cc02004-11-22 17:18:48 +00001139 tl_assert(res == p);
nethercotec9f36922004-02-14 16:40:02 +00001140 return True;
1141 }
1142 case VG_USERREQ__FREELIKE_BLOCK: {
1143 void* p = (void*)argv[1];
1144 *ret = 0;
1145 die_block( p, /*custom_free*/True );
1146 return True;
1147 }
1148 default:
1149 *ret = 0;
1150 return False;
1151 }
1152}
1153
1154/*------------------------------------------------------------*/
1155/*--- Initialisation ---*/
1156/*------------------------------------------------------------*/
1157
1158// Current directory at startup.
1159static Char* base_dir;
1160
njn0e742df2004-11-30 13:26:29 +00001161SizeT VG_(vg_malloc_redzone_szB) = 0;
nethercotec9f36922004-02-14 16:40:02 +00001162
njn26f02512004-11-22 18:33:15 +00001163void TL_(pre_clo_init)()
nethercotec9f36922004-02-14 16:40:02 +00001164{
1165 VG_(details_name) ("Massif");
nethercote29b02612004-03-16 19:41:14 +00001166 VG_(details_version) (NULL);
nethercotec9f36922004-02-14 16:40:02 +00001167 VG_(details_description) ("a space profiler");
1168 VG_(details_copyright_author)("Copyright (C) 2003, Nicholas Nethercote");
nethercote27645c72004-02-23 15:33:33 +00001169 VG_(details_bug_reports_to) (VG_BUGS_TO);
nethercotec9f36922004-02-14 16:40:02 +00001170
1171 // Needs
1172 VG_(needs_libc_freeres)();
1173 VG_(needs_command_line_options)();
1174 VG_(needs_client_requests) ();
1175
1176 // Events to track
1177 VG_(init_new_mem_stack_signal) ( new_mem_stack_signal );
1178 VG_(init_die_mem_stack_signal) ( die_mem_stack_signal );
1179
1180 // Profiling events
1181 VGP_(register_profile_event)(VgpGetXPt, "get-XPt");
1182 VGP_(register_profile_event)(VgpGetXPtSearch, "get-XPt-search");
1183 VGP_(register_profile_event)(VgpCensus, "census");
1184 VGP_(register_profile_event)(VgpCensusHeap, "census-heap");
1185 VGP_(register_profile_event)(VgpCensusSnapshot, "census-snapshot");
1186 VGP_(register_profile_event)(VgpCensusTreeSize, "census-treesize");
1187 VGP_(register_profile_event)(VgpUpdateXCon, "update-XCon");
nethercote43a15ce2004-08-30 19:15:12 +00001188 VGP_(register_profile_event)(VgpCalcSpacetime2, "calc-exact_ST_dbld");
nethercotec9f36922004-02-14 16:40:02 +00001189 VGP_(register_profile_event)(VgpPrintHp, "print-hp");
1190 VGP_(register_profile_event)(VgpPrintXPts, "print-XPts");
1191
1192 // HP_Chunks
1193 malloc_list = VG_(HT_construct)();
1194
1195 // Dummy node at top of the context structure.
1196 alloc_xpt = new_XPt(0, NULL, /*is_bottom*/False);
1197
njnca82cc02004-11-22 17:18:48 +00001198 tl_assert( VG_(getcwd_alloc)(&base_dir) );
nethercotec9f36922004-02-14 16:40:02 +00001199}
1200
njn26f02512004-11-22 18:33:15 +00001201void TL_(post_clo_init)(void)
nethercotec9f36922004-02-14 16:40:02 +00001202{
1203 ms_interval = 1;
1204
1205 // Do an initial sample for t = 0
1206 hp_census();
1207}
1208
1209/*------------------------------------------------------------*/
1210/*--- Instrumentation ---*/
1211/*------------------------------------------------------------*/
1212
sewardjd54babf2005-03-21 00:55:49 +00001213IRBB* TL_(instrument) ( IRBB* bb_in, VexGuestLayout* layout,
1214 IRType gWordTy, IRType hWordTy )
nethercotec9f36922004-02-14 16:40:02 +00001215{
sewardjd54babf2005-03-21 00:55:49 +00001216 /* XXX Will Massif work when gWordTy != hWordTy ? */
njnee8a5862004-11-22 21:08:46 +00001217 return bb_in;
nethercotec9f36922004-02-14 16:40:02 +00001218}
1219
1220/*------------------------------------------------------------*/
1221/*--- Spacetime recomputation ---*/
1222/*------------------------------------------------------------*/
1223
nethercote43a15ce2004-08-30 19:15:12 +00001224// Although we've been calculating space-time along the way, because the
1225// earlier calculations were done at a finer timescale, the .approx_ST field
nethercotec9f36922004-02-14 16:40:02 +00001226// might not agree with what hp2ps sees, because we've thrown away some of
1227// the information. So recompute it at the scale that hp2ps sees, so we can
1228// confidently determine which contexts hp2ps will choose for displaying as
1229// distinct bands. This recomputation only happens to the significant ones
1230// that get printed in the .hp file, so it's cheap.
1231//
nethercote43a15ce2004-08-30 19:15:12 +00001232// The approx_ST calculation:
nethercotec9f36922004-02-14 16:40:02 +00001233// ( a[0]*d(0,1) + a[1]*(d(0,1) + d(1,2)) + ... + a[N-1]*d(N-2,N-1) ) / 2
1234// where
1235// a[N] is the space at census N
1236// d(A,B) is the time interval between censi A and B
1237// and
1238// d(A,B) + d(B,C) == d(A,C)
1239//
1240// Key point: we can calculate the area for a census without knowing the
1241// previous or subsequent censi's space; because any over/underestimates
1242// for this census will be reversed in the next, balancing out. This is
1243// important, as getting the previous/next census entry for a particular
1244// AP is a pain with this data structure, but getting the prev/next
1245// census time is easy.
1246//
nethercote43a15ce2004-08-30 19:15:12 +00001247// Each heap calculation gets added to its context's exact_ST_dbld field.
nethercotec9f36922004-02-14 16:40:02 +00001248// The ULong* values are all running totals, hence the use of "+=" everywhere.
1249
1250// This does the calculations for a single census.
nethercote43a15ce2004-08-30 19:15:12 +00001251static void calc_exact_ST_dbld2(Census* census, UInt d_t1_t2,
nethercotec9f36922004-02-14 16:40:02 +00001252 ULong* twice_heap_ST,
1253 ULong* twice_heap_admin_ST,
1254 ULong* twice_stack_ST)
1255{
1256 UInt i, j;
1257 XPtSnapshot* xpt_snapshot;
1258
1259 // Heap --------------------------------------------------------
1260 if (clo_heap) {
1261 for (i = 0; NULL != census->xtree_snapshots[i]; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001262 // Compute total heap exact_ST_dbld for the entire XTree using only
1263 // the top-XPt (the first XPt in xtree_snapshot).
nethercotec9f36922004-02-14 16:40:02 +00001264 *twice_heap_ST += d_t1_t2 * census->xtree_snapshots[i][0].space;
1265
nethercote43a15ce2004-08-30 19:15:12 +00001266 // Increment exact_ST_dbld for every XPt in xtree_snapshot (inc.
1267 // top one)
nethercotec9f36922004-02-14 16:40:02 +00001268 for (j = 0; NULL != census->xtree_snapshots[i][j].xpt; j++) {
1269 xpt_snapshot = & census->xtree_snapshots[i][j];
nethercote43a15ce2004-08-30 19:15:12 +00001270 xpt_snapshot->xpt->exact_ST_dbld += d_t1_t2 * xpt_snapshot->space;
nethercotec9f36922004-02-14 16:40:02 +00001271 }
1272 }
1273 *twice_heap_ST += d_t1_t2 * census->others_space;
1274 }
1275
1276 // Heap admin --------------------------------------------------
1277 if (clo_heap_admin > 0)
1278 *twice_heap_admin_ST += d_t1_t2 * census->heap_admin_space;
1279
1280 // Stack(s) ----------------------------------------------------
1281 if (clo_stacks)
1282 *twice_stack_ST += d_t1_t2 * census->stacks_space;
1283}
1284
1285// This does the calculations for all censi.
nethercote43a15ce2004-08-30 19:15:12 +00001286static void calc_exact_ST_dbld(ULong* heap2, ULong* heap_admin2, ULong* stack2)
nethercotec9f36922004-02-14 16:40:02 +00001287{
1288 UInt i, N = curr_census;
1289
1290 VGP_PUSHCC(VgpCalcSpacetime2);
1291
1292 *heap2 = 0;
1293 *heap_admin2 = 0;
1294 *stack2 = 0;
1295
1296 if (N <= 1)
1297 return;
1298
nethercote43a15ce2004-08-30 19:15:12 +00001299 calc_exact_ST_dbld2( &censi[0], censi[1].ms_time - censi[0].ms_time,
1300 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001301
1302 for (i = 1; i <= N-2; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001303 calc_exact_ST_dbld2( & censi[i], censi[i+1].ms_time - censi[i-1].ms_time,
1304 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001305 }
1306
nethercote43a15ce2004-08-30 19:15:12 +00001307 calc_exact_ST_dbld2( & censi[N-1], censi[N-1].ms_time - censi[N-2].ms_time,
1308 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001309 // Now get rid of the halves. May lose a 0.5 on each, doesn't matter.
1310 *heap2 /= 2;
1311 *heap_admin2 /= 2;
1312 *stack2 /= 2;
1313
1314 VGP_POPCC(VgpCalcSpacetime2);
1315}
1316
1317/*------------------------------------------------------------*/
1318/*--- Writing the graph file ---*/
1319/*------------------------------------------------------------*/
1320
1321static Char* make_filename(Char* dir, Char* suffix)
1322{
1323 Char* filename;
1324
1325 /* Block is big enough for dir name + massif.<pid>.<suffix> */
1326 filename = VG_(malloc)((VG_(strlen)(dir) + 32)*sizeof(Char));
1327 VG_(sprintf)(filename, "%s/massif.%d%s", dir, VG_(getpid)(), suffix);
1328
1329 return filename;
1330}
1331
1332// Make string acceptable to hp2ps (sigh): remove spaces, escape parentheses.
1333static Char* clean_fnname(Char *d, Char* s)
1334{
1335 Char* dorig = d;
1336 while (*s) {
1337 if (' ' == *s) { *d = '%'; }
1338 else if ('(' == *s) { *d++ = '\\'; *d = '('; }
1339 else if (')' == *s) { *d++ = '\\'; *d = ')'; }
1340 else { *d = *s; };
1341 s++;
1342 d++;
1343 }
1344 *d = '\0';
1345 return dorig;
1346}
1347
1348static void file_err ( Char* file )
1349{
1350 VG_(message)(Vg_UserMsg, "error: can't open output file `%s'", file );
1351 VG_(message)(Vg_UserMsg, " ... so profile results will be missing.");
1352}
1353
1354/* Format, by example:
1355
1356 JOB "a.out -p"
1357 DATE "Fri Apr 17 11:43:45 1992"
1358 SAMPLE_UNIT "seconds"
1359 VALUE_UNIT "bytes"
1360 BEGIN_SAMPLE 0.00
1361 SYSTEM 24
1362 END_SAMPLE 0.00
1363 BEGIN_SAMPLE 1.00
1364 elim 180
1365 insert 24
1366 intersect 12
1367 disin 60
1368 main 12
1369 reduce 20
1370 SYSTEM 12
1371 END_SAMPLE 1.00
1372 MARK 1.50
1373 MARK 1.75
1374 MARK 1.80
1375 BEGIN_SAMPLE 2.00
1376 elim 192
1377 insert 24
1378 intersect 12
1379 disin 84
1380 main 12
1381 SYSTEM 24
1382 END_SAMPLE 2.00
1383 BEGIN_SAMPLE 2.82
1384 END_SAMPLE 2.82
1385 */
1386static void write_hp_file(void)
1387{
1388 Int i, j;
1389 Int fd, res;
1390 Char *hp_file, *ps_file, *aux_file;
1391 Char* cmdfmt;
1392 Char* cmdbuf;
1393 Int cmdlen;
1394
1395 VGP_PUSHCC(VgpPrintHp);
1396
1397 // Open file
1398 hp_file = make_filename( base_dir, ".hp" );
1399 ps_file = make_filename( base_dir, ".ps" );
1400 aux_file = make_filename( base_dir, ".aux" );
1401 fd = VG_(open)(hp_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1402 VKI_S_IRUSR|VKI_S_IWUSR);
1403 if (fd < 0) {
1404 file_err( hp_file );
1405 VGP_POPCC(VgpPrintHp);
1406 return;
1407 }
1408
1409 // File header, including command line
1410 SPRINTF(buf, "JOB \"");
1411 for (i = 0; i < VG_(client_argc); i++)
1412 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1413 SPRINTF(buf, /*" (%d ms/sample)\"\n"*/ "\"\n"
1414 "DATE \"\"\n"
1415 "SAMPLE_UNIT \"ms\"\n"
1416 "VALUE_UNIT \"bytes\"\n", ms_interval);
1417
1418 // Censi
1419 for (i = 0; i < curr_census; i++) {
1420 Census* census = & censi[i];
1421
1422 // Census start
1423 SPRINTF(buf, "MARK %d.0\n"
1424 "BEGIN_SAMPLE %d.0\n",
1425 census->ms_time, census->ms_time);
1426
1427 // Heap -----------------------------------------------------------
1428 if (clo_heap) {
1429 // Print all the significant XPts from that census
1430 for (j = 0; NULL != census->xtree_snapshots[j]; j++) {
1431 // Grab the jth top-XPt
1432 XTreeSnapshot xtree_snapshot = & census->xtree_snapshots[j][0];
1433 if ( ! VG_(get_fnname)(xtree_snapshot->xpt->eip, buf2, 16)) {
1434 VG_(sprintf)(buf2, "???");
1435 }
1436 SPRINTF(buf, "x%x:%s %d\n", xtree_snapshot->xpt->eip,
1437 clean_fnname(buf3, buf2), xtree_snapshot->space);
1438 }
1439
1440 // Remaining heap block alloc points, combined
1441 if (census->others_space > 0)
1442 SPRINTF(buf, "other %d\n", census->others_space);
1443 }
1444
1445 // Heap admin -----------------------------------------------------
1446 if (clo_heap_admin > 0 && census->heap_admin_space)
1447 SPRINTF(buf, "heap-admin %d\n", census->heap_admin_space);
1448
1449 // Stack(s) -------------------------------------------------------
1450 if (clo_stacks)
1451 SPRINTF(buf, "stack(s) %d\n", census->stacks_space);
1452
1453 // Census end
1454 SPRINTF(buf, "END_SAMPLE %d.0\n", census->ms_time);
1455 }
1456
1457 // Close file
njnca82cc02004-11-22 17:18:48 +00001458 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001459 VG_(close)(fd);
1460
1461 // Attempt to convert file using hp2ps
1462 cmdfmt = "%s/hp2ps -c -t1 %s";
1463 cmdlen = VG_(strlen)(VG_(libdir)) + VG_(strlen)(hp_file)
1464 + VG_(strlen)(cmdfmt);
1465 cmdbuf = VG_(malloc)( sizeof(Char) * cmdlen );
1466 VG_(sprintf)(cmdbuf, cmdfmt, VG_(libdir), hp_file);
1467 res = VG_(system)(cmdbuf);
1468 VG_(free)(cmdbuf);
1469 if (res != 0) {
1470 VG_(message)(Vg_UserMsg,
1471 "Conversion to PostScript failed. Try converting manually.");
1472 } else {
1473 // remove the .hp and .aux file
1474 VG_(unlink)(hp_file);
1475 VG_(unlink)(aux_file);
1476 }
1477
1478 VG_(free)(hp_file);
1479 VG_(free)(ps_file);
1480 VG_(free)(aux_file);
1481
1482 VGP_POPCC(VgpPrintHp);
1483}
1484
1485/*------------------------------------------------------------*/
1486/*--- Writing the XPt text/HTML file ---*/
1487/*------------------------------------------------------------*/
1488
1489static void percentify(Int n, Int pow, Int field_width, char xbuf[])
1490{
1491 int i, len, space;
1492
1493 VG_(sprintf)(xbuf, "%d.%d%%", n / pow, n % pow);
1494 len = VG_(strlen)(xbuf);
1495 space = field_width - len;
1496 if (space < 0) space = 0; /* Allow for v. small field_width */
1497 i = len;
1498
1499 /* Right justify in field */
1500 for ( ; i >= 0; i--) xbuf[i + space] = xbuf[i];
1501 for (i = 0; i < space; i++) xbuf[i] = ' ';
1502}
1503
1504// Nb: uses a static buffer, each call trashes the last string returned.
1505static Char* make_perc(ULong spacetime, ULong total_spacetime)
1506{
1507 static Char mbuf[32];
1508
1509 UInt p = 10;
njnca82cc02004-11-22 17:18:48 +00001510 tl_assert(0 != total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001511 percentify(spacetime * 100 * p / total_spacetime, p, 5, mbuf);
1512 return mbuf;
1513}
1514
1515// Nb: passed in XPt is a lower-level XPt; %eips are grabbed from
1516// bottom-to-top of XCon, and then printed in the reverse order.
1517static UInt pp_XCon(Int fd, XPt* xpt)
1518{
1519 Addr rev_eips[clo_depth+1];
1520 Int i = 0;
1521 Int n = 0;
1522 Bool is_HTML = ( XHTML == clo_format );
1523 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1524 Char* maybe_indent = ( is_HTML ? "&nbsp;&nbsp;" : "" );
1525
njnca82cc02004-11-22 17:18:48 +00001526 tl_assert(NULL != xpt);
nethercotec9f36922004-02-14 16:40:02 +00001527
1528 while (True) {
1529 rev_eips[i] = xpt->eip;
1530 n++;
1531 if (alloc_xpt == xpt->parent) break;
1532 i++;
1533 xpt = xpt->parent;
1534 }
1535
1536 for (i = n-1; i >= 0; i--) {
1537 // -1 means point to calling line
1538 VG_(describe_eip)(rev_eips[i]-1, buf2, BUF_LEN);
1539 SPRINTF(buf, " %s%s%s\n", maybe_indent, buf2, maybe_br);
1540 }
1541
1542 return n;
1543}
1544
1545// Important point: for HTML, each XPt must be identified uniquely for the
1546// HTML links to all match up correctly. Using xpt->eip is not
1547// sufficient, because function pointers mean that you can call more than
1548// one other function from a single code location. So instead we use the
1549// address of the xpt struct itself, which is guaranteed to be unique.
1550
1551static void pp_all_XPts2(Int fd, Queue* q, ULong heap_spacetime,
1552 ULong total_spacetime)
1553{
1554 UInt i;
1555 XPt *xpt, *child;
1556 UInt L = 0;
1557 UInt c1 = 1;
1558 UInt c2 = 0;
1559 ULong sum = 0;
1560 UInt n;
1561 Char *eip_desc, *perc;
1562 Bool is_HTML = ( XHTML == clo_format );
1563 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1564 Char* maybe_p = ( is_HTML ? "<p>" : "" );
1565 Char* maybe_ul = ( is_HTML ? "<ul>" : "" );
1566 Char* maybe_li = ( is_HTML ? "<li>" : "" );
1567 Char* maybe_fli = ( is_HTML ? "</li>" : "" );
1568 Char* maybe_ful = ( is_HTML ? "</ul>" : "" );
1569 Char* end_hr = ( is_HTML ? "<hr>" :
1570 "=================================" );
1571 Char* depth = ( is_HTML ? "<code>--depth</code>" : "--depth" );
1572
nethercote43a15ce2004-08-30 19:15:12 +00001573 if (total_spacetime == 0) {
1574 SPRINTF(buf, "(No heap memory allocated)\n");
1575 return;
1576 }
1577
1578
nethercotec9f36922004-02-14 16:40:02 +00001579 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1580
1581 while (NULL != (xpt = (XPt*)dequeue(q))) {
nethercote43a15ce2004-08-30 19:15:12 +00001582 // Check that non-top-level XPts have a zero .approx_ST field.
njnca82cc02004-11-22 17:18:48 +00001583 if (xpt->parent != alloc_xpt) tl_assert( 0 == xpt->approx_ST );
nethercotec9f36922004-02-14 16:40:02 +00001584
nethercote43a15ce2004-08-30 19:15:12 +00001585 // Check that the sum of all children .exact_ST_dbld fields equals
1586 // parent's (unless alloc_xpt, when it should == 0).
nethercotec9f36922004-02-14 16:40:02 +00001587 if (alloc_xpt == xpt) {
njnca82cc02004-11-22 17:18:48 +00001588 tl_assert(0 == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001589 } else {
1590 sum = 0;
1591 for (i = 0; i < xpt->n_children; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001592 sum += xpt->children[i]->exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +00001593 }
njnca82cc02004-11-22 17:18:48 +00001594 //tl_assert(sum == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001595 // It's possible that not all the children were included in the
nethercote43a15ce2004-08-30 19:15:12 +00001596 // exact_ST_dbld calculations. Hopefully almost all of them were, and
nethercotec9f36922004-02-14 16:40:02 +00001597 // all the important ones.
njnca82cc02004-11-22 17:18:48 +00001598// tl_assert(sum <= xpt->exact_ST_dbld);
1599// tl_assert(sum * 1.05 > xpt->exact_ST_dbld );
nethercote43a15ce2004-08-30 19:15:12 +00001600// if (sum != xpt->exact_ST_dbld) {
1601// VG_(printf)("%ld, %ld\n", sum, xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001602// }
1603 }
1604
1605 if (xpt == alloc_xpt) {
1606 SPRINTF(buf, "Heap allocation functions accounted for "
1607 "%s of measured spacetime%s\n",
1608 make_perc(heap_spacetime, total_spacetime), maybe_br);
1609 } else {
nethercote43a15ce2004-08-30 19:15:12 +00001610 // Remember: exact_ST_dbld is space.time *doubled*
1611 perc = make_perc(xpt->exact_ST_dbld / 2, total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001612 if (is_HTML) {
1613 SPRINTF(buf, "<a name=\"b%x\"></a>"
1614 "Context accounted for "
1615 "<a href=\"#a%x\">%s</a> of measured spacetime<br>\n",
1616 xpt, xpt, perc);
1617 } else {
1618 SPRINTF(buf, "Context accounted for %s of measured spacetime\n",
1619 perc);
1620 }
1621 n = pp_XCon(fd, xpt);
njnca82cc02004-11-22 17:18:48 +00001622 tl_assert(n == L);
nethercotec9f36922004-02-14 16:40:02 +00001623 }
1624
nethercote43a15ce2004-08-30 19:15:12 +00001625 // Sort children by exact_ST_dbld
nethercotec9f36922004-02-14 16:40:02 +00001626 VG_(ssort)(xpt->children, xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001627 XPt_cmp_exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001628
1629 SPRINTF(buf, "%s\nCalled from:%s\n", maybe_p, maybe_ul);
1630 for (i = 0; i < xpt->n_children; i++) {
1631 child = xpt->children[i];
1632
1633 // Stop when <1% of total spacetime
nethercote43a15ce2004-08-30 19:15:12 +00001634 if (child->exact_ST_dbld * 1000 / (total_spacetime * 2) < 5) {
nethercotec9f36922004-02-14 16:40:02 +00001635 UInt n_insig = xpt->n_children - i;
1636 Char* s = ( n_insig == 1 ? "" : "s" );
1637 Char* and = ( 0 == i ? "" : "and " );
1638 Char* other = ( 0 == i ? "" : "other " );
1639 SPRINTF(buf, " %s%s%d %sinsignificant place%s%s\n\n",
1640 maybe_li, and, n_insig, other, s, maybe_fli);
1641 break;
1642 }
1643
nethercote43a15ce2004-08-30 19:15:12 +00001644 // Remember: exact_ST_dbld is space.time *doubled*
1645 perc = make_perc(child->exact_ST_dbld / 2, total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001646 eip_desc = VG_(describe_eip)(child->eip-1, buf2, BUF_LEN);
1647 if (is_HTML) {
1648 SPRINTF(buf, "<li><a name=\"a%x\"></a>", child );
1649
1650 if (child->n_children > 0) {
1651 SPRINTF(buf, "<a href=\"#b%x\">%s</a>", child, perc);
1652 } else {
1653 SPRINTF(buf, "%s", perc);
1654 }
1655 SPRINTF(buf, ": %s\n", eip_desc);
1656 } else {
1657 SPRINTF(buf, " %6s: %s\n\n", perc, eip_desc);
1658 }
1659
1660 if (child->n_children > 0) {
1661 enqueue(q, (void*)child);
1662 c2++;
1663 }
1664 }
1665 SPRINTF(buf, "%s%s", maybe_ful, maybe_p);
1666 c1--;
1667
1668 // Putting markers between levels of the structure:
1669 // c1 tracks how many to go on this level, c2 tracks how many we've
1670 // queued up for the next level while finishing off this level.
1671 // When c1 gets to zero, we've changed levels, so print a marker,
1672 // move c2 into c1, and zero c2.
1673 if (0 == c1) {
1674 L++;
1675 c1 = c2;
1676 c2 = 0;
1677 if (! is_empty_queue(q) ) { // avoid empty one at end
1678 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1679 }
1680 } else {
1681 SPRINTF(buf, "---------------------------------%s\n", maybe_br);
1682 }
1683 }
1684 SPRINTF(buf, "%s\n\nEnd of information. Rerun with a bigger "
1685 "%s value for more.\n", end_hr, depth);
1686}
1687
1688static void pp_all_XPts(Int fd, XPt* xpt, ULong heap_spacetime,
1689 ULong total_spacetime)
1690{
1691 Queue* q = construct_queue(100);
nethercote43a15ce2004-08-30 19:15:12 +00001692
nethercotec9f36922004-02-14 16:40:02 +00001693 enqueue(q, xpt);
1694 pp_all_XPts2(fd, q, heap_spacetime, total_spacetime);
1695 destruct_queue(q);
1696}
1697
1698static void
1699write_text_file(ULong total_ST, ULong heap_ST)
1700{
1701 Int fd, i;
1702 Char* text_file;
1703 Char* maybe_p = ( XHTML == clo_format ? "<p>" : "" );
1704
1705 VGP_PUSHCC(VgpPrintXPts);
1706
1707 // Open file
1708 text_file = make_filename( base_dir,
1709 ( XText == clo_format ? ".txt" : ".html" ) );
1710
1711 fd = VG_(open)(text_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1712 VKI_S_IRUSR|VKI_S_IWUSR);
1713 if (fd < 0) {
1714 file_err( text_file );
1715 VGP_POPCC(VgpPrintXPts);
1716 return;
1717 }
1718
1719 // Header
1720 if (XHTML == clo_format) {
1721 SPRINTF(buf, "<html>\n"
1722 "<head>\n"
1723 "<title>%s</title>\n"
1724 "</head>\n"
1725 "<body>\n",
1726 text_file);
1727 }
1728
1729 // Command line
1730 SPRINTF(buf, "Command: ");
1731 for (i = 0; i < VG_(client_argc); i++)
1732 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1733 SPRINTF(buf, "\n%s\n", maybe_p);
1734
1735 if (clo_heap)
1736 pp_all_XPts(fd, alloc_xpt, heap_ST, total_ST);
1737
njnca82cc02004-11-22 17:18:48 +00001738 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001739 VG_(close)(fd);
1740
1741 VGP_POPCC(VgpPrintXPts);
1742}
1743
1744/*------------------------------------------------------------*/
1745/*--- Finalisation ---*/
1746/*------------------------------------------------------------*/
1747
1748static void
1749print_summary(ULong total_ST, ULong heap_ST, ULong heap_admin_ST,
1750 ULong stack_ST)
1751{
1752 VG_(message)(Vg_UserMsg, "Total spacetime: %,ld ms.B", total_ST);
1753
1754 // Heap --------------------------------------------------------------
1755 if (clo_heap)
1756 VG_(message)(Vg_UserMsg, "heap: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001757 ( 0 == total_ST ? (Char*)"(n/a)"
1758 : make_perc(heap_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001759
1760 // Heap admin --------------------------------------------------------
1761 if (clo_heap_admin)
1762 VG_(message)(Vg_UserMsg, "heap admin: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001763 ( 0 == total_ST ? (Char*)"(n/a)"
1764 : make_perc(heap_admin_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001765
njnca82cc02004-11-22 17:18:48 +00001766 tl_assert( VG_(HT_count_nodes)(malloc_list) == n_heap_blocks );
nethercotec9f36922004-02-14 16:40:02 +00001767
1768 // Stack(s) ----------------------------------------------------------
nethercote43a15ce2004-08-30 19:15:12 +00001769 if (clo_stacks) {
nethercotec9f36922004-02-14 16:40:02 +00001770 VG_(message)(Vg_UserMsg, "stack(s): %s",
sewardjb5f6f512005-03-10 23:59:00 +00001771 ( 0 == stack_ST ? (Char*)"0%"
1772 : make_perc(stack_ST, total_ST) ) );
nethercote43a15ce2004-08-30 19:15:12 +00001773 }
nethercotec9f36922004-02-14 16:40:02 +00001774
1775 if (VG_(clo_verbosity) > 1) {
njnca82cc02004-11-22 17:18:48 +00001776 tl_assert(n_xpts > 0); // always have alloc_xpt
nethercotec9f36922004-02-14 16:40:02 +00001777 VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
1778 VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
1779 n_zero_allocs * 100 / n_allocs );
1780 VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
1781 VG_(message)(Vg_DebugMsg, " XPts: %u (%d B)", n_xpts,
1782 n_xpts*sizeof(XPt));
1783 VG_(message)(Vg_DebugMsg, " bot-XPts: %u (%d%%)", n_bot_xpts,
1784 n_bot_xpts * 100 / n_xpts);
1785 VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)", alloc_xpt->n_children,
1786 alloc_xpt->n_children * 100 / n_xpts);
1787 VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
1788 VG_(message)(Vg_DebugMsg, "snap-frees: %u", n_snapshot_frees);
1789 VG_(message)(Vg_DebugMsg, "atmp censi: %u", n_attempted_censi);
1790 VG_(message)(Vg_DebugMsg, "fake censi: %u", n_fake_censi);
1791 VG_(message)(Vg_DebugMsg, "real censi: %u", n_real_censi);
1792 VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
1793 }
1794}
1795
njn26f02512004-11-22 18:33:15 +00001796void TL_(fini)(Int exit_status)
nethercotec9f36922004-02-14 16:40:02 +00001797{
1798 ULong total_ST = 0;
1799 ULong heap_ST = 0;
1800 ULong heap_admin_ST = 0;
1801 ULong stack_ST = 0;
1802
1803 // Do a final (empty) sample to show program's end
1804 hp_census();
1805
1806 // Redo spacetimes of significant contexts to match the .hp file.
nethercote43a15ce2004-08-30 19:15:12 +00001807 calc_exact_ST_dbld(&heap_ST, &heap_admin_ST, &stack_ST);
nethercotec9f36922004-02-14 16:40:02 +00001808 total_ST = heap_ST + heap_admin_ST + stack_ST;
1809 write_hp_file ( );
1810 write_text_file( total_ST, heap_ST );
1811 print_summary ( total_ST, heap_ST, heap_admin_ST, stack_ST );
1812}
1813
njn26f02512004-11-22 18:33:15 +00001814VG_DETERMINE_INTERFACE_VERSION(TL_(pre_clo_init), 0)
nethercotec9f36922004-02-14 16:40:02 +00001815
1816/*--------------------------------------------------------------------*/
1817/*--- end ms_main.c ---*/
1818/*--------------------------------------------------------------------*/
1819