blob: a0165cd8c5b87266b1800502c7eb514103ee316b [file] [log] [blame]
nethercotec9f36922004-02-14 16:40:02 +00001
2/*--------------------------------------------------------------------*/
3/*--- Massif: a heap profiling skin. ms_main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Massif, a Valgrind skin for profiling memory
8 usage of programs.
9
nethercote2da914c2004-05-11 09:17:49 +000010 Copyright (C) 2003-2004 Nicholas Nethercote
nethercotec9f36922004-02-14 16:40:02 +000011 njn25@cam.ac.uk
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31// Memory profiler. Produces a graph, gives lots of information about
32// allocation contexts, in terms of space.time values (ie. area under the
33// graph). Allocation context information is hierarchical, and can thus
34// be inspected step-wise to an appropriate depth. See comments on data
35// structures below for more info on how things work.
36
37#include "vg_skin.h"
38//#include "vg_profile.c"
39
40#include "valgrind.h" // For {MALLOC,FREE}LIKE_BLOCK
41
42/*------------------------------------------------------------*/
43/*--- Overview of operation ---*/
44/*------------------------------------------------------------*/
45
46// Heap blocks are tracked, and the amount of space allocated by various
47// contexts (ie. lines of code, more or less) is also tracked.
48// Periodically, a census is taken, and the amount of space used, at that
49// point, by the most significant (highly allocating) contexts is recorded.
50// Census start off frequently, but are scaled back as the program goes on,
51// so that there are always a good number of them. At the end, overall
52// spacetimes for different contexts (of differing levels of precision) is
53// calculated, the graph is printed, and the text giving spacetimes for the
54// increasingly precise contexts is given.
55//
56// Measures the following:
57// - heap blocks
58// - heap admin bytes
59// - stack(s)
60// - code (code segments loaded at startup, and loaded with mmap)
61// - data (data segments loaded at startup, and loaded/created with mmap,
62// and brk()d segments)
63
64/*------------------------------------------------------------*/
65/*--- Main types ---*/
66/*------------------------------------------------------------*/
67
68// An XPt represents an "execution point", ie. a code address. Each XPt is
69// part of a tree of XPts (an "execution tree", or "XTree"). Each
70// top-to-bottom path through an XTree gives an execution context ("XCon"),
71// and is equivalent to a traditional Valgrind ExeContext.
72//
73// The XPt at the top of an XTree (but below "alloc_xpt") is called a
74// "top-XPt". The XPts are the bottom of an XTree (leaf nodes) are
75// "bottom-XPTs". The number of XCons in an XTree is equal to the number of
76// bottom-XPTs in that XTree.
77//
78// All XCons have the same top-XPt, "alloc_xpt", which represents all
79// allocation functions like malloc(). It's a bit of a fake XPt, though,
80// and is only used because it makes some of the code simpler.
81//
82// XTrees are bi-directional.
83//
84// > parent < Example: if child1() calls parent() and child2()
85// / | \ also calls parent(), and parent() calls malloc(),
86// | / \ | the XTree will look like this.
87// | v v |
88// child1 child2
89
90typedef struct _XPt XPt;
91
92struct _XPt {
93 Addr eip; // code address
94
95 // Bottom-XPts: space for the precise context.
96 // Other XPts: space of all the descendent bottom-XPts.
97 // Nb: this value goes up and down as the program executes.
98 UInt curr_space;
99
100 // An approximate space.time calculation used along the way for selecting
101 // which contexts to include at each census point.
102 // !!! top-XPTs only !!!
103 ULong spacetime;
104
105 // spacetime2 is an exact space.time calculation done at the end, and
106 // used in the results.
107 // Note that it is *doubled*, to avoid rounding errors.
108 // !!! not used for 'alloc_xpt' !!!
109 ULong spacetime2;
110
111 // n_children and max_children are integers; a very big program might
112 // have more than 65536 allocation points (Konqueror startup has 1800).
113 XPt* parent; // pointer to parent XPt
114 UInt n_children; // number of children
115 UInt max_children; // capacity of children array
116 XPt** children; // pointers to children XPts
117};
118
119// Each census snapshots the most significant XTrees, each XTree having a
120// top-XPt as its root. The 'curr_space' element for each XPt is recorded
121// in the snapshot. The snapshot contains all the XTree's XPts, not in a
122// tree structure, but flattened into an array. This flat snapshot is used
123// at the end for computing spacetime2 for each XPt.
124//
125// Graph resolution, x-axis: no point having more than about 200 census
126// x-points; you can't see them on the graph. Therefore:
127//
128// - do a census every 1 ms for first 200 --> 200, all (200 ms)
129// - halve (drop half of them) --> 100, every 2nd (200 ms)
130// - do a census every 2 ms for next 200 --> 200, every 2nd (400 ms)
131// - halve --> 100, every 4th (400 ms)
132// - do a census every 4 ms for next 400 --> 200, every 4th (800 ms)
133// - etc.
134//
135// This isn't exactly right, because we actually drop (N/2)-1 when halving,
136// but it shows the basic idea.
137
138#define MAX_N_CENSI 200 // Keep it even, for simplicity
139
140// Graph resolution, y-axis: hp2ps only draws the 19 biggest (in space-time)
141// bands, rest get lumped into OTHERS. I only print the top N
142// (cumulative-so-far space-time) at each point. N should be a bit bigger
143// than 19 in case the cumulative space-time doesn't fit with the eventual
144// space-time computed by hp2ps (but it should be close if the samples are
145// evenly spread, since hp2ps does an approximate per-band space-time
146// calculation that just sums the totals; ie. it assumes all samples are
147// the same distance apart).
148
149#define MAX_SNAPSHOTS 32
150
151typedef
152 struct {
153 XPt* xpt;
154 UInt space;
155 }
156 XPtSnapshot;
157
158// An XTree snapshot is stored as an array of of XPt snapshots.
159typedef XPtSnapshot* XTreeSnapshot;
160
161typedef
162 struct {
163 Int ms_time; // Int: must allow -1
164 XTreeSnapshot xtree_snapshots[MAX_SNAPSHOTS+1]; // +1 for zero-termination
165 UInt others_space;
166 UInt heap_admin_space;
167 UInt stacks_space;
168 }
169 Census;
170
171// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
172// which is a foothold into the XCon at which it was allocated. From
173// HP_Chunks, XPt 'space' fields are incremented (at allocation) and
174// decremented (at deallocation).
175//
176// Nb: first two fields must match core's VgHashNode.
177typedef
178 struct _HP_Chunk {
179 struct _HP_Chunk* next;
180 Addr data; // Ptr to actual block
181 UInt size; // Size requested
182 XPt* where; // Where allocated; bottom-XPt
183 }
184 HP_Chunk;
185
186/*------------------------------------------------------------*/
187/*--- Profiling events ---*/
188/*------------------------------------------------------------*/
189
190typedef
191 enum {
192 VgpGetXPt = VgpFini+1,
193 VgpGetXPtSearch,
194 VgpCensus,
195 VgpCensusHeap,
196 VgpCensusSnapshot,
197 VgpCensusTreeSize,
198 VgpUpdateXCon,
199 VgpCalcSpacetime2,
200 VgpPrintHp,
201 VgpPrintXPts,
202 }
203 VgpSkinCC;
204
205/*------------------------------------------------------------*/
206/*--- Statistics ---*/
207/*------------------------------------------------------------*/
208
209// Konqueror startup, to give an idea of the numbers involved with a biggish
210// program, with default depth:
211//
212// depth=3 depth=40
213// - 310,000 allocations
214// - 300,000 frees
215// - 15,000 XPts 800,000 XPts
216// - 1,800 top-XPts
217
218static UInt n_xpts = 0;
219static UInt n_bot_xpts = 0;
220static UInt n_allocs = 0;
221static UInt n_zero_allocs = 0;
222static UInt n_frees = 0;
223static UInt n_children_reallocs = 0;
224static UInt n_snapshot_frees = 0;
225
226static UInt n_halvings = 0;
227static UInt n_real_censi = 0;
228static UInt n_fake_censi = 0;
229static UInt n_attempted_censi = 0;
230
231/*------------------------------------------------------------*/
232/*--- Globals ---*/
233/*------------------------------------------------------------*/
234
235#define FILENAME_LEN 256
236
237#define SPRINTF(zz_buf, fmt, args...) \
238 do { Int len = VG_(sprintf)(zz_buf, fmt, ## args); \
239 VG_(write)(fd, (void*)zz_buf, len); \
240 } while (0)
241
242#define BUF_LEN 1024 // general purpose
243static Char buf [BUF_LEN];
244static Char buf2[BUF_LEN];
245static Char buf3[BUF_LEN];
246
247static UInt sigstacks_space = 0; // Current signal stacks space sum
248
249static VgHashTable malloc_list = NULL; // HP_Chunks
250
251static UInt n_heap_blocks = 0;
252
253
254#define MAX_ALLOC_FNS 32 // includes the builtin ones
255
nethercotec7469182004-05-11 09:21:08 +0000256// First few filled in, rest should be zeroed. Zero-terminated vector.
257static UInt n_alloc_fns = 11;
nethercotec9f36922004-02-14 16:40:02 +0000258static Char* alloc_fns[MAX_ALLOC_FNS] = {
259 "malloc",
260 "operator new(unsigned)",
261 "operator new[](unsigned)",
nethercoteeb479cb2004-05-11 16:37:17 +0000262 "operator new(unsigned, std::nothrow_t const&)",
263 "operator new[](unsigned, std::nothrow_t const&)",
nethercotec9f36922004-02-14 16:40:02 +0000264 "__builtin_new",
265 "__builtin_vec_new",
266 "calloc",
267 "realloc",
268 "my_malloc", // from vg_libpthread.c
fitzhardinge51f3ff12004-03-04 22:42:03 +0000269 "memalign",
nethercotec9f36922004-02-14 16:40:02 +0000270};
271
272
273/*------------------------------------------------------------*/
274/*--- Command line args ---*/
275/*------------------------------------------------------------*/
276
277#define MAX_DEPTH 50
278
279typedef
280 enum {
281 XText, XHTML,
282 }
283 XFormat;
284
285static Bool clo_heap = True;
286static UInt clo_heap_admin = 8;
287static Bool clo_stacks = True;
288static Bool clo_depth = 3;
289static XFormat clo_format = XText;
290
291Bool SK_(process_cmd_line_option)(Char* arg)
292{
293 if (VG_CLO_STREQ(arg, "--heap=yes"))
294 clo_heap = True;
295 else if (VG_CLO_STREQ(arg, "--heap=no"))
296 clo_heap = False;
297
298 else if (VG_CLO_STREQN(13, arg, "--heap-admin=")) {
299 clo_heap_admin = (Int)VG_(atoll)(&arg[13]);
300 if (clo_heap_admin > 100) {
301 VG_(message)(Vg_UserMsg,
302 "Admin size for heap blocks too large");
303 VG_(bad_option)(arg);
304 }
305 }
306
307 else if (VG_CLO_STREQ(arg, "--stacks=yes"))
308 clo_stacks = True;
309 else if (VG_CLO_STREQ(arg, "--stacks=no"))
310 clo_stacks = False;
311
312 else if (VG_CLO_STREQN(8, arg, "--depth=")) {
313 clo_depth = (Int)VG_(atoll)(&arg[8]);
314 if (clo_depth < 1) clo_depth = 1;
315 if (clo_depth >= MAX_DEPTH) clo_depth = MAX_DEPTH;
316 }
317
318 else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
319 alloc_fns[n_alloc_fns] = & arg[11];
320 n_alloc_fns++;
321 if (n_alloc_fns >= MAX_ALLOC_FNS) {
322 VG_(printf)("Too many alloc functions specified, sorry");
323 VG_(bad_option)(arg);
324 }
325 }
326
327 else if (VG_CLO_STREQ(arg, "--format=text"))
328 clo_format = XText;
329 else if (VG_CLO_STREQ(arg, "--format=html"))
330 clo_format = XHTML;
331
332 else
333 return VG_(replacement_malloc_process_cmd_line_option)(arg);
334
335 return True;
336}
337
338void SK_(print_usage)(void)
339{
340 VG_(printf)(
341" --heap=no|yes profile heap blocks [yes]\n"
342" --heap-admin=<number> average admin bytes per heap block [8]\n"
343" --stacks=no|yes profile stack(s) [yes]\n"
344" --depth=<number> depth of contexts [3]\n"
345" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
346" --format=text|html format of textual output [text]\n"
347 );
348 VG_(replacement_malloc_print_usage)();
349}
350
351void SK_(print_debug_usage)(void)
352{
353 VG_(replacement_malloc_print_debug_usage)();
354}
355
356/*------------------------------------------------------------*/
357/*--- Execution contexts ---*/
358/*------------------------------------------------------------*/
359
360// Fake XPt representing all allocation functions like malloc(). Acts as
361// parent node to all top-XPts.
362static XPt* alloc_xpt;
363
364// Cheap allocation for blocks that never need to be freed. Saves about 10%
365// for Konqueror startup with --depth=40.
366static void* perm_malloc(UInt n_bytes)
367{
368 static Addr hp = 0; // current heap pointer
369 static Addr hp_lim = 0; // maximum usable byte in current block
370
371 #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
372
373 if (hp + n_bytes > hp_lim) {
374 hp = (Addr)VG_(get_memory_from_mmap)(SUPERBLOCK_SIZE, "perm_malloc");
375 hp_lim = hp + SUPERBLOCK_SIZE - 1;
376 }
377
378 hp += n_bytes;
379
380 return (void*)(hp - n_bytes);
381}
382
383
384
385static XPt* new_XPt(Addr eip, XPt* parent, Bool is_bottom)
386{
387 XPt* xpt = perm_malloc(sizeof(XPt));
388 xpt->eip = eip;
389
390 xpt->curr_space = 0;
391 xpt->spacetime = 0;
392 xpt->spacetime2 = 0;
393
394 xpt->parent = parent;
nethercotefc016352004-04-27 09:51:51 +0000395
396 // Check parent is not a bottom-XPt
397 sk_assert(parent == NULL || 0 != parent->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000398
399 xpt->n_children = 0;
400
401 // If a bottom-XPt, don't allocate space for children. This can be 50%
402 // or more, although it tends to drop as --depth increases (eg. 10% for
403 // konqueror with --depth=20).
404 if ( is_bottom ) {
405 xpt->max_children = 0;
406 xpt->children = NULL;
407 n_bot_xpts++;
408 } else {
409 xpt->max_children = 4;
410 xpt->children = VG_(malloc)( xpt->max_children * sizeof(XPt*) );
411 }
412
413 // Update statistics
414 n_xpts++;
415
416 return xpt;
417}
418
419static Bool is_alloc_fn(Addr eip)
420{
421 Int i;
422
423 if ( VG_(get_fnname)(eip, buf, BUF_LEN) ) {
424 for (i = 0; i < n_alloc_fns; i++) {
425 if (VG_STREQ(buf, alloc_fns[i]))
426 return True;
427 }
428 }
429 return False;
430}
431
432// Returns an XCon, from the bottom-XPt. Nb: the XPt returned must be a
433// bottom-XPt now and must always remain a bottom-XPt. We go to some effort
434// to ensure this in certain cases. See comments below.
435static XPt* get_XCon( ThreadId tid, Bool custom_malloc )
436{
437 // Static to minimise stack size. +1 for added 0xffffffff %eip.
438 static Addr eips[MAX_DEPTH + MAX_ALLOC_FNS + 1];
439
440 XPt* xpt = alloc_xpt;
441 UInt n_eips, L, A, B, nC;
442 UInt overestimate;
443 Bool reached_bottom;
444
445 VGP_PUSHCC(VgpGetXPt);
446
447 // Want at least clo_depth non-alloc-fn entries in the snapshot.
448 // However, because we have 1 or more (an unknown number, at this point)
449 // alloc-fns ignored, we overestimate the size needed for the stack
450 // snapshot. Then, if necessary, we repeatedly increase the size until
451 // it is enough.
452 overestimate = 2;
453 while (True) {
454 n_eips = VG_(stack_snapshot)( tid, eips, clo_depth + overestimate );
455
456 // Now we add a dummy "unknown" %eip at the end. This is only used if we
457 // run out of %eips before hitting clo_depth. It's done to ensure the
458 // XPt we return is (now and forever) a bottom-XPt. If the returned XPt
459 // wasn't a bottom-XPt (now or later) it would cause problems later (eg.
460 // the parent's spacetime wouldn't be equal to the total of the
461 // childrens' spacetimes).
462 eips[ n_eips++ ] = 0xffffffff;
463
464 // Skip over alloc functions in eips[].
465 for (L = 0; is_alloc_fn(eips[L]) && L < n_eips; L++) { }
466
467 // Must be at least one alloc function, unless client used
468 // MALLOCLIKE_BLOCK
469 if (!custom_malloc) sk_assert(L > 0);
470
471 // Should be at least one non-alloc function. If not, try again.
472 if (L == n_eips) {
473 overestimate += 2;
474 if (overestimate > MAX_ALLOC_FNS)
475 VG_(skin_panic)("No stk snapshot big enough to find non-alloc fns");
476 } else {
477 break;
478 }
479 }
480 A = L;
481 B = n_eips - 1;
482 reached_bottom = False;
483
484 // By this point, the eips we care about are in eips[A]..eips[B]
485
486 // Now do the search/insertion of the XCon. 'L' is the loop counter,
487 // being the index into eips[].
488 while (True) {
489 // Look for %eip in xpt's children.
490 // XXX: linear search, ugh -- about 10% of time for konqueror startup
491 // XXX: tried cacheing last result, only hit about 4% for konqueror
492 // Nb: this search hits about 98% of the time for konqueror
493 VGP_PUSHCC(VgpGetXPtSearch);
494
495 // If we've searched/added deep enough, or run out of EIPs, this is
496 // the bottom XPt.
497 if (L - A + 1 == clo_depth || L == B)
498 reached_bottom = True;
499
500 nC = 0;
501 while (True) {
502 if (nC == xpt->n_children) {
503 // not found, insert new XPt
504 sk_assert(xpt->max_children != 0);
505 sk_assert(xpt->n_children <= xpt->max_children);
506 // Expand 'children' if necessary
507 if (xpt->n_children == xpt->max_children) {
508 xpt->max_children *= 2;
509 xpt->children = VG_(realloc)( xpt->children,
510 xpt->max_children * sizeof(XPt*) );
511 n_children_reallocs++;
512 }
513 // Make new XPt for %eip, insert in list
514 xpt->children[ xpt->n_children++ ] =
515 new_XPt(eips[L], xpt, reached_bottom);
516 break;
517 }
518 if (eips[L] == xpt->children[nC]->eip) break; // found the %eip
519 nC++; // keep looking
520 }
521 VGP_POPCC(VgpGetXPtSearch);
522
523 // Return found/built bottom-XPt.
524 if (reached_bottom) {
525 sk_assert(0 == xpt->children[nC]->n_children); // Must be bottom-XPt
526 VGP_POPCC(VgpGetXPt);
527 return xpt->children[nC];
528 }
529
530 // Descend to next level in XTree, the newly found/built non-bottom-XPt
531 xpt = xpt->children[nC];
532 L++;
533 }
534}
535
536// Update 'curr_space' of every XPt in the XCon, by percolating upwards.
537static void update_XCon(XPt* xpt, Int space_delta)
538{
539 VGP_PUSHCC(VgpUpdateXCon);
540
541 sk_assert(True == clo_heap);
542 sk_assert(0 != space_delta);
543 sk_assert(NULL != xpt);
544 sk_assert(0 == xpt->n_children); // must be bottom-XPt
545
546 while (xpt != alloc_xpt) {
547 if (space_delta < 0) sk_assert(xpt->curr_space >= -space_delta);
548 xpt->curr_space += space_delta;
549 xpt = xpt->parent;
550 }
551 if (space_delta < 0) sk_assert(alloc_xpt->curr_space >= -space_delta);
552 alloc_xpt->curr_space += space_delta;
553
554 VGP_POPCC(VgpUpdateXCon);
555}
556
557// Actually want a reverse sort, biggest to smallest
558static Int XPt_cmp_spacetime(void* n1, void* n2)
559{
560 XPt* xpt1 = *(XPt**)n1;
561 XPt* xpt2 = *(XPt**)n2;
562 return (xpt1->spacetime < xpt2->spacetime ? 1 : -1);
563}
564
565static Int XPt_cmp_spacetime2(void* n1, void* n2)
566{
567 XPt* xpt1 = *(XPt**)n1;
568 XPt* xpt2 = *(XPt**)n2;
569 return (xpt1->spacetime2 < xpt2->spacetime2 ? 1 : -1);
570}
571
572
573/*------------------------------------------------------------*/
574/*--- A generic Queue ---*/
575/*------------------------------------------------------------*/
576
577typedef
578 struct {
579 UInt head; // Index of first entry
580 UInt tail; // Index of final+1 entry, ie. next free slot
581 UInt max_elems;
582 void** elems;
583 }
584 Queue;
585
586static Queue* construct_queue(UInt size)
587{
588 UInt i;
589 Queue* q = VG_(malloc)(sizeof(Queue));
590 q->head = 0;
591 q->tail = 0;
592 q->max_elems = size;
593 q->elems = VG_(malloc)(size * sizeof(void*));
594 for (i = 0; i < size; i++)
595 q->elems[i] = NULL;
596
597 return q;
598}
599
600static void destruct_queue(Queue* q)
601{
602 VG_(free)(q->elems);
603 VG_(free)(q);
604}
605
606static void shuffle(Queue* dest_q, void** old_elems)
607{
608 UInt i, j;
609 for (i = 0, j = dest_q->head; j < dest_q->tail; i++, j++)
610 dest_q->elems[i] = old_elems[j];
611 dest_q->head = 0;
612 dest_q->tail = i;
613 for ( ; i < dest_q->max_elems; i++)
614 dest_q->elems[i] = NULL; // paranoia
615}
616
617// Shuffles elements down. If not enough slots free, increase size. (We
618// don't wait until we've completely run out of space, because there could
619// be lots of shuffling just before that point which would be slow.)
620static void adjust(Queue* q)
621{
622 void** old_elems;
623
624 sk_assert(q->tail == q->max_elems);
625 if (q->head < 10) {
626 old_elems = q->elems;
627 q->max_elems *= 2;
628 q->elems = VG_(malloc)(q->max_elems * sizeof(void*));
629 shuffle(q, old_elems);
630 VG_(free)(old_elems);
631 } else {
632 shuffle(q, q->elems);
633 }
634}
635
636static void enqueue(Queue* q, void* elem)
637{
638 if (q->tail == q->max_elems)
639 adjust(q);
640 q->elems[q->tail++] = elem;
641}
642
643static Bool is_empty_queue(Queue* q)
644{
645 return (q->head == q->tail);
646}
647
648static void* dequeue(Queue* q)
649{
650 if (is_empty_queue(q))
651 return NULL; // Queue empty
652 else
653 return q->elems[q->head++];
654}
655
656/*------------------------------------------------------------*/
657/*--- malloc() et al replacement wrappers ---*/
658/*------------------------------------------------------------*/
659
660static __inline__
661void add_HP_Chunk(HP_Chunk* hc)
662{
663 n_heap_blocks++;
664 VG_(HT_add_node) ( malloc_list, (VgHashNode*)hc );
665}
666
667static __inline__
668HP_Chunk* get_HP_Chunk(void* p, HP_Chunk*** prev_chunks_next_ptr)
669{
670 return (HP_Chunk*)VG_(HT_get_node) ( malloc_list, (UInt)p,
671 (VgHashNode***)prev_chunks_next_ptr );
672}
673
674static __inline__
675void remove_HP_Chunk(HP_Chunk* hc, HP_Chunk** prev_chunks_next_ptr)
676{
677 sk_assert(n_heap_blocks > 0);
678 n_heap_blocks--;
679 *prev_chunks_next_ptr = hc->next;
680}
681
682// Forward declaration
683static void hp_census(void);
684
685static __inline__
686void new_block_meta ( void* p, Int size, Bool custom_malloc )
687{
688 HP_Chunk* hc;
689
690 VGP_PUSHCC(VgpCliMalloc);
691
692 if (0 == size) n_zero_allocs++;
693
694 // Make new HP_Chunk node, add to malloclist
695 hc = VG_(malloc)(sizeof(HP_Chunk));
696 hc->size = size;
697 hc->data = (Addr)p;
698
699 if (clo_heap) {
700 hc->where = get_XCon( VG_(get_current_or_recent_tid)(), custom_malloc );
701 if (size != 0)
702 update_XCon(hc->where, size);
703 } else {
704 hc->where = NULL; // paranoia
705 }
706
707 add_HP_Chunk( hc );
708
709 hp_census(); // do a census!
710
711 VGP_POPCC(VgpCliMalloc);
712}
713
714static __inline__
715void* new_block ( Int size, UInt align, Bool is_zeroed )
716{
717 void* p;
718
719 if (size < 0) return NULL;
720
721 VGP_PUSHCC(VgpCliMalloc);
722
723 // Update statistics
724 n_allocs++;
725
726 p = VG_(cli_malloc)( align, size );
727 if (is_zeroed) VG_(memset)(p, 0, size);
728 new_block_meta(p, size, /*custom_malloc*/False);
729
730 VGP_POPCC(VgpCliMalloc);
731 return p;
732}
733
734static __inline__
735void die_block ( void* p, Bool custom_free )
736{
737 HP_Chunk* hc;
738 HP_Chunk** remove_handle;
739
740 VGP_PUSHCC(VgpCliMalloc);
741
742 // Update statistics
743 n_frees++;
744
745 hc = get_HP_Chunk ( p, &remove_handle );
746 if (hc == NULL)
747 return; // must have been a bogus free(), or p==NULL
748
749 sk_assert(hc->data == (Addr)p);
750
751 if (clo_heap && hc->size != 0)
752 update_XCon(hc->where, -hc->size);
753
754 // Actually free the heap block
755 if (!custom_free)
756 VG_(cli_free)( p );
757
758 // Remove HP_Chunk from malloclist, destroy
759 remove_HP_Chunk(hc, remove_handle);
760
761 hp_census(); // do a census!
762
763 VG_(free)( hc );
764 VGP_POPCC(VgpCliMalloc);
765}
766
767
768void* SK_(malloc) ( Int n )
769{
770 return new_block( n, VG_(clo_alignment), /*is_zeroed*/False );
771}
772
773void* SK_(__builtin_new) ( Int n )
774{
775 return new_block( n, VG_(clo_alignment), /*is_zeroed*/False );
776}
777
778void* SK_(__builtin_vec_new) ( Int n )
779{
780 return new_block( n, VG_(clo_alignment), /*is_zeroed*/False );
781}
782
783void* SK_(calloc) ( Int m, Int size )
784{
785 return new_block( m*size, VG_(clo_alignment), /*is_zeroed*/True );
786}
787
fitzhardinge51f3ff12004-03-04 22:42:03 +0000788void *SK_(memalign)( Int align, Int n )
789{
790 return new_block( n, align, False );
791}
792
nethercotec9f36922004-02-14 16:40:02 +0000793void SK_(free) ( void* p )
794{
795 die_block( p, /*custom_free*/False );
796}
797
798void SK_(__builtin_delete) ( void* p )
799{
800 die_block( p, /*custom_free*/False);
801}
802
803void SK_(__builtin_vec_delete) ( void* p )
804{
805 die_block( p, /*custom_free*/False );
806}
807
808void* SK_(realloc) ( void* p_old, Int new_size )
809{
810 HP_Chunk* hc;
811 HP_Chunk** remove_handle;
812 Int i;
813 void* p_new;
814 UInt old_size;
815 XPt *old_where, *new_where;
816
817 VGP_PUSHCC(VgpCliMalloc);
818
819 // First try and find the block.
820 hc = get_HP_Chunk ( p_old, &remove_handle );
821 if (hc == NULL) {
822 VGP_POPCC(VgpCliMalloc);
823 return NULL; // must have been a bogus free()
824 }
825
826 sk_assert(hc->data == (Addr)p_old);
827 old_size = hc->size;
828
829 if (new_size <= old_size) {
830 // new size is smaller or same; block not moved
831 p_new = p_old;
832
833 } else {
834 // new size is bigger; make new block, copy shared contents, free old
835 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_size);
836
837 for (i = 0; i < old_size; i++)
838 ((UChar*)p_new)[i] = ((UChar*)p_old)[i];
839
840 VG_(cli_free)(p_old);
841 }
842
843 old_where = hc->where;
844 new_where = get_XCon( VG_(get_current_or_recent_tid)(),
845 /*custom_malloc*/False);
846
847 // Update HP_Chunk
848 hc->data = (Addr)p_new;
849 hc->size = new_size;
850 hc->where = new_where;
851
852 // Update XPt curr_space fields
853 if (clo_heap) {
854 if (0 != old_size) update_XCon(old_where, -old_size);
855 if (0 != new_size) update_XCon(new_where, new_size);
856 }
857
858 // If block has moved, have to remove and reinsert in the malloclist
859 // (since the updated 'data' field is the hash lookup key).
860 if (p_new != p_old) {
861 remove_HP_Chunk(hc, remove_handle);
862 add_HP_Chunk(hc);
863 }
864
865 VGP_POPCC(VgpCliMalloc);
866 return p_new;
867}
868
869
870/*------------------------------------------------------------*/
871/*--- Taking a census ---*/
872/*------------------------------------------------------------*/
873
874static Census censi[MAX_N_CENSI];
875static UInt curr_census = 0;
876
877// Must return False so that all stacks are traversed
878static UInt count_stack_size_counter;
879static Bool count_stack_size( Addr stack_min, Addr stack_max )
880{
881 count_stack_size_counter += (stack_max - stack_min);
882 return False;
883}
884
885static UInt get_xtree_size(XPt* xpt, UInt ix)
886{
887 UInt i;
888
889// VG_(printf)("%4d ", xpt->curr_space);
890
891 // If this one has size zero, all the children will be size zero too, so
892 // nothing interesting to record.
893// if (0 != xpt->curr_space || 0 == ix) {
894 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002 || 0 == ix) {
895 ix++;
896
897 // Count all (non-zero) descendent XPts
898 for (i = 0; i < xpt->n_children; i++)
899 ix = get_xtree_size(xpt->children[i], ix);
900 }
901 return ix;
902}
903
904static
905UInt do_space_snapshot(XPt xpt[], XTreeSnapshot xtree_snapshot, UInt ix)
906{
907 UInt i;
908
909 // Snapshot this XPt, if non-zero space, or the first one
910// if (0 != xpt->curr_space || 0 == ix) {
911 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002 || 0 == ix) {
912 xtree_snapshot[ix].xpt = xpt;
913 xtree_snapshot[ix].space = xpt->curr_space;
914 ix++;
915
916 // Snapshot all (non-zero) descendent XPts
917 for (i = 0; i < xpt->n_children; i++)
918 ix = do_space_snapshot(xpt->children[i], xtree_snapshot, ix);
919 }
920 return ix;
921}
922
923static UInt ms_interval;
924static UInt do_every_nth_census = 30;
925
926// Weed out half the censi; we choose those that represent the smallest
927// time-spans, because that loses the least information.
928//
929// Algorithm for N censi: We find the census representing the smallest
930// timeframe, and remove it. We repeat this until (N/2)-1 censi are gone.
931// (It's (N/2)-1 because we never remove the first and last censi.)
932// We have to do this one census at a time, rather than finding the (N/2)-1
933// smallest censi in one hit, because when a census is removed, it's
934// neighbours immediately cover greater timespans. So it's N^2, but N only
935// equals 200, and this is only done every 100 censi, which is not too often.
936static void halve_censi(void)
937{
938 Int i, jp, j, jn, k;
939 Census* min_census;
940
941 n_halvings++;
942 if (VG_(clo_verbosity) > 1)
943 VG_(message)(Vg_UserMsg, "Halving censi...");
944
945 // Sets j to the index of the first not-yet-removed census at or after i
946 #define FIND_CENSUS(i, j) \
947 for (j = i; -1 == censi[j].ms_time; j++) { }
948
949 for (i = 2; i < MAX_N_CENSI; i += 2) {
950 // Find the censi representing the smallest timespan. The timespan
951 // for census n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
952 // censi A and B. We don't consider the first and last censi for
953 // removal.
954 Int min_span = 0x7fffffff;
955 Int min_j = 0;
956
957 // Initial triple: (prev, curr, next) == (jp, j, jn)
958 jp = 0;
959 FIND_CENSUS(1, j);
960 FIND_CENSUS(j+1, jn);
961 while (jn < MAX_N_CENSI) {
962 Int timespan = censi[jn].ms_time - censi[jp].ms_time;
963 sk_assert(timespan >= 0);
964 if (timespan < min_span) {
965 min_span = timespan;
966 min_j = j;
967 }
968 // Move on to next triple
969 jp = j;
970 j = jn;
971 FIND_CENSUS(jn+1, jn);
972 }
973 // We've found the least important census, now remove it
974 min_census = & censi[ min_j ];
975 for (k = 0; NULL != min_census->xtree_snapshots[k]; k++) {
976 n_snapshot_frees++;
977 VG_(free)(min_census->xtree_snapshots[k]);
978 min_census->xtree_snapshots[k] = NULL;
979 }
980 min_census->ms_time = -1;
981 }
982
983 // Slide down the remaining censi over the removed ones. The '<=' is
984 // because we are removing on (N/2)-1, rather than N/2.
985 for (i = 0, j = 0; i <= MAX_N_CENSI / 2; i++, j++) {
986 FIND_CENSUS(j, j);
987 if (i != j) {
988 censi[i] = censi[j];
989 }
990 }
991 curr_census = i;
992
993 // Double intervals
994 ms_interval *= 2;
995 do_every_nth_census *= 2;
996
997 if (VG_(clo_verbosity) > 1)
998 VG_(message)(Vg_UserMsg, "...done");
999}
1000
1001// Take a census. Census time seems to be insignificant (usually <= 0 ms,
1002// almost always <= 1ms) so don't have to worry about subtracting it from
1003// running time in any way.
1004//
1005// XXX: NOT TRUE! with bigger depths, konqueror censuses can easily take
1006// 50ms!
1007static void hp_census(void)
1008{
1009 static UInt ms_prev_census = 0;
1010 static UInt ms_next_census = 0; // zero allows startup census
1011
1012 Int ms_time, ms_time_since_prev;
1013 Int i, K;
1014 Census* census;
1015
1016 VGP_PUSHCC(VgpCensus);
1017
1018 // Only do a census if it's time
1019 ms_time = VG_(read_millisecond_timer)();
1020 ms_time_since_prev = ms_time - ms_prev_census;
1021 if (ms_time < ms_next_census) {
1022 n_fake_censi++;
1023 VGP_POPCC(VgpCensus);
1024 return;
1025 }
1026 n_real_censi++;
1027
1028 census = & censi[curr_census];
1029
1030 census->ms_time = ms_time;
1031
1032 // Heap: snapshot the K most significant XTrees -------------------
1033 if (clo_heap) {
1034 K = ( alloc_xpt->n_children < MAX_SNAPSHOTS
1035 ? alloc_xpt->n_children
1036 : MAX_SNAPSHOTS); // max out
1037
1038 // Update .spacetime field (approximatively) for all top-XPts.
1039 // We *do not* do it for any non-top-XPTs.
1040 for (i = 0; i < alloc_xpt->n_children; i++) {
1041 XPt* top_XPt = alloc_xpt->children[i];
1042 top_XPt->spacetime += top_XPt->curr_space * ms_time_since_prev;
1043 }
1044 // Sort top-XPts by spacetime2 field.
1045 VG_(ssort)(alloc_xpt->children, alloc_xpt->n_children, sizeof(XPt*),
1046 XPt_cmp_spacetime);
1047
1048 VGP_PUSHCC(VgpCensusHeap);
1049
1050 // For each significant top-level XPt, record space info about its
1051 // entire XTree, in a single census entry.
1052 // Nb: the xtree_size count/snapshot buffer allocation, and the actual
1053 // snapshot, take similar amounts of time (measured with the
1054 // millesecond counter).
1055 for (i = 0; i < K; i++) {
1056 UInt xtree_size, xtree_size2;
1057// VG_(printf)("%7u ", alloc_xpt->children[i]->spacetime);
1058 // Count how many XPts are in the XTree; make array of that size
1059 // (+1 for zero termination, which calloc() does for us).
1060 VGP_PUSHCC(VgpCensusTreeSize);
1061 xtree_size = get_xtree_size( alloc_xpt->children[i], 0 );
1062 VGP_POPCC(VgpCensusTreeSize);
1063 census->xtree_snapshots[i] =
1064 VG_(calloc)(xtree_size+1, sizeof(XPtSnapshot));
jseward612e8362004-03-07 10:23:20 +00001065 if (0 && VG_(clo_verbosity) > 1)
nethercotec9f36922004-02-14 16:40:02 +00001066 VG_(printf)("calloc: %d (%d B)\n", xtree_size+1,
1067 (xtree_size+1) * sizeof(XPtSnapshot));
1068
1069 // Take space-snapshot: copy 'curr_space' for every XPt in the
1070 // XTree into the snapshot array, along with pointers to the XPts.
1071 // (Except for ones with curr_space==0, which wouldn't contribute
1072 // to the final spacetime2 calculation anyway; excluding them
1073 // saves a lot of memory and up to 40% time with big --depth valus.
1074 VGP_PUSHCC(VgpCensusSnapshot);
1075 xtree_size2 = do_space_snapshot(alloc_xpt->children[i],
1076 census->xtree_snapshots[i], 0);
1077 sk_assert(xtree_size == xtree_size2);
1078 VGP_POPCC(VgpCensusSnapshot);
1079 }
1080// VG_(printf)("\n\n");
1081 // Zero-terminate 'xtree_snapshot' array
1082 census->xtree_snapshots[i] = NULL;
1083
1084 VGP_POPCC(VgpCensusHeap);
1085
1086 //VG_(printf)("printed %d censi\n", K);
1087
1088 // Lump the rest into a single "others" entry.
1089 census->others_space = 0;
1090 for (i = K; i < alloc_xpt->n_children; i++) {
1091 census->others_space += alloc_xpt->children[i]->curr_space;
1092 }
1093 }
1094
1095 // Heap admin -------------------------------------------------------
1096 if (clo_heap_admin > 0)
1097 census->heap_admin_space = clo_heap_admin * n_heap_blocks;
1098
1099 // Stack(s) ---------------------------------------------------------
1100 if (clo_stacks) {
1101 count_stack_size_counter = sigstacks_space;
1102 // slightly abusing this function
1103 VG_(first_matching_thread_stack)( count_stack_size );
1104 census->stacks_space = count_stack_size_counter;
1105 i++;
1106 }
1107
1108 // Finish, update interval if necessary -----------------------------
1109 curr_census++;
1110 census = NULL; // don't use again now that curr_census changed
1111
1112 // Halve the entries, if our census table is full
1113 if (MAX_N_CENSI == curr_census) {
1114 halve_censi();
1115 }
1116
1117 // Take time for next census from now, rather than when this census
1118 // should have happened. Because, if there's a big gap due to a kernel
1119 // operation, there's no point doing catch-up censi every BB for a while
1120 // -- that would just give N censi at almost the same time.
1121 if (VG_(clo_verbosity) > 1) {
1122 VG_(message)(Vg_UserMsg, "census: %d ms (took %d ms)", ms_time,
1123 VG_(read_millisecond_timer)() - ms_time );
1124 }
1125 ms_prev_census = ms_time;
1126 ms_next_census = ms_time + ms_interval;
1127 //ms_next_census += ms_interval;
1128
1129 //VG_(printf)("Next: %d ms\n", ms_next_census);
1130
1131 VGP_POPCC(VgpCensus);
1132}
1133
1134/*------------------------------------------------------------*/
1135/*--- Tracked events ---*/
1136/*------------------------------------------------------------*/
1137
1138static void new_mem_stack_signal(Addr a, UInt len)
1139{
1140 sigstacks_space += len;
1141}
1142
1143static void die_mem_stack_signal(Addr a, UInt len)
1144{
1145 sk_assert(sigstacks_space >= len);
1146 sigstacks_space -= len;
1147}
1148
1149/*------------------------------------------------------------*/
1150/*--- Client Requests ---*/
1151/*------------------------------------------------------------*/
1152
1153Bool SK_(handle_client_request) ( ThreadId tid, UInt* argv, UInt* ret )
1154{
1155 switch (argv[0]) {
1156 case VG_USERREQ__MALLOCLIKE_BLOCK: {
1157 void* p = (void*)argv[1];
1158 UInt sizeB = argv[2];
1159 *ret = 0;
1160 new_block_meta( p, sizeB, /*custom_malloc*/True );
1161 return True;
1162 }
1163 case VG_USERREQ__FREELIKE_BLOCK: {
1164 void* p = (void*)argv[1];
1165 *ret = 0;
1166 die_block( p, /*custom_free*/True );
1167 return True;
1168 }
1169 default:
1170 *ret = 0;
1171 return False;
1172 }
1173}
1174
1175/*------------------------------------------------------------*/
1176/*--- Initialisation ---*/
1177/*------------------------------------------------------------*/
1178
1179// Current directory at startup.
1180static Char* base_dir;
1181
1182UInt VG_(vg_malloc_redzone_szB) = 0;
1183
1184void SK_(pre_clo_init)()
1185{
1186 VG_(details_name) ("Massif");
nethercote29b02612004-03-16 19:41:14 +00001187 VG_(details_version) (NULL);
nethercotec9f36922004-02-14 16:40:02 +00001188 VG_(details_description) ("a space profiler");
1189 VG_(details_copyright_author)("Copyright (C) 2003, Nicholas Nethercote");
nethercote27645c72004-02-23 15:33:33 +00001190 VG_(details_bug_reports_to) (VG_BUGS_TO);
nethercotec9f36922004-02-14 16:40:02 +00001191
1192 // Needs
1193 VG_(needs_libc_freeres)();
1194 VG_(needs_command_line_options)();
1195 VG_(needs_client_requests) ();
1196
1197 // Events to track
1198 VG_(init_new_mem_stack_signal) ( new_mem_stack_signal );
1199 VG_(init_die_mem_stack_signal) ( die_mem_stack_signal );
1200
1201 // Profiling events
1202 VGP_(register_profile_event)(VgpGetXPt, "get-XPt");
1203 VGP_(register_profile_event)(VgpGetXPtSearch, "get-XPt-search");
1204 VGP_(register_profile_event)(VgpCensus, "census");
1205 VGP_(register_profile_event)(VgpCensusHeap, "census-heap");
1206 VGP_(register_profile_event)(VgpCensusSnapshot, "census-snapshot");
1207 VGP_(register_profile_event)(VgpCensusTreeSize, "census-treesize");
1208 VGP_(register_profile_event)(VgpUpdateXCon, "update-XCon");
1209 VGP_(register_profile_event)(VgpCalcSpacetime2, "calc-spacetime2");
1210 VGP_(register_profile_event)(VgpPrintHp, "print-hp");
1211 VGP_(register_profile_event)(VgpPrintXPts, "print-XPts");
1212
1213 // HP_Chunks
1214 malloc_list = VG_(HT_construct)();
1215
1216 // Dummy node at top of the context structure.
1217 alloc_xpt = new_XPt(0, NULL, /*is_bottom*/False);
1218
1219 sk_assert( VG_(getcwd_alloc)(&base_dir) );
1220}
1221
1222void SK_(post_clo_init)(void)
1223{
1224 ms_interval = 1;
1225
1226 // Do an initial sample for t = 0
1227 hp_census();
1228}
1229
1230/*------------------------------------------------------------*/
1231/*--- Instrumentation ---*/
1232/*------------------------------------------------------------*/
1233
1234UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
1235{
1236 return cb_in;
1237}
1238
1239/*------------------------------------------------------------*/
1240/*--- Spacetime recomputation ---*/
1241/*------------------------------------------------------------*/
1242
1243// Although we've been calculating spacetime along the way, because the
1244// earlier calculations were done at a finer timescale, the .spacetime field
1245// might not agree with what hp2ps sees, because we've thrown away some of
1246// the information. So recompute it at the scale that hp2ps sees, so we can
1247// confidently determine which contexts hp2ps will choose for displaying as
1248// distinct bands. This recomputation only happens to the significant ones
1249// that get printed in the .hp file, so it's cheap.
1250//
1251// The spacetime calculation:
1252// ( a[0]*d(0,1) + a[1]*(d(0,1) + d(1,2)) + ... + a[N-1]*d(N-2,N-1) ) / 2
1253// where
1254// a[N] is the space at census N
1255// d(A,B) is the time interval between censi A and B
1256// and
1257// d(A,B) + d(B,C) == d(A,C)
1258//
1259// Key point: we can calculate the area for a census without knowing the
1260// previous or subsequent censi's space; because any over/underestimates
1261// for this census will be reversed in the next, balancing out. This is
1262// important, as getting the previous/next census entry for a particular
1263// AP is a pain with this data structure, but getting the prev/next
1264// census time is easy.
1265//
1266// Each heap calculation gets added to its context's spacetime2 field.
1267// The ULong* values are all running totals, hence the use of "+=" everywhere.
1268
1269// This does the calculations for a single census.
1270static void calc_spacetime2b(Census* census, UInt d_t1_t2,
1271 ULong* twice_heap_ST,
1272 ULong* twice_heap_admin_ST,
1273 ULong* twice_stack_ST)
1274{
1275 UInt i, j;
1276 XPtSnapshot* xpt_snapshot;
1277
1278 // Heap --------------------------------------------------------
1279 if (clo_heap) {
1280 for (i = 0; NULL != census->xtree_snapshots[i]; i++) {
1281 // Compute total heap spacetime2 for the entire XTree using only the
1282 // top-XPt (the first XPt in xtree_snapshot).
1283 *twice_heap_ST += d_t1_t2 * census->xtree_snapshots[i][0].space;
1284
1285 // Increment spacetime2 for every XPt in xtree_snapshot (inc. top one)
1286 for (j = 0; NULL != census->xtree_snapshots[i][j].xpt; j++) {
1287 xpt_snapshot = & census->xtree_snapshots[i][j];
1288 xpt_snapshot->xpt->spacetime2 += d_t1_t2 * xpt_snapshot->space;
1289 }
1290 }
1291 *twice_heap_ST += d_t1_t2 * census->others_space;
1292 }
1293
1294 // Heap admin --------------------------------------------------
1295 if (clo_heap_admin > 0)
1296 *twice_heap_admin_ST += d_t1_t2 * census->heap_admin_space;
1297
1298 // Stack(s) ----------------------------------------------------
1299 if (clo_stacks)
1300 *twice_stack_ST += d_t1_t2 * census->stacks_space;
1301}
1302
1303// This does the calculations for all censi.
1304static void calc_spacetime2(ULong* heap2, ULong* heap_admin2, ULong* stack2)
1305{
1306 UInt i, N = curr_census;
1307
1308 VGP_PUSHCC(VgpCalcSpacetime2);
1309
1310 *heap2 = 0;
1311 *heap_admin2 = 0;
1312 *stack2 = 0;
1313
1314 if (N <= 1)
1315 return;
1316
1317 calc_spacetime2b( &censi[0], censi[1].ms_time - censi[0].ms_time,
1318 heap2, heap_admin2, stack2 );
1319
1320 for (i = 1; i <= N-2; i++) {
1321 calc_spacetime2b( & censi[i], censi[i+1].ms_time - censi[i-1].ms_time,
1322 heap2, heap_admin2, stack2 );
1323 }
1324
1325 calc_spacetime2b( & censi[N-1], censi[N-1].ms_time - censi[N-2].ms_time,
1326 heap2, heap_admin2, stack2 );
1327 // Now get rid of the halves. May lose a 0.5 on each, doesn't matter.
1328 *heap2 /= 2;
1329 *heap_admin2 /= 2;
1330 *stack2 /= 2;
1331
1332 VGP_POPCC(VgpCalcSpacetime2);
1333}
1334
1335/*------------------------------------------------------------*/
1336/*--- Writing the graph file ---*/
1337/*------------------------------------------------------------*/
1338
1339static Char* make_filename(Char* dir, Char* suffix)
1340{
1341 Char* filename;
1342
1343 /* Block is big enough for dir name + massif.<pid>.<suffix> */
1344 filename = VG_(malloc)((VG_(strlen)(dir) + 32)*sizeof(Char));
1345 VG_(sprintf)(filename, "%s/massif.%d%s", dir, VG_(getpid)(), suffix);
1346
1347 return filename;
1348}
1349
1350// Make string acceptable to hp2ps (sigh): remove spaces, escape parentheses.
1351static Char* clean_fnname(Char *d, Char* s)
1352{
1353 Char* dorig = d;
1354 while (*s) {
1355 if (' ' == *s) { *d = '%'; }
1356 else if ('(' == *s) { *d++ = '\\'; *d = '('; }
1357 else if (')' == *s) { *d++ = '\\'; *d = ')'; }
1358 else { *d = *s; };
1359 s++;
1360 d++;
1361 }
1362 *d = '\0';
1363 return dorig;
1364}
1365
1366static void file_err ( Char* file )
1367{
1368 VG_(message)(Vg_UserMsg, "error: can't open output file `%s'", file );
1369 VG_(message)(Vg_UserMsg, " ... so profile results will be missing.");
1370}
1371
1372/* Format, by example:
1373
1374 JOB "a.out -p"
1375 DATE "Fri Apr 17 11:43:45 1992"
1376 SAMPLE_UNIT "seconds"
1377 VALUE_UNIT "bytes"
1378 BEGIN_SAMPLE 0.00
1379 SYSTEM 24
1380 END_SAMPLE 0.00
1381 BEGIN_SAMPLE 1.00
1382 elim 180
1383 insert 24
1384 intersect 12
1385 disin 60
1386 main 12
1387 reduce 20
1388 SYSTEM 12
1389 END_SAMPLE 1.00
1390 MARK 1.50
1391 MARK 1.75
1392 MARK 1.80
1393 BEGIN_SAMPLE 2.00
1394 elim 192
1395 insert 24
1396 intersect 12
1397 disin 84
1398 main 12
1399 SYSTEM 24
1400 END_SAMPLE 2.00
1401 BEGIN_SAMPLE 2.82
1402 END_SAMPLE 2.82
1403 */
1404static void write_hp_file(void)
1405{
1406 Int i, j;
1407 Int fd, res;
1408 Char *hp_file, *ps_file, *aux_file;
1409 Char* cmdfmt;
1410 Char* cmdbuf;
1411 Int cmdlen;
1412
1413 VGP_PUSHCC(VgpPrintHp);
1414
1415 // Open file
1416 hp_file = make_filename( base_dir, ".hp" );
1417 ps_file = make_filename( base_dir, ".ps" );
1418 aux_file = make_filename( base_dir, ".aux" );
1419 fd = VG_(open)(hp_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1420 VKI_S_IRUSR|VKI_S_IWUSR);
1421 if (fd < 0) {
1422 file_err( hp_file );
1423 VGP_POPCC(VgpPrintHp);
1424 return;
1425 }
1426
1427 // File header, including command line
1428 SPRINTF(buf, "JOB \"");
1429 for (i = 0; i < VG_(client_argc); i++)
1430 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1431 SPRINTF(buf, /*" (%d ms/sample)\"\n"*/ "\"\n"
1432 "DATE \"\"\n"
1433 "SAMPLE_UNIT \"ms\"\n"
1434 "VALUE_UNIT \"bytes\"\n", ms_interval);
1435
1436 // Censi
1437 for (i = 0; i < curr_census; i++) {
1438 Census* census = & censi[i];
1439
1440 // Census start
1441 SPRINTF(buf, "MARK %d.0\n"
1442 "BEGIN_SAMPLE %d.0\n",
1443 census->ms_time, census->ms_time);
1444
1445 // Heap -----------------------------------------------------------
1446 if (clo_heap) {
1447 // Print all the significant XPts from that census
1448 for (j = 0; NULL != census->xtree_snapshots[j]; j++) {
1449 // Grab the jth top-XPt
1450 XTreeSnapshot xtree_snapshot = & census->xtree_snapshots[j][0];
1451 if ( ! VG_(get_fnname)(xtree_snapshot->xpt->eip, buf2, 16)) {
1452 VG_(sprintf)(buf2, "???");
1453 }
1454 SPRINTF(buf, "x%x:%s %d\n", xtree_snapshot->xpt->eip,
1455 clean_fnname(buf3, buf2), xtree_snapshot->space);
1456 }
1457
1458 // Remaining heap block alloc points, combined
1459 if (census->others_space > 0)
1460 SPRINTF(buf, "other %d\n", census->others_space);
1461 }
1462
1463 // Heap admin -----------------------------------------------------
1464 if (clo_heap_admin > 0 && census->heap_admin_space)
1465 SPRINTF(buf, "heap-admin %d\n", census->heap_admin_space);
1466
1467 // Stack(s) -------------------------------------------------------
1468 if (clo_stacks)
1469 SPRINTF(buf, "stack(s) %d\n", census->stacks_space);
1470
1471 // Census end
1472 SPRINTF(buf, "END_SAMPLE %d.0\n", census->ms_time);
1473 }
1474
1475 // Close file
1476 sk_assert(fd >= 0);
1477 VG_(close)(fd);
1478
1479 // Attempt to convert file using hp2ps
1480 cmdfmt = "%s/hp2ps -c -t1 %s";
1481 cmdlen = VG_(strlen)(VG_(libdir)) + VG_(strlen)(hp_file)
1482 + VG_(strlen)(cmdfmt);
1483 cmdbuf = VG_(malloc)( sizeof(Char) * cmdlen );
1484 VG_(sprintf)(cmdbuf, cmdfmt, VG_(libdir), hp_file);
1485 res = VG_(system)(cmdbuf);
1486 VG_(free)(cmdbuf);
1487 if (res != 0) {
1488 VG_(message)(Vg_UserMsg,
1489 "Conversion to PostScript failed. Try converting manually.");
1490 } else {
1491 // remove the .hp and .aux file
1492 VG_(unlink)(hp_file);
1493 VG_(unlink)(aux_file);
1494 }
1495
1496 VG_(free)(hp_file);
1497 VG_(free)(ps_file);
1498 VG_(free)(aux_file);
1499
1500 VGP_POPCC(VgpPrintHp);
1501}
1502
1503/*------------------------------------------------------------*/
1504/*--- Writing the XPt text/HTML file ---*/
1505/*------------------------------------------------------------*/
1506
1507static void percentify(Int n, Int pow, Int field_width, char xbuf[])
1508{
1509 int i, len, space;
1510
1511 VG_(sprintf)(xbuf, "%d.%d%%", n / pow, n % pow);
1512 len = VG_(strlen)(xbuf);
1513 space = field_width - len;
1514 if (space < 0) space = 0; /* Allow for v. small field_width */
1515 i = len;
1516
1517 /* Right justify in field */
1518 for ( ; i >= 0; i--) xbuf[i + space] = xbuf[i];
1519 for (i = 0; i < space; i++) xbuf[i] = ' ';
1520}
1521
1522// Nb: uses a static buffer, each call trashes the last string returned.
1523static Char* make_perc(ULong spacetime, ULong total_spacetime)
1524{
1525 static Char mbuf[32];
1526
1527 UInt p = 10;
1528 percentify(spacetime * 100 * p / total_spacetime, p, 5, mbuf);
1529 return mbuf;
1530}
1531
1532// Nb: passed in XPt is a lower-level XPt; %eips are grabbed from
1533// bottom-to-top of XCon, and then printed in the reverse order.
1534static UInt pp_XCon(Int fd, XPt* xpt)
1535{
1536 Addr rev_eips[clo_depth+1];
1537 Int i = 0;
1538 Int n = 0;
1539 Bool is_HTML = ( XHTML == clo_format );
1540 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1541 Char* maybe_indent = ( is_HTML ? "&nbsp;&nbsp;" : "" );
1542
1543 sk_assert(NULL != xpt);
1544
1545 while (True) {
1546 rev_eips[i] = xpt->eip;
1547 n++;
1548 if (alloc_xpt == xpt->parent) break;
1549 i++;
1550 xpt = xpt->parent;
1551 }
1552
1553 for (i = n-1; i >= 0; i--) {
1554 // -1 means point to calling line
1555 VG_(describe_eip)(rev_eips[i]-1, buf2, BUF_LEN);
1556 SPRINTF(buf, " %s%s%s\n", maybe_indent, buf2, maybe_br);
1557 }
1558
1559 return n;
1560}
1561
1562// Important point: for HTML, each XPt must be identified uniquely for the
1563// HTML links to all match up correctly. Using xpt->eip is not
1564// sufficient, because function pointers mean that you can call more than
1565// one other function from a single code location. So instead we use the
1566// address of the xpt struct itself, which is guaranteed to be unique.
1567
1568static void pp_all_XPts2(Int fd, Queue* q, ULong heap_spacetime,
1569 ULong total_spacetime)
1570{
1571 UInt i;
1572 XPt *xpt, *child;
1573 UInt L = 0;
1574 UInt c1 = 1;
1575 UInt c2 = 0;
1576 ULong sum = 0;
1577 UInt n;
1578 Char *eip_desc, *perc;
1579 Bool is_HTML = ( XHTML == clo_format );
1580 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1581 Char* maybe_p = ( is_HTML ? "<p>" : "" );
1582 Char* maybe_ul = ( is_HTML ? "<ul>" : "" );
1583 Char* maybe_li = ( is_HTML ? "<li>" : "" );
1584 Char* maybe_fli = ( is_HTML ? "</li>" : "" );
1585 Char* maybe_ful = ( is_HTML ? "</ul>" : "" );
1586 Char* end_hr = ( is_HTML ? "<hr>" :
1587 "=================================" );
1588 Char* depth = ( is_HTML ? "<code>--depth</code>" : "--depth" );
1589
1590 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1591
1592 while (NULL != (xpt = (XPt*)dequeue(q))) {
1593 // Check that non-top-level XPts have a zero .spacetime field.
1594 if (xpt->parent != alloc_xpt) sk_assert( 0 == xpt->spacetime );
1595
1596 // Check that the sum of all children .spacetime2s equals parent's
1597 // (unless alloc_xpt, when it should == 0).
1598 if (alloc_xpt == xpt) {
1599 sk_assert(0 == xpt->spacetime2);
1600 } else {
1601 sum = 0;
1602 for (i = 0; i < xpt->n_children; i++) {
1603 sum += xpt->children[i]->spacetime2;
1604 }
1605 //sk_assert(sum == xpt->spacetime2);
1606 // It's possible that not all the children were included in the
1607 // spacetime2 calculations. Hopefully almost all of them were, and
1608 // all the important ones.
1609// sk_assert(sum <= xpt->spacetime2);
1610// sk_assert(sum * 1.05 > xpt->spacetime2 );
1611// if (sum != xpt->spacetime2) {
1612// VG_(printf)("%ld, %ld\n", sum, xpt->spacetime2);
1613// }
1614 }
1615
1616 if (xpt == alloc_xpt) {
1617 SPRINTF(buf, "Heap allocation functions accounted for "
1618 "%s of measured spacetime%s\n",
1619 make_perc(heap_spacetime, total_spacetime), maybe_br);
1620 } else {
1621 // Remember: spacetime2 is space.time *doubled*
1622 perc = make_perc(xpt->spacetime2 / 2, total_spacetime);
1623 if (is_HTML) {
1624 SPRINTF(buf, "<a name=\"b%x\"></a>"
1625 "Context accounted for "
1626 "<a href=\"#a%x\">%s</a> of measured spacetime<br>\n",
1627 xpt, xpt, perc);
1628 } else {
1629 SPRINTF(buf, "Context accounted for %s of measured spacetime\n",
1630 perc);
1631 }
1632 n = pp_XCon(fd, xpt);
1633 sk_assert(n == L);
1634 }
1635
1636 // Sort children by spacetime2
1637 VG_(ssort)(xpt->children, xpt->n_children, sizeof(XPt*),
1638 XPt_cmp_spacetime2);
1639
1640 SPRINTF(buf, "%s\nCalled from:%s\n", maybe_p, maybe_ul);
1641 for (i = 0; i < xpt->n_children; i++) {
1642 child = xpt->children[i];
1643
1644 // Stop when <1% of total spacetime
1645 if (child->spacetime2 * 1000 / (total_spacetime * 2) < 5) {
1646 UInt n_insig = xpt->n_children - i;
1647 Char* s = ( n_insig == 1 ? "" : "s" );
1648 Char* and = ( 0 == i ? "" : "and " );
1649 Char* other = ( 0 == i ? "" : "other " );
1650 SPRINTF(buf, " %s%s%d %sinsignificant place%s%s\n\n",
1651 maybe_li, and, n_insig, other, s, maybe_fli);
1652 break;
1653 }
1654
1655 // Remember: spacetime2 is space.time *doubled*
1656 perc = make_perc(child->spacetime2 / 2, total_spacetime);
1657 eip_desc = VG_(describe_eip)(child->eip-1, buf2, BUF_LEN);
1658 if (is_HTML) {
1659 SPRINTF(buf, "<li><a name=\"a%x\"></a>", child );
1660
1661 if (child->n_children > 0) {
1662 SPRINTF(buf, "<a href=\"#b%x\">%s</a>", child, perc);
1663 } else {
1664 SPRINTF(buf, "%s", perc);
1665 }
1666 SPRINTF(buf, ": %s\n", eip_desc);
1667 } else {
1668 SPRINTF(buf, " %6s: %s\n\n", perc, eip_desc);
1669 }
1670
1671 if (child->n_children > 0) {
1672 enqueue(q, (void*)child);
1673 c2++;
1674 }
1675 }
1676 SPRINTF(buf, "%s%s", maybe_ful, maybe_p);
1677 c1--;
1678
1679 // Putting markers between levels of the structure:
1680 // c1 tracks how many to go on this level, c2 tracks how many we've
1681 // queued up for the next level while finishing off this level.
1682 // When c1 gets to zero, we've changed levels, so print a marker,
1683 // move c2 into c1, and zero c2.
1684 if (0 == c1) {
1685 L++;
1686 c1 = c2;
1687 c2 = 0;
1688 if (! is_empty_queue(q) ) { // avoid empty one at end
1689 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1690 }
1691 } else {
1692 SPRINTF(buf, "---------------------------------%s\n", maybe_br);
1693 }
1694 }
1695 SPRINTF(buf, "%s\n\nEnd of information. Rerun with a bigger "
1696 "%s value for more.\n", end_hr, depth);
1697}
1698
1699static void pp_all_XPts(Int fd, XPt* xpt, ULong heap_spacetime,
1700 ULong total_spacetime)
1701{
1702 Queue* q = construct_queue(100);
1703 enqueue(q, xpt);
1704 pp_all_XPts2(fd, q, heap_spacetime, total_spacetime);
1705 destruct_queue(q);
1706}
1707
1708static void
1709write_text_file(ULong total_ST, ULong heap_ST)
1710{
1711 Int fd, i;
1712 Char* text_file;
1713 Char* maybe_p = ( XHTML == clo_format ? "<p>" : "" );
1714
1715 VGP_PUSHCC(VgpPrintXPts);
1716
1717 // Open file
1718 text_file = make_filename( base_dir,
1719 ( XText == clo_format ? ".txt" : ".html" ) );
1720
1721 fd = VG_(open)(text_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1722 VKI_S_IRUSR|VKI_S_IWUSR);
1723 if (fd < 0) {
1724 file_err( text_file );
1725 VGP_POPCC(VgpPrintXPts);
1726 return;
1727 }
1728
1729 // Header
1730 if (XHTML == clo_format) {
1731 SPRINTF(buf, "<html>\n"
1732 "<head>\n"
1733 "<title>%s</title>\n"
1734 "</head>\n"
1735 "<body>\n",
1736 text_file);
1737 }
1738
1739 // Command line
1740 SPRINTF(buf, "Command: ");
1741 for (i = 0; i < VG_(client_argc); i++)
1742 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1743 SPRINTF(buf, "\n%s\n", maybe_p);
1744
1745 if (clo_heap)
1746 pp_all_XPts(fd, alloc_xpt, heap_ST, total_ST);
1747
1748 sk_assert(fd >= 0);
1749 VG_(close)(fd);
1750
1751 VGP_POPCC(VgpPrintXPts);
1752}
1753
1754/*------------------------------------------------------------*/
1755/*--- Finalisation ---*/
1756/*------------------------------------------------------------*/
1757
1758static void
1759print_summary(ULong total_ST, ULong heap_ST, ULong heap_admin_ST,
1760 ULong stack_ST)
1761{
1762 VG_(message)(Vg_UserMsg, "Total spacetime: %,ld ms.B", total_ST);
1763
1764 // Heap --------------------------------------------------------------
1765 if (clo_heap)
1766 VG_(message)(Vg_UserMsg, "heap: %s",
1767 make_perc(heap_ST, total_ST) );
1768
1769 // Heap admin --------------------------------------------------------
1770 if (clo_heap_admin)
1771 VG_(message)(Vg_UserMsg, "heap admin: %s",
1772 make_perc(heap_admin_ST, total_ST));
1773
1774 sk_assert( VG_(HT_count_nodes)(malloc_list) == n_heap_blocks );
1775
1776 // Stack(s) ----------------------------------------------------------
1777 if (clo_stacks)
1778 VG_(message)(Vg_UserMsg, "stack(s): %s",
1779 make_perc(stack_ST, total_ST));
1780
1781 if (VG_(clo_verbosity) > 1) {
1782 sk_assert(n_xpts > 0); // always have alloc_xpt
1783 VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
1784 VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
1785 n_zero_allocs * 100 / n_allocs );
1786 VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
1787 VG_(message)(Vg_DebugMsg, " XPts: %u (%d B)", n_xpts,
1788 n_xpts*sizeof(XPt));
1789 VG_(message)(Vg_DebugMsg, " bot-XPts: %u (%d%%)", n_bot_xpts,
1790 n_bot_xpts * 100 / n_xpts);
1791 VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)", alloc_xpt->n_children,
1792 alloc_xpt->n_children * 100 / n_xpts);
1793 VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
1794 VG_(message)(Vg_DebugMsg, "snap-frees: %u", n_snapshot_frees);
1795 VG_(message)(Vg_DebugMsg, "atmp censi: %u", n_attempted_censi);
1796 VG_(message)(Vg_DebugMsg, "fake censi: %u", n_fake_censi);
1797 VG_(message)(Vg_DebugMsg, "real censi: %u", n_real_censi);
1798 VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
1799 }
1800}
1801
1802void SK_(fini)(Int exit_status)
1803{
1804 ULong total_ST = 0;
1805 ULong heap_ST = 0;
1806 ULong heap_admin_ST = 0;
1807 ULong stack_ST = 0;
1808
1809 // Do a final (empty) sample to show program's end
1810 hp_census();
1811
1812 // Redo spacetimes of significant contexts to match the .hp file.
1813 calc_spacetime2(&heap_ST, &heap_admin_ST, &stack_ST);
1814 total_ST = heap_ST + heap_admin_ST + stack_ST;
1815 write_hp_file ( );
1816 write_text_file( total_ST, heap_ST );
1817 print_summary ( total_ST, heap_ST, heap_admin_ST, stack_ST );
1818}
1819
1820VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
1821
1822/*--------------------------------------------------------------------*/
1823/*--- end ms_main.c ---*/
1824/*--------------------------------------------------------------------*/
1825