blob: f695b66074a46853737a3e01131514813157768c [file] [log] [blame]
nethercotec9f36922004-02-14 16:40:02 +00001
2/*--------------------------------------------------------------------*/
nethercote996901a2004-08-03 13:29:09 +00003/*--- Massif: a heap profiling tool. ms_main.c ---*/
nethercotec9f36922004-02-14 16:40:02 +00004/*--------------------------------------------------------------------*/
5
6/*
nethercote996901a2004-08-03 13:29:09 +00007 This file is part of Massif, a Valgrind tool for profiling memory
nethercotec9f36922004-02-14 16:40:02 +00008 usage of programs.
9
njn53612422005-03-12 16:22:54 +000010 Copyright (C) 2003-2005 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000011 njn@valgrind.org
nethercotec9f36922004-02-14 16:40:02 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31// Memory profiler. Produces a graph, gives lots of information about
32// allocation contexts, in terms of space.time values (ie. area under the
33// graph). Allocation context information is hierarchical, and can thus
34// be inspected step-wise to an appropriate depth. See comments on data
35// structures below for more info on how things work.
36
nethercote46063202004-09-02 08:51:43 +000037#include "tool.h"
njn81c00df2005-05-14 21:28:43 +000038#include "pub_tool_hashtable.h"
njn717cde52005-05-10 02:47:21 +000039#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000040#include "pub_tool_options.h"
njn717cde52005-05-10 02:47:21 +000041#include "pub_tool_replacemalloc.h"
njnd01fef72005-03-25 23:35:48 +000042#include "pub_tool_stacktrace.h"
njn43b9a8a2005-05-10 04:37:01 +000043#include "pub_tool_tooliface.h"
nethercotec9f36922004-02-14 16:40:02 +000044
45#include "valgrind.h" // For {MALLOC,FREE}LIKE_BLOCK
46
47/*------------------------------------------------------------*/
48/*--- Overview of operation ---*/
49/*------------------------------------------------------------*/
50
51// Heap blocks are tracked, and the amount of space allocated by various
52// contexts (ie. lines of code, more or less) is also tracked.
53// Periodically, a census is taken, and the amount of space used, at that
54// point, by the most significant (highly allocating) contexts is recorded.
55// Census start off frequently, but are scaled back as the program goes on,
56// so that there are always a good number of them. At the end, overall
57// spacetimes for different contexts (of differing levels of precision) is
58// calculated, the graph is printed, and the text giving spacetimes for the
59// increasingly precise contexts is given.
60//
61// Measures the following:
62// - heap blocks
63// - heap admin bytes
64// - stack(s)
65// - code (code segments loaded at startup, and loaded with mmap)
66// - data (data segments loaded at startup, and loaded/created with mmap,
67// and brk()d segments)
68
69/*------------------------------------------------------------*/
70/*--- Main types ---*/
71/*------------------------------------------------------------*/
72
73// An XPt represents an "execution point", ie. a code address. Each XPt is
74// part of a tree of XPts (an "execution tree", or "XTree"). Each
75// top-to-bottom path through an XTree gives an execution context ("XCon"),
76// and is equivalent to a traditional Valgrind ExeContext.
77//
78// The XPt at the top of an XTree (but below "alloc_xpt") is called a
79// "top-XPt". The XPts are the bottom of an XTree (leaf nodes) are
80// "bottom-XPTs". The number of XCons in an XTree is equal to the number of
81// bottom-XPTs in that XTree.
82//
83// All XCons have the same top-XPt, "alloc_xpt", which represents all
84// allocation functions like malloc(). It's a bit of a fake XPt, though,
85// and is only used because it makes some of the code simpler.
86//
87// XTrees are bi-directional.
88//
89// > parent < Example: if child1() calls parent() and child2()
90// / | \ also calls parent(), and parent() calls malloc(),
91// | / \ | the XTree will look like this.
92// | v v |
93// child1 child2
94
95typedef struct _XPt XPt;
96
97struct _XPt {
njnd01fef72005-03-25 23:35:48 +000098 Addr ip; // code address
nethercotec9f36922004-02-14 16:40:02 +000099
100 // Bottom-XPts: space for the precise context.
101 // Other XPts: space of all the descendent bottom-XPts.
102 // Nb: this value goes up and down as the program executes.
103 UInt curr_space;
104
105 // An approximate space.time calculation used along the way for selecting
106 // which contexts to include at each census point.
107 // !!! top-XPTs only !!!
nethercote43a15ce2004-08-30 19:15:12 +0000108 ULong approx_ST;
nethercotec9f36922004-02-14 16:40:02 +0000109
nethercote43a15ce2004-08-30 19:15:12 +0000110 // exact_ST_dbld is an exact space.time calculation done at the end, and
nethercotec9f36922004-02-14 16:40:02 +0000111 // used in the results.
112 // Note that it is *doubled*, to avoid rounding errors.
113 // !!! not used for 'alloc_xpt' !!!
nethercote43a15ce2004-08-30 19:15:12 +0000114 ULong exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +0000115
116 // n_children and max_children are integers; a very big program might
117 // have more than 65536 allocation points (Konqueror startup has 1800).
118 XPt* parent; // pointer to parent XPt
119 UInt n_children; // number of children
120 UInt max_children; // capacity of children array
121 XPt** children; // pointers to children XPts
122};
123
124// Each census snapshots the most significant XTrees, each XTree having a
125// top-XPt as its root. The 'curr_space' element for each XPt is recorded
126// in the snapshot. The snapshot contains all the XTree's XPts, not in a
127// tree structure, but flattened into an array. This flat snapshot is used
nethercote43a15ce2004-08-30 19:15:12 +0000128// at the end for computing exact_ST_dbld for each XPt.
nethercotec9f36922004-02-14 16:40:02 +0000129//
130// Graph resolution, x-axis: no point having more than about 200 census
131// x-points; you can't see them on the graph. Therefore:
132//
133// - do a census every 1 ms for first 200 --> 200, all (200 ms)
134// - halve (drop half of them) --> 100, every 2nd (200 ms)
135// - do a census every 2 ms for next 200 --> 200, every 2nd (400 ms)
136// - halve --> 100, every 4th (400 ms)
137// - do a census every 4 ms for next 400 --> 200, every 4th (800 ms)
138// - etc.
139//
140// This isn't exactly right, because we actually drop (N/2)-1 when halving,
141// but it shows the basic idea.
142
143#define MAX_N_CENSI 200 // Keep it even, for simplicity
144
145// Graph resolution, y-axis: hp2ps only draws the 19 biggest (in space-time)
146// bands, rest get lumped into OTHERS. I only print the top N
147// (cumulative-so-far space-time) at each point. N should be a bit bigger
148// than 19 in case the cumulative space-time doesn't fit with the eventual
149// space-time computed by hp2ps (but it should be close if the samples are
150// evenly spread, since hp2ps does an approximate per-band space-time
151// calculation that just sums the totals; ie. it assumes all samples are
152// the same distance apart).
153
154#define MAX_SNAPSHOTS 32
155
156typedef
157 struct {
158 XPt* xpt;
159 UInt space;
160 }
161 XPtSnapshot;
162
163// An XTree snapshot is stored as an array of of XPt snapshots.
164typedef XPtSnapshot* XTreeSnapshot;
165
166typedef
167 struct {
168 Int ms_time; // Int: must allow -1
169 XTreeSnapshot xtree_snapshots[MAX_SNAPSHOTS+1]; // +1 for zero-termination
170 UInt others_space;
171 UInt heap_admin_space;
172 UInt stacks_space;
173 }
174 Census;
175
176// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
177// which is a foothold into the XCon at which it was allocated. From
178// HP_Chunks, XPt 'space' fields are incremented (at allocation) and
179// decremented (at deallocation).
180//
181// Nb: first two fields must match core's VgHashNode.
182typedef
183 struct _HP_Chunk {
184 struct _HP_Chunk* next;
185 Addr data; // Ptr to actual block
nethercote7ac7f7b2004-11-02 12:36:02 +0000186 SizeT size; // Size requested
nethercotec9f36922004-02-14 16:40:02 +0000187 XPt* where; // Where allocated; bottom-XPt
188 }
189 HP_Chunk;
190
191/*------------------------------------------------------------*/
192/*--- Profiling events ---*/
193/*------------------------------------------------------------*/
194
195typedef
196 enum {
197 VgpGetXPt = VgpFini+1,
198 VgpGetXPtSearch,
199 VgpCensus,
200 VgpCensusHeap,
201 VgpCensusSnapshot,
202 VgpCensusTreeSize,
203 VgpUpdateXCon,
204 VgpCalcSpacetime2,
205 VgpPrintHp,
206 VgpPrintXPts,
207 }
njn4be0a692004-11-22 18:10:36 +0000208 VgpToolCC;
nethercotec9f36922004-02-14 16:40:02 +0000209
210/*------------------------------------------------------------*/
211/*--- Statistics ---*/
212/*------------------------------------------------------------*/
213
214// Konqueror startup, to give an idea of the numbers involved with a biggish
215// program, with default depth:
216//
217// depth=3 depth=40
218// - 310,000 allocations
219// - 300,000 frees
220// - 15,000 XPts 800,000 XPts
221// - 1,800 top-XPts
222
223static UInt n_xpts = 0;
224static UInt n_bot_xpts = 0;
225static UInt n_allocs = 0;
226static UInt n_zero_allocs = 0;
227static UInt n_frees = 0;
228static UInt n_children_reallocs = 0;
229static UInt n_snapshot_frees = 0;
230
231static UInt n_halvings = 0;
232static UInt n_real_censi = 0;
233static UInt n_fake_censi = 0;
234static UInt n_attempted_censi = 0;
235
236/*------------------------------------------------------------*/
237/*--- Globals ---*/
238/*------------------------------------------------------------*/
239
240#define FILENAME_LEN 256
241
242#define SPRINTF(zz_buf, fmt, args...) \
243 do { Int len = VG_(sprintf)(zz_buf, fmt, ## args); \
244 VG_(write)(fd, (void*)zz_buf, len); \
245 } while (0)
246
247#define BUF_LEN 1024 // general purpose
248static Char buf [BUF_LEN];
249static Char buf2[BUF_LEN];
250static Char buf3[BUF_LEN];
251
nethercote8b5f40c2004-11-02 13:29:50 +0000252static SizeT sigstacks_space = 0; // Current signal stacks space sum
nethercotec9f36922004-02-14 16:40:02 +0000253
254static VgHashTable malloc_list = NULL; // HP_Chunks
255
256static UInt n_heap_blocks = 0;
257
njn51d827b2005-05-09 01:02:08 +0000258// Current directory at startup.
259static Char* base_dir;
nethercotec9f36922004-02-14 16:40:02 +0000260
261#define MAX_ALLOC_FNS 32 // includes the builtin ones
262
nethercotec7469182004-05-11 09:21:08 +0000263// First few filled in, rest should be zeroed. Zero-terminated vector.
264static UInt n_alloc_fns = 11;
nethercotec9f36922004-02-14 16:40:02 +0000265static Char* alloc_fns[MAX_ALLOC_FNS] = {
266 "malloc",
267 "operator new(unsigned)",
268 "operator new[](unsigned)",
nethercoteeb479cb2004-05-11 16:37:17 +0000269 "operator new(unsigned, std::nothrow_t const&)",
270 "operator new[](unsigned, std::nothrow_t const&)",
nethercotec9f36922004-02-14 16:40:02 +0000271 "__builtin_new",
272 "__builtin_vec_new",
273 "calloc",
274 "realloc",
fitzhardinge51f3ff12004-03-04 22:42:03 +0000275 "memalign",
nethercotec9f36922004-02-14 16:40:02 +0000276};
277
278
279/*------------------------------------------------------------*/
280/*--- Command line args ---*/
281/*------------------------------------------------------------*/
282
283#define MAX_DEPTH 50
284
285typedef
286 enum {
287 XText, XHTML,
288 }
289 XFormat;
290
291static Bool clo_heap = True;
292static UInt clo_heap_admin = 8;
293static Bool clo_stacks = True;
294static Bool clo_depth = 3;
295static XFormat clo_format = XText;
296
njn51d827b2005-05-09 01:02:08 +0000297static Bool ms_process_cmd_line_option(Char* arg)
nethercotec9f36922004-02-14 16:40:02 +0000298{
njn45270a22005-03-27 01:00:11 +0000299 VG_BOOL_CLO(arg, "--heap", clo_heap)
300 else VG_BOOL_CLO(arg, "--stacks", clo_stacks)
nethercotec9f36922004-02-14 16:40:02 +0000301
njn45270a22005-03-27 01:00:11 +0000302 else VG_NUM_CLO (arg, "--heap-admin", clo_heap_admin)
303 else VG_BNUM_CLO(arg, "--depth", clo_depth, 1, MAX_DEPTH)
nethercotec9f36922004-02-14 16:40:02 +0000304
305 else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
306 alloc_fns[n_alloc_fns] = & arg[11];
307 n_alloc_fns++;
308 if (n_alloc_fns >= MAX_ALLOC_FNS) {
309 VG_(printf)("Too many alloc functions specified, sorry");
310 VG_(bad_option)(arg);
311 }
312 }
313
314 else if (VG_CLO_STREQ(arg, "--format=text"))
315 clo_format = XText;
316 else if (VG_CLO_STREQ(arg, "--format=html"))
317 clo_format = XHTML;
318
319 else
320 return VG_(replacement_malloc_process_cmd_line_option)(arg);
nethercote27fec902004-06-16 21:26:32 +0000321
nethercotec9f36922004-02-14 16:40:02 +0000322 return True;
323}
324
njn51d827b2005-05-09 01:02:08 +0000325static void ms_print_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000326{
327 VG_(printf)(
328" --heap=no|yes profile heap blocks [yes]\n"
329" --heap-admin=<number> average admin bytes per heap block [8]\n"
330" --stacks=no|yes profile stack(s) [yes]\n"
331" --depth=<number> depth of contexts [3]\n"
332" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
333" --format=text|html format of textual output [text]\n"
334 );
335 VG_(replacement_malloc_print_usage)();
336}
337
njn51d827b2005-05-09 01:02:08 +0000338static void ms_print_debug_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000339{
340 VG_(replacement_malloc_print_debug_usage)();
341}
342
343/*------------------------------------------------------------*/
344/*--- Execution contexts ---*/
345/*------------------------------------------------------------*/
346
347// Fake XPt representing all allocation functions like malloc(). Acts as
348// parent node to all top-XPts.
349static XPt* alloc_xpt;
350
351// Cheap allocation for blocks that never need to be freed. Saves about 10%
352// for Konqueror startup with --depth=40.
nethercote7ac7f7b2004-11-02 12:36:02 +0000353static void* perm_malloc(SizeT n_bytes)
nethercotec9f36922004-02-14 16:40:02 +0000354{
355 static Addr hp = 0; // current heap pointer
356 static Addr hp_lim = 0; // maximum usable byte in current block
357
358 #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
359
360 if (hp + n_bytes > hp_lim) {
361 hp = (Addr)VG_(get_memory_from_mmap)(SUPERBLOCK_SIZE, "perm_malloc");
362 hp_lim = hp + SUPERBLOCK_SIZE - 1;
363 }
364
365 hp += n_bytes;
366
367 return (void*)(hp - n_bytes);
368}
369
370
371
njnd01fef72005-03-25 23:35:48 +0000372static XPt* new_XPt(Addr ip, XPt* parent, Bool is_bottom)
nethercotec9f36922004-02-14 16:40:02 +0000373{
374 XPt* xpt = perm_malloc(sizeof(XPt));
njnd01fef72005-03-25 23:35:48 +0000375 xpt->ip = ip;
nethercotec9f36922004-02-14 16:40:02 +0000376
nethercote43a15ce2004-08-30 19:15:12 +0000377 xpt->curr_space = 0;
378 xpt->approx_ST = 0;
379 xpt->exact_ST_dbld = 0;
nethercotec9f36922004-02-14 16:40:02 +0000380
381 xpt->parent = parent;
nethercotefc016352004-04-27 09:51:51 +0000382
383 // Check parent is not a bottom-XPt
njnca82cc02004-11-22 17:18:48 +0000384 tl_assert(parent == NULL || 0 != parent->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000385
386 xpt->n_children = 0;
387
388 // If a bottom-XPt, don't allocate space for children. This can be 50%
389 // or more, although it tends to drop as --depth increases (eg. 10% for
390 // konqueror with --depth=20).
391 if ( is_bottom ) {
392 xpt->max_children = 0;
393 xpt->children = NULL;
394 n_bot_xpts++;
395 } else {
396 xpt->max_children = 4;
397 xpt->children = VG_(malloc)( xpt->max_children * sizeof(XPt*) );
398 }
399
400 // Update statistics
401 n_xpts++;
402
403 return xpt;
404}
405
njnd01fef72005-03-25 23:35:48 +0000406static Bool is_alloc_fn(Addr ip)
nethercotec9f36922004-02-14 16:40:02 +0000407{
408 Int i;
409
njnd01fef72005-03-25 23:35:48 +0000410 if ( VG_(get_fnname)(ip, buf, BUF_LEN) ) {
nethercotec9f36922004-02-14 16:40:02 +0000411 for (i = 0; i < n_alloc_fns; i++) {
412 if (VG_STREQ(buf, alloc_fns[i]))
413 return True;
414 }
415 }
416 return False;
417}
418
419// Returns an XCon, from the bottom-XPt. Nb: the XPt returned must be a
420// bottom-XPt now and must always remain a bottom-XPt. We go to some effort
421// to ensure this in certain cases. See comments below.
422static XPt* get_XCon( ThreadId tid, Bool custom_malloc )
423{
njnd01fef72005-03-25 23:35:48 +0000424 // Static to minimise stack size. +1 for added ~0 IP
425 static Addr ips[MAX_DEPTH + MAX_ALLOC_FNS + 1];
nethercotec9f36922004-02-14 16:40:02 +0000426
427 XPt* xpt = alloc_xpt;
njnd01fef72005-03-25 23:35:48 +0000428 UInt n_ips, L, A, B, nC;
nethercotec9f36922004-02-14 16:40:02 +0000429 UInt overestimate;
430 Bool reached_bottom;
431
432 VGP_PUSHCC(VgpGetXPt);
433
434 // Want at least clo_depth non-alloc-fn entries in the snapshot.
435 // However, because we have 1 or more (an unknown number, at this point)
436 // alloc-fns ignored, we overestimate the size needed for the stack
437 // snapshot. Then, if necessary, we repeatedly increase the size until
438 // it is enough.
439 overestimate = 2;
440 while (True) {
njnd01fef72005-03-25 23:35:48 +0000441 n_ips = VG_(get_StackTrace)( tid, ips, clo_depth + overestimate );
nethercotec9f36922004-02-14 16:40:02 +0000442
njnd01fef72005-03-25 23:35:48 +0000443 // Now we add a dummy "unknown" IP at the end. This is only used if we
444 // run out of IPs before hitting clo_depth. It's done to ensure the
nethercotec9f36922004-02-14 16:40:02 +0000445 // XPt we return is (now and forever) a bottom-XPt. If the returned XPt
446 // wasn't a bottom-XPt (now or later) it would cause problems later (eg.
nethercote43a15ce2004-08-30 19:15:12 +0000447 // the parent's approx_ST wouldn't be equal [or almost equal] to the
448 // total of the childrens' approx_STs).
njnd01fef72005-03-25 23:35:48 +0000449 ips[ n_ips++ ] = ~((Addr)0);
nethercotec9f36922004-02-14 16:40:02 +0000450
njnd01fef72005-03-25 23:35:48 +0000451 // Skip over alloc functions in ips[].
452 for (L = 0; is_alloc_fn(ips[L]) && L < n_ips; L++) { }
nethercotec9f36922004-02-14 16:40:02 +0000453
454 // Must be at least one alloc function, unless client used
455 // MALLOCLIKE_BLOCK
njnca82cc02004-11-22 17:18:48 +0000456 if (!custom_malloc) tl_assert(L > 0);
nethercotec9f36922004-02-14 16:40:02 +0000457
458 // Should be at least one non-alloc function. If not, try again.
njnd01fef72005-03-25 23:35:48 +0000459 if (L == n_ips) {
nethercotec9f36922004-02-14 16:40:02 +0000460 overestimate += 2;
461 if (overestimate > MAX_ALLOC_FNS)
njn67993252004-11-22 18:02:32 +0000462 VG_(tool_panic)("No stk snapshot big enough to find non-alloc fns");
nethercotec9f36922004-02-14 16:40:02 +0000463 } else {
464 break;
465 }
466 }
467 A = L;
njnd01fef72005-03-25 23:35:48 +0000468 B = n_ips - 1;
nethercotec9f36922004-02-14 16:40:02 +0000469 reached_bottom = False;
470
njnd01fef72005-03-25 23:35:48 +0000471 // By this point, the IPs we care about are in ips[A]..ips[B]
nethercotec9f36922004-02-14 16:40:02 +0000472
473 // Now do the search/insertion of the XCon. 'L' is the loop counter,
njnd01fef72005-03-25 23:35:48 +0000474 // being the index into ips[].
nethercotec9f36922004-02-14 16:40:02 +0000475 while (True) {
njnd01fef72005-03-25 23:35:48 +0000476 // Look for IP in xpt's children.
nethercotec9f36922004-02-14 16:40:02 +0000477 // XXX: linear search, ugh -- about 10% of time for konqueror startup
478 // XXX: tried cacheing last result, only hit about 4% for konqueror
479 // Nb: this search hits about 98% of the time for konqueror
480 VGP_PUSHCC(VgpGetXPtSearch);
481
482 // If we've searched/added deep enough, or run out of EIPs, this is
483 // the bottom XPt.
484 if (L - A + 1 == clo_depth || L == B)
485 reached_bottom = True;
486
487 nC = 0;
488 while (True) {
489 if (nC == xpt->n_children) {
490 // not found, insert new XPt
njnca82cc02004-11-22 17:18:48 +0000491 tl_assert(xpt->max_children != 0);
492 tl_assert(xpt->n_children <= xpt->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000493 // Expand 'children' if necessary
494 if (xpt->n_children == xpt->max_children) {
495 xpt->max_children *= 2;
496 xpt->children = VG_(realloc)( xpt->children,
497 xpt->max_children * sizeof(XPt*) );
498 n_children_reallocs++;
499 }
njnd01fef72005-03-25 23:35:48 +0000500 // Make new XPt for IP, insert in list
nethercotec9f36922004-02-14 16:40:02 +0000501 xpt->children[ xpt->n_children++ ] =
njnd01fef72005-03-25 23:35:48 +0000502 new_XPt(ips[L], xpt, reached_bottom);
nethercotec9f36922004-02-14 16:40:02 +0000503 break;
504 }
njnd01fef72005-03-25 23:35:48 +0000505 if (ips[L] == xpt->children[nC]->ip) break; // found the IP
nethercotec9f36922004-02-14 16:40:02 +0000506 nC++; // keep looking
507 }
508 VGP_POPCC(VgpGetXPtSearch);
509
510 // Return found/built bottom-XPt.
511 if (reached_bottom) {
njnca82cc02004-11-22 17:18:48 +0000512 tl_assert(0 == xpt->children[nC]->n_children); // Must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000513 VGP_POPCC(VgpGetXPt);
514 return xpt->children[nC];
515 }
516
517 // Descend to next level in XTree, the newly found/built non-bottom-XPt
518 xpt = xpt->children[nC];
519 L++;
520 }
521}
522
523// Update 'curr_space' of every XPt in the XCon, by percolating upwards.
524static void update_XCon(XPt* xpt, Int space_delta)
525{
526 VGP_PUSHCC(VgpUpdateXCon);
527
njnca82cc02004-11-22 17:18:48 +0000528 tl_assert(True == clo_heap);
529 tl_assert(0 != space_delta);
530 tl_assert(NULL != xpt);
531 tl_assert(0 == xpt->n_children); // must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000532
533 while (xpt != alloc_xpt) {
njnca82cc02004-11-22 17:18:48 +0000534 if (space_delta < 0) tl_assert(xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000535 xpt->curr_space += space_delta;
536 xpt = xpt->parent;
537 }
njnca82cc02004-11-22 17:18:48 +0000538 if (space_delta < 0) tl_assert(alloc_xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000539 alloc_xpt->curr_space += space_delta;
540
541 VGP_POPCC(VgpUpdateXCon);
542}
543
544// Actually want a reverse sort, biggest to smallest
nethercote43a15ce2004-08-30 19:15:12 +0000545static Int XPt_cmp_approx_ST(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000546{
547 XPt* xpt1 = *(XPt**)n1;
548 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000549 return (xpt1->approx_ST < xpt2->approx_ST ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000550}
551
nethercote43a15ce2004-08-30 19:15:12 +0000552static Int XPt_cmp_exact_ST_dbld(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000553{
554 XPt* xpt1 = *(XPt**)n1;
555 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000556 return (xpt1->exact_ST_dbld < xpt2->exact_ST_dbld ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000557}
558
559
560/*------------------------------------------------------------*/
561/*--- A generic Queue ---*/
562/*------------------------------------------------------------*/
563
564typedef
565 struct {
566 UInt head; // Index of first entry
567 UInt tail; // Index of final+1 entry, ie. next free slot
568 UInt max_elems;
569 void** elems;
570 }
571 Queue;
572
573static Queue* construct_queue(UInt size)
574{
575 UInt i;
576 Queue* q = VG_(malloc)(sizeof(Queue));
577 q->head = 0;
578 q->tail = 0;
579 q->max_elems = size;
580 q->elems = VG_(malloc)(size * sizeof(void*));
581 for (i = 0; i < size; i++)
582 q->elems[i] = NULL;
583
584 return q;
585}
586
587static void destruct_queue(Queue* q)
588{
589 VG_(free)(q->elems);
590 VG_(free)(q);
591}
592
593static void shuffle(Queue* dest_q, void** old_elems)
594{
595 UInt i, j;
596 for (i = 0, j = dest_q->head; j < dest_q->tail; i++, j++)
597 dest_q->elems[i] = old_elems[j];
598 dest_q->head = 0;
599 dest_q->tail = i;
600 for ( ; i < dest_q->max_elems; i++)
601 dest_q->elems[i] = NULL; // paranoia
602}
603
604// Shuffles elements down. If not enough slots free, increase size. (We
605// don't wait until we've completely run out of space, because there could
606// be lots of shuffling just before that point which would be slow.)
607static void adjust(Queue* q)
608{
609 void** old_elems;
610
njnca82cc02004-11-22 17:18:48 +0000611 tl_assert(q->tail == q->max_elems);
nethercotec9f36922004-02-14 16:40:02 +0000612 if (q->head < 10) {
613 old_elems = q->elems;
614 q->max_elems *= 2;
615 q->elems = VG_(malloc)(q->max_elems * sizeof(void*));
616 shuffle(q, old_elems);
617 VG_(free)(old_elems);
618 } else {
619 shuffle(q, q->elems);
620 }
621}
622
623static void enqueue(Queue* q, void* elem)
624{
625 if (q->tail == q->max_elems)
626 adjust(q);
627 q->elems[q->tail++] = elem;
628}
629
630static Bool is_empty_queue(Queue* q)
631{
632 return (q->head == q->tail);
633}
634
635static void* dequeue(Queue* q)
636{
637 if (is_empty_queue(q))
638 return NULL; // Queue empty
639 else
640 return q->elems[q->head++];
641}
642
643/*------------------------------------------------------------*/
644/*--- malloc() et al replacement wrappers ---*/
645/*------------------------------------------------------------*/
646
647static __inline__
648void add_HP_Chunk(HP_Chunk* hc)
649{
650 n_heap_blocks++;
651 VG_(HT_add_node) ( malloc_list, (VgHashNode*)hc );
652}
653
654static __inline__
655HP_Chunk* get_HP_Chunk(void* p, HP_Chunk*** prev_chunks_next_ptr)
656{
nethercote3d6b6112004-11-04 16:39:43 +0000657 return (HP_Chunk*)VG_(HT_get_node) ( malloc_list, (UWord)p,
nethercotec9f36922004-02-14 16:40:02 +0000658 (VgHashNode***)prev_chunks_next_ptr );
659}
660
661static __inline__
662void remove_HP_Chunk(HP_Chunk* hc, HP_Chunk** prev_chunks_next_ptr)
663{
njnca82cc02004-11-22 17:18:48 +0000664 tl_assert(n_heap_blocks > 0);
nethercotec9f36922004-02-14 16:40:02 +0000665 n_heap_blocks--;
666 *prev_chunks_next_ptr = hc->next;
667}
668
669// Forward declaration
670static void hp_census(void);
671
nethercote159dfef2004-09-13 13:27:30 +0000672static
njn57735902004-11-25 18:04:54 +0000673void* new_block ( ThreadId tid, void* p, SizeT size, SizeT align,
674 Bool is_zeroed )
nethercotec9f36922004-02-14 16:40:02 +0000675{
676 HP_Chunk* hc;
nethercote57e36b32004-07-10 14:56:28 +0000677 Bool custom_alloc = (NULL == p);
nethercotec9f36922004-02-14 16:40:02 +0000678 if (size < 0) return NULL;
679
680 VGP_PUSHCC(VgpCliMalloc);
681
682 // Update statistics
683 n_allocs++;
nethercote57e36b32004-07-10 14:56:28 +0000684 if (0 == size) n_zero_allocs++;
nethercotec9f36922004-02-14 16:40:02 +0000685
nethercote57e36b32004-07-10 14:56:28 +0000686 // Allocate and zero if necessary
687 if (!p) {
688 p = VG_(cli_malloc)( align, size );
689 if (!p) {
690 VGP_POPCC(VgpCliMalloc);
691 return NULL;
692 }
693 if (is_zeroed) VG_(memset)(p, 0, size);
694 }
695
696 // Make new HP_Chunk node, add to malloclist
697 hc = VG_(malloc)(sizeof(HP_Chunk));
698 hc->size = size;
699 hc->data = (Addr)p;
700 hc->where = NULL; // paranoia
701 if (clo_heap) {
njn57735902004-11-25 18:04:54 +0000702 hc->where = get_XCon( tid, custom_alloc );
nethercote57e36b32004-07-10 14:56:28 +0000703 if (0 != size)
704 update_XCon(hc->where, size);
705 }
706 add_HP_Chunk( hc );
707
708 // do a census!
709 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000710
711 VGP_POPCC(VgpCliMalloc);
712 return p;
713}
714
715static __inline__
716void die_block ( void* p, Bool custom_free )
717{
nethercote57e36b32004-07-10 14:56:28 +0000718 HP_Chunk *hc, **remove_handle;
nethercotec9f36922004-02-14 16:40:02 +0000719
720 VGP_PUSHCC(VgpCliMalloc);
721
722 // Update statistics
723 n_frees++;
724
nethercote57e36b32004-07-10 14:56:28 +0000725 // Remove HP_Chunk from malloclist
726 hc = get_HP_Chunk( p, &remove_handle );
nethercotec9f36922004-02-14 16:40:02 +0000727 if (hc == NULL)
728 return; // must have been a bogus free(), or p==NULL
njnca82cc02004-11-22 17:18:48 +0000729 tl_assert(hc->data == (Addr)p);
nethercote57e36b32004-07-10 14:56:28 +0000730 remove_HP_Chunk(hc, remove_handle);
nethercotec9f36922004-02-14 16:40:02 +0000731
732 if (clo_heap && hc->size != 0)
733 update_XCon(hc->where, -hc->size);
734
nethercote57e36b32004-07-10 14:56:28 +0000735 VG_(free)( hc );
736
737 // Actually free the heap block, if necessary
nethercotec9f36922004-02-14 16:40:02 +0000738 if (!custom_free)
739 VG_(cli_free)( p );
740
nethercote57e36b32004-07-10 14:56:28 +0000741 // do a census!
742 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000743
nethercotec9f36922004-02-14 16:40:02 +0000744 VGP_POPCC(VgpCliMalloc);
745}
746
747
njn51d827b2005-05-09 01:02:08 +0000748static void* ms_malloc ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000749{
njn57735902004-11-25 18:04:54 +0000750 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000751}
752
njn51d827b2005-05-09 01:02:08 +0000753static void* ms___builtin_new ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000754{
njn57735902004-11-25 18:04:54 +0000755 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000756}
757
njn51d827b2005-05-09 01:02:08 +0000758static void* ms___builtin_vec_new ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000759{
njn57735902004-11-25 18:04:54 +0000760 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000761}
762
njn51d827b2005-05-09 01:02:08 +0000763static void* ms_calloc ( ThreadId tid, SizeT m, SizeT size )
nethercotec9f36922004-02-14 16:40:02 +0000764{
njn57735902004-11-25 18:04:54 +0000765 return new_block( tid, NULL, m*size, VG_(clo_alignment), /*is_zeroed*/True );
nethercotec9f36922004-02-14 16:40:02 +0000766}
767
njn51d827b2005-05-09 01:02:08 +0000768static void *ms_memalign ( ThreadId tid, SizeT align, SizeT n )
fitzhardinge51f3ff12004-03-04 22:42:03 +0000769{
njn57735902004-11-25 18:04:54 +0000770 return new_block( tid, NULL, n, align, False );
fitzhardinge51f3ff12004-03-04 22:42:03 +0000771}
772
njn51d827b2005-05-09 01:02:08 +0000773static void ms_free ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000774{
775 die_block( p, /*custom_free*/False );
776}
777
njn51d827b2005-05-09 01:02:08 +0000778static void ms___builtin_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000779{
780 die_block( p, /*custom_free*/False);
781}
782
njn51d827b2005-05-09 01:02:08 +0000783static void ms___builtin_vec_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000784{
785 die_block( p, /*custom_free*/False );
786}
787
njn51d827b2005-05-09 01:02:08 +0000788static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_size )
nethercotec9f36922004-02-14 16:40:02 +0000789{
790 HP_Chunk* hc;
791 HP_Chunk** remove_handle;
792 Int i;
793 void* p_new;
nethercote7ac7f7b2004-11-02 12:36:02 +0000794 SizeT old_size;
nethercotec9f36922004-02-14 16:40:02 +0000795 XPt *old_where, *new_where;
796
797 VGP_PUSHCC(VgpCliMalloc);
798
799 // First try and find the block.
800 hc = get_HP_Chunk ( p_old, &remove_handle );
801 if (hc == NULL) {
802 VGP_POPCC(VgpCliMalloc);
803 return NULL; // must have been a bogus free()
804 }
805
njnca82cc02004-11-22 17:18:48 +0000806 tl_assert(hc->data == (Addr)p_old);
nethercotec9f36922004-02-14 16:40:02 +0000807 old_size = hc->size;
808
809 if (new_size <= old_size) {
810 // new size is smaller or same; block not moved
811 p_new = p_old;
812
813 } else {
814 // new size is bigger; make new block, copy shared contents, free old
815 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_size);
816
817 for (i = 0; i < old_size; i++)
818 ((UChar*)p_new)[i] = ((UChar*)p_old)[i];
819
820 VG_(cli_free)(p_old);
821 }
822
823 old_where = hc->where;
njn57735902004-11-25 18:04:54 +0000824 new_where = get_XCon( tid, /*custom_malloc*/False);
nethercotec9f36922004-02-14 16:40:02 +0000825
826 // Update HP_Chunk
827 hc->data = (Addr)p_new;
828 hc->size = new_size;
829 hc->where = new_where;
830
831 // Update XPt curr_space fields
832 if (clo_heap) {
833 if (0 != old_size) update_XCon(old_where, -old_size);
834 if (0 != new_size) update_XCon(new_where, new_size);
835 }
836
837 // If block has moved, have to remove and reinsert in the malloclist
838 // (since the updated 'data' field is the hash lookup key).
839 if (p_new != p_old) {
840 remove_HP_Chunk(hc, remove_handle);
841 add_HP_Chunk(hc);
842 }
843
844 VGP_POPCC(VgpCliMalloc);
845 return p_new;
846}
847
848
849/*------------------------------------------------------------*/
850/*--- Taking a census ---*/
851/*------------------------------------------------------------*/
852
853static Census censi[MAX_N_CENSI];
854static UInt curr_census = 0;
855
856// Must return False so that all stacks are traversed
thughes4ad52d02004-06-27 17:37:21 +0000857static Bool count_stack_size( Addr stack_min, Addr stack_max, void *cp )
nethercotec9f36922004-02-14 16:40:02 +0000858{
thughes4ad52d02004-06-27 17:37:21 +0000859 *(UInt *)cp += (stack_max - stack_min);
nethercotec9f36922004-02-14 16:40:02 +0000860 return False;
861}
862
863static UInt get_xtree_size(XPt* xpt, UInt ix)
864{
865 UInt i;
866
nethercote43a15ce2004-08-30 19:15:12 +0000867 // If no memory allocated at all, nothing interesting to record.
868 if (alloc_xpt->curr_space == 0) return 0;
869
870 // Ignore sub-XTrees that account for a miniscule fraction of current
871 // allocated space.
872 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000873 ix++;
874
875 // Count all (non-zero) descendent XPts
876 for (i = 0; i < xpt->n_children; i++)
877 ix = get_xtree_size(xpt->children[i], ix);
878 }
879 return ix;
880}
881
882static
883UInt do_space_snapshot(XPt xpt[], XTreeSnapshot xtree_snapshot, UInt ix)
884{
885 UInt i;
886
nethercote43a15ce2004-08-30 19:15:12 +0000887 // Structure of this function mirrors that of get_xtree_size().
888
889 if (alloc_xpt->curr_space == 0) return 0;
890
891 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000892 xtree_snapshot[ix].xpt = xpt;
893 xtree_snapshot[ix].space = xpt->curr_space;
894 ix++;
895
nethercotec9f36922004-02-14 16:40:02 +0000896 for (i = 0; i < xpt->n_children; i++)
897 ix = do_space_snapshot(xpt->children[i], xtree_snapshot, ix);
898 }
899 return ix;
900}
901
902static UInt ms_interval;
903static UInt do_every_nth_census = 30;
904
905// Weed out half the censi; we choose those that represent the smallest
906// time-spans, because that loses the least information.
907//
908// Algorithm for N censi: We find the census representing the smallest
909// timeframe, and remove it. We repeat this until (N/2)-1 censi are gone.
910// (It's (N/2)-1 because we never remove the first and last censi.)
911// We have to do this one census at a time, rather than finding the (N/2)-1
912// smallest censi in one hit, because when a census is removed, it's
913// neighbours immediately cover greater timespans. So it's N^2, but N only
914// equals 200, and this is only done every 100 censi, which is not too often.
915static void halve_censi(void)
916{
917 Int i, jp, j, jn, k;
918 Census* min_census;
919
920 n_halvings++;
921 if (VG_(clo_verbosity) > 1)
922 VG_(message)(Vg_UserMsg, "Halving censi...");
923
924 // Sets j to the index of the first not-yet-removed census at or after i
925 #define FIND_CENSUS(i, j) \
njn6f1f76d2005-05-24 21:28:54 +0000926 for (j = i; j < MAX_N_CENSI && -1 == censi[j].ms_time; j++) { }
nethercotec9f36922004-02-14 16:40:02 +0000927
928 for (i = 2; i < MAX_N_CENSI; i += 2) {
929 // Find the censi representing the smallest timespan. The timespan
930 // for census n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
931 // censi A and B. We don't consider the first and last censi for
932 // removal.
933 Int min_span = 0x7fffffff;
934 Int min_j = 0;
935
936 // Initial triple: (prev, curr, next) == (jp, j, jn)
937 jp = 0;
938 FIND_CENSUS(1, j);
939 FIND_CENSUS(j+1, jn);
940 while (jn < MAX_N_CENSI) {
941 Int timespan = censi[jn].ms_time - censi[jp].ms_time;
njnca82cc02004-11-22 17:18:48 +0000942 tl_assert(timespan >= 0);
nethercotec9f36922004-02-14 16:40:02 +0000943 if (timespan < min_span) {
944 min_span = timespan;
945 min_j = j;
946 }
947 // Move on to next triple
948 jp = j;
949 j = jn;
950 FIND_CENSUS(jn+1, jn);
951 }
952 // We've found the least important census, now remove it
953 min_census = & censi[ min_j ];
954 for (k = 0; NULL != min_census->xtree_snapshots[k]; k++) {
955 n_snapshot_frees++;
956 VG_(free)(min_census->xtree_snapshots[k]);
957 min_census->xtree_snapshots[k] = NULL;
958 }
959 min_census->ms_time = -1;
960 }
961
962 // Slide down the remaining censi over the removed ones. The '<=' is
963 // because we are removing on (N/2)-1, rather than N/2.
964 for (i = 0, j = 0; i <= MAX_N_CENSI / 2; i++, j++) {
965 FIND_CENSUS(j, j);
966 if (i != j) {
967 censi[i] = censi[j];
968 }
969 }
970 curr_census = i;
971
972 // Double intervals
973 ms_interval *= 2;
974 do_every_nth_census *= 2;
975
976 if (VG_(clo_verbosity) > 1)
977 VG_(message)(Vg_UserMsg, "...done");
978}
979
980// Take a census. Census time seems to be insignificant (usually <= 0 ms,
981// almost always <= 1ms) so don't have to worry about subtracting it from
982// running time in any way.
983//
984// XXX: NOT TRUE! with bigger depths, konqueror censuses can easily take
985// 50ms!
986static void hp_census(void)
987{
988 static UInt ms_prev_census = 0;
989 static UInt ms_next_census = 0; // zero allows startup census
990
991 Int ms_time, ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +0000992 Census* census;
993
994 VGP_PUSHCC(VgpCensus);
995
996 // Only do a census if it's time
997 ms_time = VG_(read_millisecond_timer)();
998 ms_time_since_prev = ms_time - ms_prev_census;
999 if (ms_time < ms_next_census) {
1000 n_fake_censi++;
1001 VGP_POPCC(VgpCensus);
1002 return;
1003 }
1004 n_real_censi++;
1005
1006 census = & censi[curr_census];
1007
1008 census->ms_time = ms_time;
1009
1010 // Heap: snapshot the K most significant XTrees -------------------
1011 if (clo_heap) {
njn6f1f76d2005-05-24 21:28:54 +00001012 Int i, K;
nethercotec9f36922004-02-14 16:40:02 +00001013 K = ( alloc_xpt->n_children < MAX_SNAPSHOTS
1014 ? alloc_xpt->n_children
1015 : MAX_SNAPSHOTS); // max out
1016
nethercote43a15ce2004-08-30 19:15:12 +00001017 // Update .approx_ST field (approximatively) for all top-XPts.
nethercotec9f36922004-02-14 16:40:02 +00001018 // We *do not* do it for any non-top-XPTs.
1019 for (i = 0; i < alloc_xpt->n_children; i++) {
1020 XPt* top_XPt = alloc_xpt->children[i];
nethercote43a15ce2004-08-30 19:15:12 +00001021 top_XPt->approx_ST += top_XPt->curr_space * ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +00001022 }
nethercote43a15ce2004-08-30 19:15:12 +00001023 // Sort top-XPts by approx_ST field.
nethercotec9f36922004-02-14 16:40:02 +00001024 VG_(ssort)(alloc_xpt->children, alloc_xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001025 XPt_cmp_approx_ST);
nethercotec9f36922004-02-14 16:40:02 +00001026
1027 VGP_PUSHCC(VgpCensusHeap);
1028
1029 // For each significant top-level XPt, record space info about its
1030 // entire XTree, in a single census entry.
1031 // Nb: the xtree_size count/snapshot buffer allocation, and the actual
1032 // snapshot, take similar amounts of time (measured with the
nethercote43a15ce2004-08-30 19:15:12 +00001033 // millisecond counter).
nethercotec9f36922004-02-14 16:40:02 +00001034 for (i = 0; i < K; i++) {
1035 UInt xtree_size, xtree_size2;
nethercote43a15ce2004-08-30 19:15:12 +00001036// VG_(printf)("%7u ", alloc_xpt->children[i]->approx_ST);
1037 // Count how many XPts are in the XTree
nethercotec9f36922004-02-14 16:40:02 +00001038 VGP_PUSHCC(VgpCensusTreeSize);
1039 xtree_size = get_xtree_size( alloc_xpt->children[i], 0 );
1040 VGP_POPCC(VgpCensusTreeSize);
nethercote43a15ce2004-08-30 19:15:12 +00001041
1042 // If no XPts counted (ie. alloc_xpt.curr_space==0 or XTree
1043 // insignificant) then don't take any more snapshots.
1044 if (0 == xtree_size) break;
1045
1046 // Make array of the appropriate size (+1 for zero termination,
1047 // which calloc() does for us).
nethercotec9f36922004-02-14 16:40:02 +00001048 census->xtree_snapshots[i] =
1049 VG_(calloc)(xtree_size+1, sizeof(XPtSnapshot));
jseward612e8362004-03-07 10:23:20 +00001050 if (0 && VG_(clo_verbosity) > 1)
nethercotec9f36922004-02-14 16:40:02 +00001051 VG_(printf)("calloc: %d (%d B)\n", xtree_size+1,
1052 (xtree_size+1) * sizeof(XPtSnapshot));
1053
1054 // Take space-snapshot: copy 'curr_space' for every XPt in the
1055 // XTree into the snapshot array, along with pointers to the XPts.
1056 // (Except for ones with curr_space==0, which wouldn't contribute
nethercote43a15ce2004-08-30 19:15:12 +00001057 // to the final exact_ST_dbld calculation anyway; excluding them
nethercotec9f36922004-02-14 16:40:02 +00001058 // saves a lot of memory and up to 40% time with big --depth valus.
1059 VGP_PUSHCC(VgpCensusSnapshot);
1060 xtree_size2 = do_space_snapshot(alloc_xpt->children[i],
1061 census->xtree_snapshots[i], 0);
njnca82cc02004-11-22 17:18:48 +00001062 tl_assert(xtree_size == xtree_size2);
nethercotec9f36922004-02-14 16:40:02 +00001063 VGP_POPCC(VgpCensusSnapshot);
1064 }
1065// VG_(printf)("\n\n");
1066 // Zero-terminate 'xtree_snapshot' array
1067 census->xtree_snapshots[i] = NULL;
1068
1069 VGP_POPCC(VgpCensusHeap);
1070
1071 //VG_(printf)("printed %d censi\n", K);
1072
1073 // Lump the rest into a single "others" entry.
1074 census->others_space = 0;
1075 for (i = K; i < alloc_xpt->n_children; i++) {
1076 census->others_space += alloc_xpt->children[i]->curr_space;
1077 }
1078 }
1079
1080 // Heap admin -------------------------------------------------------
1081 if (clo_heap_admin > 0)
1082 census->heap_admin_space = clo_heap_admin * n_heap_blocks;
1083
1084 // Stack(s) ---------------------------------------------------------
1085 if (clo_stacks) {
thughes4ad52d02004-06-27 17:37:21 +00001086 census->stacks_space = sigstacks_space;
nethercotec9f36922004-02-14 16:40:02 +00001087 // slightly abusing this function
thughes4ad52d02004-06-27 17:37:21 +00001088 VG_(first_matching_thread_stack)( count_stack_size, &census->stacks_space );
nethercotec9f36922004-02-14 16:40:02 +00001089 }
1090
1091 // Finish, update interval if necessary -----------------------------
1092 curr_census++;
1093 census = NULL; // don't use again now that curr_census changed
1094
1095 // Halve the entries, if our census table is full
1096 if (MAX_N_CENSI == curr_census) {
1097 halve_censi();
1098 }
1099
1100 // Take time for next census from now, rather than when this census
1101 // should have happened. Because, if there's a big gap due to a kernel
1102 // operation, there's no point doing catch-up censi every BB for a while
1103 // -- that would just give N censi at almost the same time.
1104 if (VG_(clo_verbosity) > 1) {
1105 VG_(message)(Vg_UserMsg, "census: %d ms (took %d ms)", ms_time,
1106 VG_(read_millisecond_timer)() - ms_time );
1107 }
1108 ms_prev_census = ms_time;
1109 ms_next_census = ms_time + ms_interval;
1110 //ms_next_census += ms_interval;
1111
1112 //VG_(printf)("Next: %d ms\n", ms_next_census);
1113
1114 VGP_POPCC(VgpCensus);
1115}
1116
1117/*------------------------------------------------------------*/
1118/*--- Tracked events ---*/
1119/*------------------------------------------------------------*/
1120
nethercote8b5f40c2004-11-02 13:29:50 +00001121static void new_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001122{
1123 sigstacks_space += len;
1124}
1125
nethercote8b5f40c2004-11-02 13:29:50 +00001126static void die_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001127{
njnca82cc02004-11-22 17:18:48 +00001128 tl_assert(sigstacks_space >= len);
nethercotec9f36922004-02-14 16:40:02 +00001129 sigstacks_space -= len;
1130}
1131
1132/*------------------------------------------------------------*/
1133/*--- Client Requests ---*/
1134/*------------------------------------------------------------*/
1135
njn51d827b2005-05-09 01:02:08 +00001136static Bool ms_handle_client_request ( ThreadId tid, UWord* argv, UWord* ret )
nethercotec9f36922004-02-14 16:40:02 +00001137{
1138 switch (argv[0]) {
1139 case VG_USERREQ__MALLOCLIKE_BLOCK: {
nethercote57e36b32004-07-10 14:56:28 +00001140 void* res;
nethercotec9f36922004-02-14 16:40:02 +00001141 void* p = (void*)argv[1];
nethercoted1b64b22004-11-04 18:22:28 +00001142 SizeT sizeB = argv[2];
nethercotec9f36922004-02-14 16:40:02 +00001143 *ret = 0;
njn57735902004-11-25 18:04:54 +00001144 res = new_block( tid, p, sizeB, /*align--ignored*/0, /*is_zeroed*/False );
njnca82cc02004-11-22 17:18:48 +00001145 tl_assert(res == p);
nethercotec9f36922004-02-14 16:40:02 +00001146 return True;
1147 }
1148 case VG_USERREQ__FREELIKE_BLOCK: {
1149 void* p = (void*)argv[1];
1150 *ret = 0;
1151 die_block( p, /*custom_free*/True );
1152 return True;
1153 }
1154 default:
1155 *ret = 0;
1156 return False;
1157 }
1158}
1159
1160/*------------------------------------------------------------*/
nethercotec9f36922004-02-14 16:40:02 +00001161/*--- Instrumentation ---*/
1162/*------------------------------------------------------------*/
1163
njn51d827b2005-05-09 01:02:08 +00001164static IRBB* ms_instrument ( IRBB* bb_in, VexGuestLayout* layout,
1165 IRType gWordTy, IRType hWordTy )
nethercotec9f36922004-02-14 16:40:02 +00001166{
sewardjd54babf2005-03-21 00:55:49 +00001167 /* XXX Will Massif work when gWordTy != hWordTy ? */
njnee8a5862004-11-22 21:08:46 +00001168 return bb_in;
nethercotec9f36922004-02-14 16:40:02 +00001169}
1170
1171/*------------------------------------------------------------*/
1172/*--- Spacetime recomputation ---*/
1173/*------------------------------------------------------------*/
1174
nethercote43a15ce2004-08-30 19:15:12 +00001175// Although we've been calculating space-time along the way, because the
1176// earlier calculations were done at a finer timescale, the .approx_ST field
nethercotec9f36922004-02-14 16:40:02 +00001177// might not agree with what hp2ps sees, because we've thrown away some of
1178// the information. So recompute it at the scale that hp2ps sees, so we can
1179// confidently determine which contexts hp2ps will choose for displaying as
1180// distinct bands. This recomputation only happens to the significant ones
1181// that get printed in the .hp file, so it's cheap.
1182//
nethercote43a15ce2004-08-30 19:15:12 +00001183// The approx_ST calculation:
nethercotec9f36922004-02-14 16:40:02 +00001184// ( a[0]*d(0,1) + a[1]*(d(0,1) + d(1,2)) + ... + a[N-1]*d(N-2,N-1) ) / 2
1185// where
1186// a[N] is the space at census N
1187// d(A,B) is the time interval between censi A and B
1188// and
1189// d(A,B) + d(B,C) == d(A,C)
1190//
1191// Key point: we can calculate the area for a census without knowing the
1192// previous or subsequent censi's space; because any over/underestimates
1193// for this census will be reversed in the next, balancing out. This is
1194// important, as getting the previous/next census entry for a particular
1195// AP is a pain with this data structure, but getting the prev/next
1196// census time is easy.
1197//
nethercote43a15ce2004-08-30 19:15:12 +00001198// Each heap calculation gets added to its context's exact_ST_dbld field.
nethercotec9f36922004-02-14 16:40:02 +00001199// The ULong* values are all running totals, hence the use of "+=" everywhere.
1200
1201// This does the calculations for a single census.
nethercote43a15ce2004-08-30 19:15:12 +00001202static void calc_exact_ST_dbld2(Census* census, UInt d_t1_t2,
nethercotec9f36922004-02-14 16:40:02 +00001203 ULong* twice_heap_ST,
1204 ULong* twice_heap_admin_ST,
1205 ULong* twice_stack_ST)
1206{
1207 UInt i, j;
1208 XPtSnapshot* xpt_snapshot;
1209
1210 // Heap --------------------------------------------------------
1211 if (clo_heap) {
1212 for (i = 0; NULL != census->xtree_snapshots[i]; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001213 // Compute total heap exact_ST_dbld for the entire XTree using only
1214 // the top-XPt (the first XPt in xtree_snapshot).
nethercotec9f36922004-02-14 16:40:02 +00001215 *twice_heap_ST += d_t1_t2 * census->xtree_snapshots[i][0].space;
1216
nethercote43a15ce2004-08-30 19:15:12 +00001217 // Increment exact_ST_dbld for every XPt in xtree_snapshot (inc.
1218 // top one)
nethercotec9f36922004-02-14 16:40:02 +00001219 for (j = 0; NULL != census->xtree_snapshots[i][j].xpt; j++) {
1220 xpt_snapshot = & census->xtree_snapshots[i][j];
nethercote43a15ce2004-08-30 19:15:12 +00001221 xpt_snapshot->xpt->exact_ST_dbld += d_t1_t2 * xpt_snapshot->space;
nethercotec9f36922004-02-14 16:40:02 +00001222 }
1223 }
1224 *twice_heap_ST += d_t1_t2 * census->others_space;
1225 }
1226
1227 // Heap admin --------------------------------------------------
1228 if (clo_heap_admin > 0)
1229 *twice_heap_admin_ST += d_t1_t2 * census->heap_admin_space;
1230
1231 // Stack(s) ----------------------------------------------------
1232 if (clo_stacks)
1233 *twice_stack_ST += d_t1_t2 * census->stacks_space;
1234}
1235
1236// This does the calculations for all censi.
nethercote43a15ce2004-08-30 19:15:12 +00001237static void calc_exact_ST_dbld(ULong* heap2, ULong* heap_admin2, ULong* stack2)
nethercotec9f36922004-02-14 16:40:02 +00001238{
1239 UInt i, N = curr_census;
1240
1241 VGP_PUSHCC(VgpCalcSpacetime2);
1242
1243 *heap2 = 0;
1244 *heap_admin2 = 0;
1245 *stack2 = 0;
1246
1247 if (N <= 1)
1248 return;
1249
nethercote43a15ce2004-08-30 19:15:12 +00001250 calc_exact_ST_dbld2( &censi[0], censi[1].ms_time - censi[0].ms_time,
1251 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001252
1253 for (i = 1; i <= N-2; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001254 calc_exact_ST_dbld2( & censi[i], censi[i+1].ms_time - censi[i-1].ms_time,
1255 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001256 }
1257
nethercote43a15ce2004-08-30 19:15:12 +00001258 calc_exact_ST_dbld2( & censi[N-1], censi[N-1].ms_time - censi[N-2].ms_time,
1259 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001260 // Now get rid of the halves. May lose a 0.5 on each, doesn't matter.
1261 *heap2 /= 2;
1262 *heap_admin2 /= 2;
1263 *stack2 /= 2;
1264
1265 VGP_POPCC(VgpCalcSpacetime2);
1266}
1267
1268/*------------------------------------------------------------*/
1269/*--- Writing the graph file ---*/
1270/*------------------------------------------------------------*/
1271
1272static Char* make_filename(Char* dir, Char* suffix)
1273{
1274 Char* filename;
1275
1276 /* Block is big enough for dir name + massif.<pid>.<suffix> */
1277 filename = VG_(malloc)((VG_(strlen)(dir) + 32)*sizeof(Char));
1278 VG_(sprintf)(filename, "%s/massif.%d%s", dir, VG_(getpid)(), suffix);
1279
1280 return filename;
1281}
1282
1283// Make string acceptable to hp2ps (sigh): remove spaces, escape parentheses.
1284static Char* clean_fnname(Char *d, Char* s)
1285{
1286 Char* dorig = d;
1287 while (*s) {
1288 if (' ' == *s) { *d = '%'; }
1289 else if ('(' == *s) { *d++ = '\\'; *d = '('; }
1290 else if (')' == *s) { *d++ = '\\'; *d = ')'; }
1291 else { *d = *s; };
1292 s++;
1293 d++;
1294 }
1295 *d = '\0';
1296 return dorig;
1297}
1298
1299static void file_err ( Char* file )
1300{
njn02bc4b82005-05-15 17:28:26 +00001301 VG_(message)(Vg_UserMsg, "error: can't open output file '%s'", file );
nethercotec9f36922004-02-14 16:40:02 +00001302 VG_(message)(Vg_UserMsg, " ... so profile results will be missing.");
1303}
1304
1305/* Format, by example:
1306
1307 JOB "a.out -p"
1308 DATE "Fri Apr 17 11:43:45 1992"
1309 SAMPLE_UNIT "seconds"
1310 VALUE_UNIT "bytes"
1311 BEGIN_SAMPLE 0.00
1312 SYSTEM 24
1313 END_SAMPLE 0.00
1314 BEGIN_SAMPLE 1.00
1315 elim 180
1316 insert 24
1317 intersect 12
1318 disin 60
1319 main 12
1320 reduce 20
1321 SYSTEM 12
1322 END_SAMPLE 1.00
1323 MARK 1.50
1324 MARK 1.75
1325 MARK 1.80
1326 BEGIN_SAMPLE 2.00
1327 elim 192
1328 insert 24
1329 intersect 12
1330 disin 84
1331 main 12
1332 SYSTEM 24
1333 END_SAMPLE 2.00
1334 BEGIN_SAMPLE 2.82
1335 END_SAMPLE 2.82
1336 */
1337static void write_hp_file(void)
1338{
1339 Int i, j;
1340 Int fd, res;
1341 Char *hp_file, *ps_file, *aux_file;
1342 Char* cmdfmt;
1343 Char* cmdbuf;
1344 Int cmdlen;
1345
1346 VGP_PUSHCC(VgpPrintHp);
1347
1348 // Open file
1349 hp_file = make_filename( base_dir, ".hp" );
1350 ps_file = make_filename( base_dir, ".ps" );
1351 aux_file = make_filename( base_dir, ".aux" );
1352 fd = VG_(open)(hp_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1353 VKI_S_IRUSR|VKI_S_IWUSR);
1354 if (fd < 0) {
1355 file_err( hp_file );
1356 VGP_POPCC(VgpPrintHp);
1357 return;
1358 }
1359
1360 // File header, including command line
1361 SPRINTF(buf, "JOB \"");
1362 for (i = 0; i < VG_(client_argc); i++)
1363 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1364 SPRINTF(buf, /*" (%d ms/sample)\"\n"*/ "\"\n"
1365 "DATE \"\"\n"
1366 "SAMPLE_UNIT \"ms\"\n"
1367 "VALUE_UNIT \"bytes\"\n", ms_interval);
1368
1369 // Censi
1370 for (i = 0; i < curr_census; i++) {
1371 Census* census = & censi[i];
1372
1373 // Census start
1374 SPRINTF(buf, "MARK %d.0\n"
1375 "BEGIN_SAMPLE %d.0\n",
1376 census->ms_time, census->ms_time);
1377
1378 // Heap -----------------------------------------------------------
1379 if (clo_heap) {
1380 // Print all the significant XPts from that census
1381 for (j = 0; NULL != census->xtree_snapshots[j]; j++) {
1382 // Grab the jth top-XPt
1383 XTreeSnapshot xtree_snapshot = & census->xtree_snapshots[j][0];
njnd01fef72005-03-25 23:35:48 +00001384 if ( ! VG_(get_fnname)(xtree_snapshot->xpt->ip, buf2, 16)) {
nethercotec9f36922004-02-14 16:40:02 +00001385 VG_(sprintf)(buf2, "???");
1386 }
njnd01fef72005-03-25 23:35:48 +00001387 SPRINTF(buf, "x%x:%s %d\n", xtree_snapshot->xpt->ip,
nethercotec9f36922004-02-14 16:40:02 +00001388 clean_fnname(buf3, buf2), xtree_snapshot->space);
1389 }
1390
1391 // Remaining heap block alloc points, combined
1392 if (census->others_space > 0)
1393 SPRINTF(buf, "other %d\n", census->others_space);
1394 }
1395
1396 // Heap admin -----------------------------------------------------
1397 if (clo_heap_admin > 0 && census->heap_admin_space)
1398 SPRINTF(buf, "heap-admin %d\n", census->heap_admin_space);
1399
1400 // Stack(s) -------------------------------------------------------
1401 if (clo_stacks)
1402 SPRINTF(buf, "stack(s) %d\n", census->stacks_space);
1403
1404 // Census end
1405 SPRINTF(buf, "END_SAMPLE %d.0\n", census->ms_time);
1406 }
1407
1408 // Close file
njnca82cc02004-11-22 17:18:48 +00001409 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001410 VG_(close)(fd);
1411
1412 // Attempt to convert file using hp2ps
1413 cmdfmt = "%s/hp2ps -c -t1 %s";
1414 cmdlen = VG_(strlen)(VG_(libdir)) + VG_(strlen)(hp_file)
1415 + VG_(strlen)(cmdfmt);
1416 cmdbuf = VG_(malloc)( sizeof(Char) * cmdlen );
1417 VG_(sprintf)(cmdbuf, cmdfmt, VG_(libdir), hp_file);
1418 res = VG_(system)(cmdbuf);
1419 VG_(free)(cmdbuf);
1420 if (res != 0) {
1421 VG_(message)(Vg_UserMsg,
1422 "Conversion to PostScript failed. Try converting manually.");
1423 } else {
1424 // remove the .hp and .aux file
1425 VG_(unlink)(hp_file);
1426 VG_(unlink)(aux_file);
1427 }
1428
1429 VG_(free)(hp_file);
1430 VG_(free)(ps_file);
1431 VG_(free)(aux_file);
1432
1433 VGP_POPCC(VgpPrintHp);
1434}
1435
1436/*------------------------------------------------------------*/
1437/*--- Writing the XPt text/HTML file ---*/
1438/*------------------------------------------------------------*/
1439
1440static void percentify(Int n, Int pow, Int field_width, char xbuf[])
1441{
1442 int i, len, space;
1443
1444 VG_(sprintf)(xbuf, "%d.%d%%", n / pow, n % pow);
1445 len = VG_(strlen)(xbuf);
1446 space = field_width - len;
1447 if (space < 0) space = 0; /* Allow for v. small field_width */
1448 i = len;
1449
1450 /* Right justify in field */
1451 for ( ; i >= 0; i--) xbuf[i + space] = xbuf[i];
1452 for (i = 0; i < space; i++) xbuf[i] = ' ';
1453}
1454
1455// Nb: uses a static buffer, each call trashes the last string returned.
1456static Char* make_perc(ULong spacetime, ULong total_spacetime)
1457{
1458 static Char mbuf[32];
1459
1460 UInt p = 10;
njnca82cc02004-11-22 17:18:48 +00001461 tl_assert(0 != total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001462 percentify(spacetime * 100 * p / total_spacetime, p, 5, mbuf);
1463 return mbuf;
1464}
1465
njnd01fef72005-03-25 23:35:48 +00001466// Nb: passed in XPt is a lower-level XPt; IPs are grabbed from
nethercotec9f36922004-02-14 16:40:02 +00001467// bottom-to-top of XCon, and then printed in the reverse order.
1468static UInt pp_XCon(Int fd, XPt* xpt)
1469{
njnd01fef72005-03-25 23:35:48 +00001470 Addr rev_ips[clo_depth+1];
nethercotec9f36922004-02-14 16:40:02 +00001471 Int i = 0;
1472 Int n = 0;
1473 Bool is_HTML = ( XHTML == clo_format );
1474 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1475 Char* maybe_indent = ( is_HTML ? "&nbsp;&nbsp;" : "" );
1476
njnca82cc02004-11-22 17:18:48 +00001477 tl_assert(NULL != xpt);
nethercotec9f36922004-02-14 16:40:02 +00001478
1479 while (True) {
njnd01fef72005-03-25 23:35:48 +00001480 rev_ips[i] = xpt->ip;
nethercotec9f36922004-02-14 16:40:02 +00001481 n++;
1482 if (alloc_xpt == xpt->parent) break;
1483 i++;
1484 xpt = xpt->parent;
1485 }
1486
1487 for (i = n-1; i >= 0; i--) {
1488 // -1 means point to calling line
njnd01fef72005-03-25 23:35:48 +00001489 VG_(describe_IP)(rev_ips[i]-1, buf2, BUF_LEN);
nethercotec9f36922004-02-14 16:40:02 +00001490 SPRINTF(buf, " %s%s%s\n", maybe_indent, buf2, maybe_br);
1491 }
1492
1493 return n;
1494}
1495
1496// Important point: for HTML, each XPt must be identified uniquely for the
njnd01fef72005-03-25 23:35:48 +00001497// HTML links to all match up correctly. Using xpt->ip is not
nethercotec9f36922004-02-14 16:40:02 +00001498// sufficient, because function pointers mean that you can call more than
1499// one other function from a single code location. So instead we use the
1500// address of the xpt struct itself, which is guaranteed to be unique.
1501
1502static void pp_all_XPts2(Int fd, Queue* q, ULong heap_spacetime,
1503 ULong total_spacetime)
1504{
1505 UInt i;
1506 XPt *xpt, *child;
1507 UInt L = 0;
1508 UInt c1 = 1;
1509 UInt c2 = 0;
1510 ULong sum = 0;
1511 UInt n;
njnd01fef72005-03-25 23:35:48 +00001512 Char *ip_desc, *perc;
nethercotec9f36922004-02-14 16:40:02 +00001513 Bool is_HTML = ( XHTML == clo_format );
1514 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1515 Char* maybe_p = ( is_HTML ? "<p>" : "" );
1516 Char* maybe_ul = ( is_HTML ? "<ul>" : "" );
1517 Char* maybe_li = ( is_HTML ? "<li>" : "" );
1518 Char* maybe_fli = ( is_HTML ? "</li>" : "" );
1519 Char* maybe_ful = ( is_HTML ? "</ul>" : "" );
1520 Char* end_hr = ( is_HTML ? "<hr>" :
1521 "=================================" );
1522 Char* depth = ( is_HTML ? "<code>--depth</code>" : "--depth" );
1523
nethercote43a15ce2004-08-30 19:15:12 +00001524 if (total_spacetime == 0) {
1525 SPRINTF(buf, "(No heap memory allocated)\n");
1526 return;
1527 }
1528
1529
nethercotec9f36922004-02-14 16:40:02 +00001530 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1531
1532 while (NULL != (xpt = (XPt*)dequeue(q))) {
nethercote43a15ce2004-08-30 19:15:12 +00001533 // Check that non-top-level XPts have a zero .approx_ST field.
njnca82cc02004-11-22 17:18:48 +00001534 if (xpt->parent != alloc_xpt) tl_assert( 0 == xpt->approx_ST );
nethercotec9f36922004-02-14 16:40:02 +00001535
nethercote43a15ce2004-08-30 19:15:12 +00001536 // Check that the sum of all children .exact_ST_dbld fields equals
1537 // parent's (unless alloc_xpt, when it should == 0).
nethercotec9f36922004-02-14 16:40:02 +00001538 if (alloc_xpt == xpt) {
njnca82cc02004-11-22 17:18:48 +00001539 tl_assert(0 == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001540 } else {
1541 sum = 0;
1542 for (i = 0; i < xpt->n_children; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001543 sum += xpt->children[i]->exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +00001544 }
njnca82cc02004-11-22 17:18:48 +00001545 //tl_assert(sum == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001546 // It's possible that not all the children were included in the
nethercote43a15ce2004-08-30 19:15:12 +00001547 // exact_ST_dbld calculations. Hopefully almost all of them were, and
nethercotec9f36922004-02-14 16:40:02 +00001548 // all the important ones.
njnca82cc02004-11-22 17:18:48 +00001549// tl_assert(sum <= xpt->exact_ST_dbld);
1550// tl_assert(sum * 1.05 > xpt->exact_ST_dbld );
nethercote43a15ce2004-08-30 19:15:12 +00001551// if (sum != xpt->exact_ST_dbld) {
1552// VG_(printf)("%ld, %ld\n", sum, xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001553// }
1554 }
1555
1556 if (xpt == alloc_xpt) {
1557 SPRINTF(buf, "Heap allocation functions accounted for "
1558 "%s of measured spacetime%s\n",
1559 make_perc(heap_spacetime, total_spacetime), maybe_br);
1560 } else {
nethercote43a15ce2004-08-30 19:15:12 +00001561 // Remember: exact_ST_dbld is space.time *doubled*
1562 perc = make_perc(xpt->exact_ST_dbld / 2, total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001563 if (is_HTML) {
1564 SPRINTF(buf, "<a name=\"b%x\"></a>"
1565 "Context accounted for "
1566 "<a href=\"#a%x\">%s</a> of measured spacetime<br>\n",
1567 xpt, xpt, perc);
1568 } else {
1569 SPRINTF(buf, "Context accounted for %s of measured spacetime\n",
1570 perc);
1571 }
1572 n = pp_XCon(fd, xpt);
njnca82cc02004-11-22 17:18:48 +00001573 tl_assert(n == L);
nethercotec9f36922004-02-14 16:40:02 +00001574 }
1575
nethercote43a15ce2004-08-30 19:15:12 +00001576 // Sort children by exact_ST_dbld
nethercotec9f36922004-02-14 16:40:02 +00001577 VG_(ssort)(xpt->children, xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001578 XPt_cmp_exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001579
1580 SPRINTF(buf, "%s\nCalled from:%s\n", maybe_p, maybe_ul);
1581 for (i = 0; i < xpt->n_children; i++) {
1582 child = xpt->children[i];
1583
1584 // Stop when <1% of total spacetime
nethercote43a15ce2004-08-30 19:15:12 +00001585 if (child->exact_ST_dbld * 1000 / (total_spacetime * 2) < 5) {
nethercotec9f36922004-02-14 16:40:02 +00001586 UInt n_insig = xpt->n_children - i;
1587 Char* s = ( n_insig == 1 ? "" : "s" );
1588 Char* and = ( 0 == i ? "" : "and " );
1589 Char* other = ( 0 == i ? "" : "other " );
1590 SPRINTF(buf, " %s%s%d %sinsignificant place%s%s\n\n",
1591 maybe_li, and, n_insig, other, s, maybe_fli);
1592 break;
1593 }
1594
nethercote43a15ce2004-08-30 19:15:12 +00001595 // Remember: exact_ST_dbld is space.time *doubled*
njnd01fef72005-03-25 23:35:48 +00001596 perc = make_perc(child->exact_ST_dbld / 2, total_spacetime);
1597 ip_desc = VG_(describe_IP)(child->ip-1, buf2, BUF_LEN);
nethercotec9f36922004-02-14 16:40:02 +00001598 if (is_HTML) {
1599 SPRINTF(buf, "<li><a name=\"a%x\"></a>", child );
1600
1601 if (child->n_children > 0) {
1602 SPRINTF(buf, "<a href=\"#b%x\">%s</a>", child, perc);
1603 } else {
1604 SPRINTF(buf, "%s", perc);
1605 }
njnd01fef72005-03-25 23:35:48 +00001606 SPRINTF(buf, ": %s\n", ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001607 } else {
njnd01fef72005-03-25 23:35:48 +00001608 SPRINTF(buf, " %6s: %s\n\n", perc, ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001609 }
1610
1611 if (child->n_children > 0) {
1612 enqueue(q, (void*)child);
1613 c2++;
1614 }
1615 }
1616 SPRINTF(buf, "%s%s", maybe_ful, maybe_p);
1617 c1--;
1618
1619 // Putting markers between levels of the structure:
1620 // c1 tracks how many to go on this level, c2 tracks how many we've
1621 // queued up for the next level while finishing off this level.
1622 // When c1 gets to zero, we've changed levels, so print a marker,
1623 // move c2 into c1, and zero c2.
1624 if (0 == c1) {
1625 L++;
1626 c1 = c2;
1627 c2 = 0;
1628 if (! is_empty_queue(q) ) { // avoid empty one at end
1629 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1630 }
1631 } else {
1632 SPRINTF(buf, "---------------------------------%s\n", maybe_br);
1633 }
1634 }
1635 SPRINTF(buf, "%s\n\nEnd of information. Rerun with a bigger "
1636 "%s value for more.\n", end_hr, depth);
1637}
1638
1639static void pp_all_XPts(Int fd, XPt* xpt, ULong heap_spacetime,
1640 ULong total_spacetime)
1641{
1642 Queue* q = construct_queue(100);
nethercote43a15ce2004-08-30 19:15:12 +00001643
nethercotec9f36922004-02-14 16:40:02 +00001644 enqueue(q, xpt);
1645 pp_all_XPts2(fd, q, heap_spacetime, total_spacetime);
1646 destruct_queue(q);
1647}
1648
1649static void
1650write_text_file(ULong total_ST, ULong heap_ST)
1651{
1652 Int fd, i;
1653 Char* text_file;
1654 Char* maybe_p = ( XHTML == clo_format ? "<p>" : "" );
1655
1656 VGP_PUSHCC(VgpPrintXPts);
1657
1658 // Open file
1659 text_file = make_filename( base_dir,
1660 ( XText == clo_format ? ".txt" : ".html" ) );
1661
1662 fd = VG_(open)(text_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1663 VKI_S_IRUSR|VKI_S_IWUSR);
1664 if (fd < 0) {
1665 file_err( text_file );
1666 VGP_POPCC(VgpPrintXPts);
1667 return;
1668 }
1669
1670 // Header
1671 if (XHTML == clo_format) {
1672 SPRINTF(buf, "<html>\n"
1673 "<head>\n"
1674 "<title>%s</title>\n"
1675 "</head>\n"
1676 "<body>\n",
1677 text_file);
1678 }
1679
1680 // Command line
1681 SPRINTF(buf, "Command: ");
1682 for (i = 0; i < VG_(client_argc); i++)
1683 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1684 SPRINTF(buf, "\n%s\n", maybe_p);
1685
1686 if (clo_heap)
1687 pp_all_XPts(fd, alloc_xpt, heap_ST, total_ST);
1688
njnca82cc02004-11-22 17:18:48 +00001689 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001690 VG_(close)(fd);
1691
1692 VGP_POPCC(VgpPrintXPts);
1693}
1694
1695/*------------------------------------------------------------*/
1696/*--- Finalisation ---*/
1697/*------------------------------------------------------------*/
1698
1699static void
1700print_summary(ULong total_ST, ULong heap_ST, ULong heap_admin_ST,
1701 ULong stack_ST)
1702{
1703 VG_(message)(Vg_UserMsg, "Total spacetime: %,ld ms.B", total_ST);
1704
1705 // Heap --------------------------------------------------------------
1706 if (clo_heap)
1707 VG_(message)(Vg_UserMsg, "heap: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001708 ( 0 == total_ST ? (Char*)"(n/a)"
1709 : make_perc(heap_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001710
1711 // Heap admin --------------------------------------------------------
1712 if (clo_heap_admin)
1713 VG_(message)(Vg_UserMsg, "heap admin: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001714 ( 0 == total_ST ? (Char*)"(n/a)"
1715 : make_perc(heap_admin_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001716
njnca82cc02004-11-22 17:18:48 +00001717 tl_assert( VG_(HT_count_nodes)(malloc_list) == n_heap_blocks );
nethercotec9f36922004-02-14 16:40:02 +00001718
1719 // Stack(s) ----------------------------------------------------------
nethercote43a15ce2004-08-30 19:15:12 +00001720 if (clo_stacks) {
nethercotec9f36922004-02-14 16:40:02 +00001721 VG_(message)(Vg_UserMsg, "stack(s): %s",
sewardjb5f6f512005-03-10 23:59:00 +00001722 ( 0 == stack_ST ? (Char*)"0%"
1723 : make_perc(stack_ST, total_ST) ) );
nethercote43a15ce2004-08-30 19:15:12 +00001724 }
nethercotec9f36922004-02-14 16:40:02 +00001725
1726 if (VG_(clo_verbosity) > 1) {
njnca82cc02004-11-22 17:18:48 +00001727 tl_assert(n_xpts > 0); // always have alloc_xpt
nethercotec9f36922004-02-14 16:40:02 +00001728 VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
1729 VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
1730 n_zero_allocs * 100 / n_allocs );
1731 VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
1732 VG_(message)(Vg_DebugMsg, " XPts: %u (%d B)", n_xpts,
1733 n_xpts*sizeof(XPt));
1734 VG_(message)(Vg_DebugMsg, " bot-XPts: %u (%d%%)", n_bot_xpts,
1735 n_bot_xpts * 100 / n_xpts);
1736 VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)", alloc_xpt->n_children,
1737 alloc_xpt->n_children * 100 / n_xpts);
1738 VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
1739 VG_(message)(Vg_DebugMsg, "snap-frees: %u", n_snapshot_frees);
1740 VG_(message)(Vg_DebugMsg, "atmp censi: %u", n_attempted_censi);
1741 VG_(message)(Vg_DebugMsg, "fake censi: %u", n_fake_censi);
1742 VG_(message)(Vg_DebugMsg, "real censi: %u", n_real_censi);
1743 VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
1744 }
1745}
1746
njn51d827b2005-05-09 01:02:08 +00001747static void ms_fini(Int exit_status)
nethercotec9f36922004-02-14 16:40:02 +00001748{
1749 ULong total_ST = 0;
1750 ULong heap_ST = 0;
1751 ULong heap_admin_ST = 0;
1752 ULong stack_ST = 0;
1753
1754 // Do a final (empty) sample to show program's end
1755 hp_census();
1756
1757 // Redo spacetimes of significant contexts to match the .hp file.
nethercote43a15ce2004-08-30 19:15:12 +00001758 calc_exact_ST_dbld(&heap_ST, &heap_admin_ST, &stack_ST);
nethercotec9f36922004-02-14 16:40:02 +00001759 total_ST = heap_ST + heap_admin_ST + stack_ST;
1760 write_hp_file ( );
1761 write_text_file( total_ST, heap_ST );
1762 print_summary ( total_ST, heap_ST, heap_admin_ST, stack_ST );
1763}
1764
njn51d827b2005-05-09 01:02:08 +00001765/*------------------------------------------------------------*/
1766/*--- Initialisation ---*/
1767/*------------------------------------------------------------*/
1768
1769static void ms_post_clo_init(void)
1770{
1771 ms_interval = 1;
1772
1773 // Do an initial sample for t = 0
1774 hp_census();
1775}
1776
1777static void ms_pre_clo_init()
1778{
1779 VG_(details_name) ("Massif");
1780 VG_(details_version) (NULL);
1781 VG_(details_description) ("a space profiler");
1782 VG_(details_copyright_author)("Copyright (C) 2003, Nicholas Nethercote");
1783 VG_(details_bug_reports_to) (VG_BUGS_TO);
1784
1785 // Basic functions
1786 VG_(basic_tool_funcs) (ms_post_clo_init,
1787 ms_instrument,
1788 ms_fini);
1789
1790 // Needs
1791 VG_(needs_libc_freeres)();
1792 VG_(needs_command_line_options)(ms_process_cmd_line_option,
1793 ms_print_usage,
1794 ms_print_debug_usage);
1795 VG_(needs_client_requests) (ms_handle_client_request);
1796
1797 // Malloc replacement
1798 VG_(malloc_funcs) (ms_malloc,
1799 ms___builtin_new,
1800 ms___builtin_vec_new,
1801 ms_memalign,
1802 ms_calloc,
1803 ms_free,
1804 ms___builtin_delete,
1805 ms___builtin_vec_delete,
1806 ms_realloc,
1807 0 );
1808
1809 // Events to track
1810 VG_(track_new_mem_stack_signal)( new_mem_stack_signal );
1811 VG_(track_die_mem_stack_signal)( die_mem_stack_signal );
1812
1813 // Profiling events
1814 VG_(register_profile_event)(VgpGetXPt, "get-XPt");
1815 VG_(register_profile_event)(VgpGetXPtSearch, "get-XPt-search");
1816 VG_(register_profile_event)(VgpCensus, "census");
1817 VG_(register_profile_event)(VgpCensusHeap, "census-heap");
1818 VG_(register_profile_event)(VgpCensusSnapshot, "census-snapshot");
1819 VG_(register_profile_event)(VgpCensusTreeSize, "census-treesize");
1820 VG_(register_profile_event)(VgpUpdateXCon, "update-XCon");
1821 VG_(register_profile_event)(VgpCalcSpacetime2, "calc-exact_ST_dbld");
1822 VG_(register_profile_event)(VgpPrintHp, "print-hp");
1823 VG_(register_profile_event)(VgpPrintXPts, "print-XPts");
1824
1825 // HP_Chunks
1826 malloc_list = VG_(HT_construct)();
1827
1828 // Dummy node at top of the context structure.
1829 alloc_xpt = new_XPt(0, NULL, /*is_bottom*/False);
1830
1831 tl_assert( VG_(getcwd_alloc)(&base_dir) );
1832}
1833
1834VG_DETERMINE_INTERFACE_VERSION(ms_pre_clo_init, 0)
nethercotec9f36922004-02-14 16:40:02 +00001835
1836/*--------------------------------------------------------------------*/
1837/*--- end ms_main.c ---*/
1838/*--------------------------------------------------------------------*/
1839