blob: bff4de53980369e683811b23950abd7c05a065b4 [file] [log] [blame]
nethercotec9f36922004-02-14 16:40:02 +00001
2/*--------------------------------------------------------------------*/
nethercote996901a2004-08-03 13:29:09 +00003/*--- Massif: a heap profiling tool. ms_main.c ---*/
nethercotec9f36922004-02-14 16:40:02 +00004/*--------------------------------------------------------------------*/
5
6/*
nethercote996901a2004-08-03 13:29:09 +00007 This file is part of Massif, a Valgrind tool for profiling memory
nethercotec9f36922004-02-14 16:40:02 +00008 usage of programs.
9
njn53612422005-03-12 16:22:54 +000010 Copyright (C) 2003-2005 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000011 njn@valgrind.org
nethercotec9f36922004-02-14 16:40:02 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31// Memory profiler. Produces a graph, gives lots of information about
32// allocation contexts, in terms of space.time values (ie. area under the
33// graph). Allocation context information is hierarchical, and can thus
34// be inspected step-wise to an appropriate depth. See comments on data
35// structures below for more info on how things work.
36
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_basics.h"
njnea27e462005-05-31 02:38:09 +000038#include "pub_tool_debuginfo.h"
njn81c00df2005-05-14 21:28:43 +000039#include "pub_tool_hashtable.h"
njn97405b22005-06-02 03:39:33 +000040#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000041#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +000042#include "pub_tool_libcfile.h"
njne9befc62005-06-11 15:51:30 +000043#include "pub_tool_libcmman.h"
njn36a20fa2005-06-03 03:08:39 +000044#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +000045#include "pub_tool_libcproc.h"
njnb506bd82005-06-21 04:01:51 +000046#include "pub_tool_machine.h"
njn717cde52005-05-10 02:47:21 +000047#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000048#include "pub_tool_options.h"
njn31513b42005-06-01 03:09:59 +000049#include "pub_tool_profile.h"
njn717cde52005-05-10 02:47:21 +000050#include "pub_tool_replacemalloc.h"
njnd01fef72005-03-25 23:35:48 +000051#include "pub_tool_stacktrace.h"
njn43b9a8a2005-05-10 04:37:01 +000052#include "pub_tool_tooliface.h"
nethercotec9f36922004-02-14 16:40:02 +000053
54#include "valgrind.h" // For {MALLOC,FREE}LIKE_BLOCK
55
56/*------------------------------------------------------------*/
57/*--- Overview of operation ---*/
58/*------------------------------------------------------------*/
59
60// Heap blocks are tracked, and the amount of space allocated by various
61// contexts (ie. lines of code, more or less) is also tracked.
62// Periodically, a census is taken, and the amount of space used, at that
63// point, by the most significant (highly allocating) contexts is recorded.
64// Census start off frequently, but are scaled back as the program goes on,
65// so that there are always a good number of them. At the end, overall
66// spacetimes for different contexts (of differing levels of precision) is
67// calculated, the graph is printed, and the text giving spacetimes for the
68// increasingly precise contexts is given.
69//
70// Measures the following:
71// - heap blocks
72// - heap admin bytes
73// - stack(s)
74// - code (code segments loaded at startup, and loaded with mmap)
75// - data (data segments loaded at startup, and loaded/created with mmap,
76// and brk()d segments)
77
78/*------------------------------------------------------------*/
79/*--- Main types ---*/
80/*------------------------------------------------------------*/
81
82// An XPt represents an "execution point", ie. a code address. Each XPt is
83// part of a tree of XPts (an "execution tree", or "XTree"). Each
84// top-to-bottom path through an XTree gives an execution context ("XCon"),
85// and is equivalent to a traditional Valgrind ExeContext.
86//
87// The XPt at the top of an XTree (but below "alloc_xpt") is called a
88// "top-XPt". The XPts are the bottom of an XTree (leaf nodes) are
89// "bottom-XPTs". The number of XCons in an XTree is equal to the number of
90// bottom-XPTs in that XTree.
91//
92// All XCons have the same top-XPt, "alloc_xpt", which represents all
93// allocation functions like malloc(). It's a bit of a fake XPt, though,
94// and is only used because it makes some of the code simpler.
95//
96// XTrees are bi-directional.
97//
98// > parent < Example: if child1() calls parent() and child2()
99// / | \ also calls parent(), and parent() calls malloc(),
100// | / \ | the XTree will look like this.
101// | v v |
102// child1 child2
103
104typedef struct _XPt XPt;
105
106struct _XPt {
njnd01fef72005-03-25 23:35:48 +0000107 Addr ip; // code address
nethercotec9f36922004-02-14 16:40:02 +0000108
109 // Bottom-XPts: space for the precise context.
110 // Other XPts: space of all the descendent bottom-XPts.
111 // Nb: this value goes up and down as the program executes.
112 UInt curr_space;
113
114 // An approximate space.time calculation used along the way for selecting
115 // which contexts to include at each census point.
116 // !!! top-XPTs only !!!
nethercote43a15ce2004-08-30 19:15:12 +0000117 ULong approx_ST;
nethercotec9f36922004-02-14 16:40:02 +0000118
nethercote43a15ce2004-08-30 19:15:12 +0000119 // exact_ST_dbld is an exact space.time calculation done at the end, and
nethercotec9f36922004-02-14 16:40:02 +0000120 // used in the results.
121 // Note that it is *doubled*, to avoid rounding errors.
122 // !!! not used for 'alloc_xpt' !!!
nethercote43a15ce2004-08-30 19:15:12 +0000123 ULong exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +0000124
125 // n_children and max_children are integers; a very big program might
126 // have more than 65536 allocation points (Konqueror startup has 1800).
127 XPt* parent; // pointer to parent XPt
128 UInt n_children; // number of children
129 UInt max_children; // capacity of children array
130 XPt** children; // pointers to children XPts
131};
132
133// Each census snapshots the most significant XTrees, each XTree having a
134// top-XPt as its root. The 'curr_space' element for each XPt is recorded
135// in the snapshot. The snapshot contains all the XTree's XPts, not in a
136// tree structure, but flattened into an array. This flat snapshot is used
nethercote43a15ce2004-08-30 19:15:12 +0000137// at the end for computing exact_ST_dbld for each XPt.
nethercotec9f36922004-02-14 16:40:02 +0000138//
139// Graph resolution, x-axis: no point having more than about 200 census
140// x-points; you can't see them on the graph. Therefore:
141//
142// - do a census every 1 ms for first 200 --> 200, all (200 ms)
143// - halve (drop half of them) --> 100, every 2nd (200 ms)
144// - do a census every 2 ms for next 200 --> 200, every 2nd (400 ms)
145// - halve --> 100, every 4th (400 ms)
146// - do a census every 4 ms for next 400 --> 200, every 4th (800 ms)
147// - etc.
148//
149// This isn't exactly right, because we actually drop (N/2)-1 when halving,
150// but it shows the basic idea.
151
152#define MAX_N_CENSI 200 // Keep it even, for simplicity
153
154// Graph resolution, y-axis: hp2ps only draws the 19 biggest (in space-time)
155// bands, rest get lumped into OTHERS. I only print the top N
156// (cumulative-so-far space-time) at each point. N should be a bit bigger
157// than 19 in case the cumulative space-time doesn't fit with the eventual
158// space-time computed by hp2ps (but it should be close if the samples are
159// evenly spread, since hp2ps does an approximate per-band space-time
160// calculation that just sums the totals; ie. it assumes all samples are
161// the same distance apart).
162
163#define MAX_SNAPSHOTS 32
164
165typedef
166 struct {
167 XPt* xpt;
168 UInt space;
169 }
170 XPtSnapshot;
171
172// An XTree snapshot is stored as an array of of XPt snapshots.
173typedef XPtSnapshot* XTreeSnapshot;
174
175typedef
176 struct {
177 Int ms_time; // Int: must allow -1
178 XTreeSnapshot xtree_snapshots[MAX_SNAPSHOTS+1]; // +1 for zero-termination
179 UInt others_space;
180 UInt heap_admin_space;
181 UInt stacks_space;
182 }
183 Census;
184
185// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
186// which is a foothold into the XCon at which it was allocated. From
187// HP_Chunks, XPt 'space' fields are incremented (at allocation) and
188// decremented (at deallocation).
189//
190// Nb: first two fields must match core's VgHashNode.
191typedef
192 struct _HP_Chunk {
193 struct _HP_Chunk* next;
194 Addr data; // Ptr to actual block
nethercote7ac7f7b2004-11-02 12:36:02 +0000195 SizeT size; // Size requested
nethercotec9f36922004-02-14 16:40:02 +0000196 XPt* where; // Where allocated; bottom-XPt
197 }
198 HP_Chunk;
199
200/*------------------------------------------------------------*/
201/*--- Profiling events ---*/
202/*------------------------------------------------------------*/
203
204typedef
205 enum {
206 VgpGetXPt = VgpFini+1,
207 VgpGetXPtSearch,
208 VgpCensus,
209 VgpCensusHeap,
210 VgpCensusSnapshot,
211 VgpCensusTreeSize,
212 VgpUpdateXCon,
213 VgpCalcSpacetime2,
214 VgpPrintHp,
215 VgpPrintXPts,
216 }
njn4be0a692004-11-22 18:10:36 +0000217 VgpToolCC;
nethercotec9f36922004-02-14 16:40:02 +0000218
219/*------------------------------------------------------------*/
220/*--- Statistics ---*/
221/*------------------------------------------------------------*/
222
223// Konqueror startup, to give an idea of the numbers involved with a biggish
224// program, with default depth:
225//
226// depth=3 depth=40
227// - 310,000 allocations
228// - 300,000 frees
229// - 15,000 XPts 800,000 XPts
230// - 1,800 top-XPts
231
232static UInt n_xpts = 0;
233static UInt n_bot_xpts = 0;
234static UInt n_allocs = 0;
235static UInt n_zero_allocs = 0;
236static UInt n_frees = 0;
237static UInt n_children_reallocs = 0;
238static UInt n_snapshot_frees = 0;
239
240static UInt n_halvings = 0;
241static UInt n_real_censi = 0;
242static UInt n_fake_censi = 0;
243static UInt n_attempted_censi = 0;
244
245/*------------------------------------------------------------*/
246/*--- Globals ---*/
247/*------------------------------------------------------------*/
248
249#define FILENAME_LEN 256
250
251#define SPRINTF(zz_buf, fmt, args...) \
252 do { Int len = VG_(sprintf)(zz_buf, fmt, ## args); \
253 VG_(write)(fd, (void*)zz_buf, len); \
254 } while (0)
255
256#define BUF_LEN 1024 // general purpose
257static Char buf [BUF_LEN];
258static Char buf2[BUF_LEN];
259static Char buf3[BUF_LEN];
260
nethercote8b5f40c2004-11-02 13:29:50 +0000261static SizeT sigstacks_space = 0; // Current signal stacks space sum
nethercotec9f36922004-02-14 16:40:02 +0000262
263static VgHashTable malloc_list = NULL; // HP_Chunks
264
265static UInt n_heap_blocks = 0;
266
njn51d827b2005-05-09 01:02:08 +0000267// Current directory at startup.
njn57ca7ab2005-06-21 23:44:58 +0000268static Char base_dir[VKI_PATH_MAX];
nethercotec9f36922004-02-14 16:40:02 +0000269
270#define MAX_ALLOC_FNS 32 // includes the builtin ones
271
nethercotec7469182004-05-11 09:21:08 +0000272// First few filled in, rest should be zeroed. Zero-terminated vector.
273static UInt n_alloc_fns = 11;
nethercotec9f36922004-02-14 16:40:02 +0000274static Char* alloc_fns[MAX_ALLOC_FNS] = {
275 "malloc",
276 "operator new(unsigned)",
277 "operator new[](unsigned)",
nethercoteeb479cb2004-05-11 16:37:17 +0000278 "operator new(unsigned, std::nothrow_t const&)",
279 "operator new[](unsigned, std::nothrow_t const&)",
nethercotec9f36922004-02-14 16:40:02 +0000280 "__builtin_new",
281 "__builtin_vec_new",
282 "calloc",
283 "realloc",
fitzhardinge51f3ff12004-03-04 22:42:03 +0000284 "memalign",
nethercotec9f36922004-02-14 16:40:02 +0000285};
286
287
288/*------------------------------------------------------------*/
289/*--- Command line args ---*/
290/*------------------------------------------------------------*/
291
292#define MAX_DEPTH 50
293
294typedef
295 enum {
296 XText, XHTML,
297 }
298 XFormat;
299
300static Bool clo_heap = True;
301static UInt clo_heap_admin = 8;
302static Bool clo_stacks = True;
303static Bool clo_depth = 3;
304static XFormat clo_format = XText;
305
njn51d827b2005-05-09 01:02:08 +0000306static Bool ms_process_cmd_line_option(Char* arg)
nethercotec9f36922004-02-14 16:40:02 +0000307{
njn45270a22005-03-27 01:00:11 +0000308 VG_BOOL_CLO(arg, "--heap", clo_heap)
309 else VG_BOOL_CLO(arg, "--stacks", clo_stacks)
nethercotec9f36922004-02-14 16:40:02 +0000310
njn45270a22005-03-27 01:00:11 +0000311 else VG_NUM_CLO (arg, "--heap-admin", clo_heap_admin)
312 else VG_BNUM_CLO(arg, "--depth", clo_depth, 1, MAX_DEPTH)
nethercotec9f36922004-02-14 16:40:02 +0000313
314 else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
315 alloc_fns[n_alloc_fns] = & arg[11];
316 n_alloc_fns++;
317 if (n_alloc_fns >= MAX_ALLOC_FNS) {
318 VG_(printf)("Too many alloc functions specified, sorry");
319 VG_(bad_option)(arg);
320 }
321 }
322
323 else if (VG_CLO_STREQ(arg, "--format=text"))
324 clo_format = XText;
325 else if (VG_CLO_STREQ(arg, "--format=html"))
326 clo_format = XHTML;
327
328 else
329 return VG_(replacement_malloc_process_cmd_line_option)(arg);
nethercote27fec902004-06-16 21:26:32 +0000330
nethercotec9f36922004-02-14 16:40:02 +0000331 return True;
332}
333
njn51d827b2005-05-09 01:02:08 +0000334static void ms_print_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000335{
336 VG_(printf)(
337" --heap=no|yes profile heap blocks [yes]\n"
338" --heap-admin=<number> average admin bytes per heap block [8]\n"
339" --stacks=no|yes profile stack(s) [yes]\n"
340" --depth=<number> depth of contexts [3]\n"
341" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
342" --format=text|html format of textual output [text]\n"
343 );
344 VG_(replacement_malloc_print_usage)();
345}
346
njn51d827b2005-05-09 01:02:08 +0000347static void ms_print_debug_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000348{
349 VG_(replacement_malloc_print_debug_usage)();
350}
351
352/*------------------------------------------------------------*/
353/*--- Execution contexts ---*/
354/*------------------------------------------------------------*/
355
356// Fake XPt representing all allocation functions like malloc(). Acts as
357// parent node to all top-XPts.
358static XPt* alloc_xpt;
359
360// Cheap allocation for blocks that never need to be freed. Saves about 10%
361// for Konqueror startup with --depth=40.
nethercote7ac7f7b2004-11-02 12:36:02 +0000362static void* perm_malloc(SizeT n_bytes)
nethercotec9f36922004-02-14 16:40:02 +0000363{
364 static Addr hp = 0; // current heap pointer
365 static Addr hp_lim = 0; // maximum usable byte in current block
366
367 #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
368
369 if (hp + n_bytes > hp_lim) {
370 hp = (Addr)VG_(get_memory_from_mmap)(SUPERBLOCK_SIZE, "perm_malloc");
371 hp_lim = hp + SUPERBLOCK_SIZE - 1;
372 }
373
374 hp += n_bytes;
375
376 return (void*)(hp - n_bytes);
377}
378
379
380
njnd01fef72005-03-25 23:35:48 +0000381static XPt* new_XPt(Addr ip, XPt* parent, Bool is_bottom)
nethercotec9f36922004-02-14 16:40:02 +0000382{
383 XPt* xpt = perm_malloc(sizeof(XPt));
njnd01fef72005-03-25 23:35:48 +0000384 xpt->ip = ip;
nethercotec9f36922004-02-14 16:40:02 +0000385
nethercote43a15ce2004-08-30 19:15:12 +0000386 xpt->curr_space = 0;
387 xpt->approx_ST = 0;
388 xpt->exact_ST_dbld = 0;
nethercotec9f36922004-02-14 16:40:02 +0000389
390 xpt->parent = parent;
nethercotefc016352004-04-27 09:51:51 +0000391
392 // Check parent is not a bottom-XPt
njnca82cc02004-11-22 17:18:48 +0000393 tl_assert(parent == NULL || 0 != parent->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000394
395 xpt->n_children = 0;
396
397 // If a bottom-XPt, don't allocate space for children. This can be 50%
398 // or more, although it tends to drop as --depth increases (eg. 10% for
399 // konqueror with --depth=20).
400 if ( is_bottom ) {
401 xpt->max_children = 0;
402 xpt->children = NULL;
403 n_bot_xpts++;
404 } else {
405 xpt->max_children = 4;
406 xpt->children = VG_(malloc)( xpt->max_children * sizeof(XPt*) );
407 }
408
409 // Update statistics
410 n_xpts++;
411
412 return xpt;
413}
414
njnd01fef72005-03-25 23:35:48 +0000415static Bool is_alloc_fn(Addr ip)
nethercotec9f36922004-02-14 16:40:02 +0000416{
417 Int i;
418
njnd01fef72005-03-25 23:35:48 +0000419 if ( VG_(get_fnname)(ip, buf, BUF_LEN) ) {
nethercotec9f36922004-02-14 16:40:02 +0000420 for (i = 0; i < n_alloc_fns; i++) {
421 if (VG_STREQ(buf, alloc_fns[i]))
422 return True;
423 }
424 }
425 return False;
426}
427
428// Returns an XCon, from the bottom-XPt. Nb: the XPt returned must be a
429// bottom-XPt now and must always remain a bottom-XPt. We go to some effort
430// to ensure this in certain cases. See comments below.
431static XPt* get_XCon( ThreadId tid, Bool custom_malloc )
432{
njnd01fef72005-03-25 23:35:48 +0000433 // Static to minimise stack size. +1 for added ~0 IP
434 static Addr ips[MAX_DEPTH + MAX_ALLOC_FNS + 1];
nethercotec9f36922004-02-14 16:40:02 +0000435
436 XPt* xpt = alloc_xpt;
njnd01fef72005-03-25 23:35:48 +0000437 UInt n_ips, L, A, B, nC;
nethercotec9f36922004-02-14 16:40:02 +0000438 UInt overestimate;
439 Bool reached_bottom;
440
441 VGP_PUSHCC(VgpGetXPt);
442
443 // Want at least clo_depth non-alloc-fn entries in the snapshot.
444 // However, because we have 1 or more (an unknown number, at this point)
445 // alloc-fns ignored, we overestimate the size needed for the stack
446 // snapshot. Then, if necessary, we repeatedly increase the size until
447 // it is enough.
448 overestimate = 2;
449 while (True) {
njnd01fef72005-03-25 23:35:48 +0000450 n_ips = VG_(get_StackTrace)( tid, ips, clo_depth + overestimate );
nethercotec9f36922004-02-14 16:40:02 +0000451
njnd01fef72005-03-25 23:35:48 +0000452 // Now we add a dummy "unknown" IP at the end. This is only used if we
453 // run out of IPs before hitting clo_depth. It's done to ensure the
nethercotec9f36922004-02-14 16:40:02 +0000454 // XPt we return is (now and forever) a bottom-XPt. If the returned XPt
455 // wasn't a bottom-XPt (now or later) it would cause problems later (eg.
nethercote43a15ce2004-08-30 19:15:12 +0000456 // the parent's approx_ST wouldn't be equal [or almost equal] to the
457 // total of the childrens' approx_STs).
njnd01fef72005-03-25 23:35:48 +0000458 ips[ n_ips++ ] = ~((Addr)0);
nethercotec9f36922004-02-14 16:40:02 +0000459
njnd01fef72005-03-25 23:35:48 +0000460 // Skip over alloc functions in ips[].
461 for (L = 0; is_alloc_fn(ips[L]) && L < n_ips; L++) { }
nethercotec9f36922004-02-14 16:40:02 +0000462
463 // Must be at least one alloc function, unless client used
464 // MALLOCLIKE_BLOCK
njnca82cc02004-11-22 17:18:48 +0000465 if (!custom_malloc) tl_assert(L > 0);
nethercotec9f36922004-02-14 16:40:02 +0000466
467 // Should be at least one non-alloc function. If not, try again.
njnd01fef72005-03-25 23:35:48 +0000468 if (L == n_ips) {
nethercotec9f36922004-02-14 16:40:02 +0000469 overestimate += 2;
470 if (overestimate > MAX_ALLOC_FNS)
njn67993252004-11-22 18:02:32 +0000471 VG_(tool_panic)("No stk snapshot big enough to find non-alloc fns");
nethercotec9f36922004-02-14 16:40:02 +0000472 } else {
473 break;
474 }
475 }
476 A = L;
njnd01fef72005-03-25 23:35:48 +0000477 B = n_ips - 1;
nethercotec9f36922004-02-14 16:40:02 +0000478 reached_bottom = False;
479
njnd01fef72005-03-25 23:35:48 +0000480 // By this point, the IPs we care about are in ips[A]..ips[B]
nethercotec9f36922004-02-14 16:40:02 +0000481
482 // Now do the search/insertion of the XCon. 'L' is the loop counter,
njnd01fef72005-03-25 23:35:48 +0000483 // being the index into ips[].
nethercotec9f36922004-02-14 16:40:02 +0000484 while (True) {
njnd01fef72005-03-25 23:35:48 +0000485 // Look for IP in xpt's children.
nethercotec9f36922004-02-14 16:40:02 +0000486 // XXX: linear search, ugh -- about 10% of time for konqueror startup
487 // XXX: tried cacheing last result, only hit about 4% for konqueror
488 // Nb: this search hits about 98% of the time for konqueror
489 VGP_PUSHCC(VgpGetXPtSearch);
490
491 // If we've searched/added deep enough, or run out of EIPs, this is
492 // the bottom XPt.
493 if (L - A + 1 == clo_depth || L == B)
494 reached_bottom = True;
495
496 nC = 0;
497 while (True) {
498 if (nC == xpt->n_children) {
499 // not found, insert new XPt
njnca82cc02004-11-22 17:18:48 +0000500 tl_assert(xpt->max_children != 0);
501 tl_assert(xpt->n_children <= xpt->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000502 // Expand 'children' if necessary
503 if (xpt->n_children == xpt->max_children) {
504 xpt->max_children *= 2;
505 xpt->children = VG_(realloc)( xpt->children,
506 xpt->max_children * sizeof(XPt*) );
507 n_children_reallocs++;
508 }
njnd01fef72005-03-25 23:35:48 +0000509 // Make new XPt for IP, insert in list
nethercotec9f36922004-02-14 16:40:02 +0000510 xpt->children[ xpt->n_children++ ] =
njnd01fef72005-03-25 23:35:48 +0000511 new_XPt(ips[L], xpt, reached_bottom);
nethercotec9f36922004-02-14 16:40:02 +0000512 break;
513 }
njnd01fef72005-03-25 23:35:48 +0000514 if (ips[L] == xpt->children[nC]->ip) break; // found the IP
nethercotec9f36922004-02-14 16:40:02 +0000515 nC++; // keep looking
516 }
517 VGP_POPCC(VgpGetXPtSearch);
518
519 // Return found/built bottom-XPt.
520 if (reached_bottom) {
njnca82cc02004-11-22 17:18:48 +0000521 tl_assert(0 == xpt->children[nC]->n_children); // Must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000522 VGP_POPCC(VgpGetXPt);
523 return xpt->children[nC];
524 }
525
526 // Descend to next level in XTree, the newly found/built non-bottom-XPt
527 xpt = xpt->children[nC];
528 L++;
529 }
530}
531
532// Update 'curr_space' of every XPt in the XCon, by percolating upwards.
533static void update_XCon(XPt* xpt, Int space_delta)
534{
535 VGP_PUSHCC(VgpUpdateXCon);
536
njnca82cc02004-11-22 17:18:48 +0000537 tl_assert(True == clo_heap);
538 tl_assert(0 != space_delta);
539 tl_assert(NULL != xpt);
540 tl_assert(0 == xpt->n_children); // must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000541
542 while (xpt != alloc_xpt) {
njnca82cc02004-11-22 17:18:48 +0000543 if (space_delta < 0) tl_assert(xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000544 xpt->curr_space += space_delta;
545 xpt = xpt->parent;
546 }
njnca82cc02004-11-22 17:18:48 +0000547 if (space_delta < 0) tl_assert(alloc_xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000548 alloc_xpt->curr_space += space_delta;
549
550 VGP_POPCC(VgpUpdateXCon);
551}
552
553// Actually want a reverse sort, biggest to smallest
nethercote43a15ce2004-08-30 19:15:12 +0000554static Int XPt_cmp_approx_ST(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000555{
556 XPt* xpt1 = *(XPt**)n1;
557 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000558 return (xpt1->approx_ST < xpt2->approx_ST ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000559}
560
nethercote43a15ce2004-08-30 19:15:12 +0000561static Int XPt_cmp_exact_ST_dbld(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000562{
563 XPt* xpt1 = *(XPt**)n1;
564 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000565 return (xpt1->exact_ST_dbld < xpt2->exact_ST_dbld ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000566}
567
568
569/*------------------------------------------------------------*/
570/*--- A generic Queue ---*/
571/*------------------------------------------------------------*/
572
573typedef
574 struct {
575 UInt head; // Index of first entry
576 UInt tail; // Index of final+1 entry, ie. next free slot
577 UInt max_elems;
578 void** elems;
579 }
580 Queue;
581
582static Queue* construct_queue(UInt size)
583{
584 UInt i;
585 Queue* q = VG_(malloc)(sizeof(Queue));
586 q->head = 0;
587 q->tail = 0;
588 q->max_elems = size;
589 q->elems = VG_(malloc)(size * sizeof(void*));
590 for (i = 0; i < size; i++)
591 q->elems[i] = NULL;
592
593 return q;
594}
595
596static void destruct_queue(Queue* q)
597{
598 VG_(free)(q->elems);
599 VG_(free)(q);
600}
601
602static void shuffle(Queue* dest_q, void** old_elems)
603{
604 UInt i, j;
605 for (i = 0, j = dest_q->head; j < dest_q->tail; i++, j++)
606 dest_q->elems[i] = old_elems[j];
607 dest_q->head = 0;
608 dest_q->tail = i;
609 for ( ; i < dest_q->max_elems; i++)
610 dest_q->elems[i] = NULL; // paranoia
611}
612
613// Shuffles elements down. If not enough slots free, increase size. (We
614// don't wait until we've completely run out of space, because there could
615// be lots of shuffling just before that point which would be slow.)
616static void adjust(Queue* q)
617{
618 void** old_elems;
619
njnca82cc02004-11-22 17:18:48 +0000620 tl_assert(q->tail == q->max_elems);
nethercotec9f36922004-02-14 16:40:02 +0000621 if (q->head < 10) {
622 old_elems = q->elems;
623 q->max_elems *= 2;
624 q->elems = VG_(malloc)(q->max_elems * sizeof(void*));
625 shuffle(q, old_elems);
626 VG_(free)(old_elems);
627 } else {
628 shuffle(q, q->elems);
629 }
630}
631
632static void enqueue(Queue* q, void* elem)
633{
634 if (q->tail == q->max_elems)
635 adjust(q);
636 q->elems[q->tail++] = elem;
637}
638
639static Bool is_empty_queue(Queue* q)
640{
641 return (q->head == q->tail);
642}
643
644static void* dequeue(Queue* q)
645{
646 if (is_empty_queue(q))
647 return NULL; // Queue empty
648 else
649 return q->elems[q->head++];
650}
651
652/*------------------------------------------------------------*/
653/*--- malloc() et al replacement wrappers ---*/
654/*------------------------------------------------------------*/
655
nethercotec9f36922004-02-14 16:40:02 +0000656// Forward declaration
657static void hp_census(void);
658
nethercote159dfef2004-09-13 13:27:30 +0000659static
njn57735902004-11-25 18:04:54 +0000660void* new_block ( ThreadId tid, void* p, SizeT size, SizeT align,
661 Bool is_zeroed )
nethercotec9f36922004-02-14 16:40:02 +0000662{
663 HP_Chunk* hc;
nethercote57e36b32004-07-10 14:56:28 +0000664 Bool custom_alloc = (NULL == p);
nethercotec9f36922004-02-14 16:40:02 +0000665 if (size < 0) return NULL;
666
667 VGP_PUSHCC(VgpCliMalloc);
668
669 // Update statistics
670 n_allocs++;
nethercote57e36b32004-07-10 14:56:28 +0000671 if (0 == size) n_zero_allocs++;
nethercotec9f36922004-02-14 16:40:02 +0000672
nethercote57e36b32004-07-10 14:56:28 +0000673 // Allocate and zero if necessary
674 if (!p) {
675 p = VG_(cli_malloc)( align, size );
676 if (!p) {
677 VGP_POPCC(VgpCliMalloc);
678 return NULL;
679 }
680 if (is_zeroed) VG_(memset)(p, 0, size);
681 }
682
njnf1c5def2005-08-11 02:17:07 +0000683 // Make new HP_Chunk node, add to malloc_list
nethercote57e36b32004-07-10 14:56:28 +0000684 hc = VG_(malloc)(sizeof(HP_Chunk));
685 hc->size = size;
686 hc->data = (Addr)p;
687 hc->where = NULL; // paranoia
688 if (clo_heap) {
njn57735902004-11-25 18:04:54 +0000689 hc->where = get_XCon( tid, custom_alloc );
nethercote57e36b32004-07-10 14:56:28 +0000690 if (0 != size)
691 update_XCon(hc->where, size);
692 }
njnf1c5def2005-08-11 02:17:07 +0000693 VG_(HT_add_node)(malloc_list, (VgHashNode*)hc);
694 n_heap_blocks++;
nethercote57e36b32004-07-10 14:56:28 +0000695
696 // do a census!
697 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000698
699 VGP_POPCC(VgpCliMalloc);
700 return p;
701}
702
703static __inline__
704void die_block ( void* p, Bool custom_free )
705{
njnf1c5def2005-08-11 02:17:07 +0000706 HP_Chunk* hc;
nethercotec9f36922004-02-14 16:40:02 +0000707
708 VGP_PUSHCC(VgpCliMalloc);
709
710 // Update statistics
711 n_frees++;
712
njnf1c5def2005-08-11 02:17:07 +0000713 // Remove HP_Chunk from malloc_list
njn5cc5d7e2005-08-11 02:09:25 +0000714 hc = (HP_Chunk*)VG_(HT_remove)(malloc_list, (UWord)p);
715 if (NULL == hc)
716 return; // must have been a bogus free()
717 tl_assert(n_heap_blocks > 0);
718 n_heap_blocks--;
nethercotec9f36922004-02-14 16:40:02 +0000719
720 if (clo_heap && hc->size != 0)
721 update_XCon(hc->where, -hc->size);
722
nethercote57e36b32004-07-10 14:56:28 +0000723 VG_(free)( hc );
724
725 // Actually free the heap block, if necessary
nethercotec9f36922004-02-14 16:40:02 +0000726 if (!custom_free)
727 VG_(cli_free)( p );
728
nethercote57e36b32004-07-10 14:56:28 +0000729 // do a census!
730 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000731
nethercotec9f36922004-02-14 16:40:02 +0000732 VGP_POPCC(VgpCliMalloc);
733}
734
735
njn51d827b2005-05-09 01:02:08 +0000736static void* ms_malloc ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000737{
njn57735902004-11-25 18:04:54 +0000738 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000739}
740
njn51d827b2005-05-09 01:02:08 +0000741static void* ms___builtin_new ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000742{
njn57735902004-11-25 18:04:54 +0000743 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000744}
745
njn51d827b2005-05-09 01:02:08 +0000746static void* ms___builtin_vec_new ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000747{
njn57735902004-11-25 18:04:54 +0000748 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000749}
750
njn51d827b2005-05-09 01:02:08 +0000751static void* ms_calloc ( ThreadId tid, SizeT m, SizeT size )
nethercotec9f36922004-02-14 16:40:02 +0000752{
njn57735902004-11-25 18:04:54 +0000753 return new_block( tid, NULL, m*size, VG_(clo_alignment), /*is_zeroed*/True );
nethercotec9f36922004-02-14 16:40:02 +0000754}
755
njn51d827b2005-05-09 01:02:08 +0000756static void *ms_memalign ( ThreadId tid, SizeT align, SizeT n )
fitzhardinge51f3ff12004-03-04 22:42:03 +0000757{
njn57735902004-11-25 18:04:54 +0000758 return new_block( tid, NULL, n, align, False );
fitzhardinge51f3ff12004-03-04 22:42:03 +0000759}
760
njn51d827b2005-05-09 01:02:08 +0000761static void ms_free ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000762{
763 die_block( p, /*custom_free*/False );
764}
765
njn51d827b2005-05-09 01:02:08 +0000766static void ms___builtin_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000767{
768 die_block( p, /*custom_free*/False);
769}
770
njn51d827b2005-05-09 01:02:08 +0000771static void ms___builtin_vec_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000772{
773 die_block( p, /*custom_free*/False );
774}
775
njn51d827b2005-05-09 01:02:08 +0000776static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_size )
nethercotec9f36922004-02-14 16:40:02 +0000777{
njn5cc5d7e2005-08-11 02:09:25 +0000778 HP_Chunk* hc;
779 void* p_new;
780 SizeT old_size;
781 XPt *old_where, *new_where;
nethercotec9f36922004-02-14 16:40:02 +0000782
783 VGP_PUSHCC(VgpCliMalloc);
784
785 // First try and find the block.
njn5cc5d7e2005-08-11 02:09:25 +0000786 hc = (HP_Chunk*)VG_(HT_remove)(malloc_list, (UWord)p_old);
nethercotec9f36922004-02-14 16:40:02 +0000787 if (hc == NULL) {
788 VGP_POPCC(VgpCliMalloc);
njn5cc5d7e2005-08-11 02:09:25 +0000789 return NULL; // must have been a bogus realloc()
nethercotec9f36922004-02-14 16:40:02 +0000790 }
791
nethercotec9f36922004-02-14 16:40:02 +0000792 old_size = hc->size;
793
794 if (new_size <= old_size) {
795 // new size is smaller or same; block not moved
796 p_new = p_old;
797
798 } else {
799 // new size is bigger; make new block, copy shared contents, free old
800 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_size);
njn9e7ce212005-08-10 21:25:36 +0000801 VG_(memcpy)(p_new, p_old, old_size);
nethercotec9f36922004-02-14 16:40:02 +0000802 VG_(cli_free)(p_old);
803 }
804
805 old_where = hc->where;
njn57735902004-11-25 18:04:54 +0000806 new_where = get_XCon( tid, /*custom_malloc*/False);
nethercotec9f36922004-02-14 16:40:02 +0000807
808 // Update HP_Chunk
809 hc->data = (Addr)p_new;
810 hc->size = new_size;
811 hc->where = new_where;
812
813 // Update XPt curr_space fields
814 if (clo_heap) {
815 if (0 != old_size) update_XCon(old_where, -old_size);
816 if (0 != new_size) update_XCon(new_where, new_size);
817 }
818
njn5cc5d7e2005-08-11 02:09:25 +0000819 // Now insert the new hc (with a possibly new 'data' field) into
820 // malloc_list. If this realloc() did not increase the memory size, we
821 // will have removed and then re-added mc unnecessarily. But that's ok
822 // because shrinking a block with realloc() is (presumably) much rarer
823 // than growing it, and this way simplifies the growing case.
824 VG_(HT_add_node)(malloc_list, (VgHashNode*)hc);
nethercotec9f36922004-02-14 16:40:02 +0000825
826 VGP_POPCC(VgpCliMalloc);
827 return p_new;
828}
829
830
831/*------------------------------------------------------------*/
832/*--- Taking a census ---*/
833/*------------------------------------------------------------*/
834
835static Census censi[MAX_N_CENSI];
836static UInt curr_census = 0;
837
838// Must return False so that all stacks are traversed
thughes4ad52d02004-06-27 17:37:21 +0000839static Bool count_stack_size( Addr stack_min, Addr stack_max, void *cp )
nethercotec9f36922004-02-14 16:40:02 +0000840{
thughes4ad52d02004-06-27 17:37:21 +0000841 *(UInt *)cp += (stack_max - stack_min);
nethercotec9f36922004-02-14 16:40:02 +0000842 return False;
843}
844
845static UInt get_xtree_size(XPt* xpt, UInt ix)
846{
847 UInt i;
848
nethercote43a15ce2004-08-30 19:15:12 +0000849 // If no memory allocated at all, nothing interesting to record.
850 if (alloc_xpt->curr_space == 0) return 0;
851
852 // Ignore sub-XTrees that account for a miniscule fraction of current
853 // allocated space.
854 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000855 ix++;
856
857 // Count all (non-zero) descendent XPts
858 for (i = 0; i < xpt->n_children; i++)
859 ix = get_xtree_size(xpt->children[i], ix);
860 }
861 return ix;
862}
863
864static
865UInt do_space_snapshot(XPt xpt[], XTreeSnapshot xtree_snapshot, UInt ix)
866{
867 UInt i;
868
nethercote43a15ce2004-08-30 19:15:12 +0000869 // Structure of this function mirrors that of get_xtree_size().
870
871 if (alloc_xpt->curr_space == 0) return 0;
872
873 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000874 xtree_snapshot[ix].xpt = xpt;
875 xtree_snapshot[ix].space = xpt->curr_space;
876 ix++;
877
nethercotec9f36922004-02-14 16:40:02 +0000878 for (i = 0; i < xpt->n_children; i++)
879 ix = do_space_snapshot(xpt->children[i], xtree_snapshot, ix);
880 }
881 return ix;
882}
883
884static UInt ms_interval;
885static UInt do_every_nth_census = 30;
886
887// Weed out half the censi; we choose those that represent the smallest
888// time-spans, because that loses the least information.
889//
890// Algorithm for N censi: We find the census representing the smallest
891// timeframe, and remove it. We repeat this until (N/2)-1 censi are gone.
892// (It's (N/2)-1 because we never remove the first and last censi.)
893// We have to do this one census at a time, rather than finding the (N/2)-1
894// smallest censi in one hit, because when a census is removed, it's
895// neighbours immediately cover greater timespans. So it's N^2, but N only
896// equals 200, and this is only done every 100 censi, which is not too often.
897static void halve_censi(void)
898{
899 Int i, jp, j, jn, k;
900 Census* min_census;
901
902 n_halvings++;
903 if (VG_(clo_verbosity) > 1)
904 VG_(message)(Vg_UserMsg, "Halving censi...");
905
906 // Sets j to the index of the first not-yet-removed census at or after i
907 #define FIND_CENSUS(i, j) \
njn6f1f76d2005-05-24 21:28:54 +0000908 for (j = i; j < MAX_N_CENSI && -1 == censi[j].ms_time; j++) { }
nethercotec9f36922004-02-14 16:40:02 +0000909
910 for (i = 2; i < MAX_N_CENSI; i += 2) {
911 // Find the censi representing the smallest timespan. The timespan
912 // for census n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
913 // censi A and B. We don't consider the first and last censi for
914 // removal.
915 Int min_span = 0x7fffffff;
916 Int min_j = 0;
917
918 // Initial triple: (prev, curr, next) == (jp, j, jn)
919 jp = 0;
920 FIND_CENSUS(1, j);
921 FIND_CENSUS(j+1, jn);
922 while (jn < MAX_N_CENSI) {
923 Int timespan = censi[jn].ms_time - censi[jp].ms_time;
njnca82cc02004-11-22 17:18:48 +0000924 tl_assert(timespan >= 0);
nethercotec9f36922004-02-14 16:40:02 +0000925 if (timespan < min_span) {
926 min_span = timespan;
927 min_j = j;
928 }
929 // Move on to next triple
930 jp = j;
931 j = jn;
932 FIND_CENSUS(jn+1, jn);
933 }
934 // We've found the least important census, now remove it
935 min_census = & censi[ min_j ];
936 for (k = 0; NULL != min_census->xtree_snapshots[k]; k++) {
937 n_snapshot_frees++;
938 VG_(free)(min_census->xtree_snapshots[k]);
939 min_census->xtree_snapshots[k] = NULL;
940 }
941 min_census->ms_time = -1;
942 }
943
944 // Slide down the remaining censi over the removed ones. The '<=' is
945 // because we are removing on (N/2)-1, rather than N/2.
946 for (i = 0, j = 0; i <= MAX_N_CENSI / 2; i++, j++) {
947 FIND_CENSUS(j, j);
948 if (i != j) {
949 censi[i] = censi[j];
950 }
951 }
952 curr_census = i;
953
954 // Double intervals
955 ms_interval *= 2;
956 do_every_nth_census *= 2;
957
958 if (VG_(clo_verbosity) > 1)
959 VG_(message)(Vg_UserMsg, "...done");
960}
961
962// Take a census. Census time seems to be insignificant (usually <= 0 ms,
963// almost always <= 1ms) so don't have to worry about subtracting it from
964// running time in any way.
965//
966// XXX: NOT TRUE! with bigger depths, konqueror censuses can easily take
967// 50ms!
968static void hp_census(void)
969{
970 static UInt ms_prev_census = 0;
971 static UInt ms_next_census = 0; // zero allows startup census
972
973 Int ms_time, ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +0000974 Census* census;
975
976 VGP_PUSHCC(VgpCensus);
977
978 // Only do a census if it's time
979 ms_time = VG_(read_millisecond_timer)();
980 ms_time_since_prev = ms_time - ms_prev_census;
981 if (ms_time < ms_next_census) {
982 n_fake_censi++;
983 VGP_POPCC(VgpCensus);
984 return;
985 }
986 n_real_censi++;
987
988 census = & censi[curr_census];
989
990 census->ms_time = ms_time;
991
992 // Heap: snapshot the K most significant XTrees -------------------
993 if (clo_heap) {
njn6f1f76d2005-05-24 21:28:54 +0000994 Int i, K;
nethercotec9f36922004-02-14 16:40:02 +0000995 K = ( alloc_xpt->n_children < MAX_SNAPSHOTS
996 ? alloc_xpt->n_children
997 : MAX_SNAPSHOTS); // max out
998
nethercote43a15ce2004-08-30 19:15:12 +0000999 // Update .approx_ST field (approximatively) for all top-XPts.
nethercotec9f36922004-02-14 16:40:02 +00001000 // We *do not* do it for any non-top-XPTs.
1001 for (i = 0; i < alloc_xpt->n_children; i++) {
1002 XPt* top_XPt = alloc_xpt->children[i];
nethercote43a15ce2004-08-30 19:15:12 +00001003 top_XPt->approx_ST += top_XPt->curr_space * ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +00001004 }
nethercote43a15ce2004-08-30 19:15:12 +00001005 // Sort top-XPts by approx_ST field.
nethercotec9f36922004-02-14 16:40:02 +00001006 VG_(ssort)(alloc_xpt->children, alloc_xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001007 XPt_cmp_approx_ST);
nethercotec9f36922004-02-14 16:40:02 +00001008
1009 VGP_PUSHCC(VgpCensusHeap);
1010
1011 // For each significant top-level XPt, record space info about its
1012 // entire XTree, in a single census entry.
1013 // Nb: the xtree_size count/snapshot buffer allocation, and the actual
1014 // snapshot, take similar amounts of time (measured with the
nethercote43a15ce2004-08-30 19:15:12 +00001015 // millisecond counter).
nethercotec9f36922004-02-14 16:40:02 +00001016 for (i = 0; i < K; i++) {
1017 UInt xtree_size, xtree_size2;
nethercote43a15ce2004-08-30 19:15:12 +00001018// VG_(printf)("%7u ", alloc_xpt->children[i]->approx_ST);
1019 // Count how many XPts are in the XTree
nethercotec9f36922004-02-14 16:40:02 +00001020 VGP_PUSHCC(VgpCensusTreeSize);
1021 xtree_size = get_xtree_size( alloc_xpt->children[i], 0 );
1022 VGP_POPCC(VgpCensusTreeSize);
nethercote43a15ce2004-08-30 19:15:12 +00001023
1024 // If no XPts counted (ie. alloc_xpt.curr_space==0 or XTree
1025 // insignificant) then don't take any more snapshots.
1026 if (0 == xtree_size) break;
1027
1028 // Make array of the appropriate size (+1 for zero termination,
1029 // which calloc() does for us).
nethercotec9f36922004-02-14 16:40:02 +00001030 census->xtree_snapshots[i] =
1031 VG_(calloc)(xtree_size+1, sizeof(XPtSnapshot));
jseward612e8362004-03-07 10:23:20 +00001032 if (0 && VG_(clo_verbosity) > 1)
nethercotec9f36922004-02-14 16:40:02 +00001033 VG_(printf)("calloc: %d (%d B)\n", xtree_size+1,
1034 (xtree_size+1) * sizeof(XPtSnapshot));
1035
1036 // Take space-snapshot: copy 'curr_space' for every XPt in the
1037 // XTree into the snapshot array, along with pointers to the XPts.
1038 // (Except for ones with curr_space==0, which wouldn't contribute
nethercote43a15ce2004-08-30 19:15:12 +00001039 // to the final exact_ST_dbld calculation anyway; excluding them
nethercotec9f36922004-02-14 16:40:02 +00001040 // saves a lot of memory and up to 40% time with big --depth valus.
1041 VGP_PUSHCC(VgpCensusSnapshot);
1042 xtree_size2 = do_space_snapshot(alloc_xpt->children[i],
1043 census->xtree_snapshots[i], 0);
njnca82cc02004-11-22 17:18:48 +00001044 tl_assert(xtree_size == xtree_size2);
nethercotec9f36922004-02-14 16:40:02 +00001045 VGP_POPCC(VgpCensusSnapshot);
1046 }
1047// VG_(printf)("\n\n");
1048 // Zero-terminate 'xtree_snapshot' array
1049 census->xtree_snapshots[i] = NULL;
1050
1051 VGP_POPCC(VgpCensusHeap);
1052
1053 //VG_(printf)("printed %d censi\n", K);
1054
1055 // Lump the rest into a single "others" entry.
1056 census->others_space = 0;
1057 for (i = K; i < alloc_xpt->n_children; i++) {
1058 census->others_space += alloc_xpt->children[i]->curr_space;
1059 }
1060 }
1061
1062 // Heap admin -------------------------------------------------------
1063 if (clo_heap_admin > 0)
1064 census->heap_admin_space = clo_heap_admin * n_heap_blocks;
1065
1066 // Stack(s) ---------------------------------------------------------
1067 if (clo_stacks) {
thughes4ad52d02004-06-27 17:37:21 +00001068 census->stacks_space = sigstacks_space;
nethercotec9f36922004-02-14 16:40:02 +00001069 // slightly abusing this function
thughes4ad52d02004-06-27 17:37:21 +00001070 VG_(first_matching_thread_stack)( count_stack_size, &census->stacks_space );
nethercotec9f36922004-02-14 16:40:02 +00001071 }
1072
1073 // Finish, update interval if necessary -----------------------------
1074 curr_census++;
1075 census = NULL; // don't use again now that curr_census changed
1076
1077 // Halve the entries, if our census table is full
1078 if (MAX_N_CENSI == curr_census) {
1079 halve_censi();
1080 }
1081
1082 // Take time for next census from now, rather than when this census
1083 // should have happened. Because, if there's a big gap due to a kernel
1084 // operation, there's no point doing catch-up censi every BB for a while
1085 // -- that would just give N censi at almost the same time.
1086 if (VG_(clo_verbosity) > 1) {
1087 VG_(message)(Vg_UserMsg, "census: %d ms (took %d ms)", ms_time,
1088 VG_(read_millisecond_timer)() - ms_time );
1089 }
1090 ms_prev_census = ms_time;
1091 ms_next_census = ms_time + ms_interval;
1092 //ms_next_census += ms_interval;
1093
1094 //VG_(printf)("Next: %d ms\n", ms_next_census);
1095
1096 VGP_POPCC(VgpCensus);
1097}
1098
1099/*------------------------------------------------------------*/
1100/*--- Tracked events ---*/
1101/*------------------------------------------------------------*/
1102
nethercote8b5f40c2004-11-02 13:29:50 +00001103static void new_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001104{
1105 sigstacks_space += len;
1106}
1107
nethercote8b5f40c2004-11-02 13:29:50 +00001108static void die_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001109{
njnca82cc02004-11-22 17:18:48 +00001110 tl_assert(sigstacks_space >= len);
nethercotec9f36922004-02-14 16:40:02 +00001111 sigstacks_space -= len;
1112}
1113
1114/*------------------------------------------------------------*/
1115/*--- Client Requests ---*/
1116/*------------------------------------------------------------*/
1117
njn51d827b2005-05-09 01:02:08 +00001118static Bool ms_handle_client_request ( ThreadId tid, UWord* argv, UWord* ret )
nethercotec9f36922004-02-14 16:40:02 +00001119{
1120 switch (argv[0]) {
1121 case VG_USERREQ__MALLOCLIKE_BLOCK: {
nethercote57e36b32004-07-10 14:56:28 +00001122 void* res;
nethercotec9f36922004-02-14 16:40:02 +00001123 void* p = (void*)argv[1];
nethercoted1b64b22004-11-04 18:22:28 +00001124 SizeT sizeB = argv[2];
nethercotec9f36922004-02-14 16:40:02 +00001125 *ret = 0;
njn57735902004-11-25 18:04:54 +00001126 res = new_block( tid, p, sizeB, /*align--ignored*/0, /*is_zeroed*/False );
njnca82cc02004-11-22 17:18:48 +00001127 tl_assert(res == p);
nethercotec9f36922004-02-14 16:40:02 +00001128 return True;
1129 }
1130 case VG_USERREQ__FREELIKE_BLOCK: {
1131 void* p = (void*)argv[1];
1132 *ret = 0;
1133 die_block( p, /*custom_free*/True );
1134 return True;
1135 }
1136 default:
1137 *ret = 0;
1138 return False;
1139 }
1140}
1141
1142/*------------------------------------------------------------*/
nethercotec9f36922004-02-14 16:40:02 +00001143/*--- Instrumentation ---*/
1144/*------------------------------------------------------------*/
1145
njn51d827b2005-05-09 01:02:08 +00001146static IRBB* ms_instrument ( IRBB* bb_in, VexGuestLayout* layout,
1147 IRType gWordTy, IRType hWordTy )
nethercotec9f36922004-02-14 16:40:02 +00001148{
sewardjd54babf2005-03-21 00:55:49 +00001149 /* XXX Will Massif work when gWordTy != hWordTy ? */
njnee8a5862004-11-22 21:08:46 +00001150 return bb_in;
nethercotec9f36922004-02-14 16:40:02 +00001151}
1152
1153/*------------------------------------------------------------*/
1154/*--- Spacetime recomputation ---*/
1155/*------------------------------------------------------------*/
1156
nethercote43a15ce2004-08-30 19:15:12 +00001157// Although we've been calculating space-time along the way, because the
1158// earlier calculations were done at a finer timescale, the .approx_ST field
nethercotec9f36922004-02-14 16:40:02 +00001159// might not agree with what hp2ps sees, because we've thrown away some of
1160// the information. So recompute it at the scale that hp2ps sees, so we can
1161// confidently determine which contexts hp2ps will choose for displaying as
1162// distinct bands. This recomputation only happens to the significant ones
1163// that get printed in the .hp file, so it's cheap.
1164//
nethercote43a15ce2004-08-30 19:15:12 +00001165// The approx_ST calculation:
nethercotec9f36922004-02-14 16:40:02 +00001166// ( a[0]*d(0,1) + a[1]*(d(0,1) + d(1,2)) + ... + a[N-1]*d(N-2,N-1) ) / 2
1167// where
1168// a[N] is the space at census N
1169// d(A,B) is the time interval between censi A and B
1170// and
1171// d(A,B) + d(B,C) == d(A,C)
1172//
1173// Key point: we can calculate the area for a census without knowing the
1174// previous or subsequent censi's space; because any over/underestimates
1175// for this census will be reversed in the next, balancing out. This is
1176// important, as getting the previous/next census entry for a particular
1177// AP is a pain with this data structure, but getting the prev/next
1178// census time is easy.
1179//
nethercote43a15ce2004-08-30 19:15:12 +00001180// Each heap calculation gets added to its context's exact_ST_dbld field.
nethercotec9f36922004-02-14 16:40:02 +00001181// The ULong* values are all running totals, hence the use of "+=" everywhere.
1182
1183// This does the calculations for a single census.
nethercote43a15ce2004-08-30 19:15:12 +00001184static void calc_exact_ST_dbld2(Census* census, UInt d_t1_t2,
nethercotec9f36922004-02-14 16:40:02 +00001185 ULong* twice_heap_ST,
1186 ULong* twice_heap_admin_ST,
1187 ULong* twice_stack_ST)
1188{
1189 UInt i, j;
1190 XPtSnapshot* xpt_snapshot;
1191
1192 // Heap --------------------------------------------------------
1193 if (clo_heap) {
1194 for (i = 0; NULL != census->xtree_snapshots[i]; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001195 // Compute total heap exact_ST_dbld for the entire XTree using only
1196 // the top-XPt (the first XPt in xtree_snapshot).
nethercotec9f36922004-02-14 16:40:02 +00001197 *twice_heap_ST += d_t1_t2 * census->xtree_snapshots[i][0].space;
1198
nethercote43a15ce2004-08-30 19:15:12 +00001199 // Increment exact_ST_dbld for every XPt in xtree_snapshot (inc.
1200 // top one)
nethercotec9f36922004-02-14 16:40:02 +00001201 for (j = 0; NULL != census->xtree_snapshots[i][j].xpt; j++) {
1202 xpt_snapshot = & census->xtree_snapshots[i][j];
nethercote43a15ce2004-08-30 19:15:12 +00001203 xpt_snapshot->xpt->exact_ST_dbld += d_t1_t2 * xpt_snapshot->space;
nethercotec9f36922004-02-14 16:40:02 +00001204 }
1205 }
1206 *twice_heap_ST += d_t1_t2 * census->others_space;
1207 }
1208
1209 // Heap admin --------------------------------------------------
1210 if (clo_heap_admin > 0)
1211 *twice_heap_admin_ST += d_t1_t2 * census->heap_admin_space;
1212
1213 // Stack(s) ----------------------------------------------------
1214 if (clo_stacks)
1215 *twice_stack_ST += d_t1_t2 * census->stacks_space;
1216}
1217
1218// This does the calculations for all censi.
nethercote43a15ce2004-08-30 19:15:12 +00001219static void calc_exact_ST_dbld(ULong* heap2, ULong* heap_admin2, ULong* stack2)
nethercotec9f36922004-02-14 16:40:02 +00001220{
1221 UInt i, N = curr_census;
1222
1223 VGP_PUSHCC(VgpCalcSpacetime2);
1224
1225 *heap2 = 0;
1226 *heap_admin2 = 0;
1227 *stack2 = 0;
1228
1229 if (N <= 1)
1230 return;
1231
nethercote43a15ce2004-08-30 19:15:12 +00001232 calc_exact_ST_dbld2( &censi[0], censi[1].ms_time - censi[0].ms_time,
1233 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001234
1235 for (i = 1; i <= N-2; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001236 calc_exact_ST_dbld2( & censi[i], censi[i+1].ms_time - censi[i-1].ms_time,
1237 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001238 }
1239
nethercote43a15ce2004-08-30 19:15:12 +00001240 calc_exact_ST_dbld2( & censi[N-1], censi[N-1].ms_time - censi[N-2].ms_time,
1241 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001242 // Now get rid of the halves. May lose a 0.5 on each, doesn't matter.
1243 *heap2 /= 2;
1244 *heap_admin2 /= 2;
1245 *stack2 /= 2;
1246
1247 VGP_POPCC(VgpCalcSpacetime2);
1248}
1249
1250/*------------------------------------------------------------*/
1251/*--- Writing the graph file ---*/
1252/*------------------------------------------------------------*/
1253
1254static Char* make_filename(Char* dir, Char* suffix)
1255{
1256 Char* filename;
1257
1258 /* Block is big enough for dir name + massif.<pid>.<suffix> */
1259 filename = VG_(malloc)((VG_(strlen)(dir) + 32)*sizeof(Char));
1260 VG_(sprintf)(filename, "%s/massif.%d%s", dir, VG_(getpid)(), suffix);
1261
1262 return filename;
1263}
1264
1265// Make string acceptable to hp2ps (sigh): remove spaces, escape parentheses.
1266static Char* clean_fnname(Char *d, Char* s)
1267{
1268 Char* dorig = d;
1269 while (*s) {
1270 if (' ' == *s) { *d = '%'; }
1271 else if ('(' == *s) { *d++ = '\\'; *d = '('; }
1272 else if (')' == *s) { *d++ = '\\'; *d = ')'; }
1273 else { *d = *s; };
1274 s++;
1275 d++;
1276 }
1277 *d = '\0';
1278 return dorig;
1279}
1280
1281static void file_err ( Char* file )
1282{
njn02bc4b82005-05-15 17:28:26 +00001283 VG_(message)(Vg_UserMsg, "error: can't open output file '%s'", file );
nethercotec9f36922004-02-14 16:40:02 +00001284 VG_(message)(Vg_UserMsg, " ... so profile results will be missing.");
1285}
1286
1287/* Format, by example:
1288
1289 JOB "a.out -p"
1290 DATE "Fri Apr 17 11:43:45 1992"
1291 SAMPLE_UNIT "seconds"
1292 VALUE_UNIT "bytes"
1293 BEGIN_SAMPLE 0.00
1294 SYSTEM 24
1295 END_SAMPLE 0.00
1296 BEGIN_SAMPLE 1.00
1297 elim 180
1298 insert 24
1299 intersect 12
1300 disin 60
1301 main 12
1302 reduce 20
1303 SYSTEM 12
1304 END_SAMPLE 1.00
1305 MARK 1.50
1306 MARK 1.75
1307 MARK 1.80
1308 BEGIN_SAMPLE 2.00
1309 elim 192
1310 insert 24
1311 intersect 12
1312 disin 84
1313 main 12
1314 SYSTEM 24
1315 END_SAMPLE 2.00
1316 BEGIN_SAMPLE 2.82
1317 END_SAMPLE 2.82
1318 */
1319static void write_hp_file(void)
1320{
sewardj92645592005-07-23 09:18:34 +00001321 Int i, j;
1322 Int fd, res;
1323 SysRes sres;
1324 Char *hp_file, *ps_file, *aux_file;
1325 Char* cmdfmt;
1326 Char* cmdbuf;
1327 Int cmdlen;
nethercotec9f36922004-02-14 16:40:02 +00001328
1329 VGP_PUSHCC(VgpPrintHp);
1330
1331 // Open file
1332 hp_file = make_filename( base_dir, ".hp" );
1333 ps_file = make_filename( base_dir, ".ps" );
1334 aux_file = make_filename( base_dir, ".aux" );
sewardj92645592005-07-23 09:18:34 +00001335 sres = VG_(open)(hp_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1336 VKI_S_IRUSR|VKI_S_IWUSR);
1337 if (sres.isError) {
nethercotec9f36922004-02-14 16:40:02 +00001338 file_err( hp_file );
1339 VGP_POPCC(VgpPrintHp);
1340 return;
sewardj92645592005-07-23 09:18:34 +00001341 } else {
1342 fd = sres.val;
nethercotec9f36922004-02-14 16:40:02 +00001343 }
1344
1345 // File header, including command line
1346 SPRINTF(buf, "JOB \"");
1347 for (i = 0; i < VG_(client_argc); i++)
1348 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1349 SPRINTF(buf, /*" (%d ms/sample)\"\n"*/ "\"\n"
1350 "DATE \"\"\n"
1351 "SAMPLE_UNIT \"ms\"\n"
1352 "VALUE_UNIT \"bytes\"\n", ms_interval);
1353
1354 // Censi
1355 for (i = 0; i < curr_census; i++) {
1356 Census* census = & censi[i];
1357
1358 // Census start
1359 SPRINTF(buf, "MARK %d.0\n"
1360 "BEGIN_SAMPLE %d.0\n",
1361 census->ms_time, census->ms_time);
1362
1363 // Heap -----------------------------------------------------------
1364 if (clo_heap) {
1365 // Print all the significant XPts from that census
1366 for (j = 0; NULL != census->xtree_snapshots[j]; j++) {
1367 // Grab the jth top-XPt
1368 XTreeSnapshot xtree_snapshot = & census->xtree_snapshots[j][0];
njnd01fef72005-03-25 23:35:48 +00001369 if ( ! VG_(get_fnname)(xtree_snapshot->xpt->ip, buf2, 16)) {
nethercotec9f36922004-02-14 16:40:02 +00001370 VG_(sprintf)(buf2, "???");
1371 }
njnd01fef72005-03-25 23:35:48 +00001372 SPRINTF(buf, "x%x:%s %d\n", xtree_snapshot->xpt->ip,
nethercotec9f36922004-02-14 16:40:02 +00001373 clean_fnname(buf3, buf2), xtree_snapshot->space);
1374 }
1375
1376 // Remaining heap block alloc points, combined
1377 if (census->others_space > 0)
1378 SPRINTF(buf, "other %d\n", census->others_space);
1379 }
1380
1381 // Heap admin -----------------------------------------------------
1382 if (clo_heap_admin > 0 && census->heap_admin_space)
1383 SPRINTF(buf, "heap-admin %d\n", census->heap_admin_space);
1384
1385 // Stack(s) -------------------------------------------------------
1386 if (clo_stacks)
1387 SPRINTF(buf, "stack(s) %d\n", census->stacks_space);
1388
1389 // Census end
1390 SPRINTF(buf, "END_SAMPLE %d.0\n", census->ms_time);
1391 }
1392
1393 // Close file
njnca82cc02004-11-22 17:18:48 +00001394 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001395 VG_(close)(fd);
1396
1397 // Attempt to convert file using hp2ps
1398 cmdfmt = "%s/hp2ps -c -t1 %s";
1399 cmdlen = VG_(strlen)(VG_(libdir)) + VG_(strlen)(hp_file)
1400 + VG_(strlen)(cmdfmt);
1401 cmdbuf = VG_(malloc)( sizeof(Char) * cmdlen );
1402 VG_(sprintf)(cmdbuf, cmdfmt, VG_(libdir), hp_file);
1403 res = VG_(system)(cmdbuf);
1404 VG_(free)(cmdbuf);
1405 if (res != 0) {
1406 VG_(message)(Vg_UserMsg,
1407 "Conversion to PostScript failed. Try converting manually.");
1408 } else {
1409 // remove the .hp and .aux file
1410 VG_(unlink)(hp_file);
1411 VG_(unlink)(aux_file);
1412 }
1413
1414 VG_(free)(hp_file);
1415 VG_(free)(ps_file);
1416 VG_(free)(aux_file);
1417
1418 VGP_POPCC(VgpPrintHp);
1419}
1420
1421/*------------------------------------------------------------*/
1422/*--- Writing the XPt text/HTML file ---*/
1423/*------------------------------------------------------------*/
1424
1425static void percentify(Int n, Int pow, Int field_width, char xbuf[])
1426{
1427 int i, len, space;
1428
1429 VG_(sprintf)(xbuf, "%d.%d%%", n / pow, n % pow);
1430 len = VG_(strlen)(xbuf);
1431 space = field_width - len;
1432 if (space < 0) space = 0; /* Allow for v. small field_width */
1433 i = len;
1434
1435 /* Right justify in field */
1436 for ( ; i >= 0; i--) xbuf[i + space] = xbuf[i];
1437 for (i = 0; i < space; i++) xbuf[i] = ' ';
1438}
1439
1440// Nb: uses a static buffer, each call trashes the last string returned.
1441static Char* make_perc(ULong spacetime, ULong total_spacetime)
1442{
1443 static Char mbuf[32];
1444
1445 UInt p = 10;
njnca82cc02004-11-22 17:18:48 +00001446 tl_assert(0 != total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001447 percentify(spacetime * 100 * p / total_spacetime, p, 5, mbuf);
1448 return mbuf;
1449}
1450
njnd01fef72005-03-25 23:35:48 +00001451// Nb: passed in XPt is a lower-level XPt; IPs are grabbed from
nethercotec9f36922004-02-14 16:40:02 +00001452// bottom-to-top of XCon, and then printed in the reverse order.
1453static UInt pp_XCon(Int fd, XPt* xpt)
1454{
njnd01fef72005-03-25 23:35:48 +00001455 Addr rev_ips[clo_depth+1];
nethercotec9f36922004-02-14 16:40:02 +00001456 Int i = 0;
1457 Int n = 0;
1458 Bool is_HTML = ( XHTML == clo_format );
1459 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1460 Char* maybe_indent = ( is_HTML ? "&nbsp;&nbsp;" : "" );
1461
njnca82cc02004-11-22 17:18:48 +00001462 tl_assert(NULL != xpt);
nethercotec9f36922004-02-14 16:40:02 +00001463
1464 while (True) {
njnd01fef72005-03-25 23:35:48 +00001465 rev_ips[i] = xpt->ip;
nethercotec9f36922004-02-14 16:40:02 +00001466 n++;
1467 if (alloc_xpt == xpt->parent) break;
1468 i++;
1469 xpt = xpt->parent;
1470 }
1471
1472 for (i = n-1; i >= 0; i--) {
1473 // -1 means point to calling line
njnd01fef72005-03-25 23:35:48 +00001474 VG_(describe_IP)(rev_ips[i]-1, buf2, BUF_LEN);
nethercotec9f36922004-02-14 16:40:02 +00001475 SPRINTF(buf, " %s%s%s\n", maybe_indent, buf2, maybe_br);
1476 }
1477
1478 return n;
1479}
1480
1481// Important point: for HTML, each XPt must be identified uniquely for the
njnd01fef72005-03-25 23:35:48 +00001482// HTML links to all match up correctly. Using xpt->ip is not
nethercotec9f36922004-02-14 16:40:02 +00001483// sufficient, because function pointers mean that you can call more than
1484// one other function from a single code location. So instead we use the
1485// address of the xpt struct itself, which is guaranteed to be unique.
1486
1487static void pp_all_XPts2(Int fd, Queue* q, ULong heap_spacetime,
1488 ULong total_spacetime)
1489{
1490 UInt i;
1491 XPt *xpt, *child;
1492 UInt L = 0;
1493 UInt c1 = 1;
1494 UInt c2 = 0;
1495 ULong sum = 0;
1496 UInt n;
njnd01fef72005-03-25 23:35:48 +00001497 Char *ip_desc, *perc;
nethercotec9f36922004-02-14 16:40:02 +00001498 Bool is_HTML = ( XHTML == clo_format );
1499 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1500 Char* maybe_p = ( is_HTML ? "<p>" : "" );
1501 Char* maybe_ul = ( is_HTML ? "<ul>" : "" );
1502 Char* maybe_li = ( is_HTML ? "<li>" : "" );
1503 Char* maybe_fli = ( is_HTML ? "</li>" : "" );
1504 Char* maybe_ful = ( is_HTML ? "</ul>" : "" );
1505 Char* end_hr = ( is_HTML ? "<hr>" :
1506 "=================================" );
1507 Char* depth = ( is_HTML ? "<code>--depth</code>" : "--depth" );
1508
nethercote43a15ce2004-08-30 19:15:12 +00001509 if (total_spacetime == 0) {
1510 SPRINTF(buf, "(No heap memory allocated)\n");
1511 return;
1512 }
1513
1514
nethercotec9f36922004-02-14 16:40:02 +00001515 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1516
1517 while (NULL != (xpt = (XPt*)dequeue(q))) {
nethercote43a15ce2004-08-30 19:15:12 +00001518 // Check that non-top-level XPts have a zero .approx_ST field.
njnca82cc02004-11-22 17:18:48 +00001519 if (xpt->parent != alloc_xpt) tl_assert( 0 == xpt->approx_ST );
nethercotec9f36922004-02-14 16:40:02 +00001520
nethercote43a15ce2004-08-30 19:15:12 +00001521 // Check that the sum of all children .exact_ST_dbld fields equals
1522 // parent's (unless alloc_xpt, when it should == 0).
nethercotec9f36922004-02-14 16:40:02 +00001523 if (alloc_xpt == xpt) {
njnca82cc02004-11-22 17:18:48 +00001524 tl_assert(0 == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001525 } else {
1526 sum = 0;
1527 for (i = 0; i < xpt->n_children; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001528 sum += xpt->children[i]->exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +00001529 }
njnca82cc02004-11-22 17:18:48 +00001530 //tl_assert(sum == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001531 // It's possible that not all the children were included in the
nethercote43a15ce2004-08-30 19:15:12 +00001532 // exact_ST_dbld calculations. Hopefully almost all of them were, and
nethercotec9f36922004-02-14 16:40:02 +00001533 // all the important ones.
njnca82cc02004-11-22 17:18:48 +00001534// tl_assert(sum <= xpt->exact_ST_dbld);
1535// tl_assert(sum * 1.05 > xpt->exact_ST_dbld );
nethercote43a15ce2004-08-30 19:15:12 +00001536// if (sum != xpt->exact_ST_dbld) {
1537// VG_(printf)("%ld, %ld\n", sum, xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001538// }
1539 }
1540
1541 if (xpt == alloc_xpt) {
1542 SPRINTF(buf, "Heap allocation functions accounted for "
1543 "%s of measured spacetime%s\n",
1544 make_perc(heap_spacetime, total_spacetime), maybe_br);
1545 } else {
nethercote43a15ce2004-08-30 19:15:12 +00001546 // Remember: exact_ST_dbld is space.time *doubled*
1547 perc = make_perc(xpt->exact_ST_dbld / 2, total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001548 if (is_HTML) {
1549 SPRINTF(buf, "<a name=\"b%x\"></a>"
1550 "Context accounted for "
1551 "<a href=\"#a%x\">%s</a> of measured spacetime<br>\n",
1552 xpt, xpt, perc);
1553 } else {
1554 SPRINTF(buf, "Context accounted for %s of measured spacetime\n",
1555 perc);
1556 }
1557 n = pp_XCon(fd, xpt);
njnca82cc02004-11-22 17:18:48 +00001558 tl_assert(n == L);
nethercotec9f36922004-02-14 16:40:02 +00001559 }
1560
nethercote43a15ce2004-08-30 19:15:12 +00001561 // Sort children by exact_ST_dbld
nethercotec9f36922004-02-14 16:40:02 +00001562 VG_(ssort)(xpt->children, xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001563 XPt_cmp_exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001564
1565 SPRINTF(buf, "%s\nCalled from:%s\n", maybe_p, maybe_ul);
1566 for (i = 0; i < xpt->n_children; i++) {
1567 child = xpt->children[i];
1568
1569 // Stop when <1% of total spacetime
nethercote43a15ce2004-08-30 19:15:12 +00001570 if (child->exact_ST_dbld * 1000 / (total_spacetime * 2) < 5) {
nethercotec9f36922004-02-14 16:40:02 +00001571 UInt n_insig = xpt->n_children - i;
1572 Char* s = ( n_insig == 1 ? "" : "s" );
1573 Char* and = ( 0 == i ? "" : "and " );
1574 Char* other = ( 0 == i ? "" : "other " );
1575 SPRINTF(buf, " %s%s%d %sinsignificant place%s%s\n\n",
1576 maybe_li, and, n_insig, other, s, maybe_fli);
1577 break;
1578 }
1579
nethercote43a15ce2004-08-30 19:15:12 +00001580 // Remember: exact_ST_dbld is space.time *doubled*
njnd01fef72005-03-25 23:35:48 +00001581 perc = make_perc(child->exact_ST_dbld / 2, total_spacetime);
1582 ip_desc = VG_(describe_IP)(child->ip-1, buf2, BUF_LEN);
nethercotec9f36922004-02-14 16:40:02 +00001583 if (is_HTML) {
1584 SPRINTF(buf, "<li><a name=\"a%x\"></a>", child );
1585
1586 if (child->n_children > 0) {
1587 SPRINTF(buf, "<a href=\"#b%x\">%s</a>", child, perc);
1588 } else {
1589 SPRINTF(buf, "%s", perc);
1590 }
njnd01fef72005-03-25 23:35:48 +00001591 SPRINTF(buf, ": %s\n", ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001592 } else {
njnd01fef72005-03-25 23:35:48 +00001593 SPRINTF(buf, " %6s: %s\n\n", perc, ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001594 }
1595
1596 if (child->n_children > 0) {
1597 enqueue(q, (void*)child);
1598 c2++;
1599 }
1600 }
1601 SPRINTF(buf, "%s%s", maybe_ful, maybe_p);
1602 c1--;
1603
1604 // Putting markers between levels of the structure:
1605 // c1 tracks how many to go on this level, c2 tracks how many we've
1606 // queued up for the next level while finishing off this level.
1607 // When c1 gets to zero, we've changed levels, so print a marker,
1608 // move c2 into c1, and zero c2.
1609 if (0 == c1) {
1610 L++;
1611 c1 = c2;
1612 c2 = 0;
1613 if (! is_empty_queue(q) ) { // avoid empty one at end
1614 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1615 }
1616 } else {
1617 SPRINTF(buf, "---------------------------------%s\n", maybe_br);
1618 }
1619 }
1620 SPRINTF(buf, "%s\n\nEnd of information. Rerun with a bigger "
1621 "%s value for more.\n", end_hr, depth);
1622}
1623
1624static void pp_all_XPts(Int fd, XPt* xpt, ULong heap_spacetime,
1625 ULong total_spacetime)
1626{
1627 Queue* q = construct_queue(100);
nethercote43a15ce2004-08-30 19:15:12 +00001628
nethercotec9f36922004-02-14 16:40:02 +00001629 enqueue(q, xpt);
1630 pp_all_XPts2(fd, q, heap_spacetime, total_spacetime);
1631 destruct_queue(q);
1632}
1633
1634static void
1635write_text_file(ULong total_ST, ULong heap_ST)
1636{
sewardj92645592005-07-23 09:18:34 +00001637 SysRes sres;
1638 Int fd, i;
1639 Char* text_file;
1640 Char* maybe_p = ( XHTML == clo_format ? "<p>" : "" );
nethercotec9f36922004-02-14 16:40:02 +00001641
1642 VGP_PUSHCC(VgpPrintXPts);
1643
1644 // Open file
1645 text_file = make_filename( base_dir,
1646 ( XText == clo_format ? ".txt" : ".html" ) );
1647
sewardj92645592005-07-23 09:18:34 +00001648 sres = VG_(open)(text_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
nethercotec9f36922004-02-14 16:40:02 +00001649 VKI_S_IRUSR|VKI_S_IWUSR);
sewardj92645592005-07-23 09:18:34 +00001650 if (sres.isError) {
nethercotec9f36922004-02-14 16:40:02 +00001651 file_err( text_file );
1652 VGP_POPCC(VgpPrintXPts);
1653 return;
sewardj92645592005-07-23 09:18:34 +00001654 } else {
1655 fd = sres.val;
nethercotec9f36922004-02-14 16:40:02 +00001656 }
1657
1658 // Header
1659 if (XHTML == clo_format) {
1660 SPRINTF(buf, "<html>\n"
1661 "<head>\n"
1662 "<title>%s</title>\n"
1663 "</head>\n"
1664 "<body>\n",
1665 text_file);
1666 }
1667
1668 // Command line
1669 SPRINTF(buf, "Command: ");
1670 for (i = 0; i < VG_(client_argc); i++)
1671 SPRINTF(buf, "%s ", VG_(client_argv)[i]);
1672 SPRINTF(buf, "\n%s\n", maybe_p);
1673
1674 if (clo_heap)
1675 pp_all_XPts(fd, alloc_xpt, heap_ST, total_ST);
1676
njnca82cc02004-11-22 17:18:48 +00001677 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001678 VG_(close)(fd);
1679
1680 VGP_POPCC(VgpPrintXPts);
1681}
1682
1683/*------------------------------------------------------------*/
1684/*--- Finalisation ---*/
1685/*------------------------------------------------------------*/
1686
1687static void
1688print_summary(ULong total_ST, ULong heap_ST, ULong heap_admin_ST,
1689 ULong stack_ST)
1690{
1691 VG_(message)(Vg_UserMsg, "Total spacetime: %,ld ms.B", total_ST);
1692
1693 // Heap --------------------------------------------------------------
1694 if (clo_heap)
1695 VG_(message)(Vg_UserMsg, "heap: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001696 ( 0 == total_ST ? (Char*)"(n/a)"
1697 : make_perc(heap_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001698
1699 // Heap admin --------------------------------------------------------
1700 if (clo_heap_admin)
1701 VG_(message)(Vg_UserMsg, "heap admin: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001702 ( 0 == total_ST ? (Char*)"(n/a)"
1703 : make_perc(heap_admin_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001704
njnca82cc02004-11-22 17:18:48 +00001705 tl_assert( VG_(HT_count_nodes)(malloc_list) == n_heap_blocks );
nethercotec9f36922004-02-14 16:40:02 +00001706
1707 // Stack(s) ----------------------------------------------------------
nethercote43a15ce2004-08-30 19:15:12 +00001708 if (clo_stacks) {
nethercotec9f36922004-02-14 16:40:02 +00001709 VG_(message)(Vg_UserMsg, "stack(s): %s",
sewardjb5f6f512005-03-10 23:59:00 +00001710 ( 0 == stack_ST ? (Char*)"0%"
1711 : make_perc(stack_ST, total_ST) ) );
nethercote43a15ce2004-08-30 19:15:12 +00001712 }
nethercotec9f36922004-02-14 16:40:02 +00001713
1714 if (VG_(clo_verbosity) > 1) {
njnca82cc02004-11-22 17:18:48 +00001715 tl_assert(n_xpts > 0); // always have alloc_xpt
nethercotec9f36922004-02-14 16:40:02 +00001716 VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
1717 VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
1718 n_zero_allocs * 100 / n_allocs );
1719 VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
1720 VG_(message)(Vg_DebugMsg, " XPts: %u (%d B)", n_xpts,
1721 n_xpts*sizeof(XPt));
1722 VG_(message)(Vg_DebugMsg, " bot-XPts: %u (%d%%)", n_bot_xpts,
1723 n_bot_xpts * 100 / n_xpts);
1724 VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)", alloc_xpt->n_children,
1725 alloc_xpt->n_children * 100 / n_xpts);
1726 VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
1727 VG_(message)(Vg_DebugMsg, "snap-frees: %u", n_snapshot_frees);
1728 VG_(message)(Vg_DebugMsg, "atmp censi: %u", n_attempted_censi);
1729 VG_(message)(Vg_DebugMsg, "fake censi: %u", n_fake_censi);
1730 VG_(message)(Vg_DebugMsg, "real censi: %u", n_real_censi);
1731 VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
1732 }
1733}
1734
njn51d827b2005-05-09 01:02:08 +00001735static void ms_fini(Int exit_status)
nethercotec9f36922004-02-14 16:40:02 +00001736{
1737 ULong total_ST = 0;
1738 ULong heap_ST = 0;
1739 ULong heap_admin_ST = 0;
1740 ULong stack_ST = 0;
1741
1742 // Do a final (empty) sample to show program's end
1743 hp_census();
1744
1745 // Redo spacetimes of significant contexts to match the .hp file.
nethercote43a15ce2004-08-30 19:15:12 +00001746 calc_exact_ST_dbld(&heap_ST, &heap_admin_ST, &stack_ST);
nethercotec9f36922004-02-14 16:40:02 +00001747 total_ST = heap_ST + heap_admin_ST + stack_ST;
1748 write_hp_file ( );
1749 write_text_file( total_ST, heap_ST );
1750 print_summary ( total_ST, heap_ST, heap_admin_ST, stack_ST );
1751}
1752
njn51d827b2005-05-09 01:02:08 +00001753/*------------------------------------------------------------*/
1754/*--- Initialisation ---*/
1755/*------------------------------------------------------------*/
1756
1757static void ms_post_clo_init(void)
1758{
1759 ms_interval = 1;
1760
1761 // Do an initial sample for t = 0
1762 hp_census();
1763}
1764
1765static void ms_pre_clo_init()
1766{
1767 VG_(details_name) ("Massif");
1768 VG_(details_version) (NULL);
1769 VG_(details_description) ("a space profiler");
1770 VG_(details_copyright_author)("Copyright (C) 2003, Nicholas Nethercote");
1771 VG_(details_bug_reports_to) (VG_BUGS_TO);
1772
1773 // Basic functions
1774 VG_(basic_tool_funcs) (ms_post_clo_init,
1775 ms_instrument,
1776 ms_fini);
1777
1778 // Needs
1779 VG_(needs_libc_freeres)();
1780 VG_(needs_command_line_options)(ms_process_cmd_line_option,
1781 ms_print_usage,
1782 ms_print_debug_usage);
1783 VG_(needs_client_requests) (ms_handle_client_request);
njnfc51f8d2005-06-21 03:20:17 +00001784 VG_(needs_malloc_replacement) (ms_malloc,
njn51d827b2005-05-09 01:02:08 +00001785 ms___builtin_new,
1786 ms___builtin_vec_new,
1787 ms_memalign,
1788 ms_calloc,
1789 ms_free,
1790 ms___builtin_delete,
1791 ms___builtin_vec_delete,
1792 ms_realloc,
1793 0 );
1794
1795 // Events to track
1796 VG_(track_new_mem_stack_signal)( new_mem_stack_signal );
1797 VG_(track_die_mem_stack_signal)( die_mem_stack_signal );
1798
1799 // Profiling events
1800 VG_(register_profile_event)(VgpGetXPt, "get-XPt");
1801 VG_(register_profile_event)(VgpGetXPtSearch, "get-XPt-search");
1802 VG_(register_profile_event)(VgpCensus, "census");
1803 VG_(register_profile_event)(VgpCensusHeap, "census-heap");
1804 VG_(register_profile_event)(VgpCensusSnapshot, "census-snapshot");
1805 VG_(register_profile_event)(VgpCensusTreeSize, "census-treesize");
1806 VG_(register_profile_event)(VgpUpdateXCon, "update-XCon");
1807 VG_(register_profile_event)(VgpCalcSpacetime2, "calc-exact_ST_dbld");
1808 VG_(register_profile_event)(VgpPrintHp, "print-hp");
1809 VG_(register_profile_event)(VgpPrintXPts, "print-XPts");
1810
1811 // HP_Chunks
njnf69f9452005-07-03 17:53:11 +00001812 malloc_list = VG_(HT_construct)( 80021 ); // prime, big
njn51d827b2005-05-09 01:02:08 +00001813
1814 // Dummy node at top of the context structure.
1815 alloc_xpt = new_XPt(0, NULL, /*is_bottom*/False);
1816
njn57ca7ab2005-06-21 23:44:58 +00001817 tl_assert( VG_(getcwd)(base_dir, VKI_PATH_MAX) );
njn51d827b2005-05-09 01:02:08 +00001818}
1819
1820VG_DETERMINE_INTERFACE_VERSION(ms_pre_clo_init, 0)
nethercotec9f36922004-02-14 16:40:02 +00001821
1822/*--------------------------------------------------------------------*/
njnf1c5def2005-08-11 02:17:07 +00001823/*--- end ---*/
nethercotec9f36922004-02-14 16:40:02 +00001824/*--------------------------------------------------------------------*/
1825