blob: 6fa430b1f83fa0cfffe614b48f554a56d3062d48 [file] [log] [blame]
nethercotec9f36922004-02-14 16:40:02 +00001
2/*--------------------------------------------------------------------*/
nethercote996901a2004-08-03 13:29:09 +00003/*--- Massif: a heap profiling tool. ms_main.c ---*/
nethercotec9f36922004-02-14 16:40:02 +00004/*--------------------------------------------------------------------*/
5
6/*
nethercote996901a2004-08-03 13:29:09 +00007 This file is part of Massif, a Valgrind tool for profiling memory
nethercotec9f36922004-02-14 16:40:02 +00008 usage of programs.
9
sewardj9ebd6e02007-01-08 06:01:59 +000010 Copyright (C) 2003-2007 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000011 njn@valgrind.org
nethercotec9f36922004-02-14 16:40:02 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31// Memory profiler. Produces a graph, gives lots of information about
32// allocation contexts, in terms of space.time values (ie. area under the
33// graph). Allocation context information is hierarchical, and can thus
34// be inspected step-wise to an appropriate depth. See comments on data
35// structures below for more info on how things work.
36
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_basics.h"
sewardj4cfea4f2006-10-14 19:26:10 +000038#include "pub_tool_vki.h"
sewardj45f4e7c2005-09-27 19:20:21 +000039#include "pub_tool_aspacemgr.h"
njnea27e462005-05-31 02:38:09 +000040#include "pub_tool_debuginfo.h"
njn81c00df2005-05-14 21:28:43 +000041#include "pub_tool_hashtable.h"
njn97405b22005-06-02 03:39:33 +000042#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000043#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +000044#include "pub_tool_libcfile.h"
njn36a20fa2005-06-03 03:08:39 +000045#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +000046#include "pub_tool_libcproc.h"
njnb506bd82005-06-21 04:01:51 +000047#include "pub_tool_machine.h"
njn717cde52005-05-10 02:47:21 +000048#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000049#include "pub_tool_options.h"
njn717cde52005-05-10 02:47:21 +000050#include "pub_tool_replacemalloc.h"
njnd01fef72005-03-25 23:35:48 +000051#include "pub_tool_stacktrace.h"
njn43b9a8a2005-05-10 04:37:01 +000052#include "pub_tool_tooliface.h"
sewardj14c7cc52007-02-25 15:08:24 +000053#include "pub_tool_xarray.h"
sewardj45f4e7c2005-09-27 19:20:21 +000054#include "pub_tool_clientstate.h"
nethercotec9f36922004-02-14 16:40:02 +000055
56#include "valgrind.h" // For {MALLOC,FREE}LIKE_BLOCK
57
58/*------------------------------------------------------------*/
59/*--- Overview of operation ---*/
60/*------------------------------------------------------------*/
61
62// Heap blocks are tracked, and the amount of space allocated by various
63// contexts (ie. lines of code, more or less) is also tracked.
64// Periodically, a census is taken, and the amount of space used, at that
65// point, by the most significant (highly allocating) contexts is recorded.
66// Census start off frequently, but are scaled back as the program goes on,
67// so that there are always a good number of them. At the end, overall
68// spacetimes for different contexts (of differing levels of precision) is
69// calculated, the graph is printed, and the text giving spacetimes for the
70// increasingly precise contexts is given.
71//
72// Measures the following:
73// - heap blocks
74// - heap admin bytes
75// - stack(s)
76// - code (code segments loaded at startup, and loaded with mmap)
77// - data (data segments loaded at startup, and loaded/created with mmap,
78// and brk()d segments)
79
80/*------------------------------------------------------------*/
81/*--- Main types ---*/
82/*------------------------------------------------------------*/
83
84// An XPt represents an "execution point", ie. a code address. Each XPt is
85// part of a tree of XPts (an "execution tree", or "XTree"). Each
86// top-to-bottom path through an XTree gives an execution context ("XCon"),
87// and is equivalent to a traditional Valgrind ExeContext.
88//
89// The XPt at the top of an XTree (but below "alloc_xpt") is called a
90// "top-XPt". The XPts are the bottom of an XTree (leaf nodes) are
91// "bottom-XPTs". The number of XCons in an XTree is equal to the number of
92// bottom-XPTs in that XTree.
93//
94// All XCons have the same top-XPt, "alloc_xpt", which represents all
95// allocation functions like malloc(). It's a bit of a fake XPt, though,
96// and is only used because it makes some of the code simpler.
97//
98// XTrees are bi-directional.
99//
100// > parent < Example: if child1() calls parent() and child2()
101// / | \ also calls parent(), and parent() calls malloc(),
102// | / \ | the XTree will look like this.
103// | v v |
104// child1 child2
105
106typedef struct _XPt XPt;
107
108struct _XPt {
njnd01fef72005-03-25 23:35:48 +0000109 Addr ip; // code address
nethercotec9f36922004-02-14 16:40:02 +0000110
111 // Bottom-XPts: space for the precise context.
112 // Other XPts: space of all the descendent bottom-XPts.
113 // Nb: this value goes up and down as the program executes.
114 UInt curr_space;
115
116 // An approximate space.time calculation used along the way for selecting
117 // which contexts to include at each census point.
118 // !!! top-XPTs only !!!
nethercote43a15ce2004-08-30 19:15:12 +0000119 ULong approx_ST;
nethercotec9f36922004-02-14 16:40:02 +0000120
nethercote43a15ce2004-08-30 19:15:12 +0000121 // exact_ST_dbld is an exact space.time calculation done at the end, and
nethercotec9f36922004-02-14 16:40:02 +0000122 // used in the results.
123 // Note that it is *doubled*, to avoid rounding errors.
124 // !!! not used for 'alloc_xpt' !!!
nethercote43a15ce2004-08-30 19:15:12 +0000125 ULong exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +0000126
127 // n_children and max_children are integers; a very big program might
128 // have more than 65536 allocation points (Konqueror startup has 1800).
129 XPt* parent; // pointer to parent XPt
130 UInt n_children; // number of children
131 UInt max_children; // capacity of children array
132 XPt** children; // pointers to children XPts
133};
134
135// Each census snapshots the most significant XTrees, each XTree having a
136// top-XPt as its root. The 'curr_space' element for each XPt is recorded
137// in the snapshot. The snapshot contains all the XTree's XPts, not in a
138// tree structure, but flattened into an array. This flat snapshot is used
nethercote43a15ce2004-08-30 19:15:12 +0000139// at the end for computing exact_ST_dbld for each XPt.
nethercotec9f36922004-02-14 16:40:02 +0000140//
141// Graph resolution, x-axis: no point having more than about 200 census
142// x-points; you can't see them on the graph. Therefore:
143//
144// - do a census every 1 ms for first 200 --> 200, all (200 ms)
145// - halve (drop half of them) --> 100, every 2nd (200 ms)
146// - do a census every 2 ms for next 200 --> 200, every 2nd (400 ms)
147// - halve --> 100, every 4th (400 ms)
148// - do a census every 4 ms for next 400 --> 200, every 4th (800 ms)
149// - etc.
150//
151// This isn't exactly right, because we actually drop (N/2)-1 when halving,
152// but it shows the basic idea.
153
154#define MAX_N_CENSI 200 // Keep it even, for simplicity
155
156// Graph resolution, y-axis: hp2ps only draws the 19 biggest (in space-time)
157// bands, rest get lumped into OTHERS. I only print the top N
158// (cumulative-so-far space-time) at each point. N should be a bit bigger
159// than 19 in case the cumulative space-time doesn't fit with the eventual
160// space-time computed by hp2ps (but it should be close if the samples are
161// evenly spread, since hp2ps does an approximate per-band space-time
162// calculation that just sums the totals; ie. it assumes all samples are
163// the same distance apart).
164
165#define MAX_SNAPSHOTS 32
166
167typedef
168 struct {
169 XPt* xpt;
170 UInt space;
171 }
172 XPtSnapshot;
173
174// An XTree snapshot is stored as an array of of XPt snapshots.
175typedef XPtSnapshot* XTreeSnapshot;
176
177typedef
178 struct {
179 Int ms_time; // Int: must allow -1
180 XTreeSnapshot xtree_snapshots[MAX_SNAPSHOTS+1]; // +1 for zero-termination
181 UInt others_space;
182 UInt heap_admin_space;
183 UInt stacks_space;
184 }
185 Census;
186
187// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
188// which is a foothold into the XCon at which it was allocated. From
189// HP_Chunks, XPt 'space' fields are incremented (at allocation) and
190// decremented (at deallocation).
191//
192// Nb: first two fields must match core's VgHashNode.
193typedef
194 struct _HP_Chunk {
195 struct _HP_Chunk* next;
196 Addr data; // Ptr to actual block
nethercote7ac7f7b2004-11-02 12:36:02 +0000197 SizeT size; // Size requested
nethercotec9f36922004-02-14 16:40:02 +0000198 XPt* where; // Where allocated; bottom-XPt
199 }
200 HP_Chunk;
201
202/*------------------------------------------------------------*/
nethercotec9f36922004-02-14 16:40:02 +0000203/*--- Statistics ---*/
204/*------------------------------------------------------------*/
205
206// Konqueror startup, to give an idea of the numbers involved with a biggish
207// program, with default depth:
208//
209// depth=3 depth=40
210// - 310,000 allocations
211// - 300,000 frees
212// - 15,000 XPts 800,000 XPts
213// - 1,800 top-XPts
214
215static UInt n_xpts = 0;
216static UInt n_bot_xpts = 0;
217static UInt n_allocs = 0;
218static UInt n_zero_allocs = 0;
219static UInt n_frees = 0;
220static UInt n_children_reallocs = 0;
221static UInt n_snapshot_frees = 0;
222
223static UInt n_halvings = 0;
224static UInt n_real_censi = 0;
225static UInt n_fake_censi = 0;
226static UInt n_attempted_censi = 0;
227
228/*------------------------------------------------------------*/
229/*--- Globals ---*/
230/*------------------------------------------------------------*/
231
232#define FILENAME_LEN 256
233
234#define SPRINTF(zz_buf, fmt, args...) \
235 do { Int len = VG_(sprintf)(zz_buf, fmt, ## args); \
236 VG_(write)(fd, (void*)zz_buf, len); \
237 } while (0)
238
239#define BUF_LEN 1024 // general purpose
240static Char buf [BUF_LEN];
241static Char buf2[BUF_LEN];
242static Char buf3[BUF_LEN];
243
nethercote8b5f40c2004-11-02 13:29:50 +0000244static SizeT sigstacks_space = 0; // Current signal stacks space sum
nethercotec9f36922004-02-14 16:40:02 +0000245
246static VgHashTable malloc_list = NULL; // HP_Chunks
247
248static UInt n_heap_blocks = 0;
249
njn51d827b2005-05-09 01:02:08 +0000250// Current directory at startup.
njn57ca7ab2005-06-21 23:44:58 +0000251static Char base_dir[VKI_PATH_MAX];
nethercotec9f36922004-02-14 16:40:02 +0000252
njna3e991b2007-03-26 23:51:29 +0000253#define MAX_ALLOC_FNS 128 // includes the builtin ones
nethercotec9f36922004-02-14 16:40:02 +0000254
nethercotec7469182004-05-11 09:21:08 +0000255// First few filled in, rest should be zeroed. Zero-terminated vector.
njna3e991b2007-03-26 23:51:29 +0000256static UInt n_alloc_fns = 10;
nethercotec9f36922004-02-14 16:40:02 +0000257static Char* alloc_fns[MAX_ALLOC_FNS] = {
258 "malloc",
259 "operator new(unsigned)",
260 "operator new[](unsigned)",
nethercoteeb479cb2004-05-11 16:37:17 +0000261 "operator new(unsigned, std::nothrow_t const&)",
262 "operator new[](unsigned, std::nothrow_t const&)",
nethercotec9f36922004-02-14 16:40:02 +0000263 "__builtin_new",
264 "__builtin_vec_new",
265 "calloc",
266 "realloc",
fitzhardinge51f3ff12004-03-04 22:42:03 +0000267 "memalign",
nethercotec9f36922004-02-14 16:40:02 +0000268};
269
270
271/*------------------------------------------------------------*/
272/*--- Command line args ---*/
273/*------------------------------------------------------------*/
274
275#define MAX_DEPTH 50
276
277typedef
278 enum {
279 XText, XHTML,
280 }
281 XFormat;
282
283static Bool clo_heap = True;
284static UInt clo_heap_admin = 8;
285static Bool clo_stacks = True;
286static Bool clo_depth = 3;
287static XFormat clo_format = XText;
288
njn51d827b2005-05-09 01:02:08 +0000289static Bool ms_process_cmd_line_option(Char* arg)
nethercotec9f36922004-02-14 16:40:02 +0000290{
njn45270a22005-03-27 01:00:11 +0000291 VG_BOOL_CLO(arg, "--heap", clo_heap)
292 else VG_BOOL_CLO(arg, "--stacks", clo_stacks)
nethercotec9f36922004-02-14 16:40:02 +0000293
njn45270a22005-03-27 01:00:11 +0000294 else VG_NUM_CLO (arg, "--heap-admin", clo_heap_admin)
295 else VG_BNUM_CLO(arg, "--depth", clo_depth, 1, MAX_DEPTH)
nethercotec9f36922004-02-14 16:40:02 +0000296
297 else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
298 alloc_fns[n_alloc_fns] = & arg[11];
299 n_alloc_fns++;
300 if (n_alloc_fns >= MAX_ALLOC_FNS) {
301 VG_(printf)("Too many alloc functions specified, sorry");
sewardj6893d652006-10-15 01:25:13 +0000302 VG_(err_bad_option)(arg);
nethercotec9f36922004-02-14 16:40:02 +0000303 }
304 }
305
306 else if (VG_CLO_STREQ(arg, "--format=text"))
307 clo_format = XText;
308 else if (VG_CLO_STREQ(arg, "--format=html"))
309 clo_format = XHTML;
310
311 else
312 return VG_(replacement_malloc_process_cmd_line_option)(arg);
nethercote27fec902004-06-16 21:26:32 +0000313
nethercotec9f36922004-02-14 16:40:02 +0000314 return True;
315}
316
njn51d827b2005-05-09 01:02:08 +0000317static void ms_print_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000318{
319 VG_(printf)(
320" --heap=no|yes profile heap blocks [yes]\n"
321" --heap-admin=<number> average admin bytes per heap block [8]\n"
322" --stacks=no|yes profile stack(s) [yes]\n"
323" --depth=<number> depth of contexts [3]\n"
324" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
325" --format=text|html format of textual output [text]\n"
326 );
327 VG_(replacement_malloc_print_usage)();
328}
329
njn51d827b2005-05-09 01:02:08 +0000330static void ms_print_debug_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000331{
332 VG_(replacement_malloc_print_debug_usage)();
333}
334
335/*------------------------------------------------------------*/
336/*--- Execution contexts ---*/
337/*------------------------------------------------------------*/
338
339// Fake XPt representing all allocation functions like malloc(). Acts as
340// parent node to all top-XPts.
341static XPt* alloc_xpt;
342
343// Cheap allocation for blocks that never need to be freed. Saves about 10%
344// for Konqueror startup with --depth=40.
nethercote7ac7f7b2004-11-02 12:36:02 +0000345static void* perm_malloc(SizeT n_bytes)
nethercotec9f36922004-02-14 16:40:02 +0000346{
347 static Addr hp = 0; // current heap pointer
348 static Addr hp_lim = 0; // maximum usable byte in current block
349
350 #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
351
352 if (hp + n_bytes > hp_lim) {
sewardj45f4e7c2005-09-27 19:20:21 +0000353 hp = (Addr)VG_(am_shadow_alloc)(SUPERBLOCK_SIZE);
354 if (hp == 0)
355 VG_(out_of_memory_NORETURN)( "massif:perm_malloc",
356 SUPERBLOCK_SIZE);
nethercotec9f36922004-02-14 16:40:02 +0000357 hp_lim = hp + SUPERBLOCK_SIZE - 1;
358 }
359
360 hp += n_bytes;
361
362 return (void*)(hp - n_bytes);
363}
364
365
366
njnd01fef72005-03-25 23:35:48 +0000367static XPt* new_XPt(Addr ip, XPt* parent, Bool is_bottom)
nethercotec9f36922004-02-14 16:40:02 +0000368{
369 XPt* xpt = perm_malloc(sizeof(XPt));
njnd01fef72005-03-25 23:35:48 +0000370 xpt->ip = ip;
nethercotec9f36922004-02-14 16:40:02 +0000371
nethercote43a15ce2004-08-30 19:15:12 +0000372 xpt->curr_space = 0;
373 xpt->approx_ST = 0;
374 xpt->exact_ST_dbld = 0;
nethercotec9f36922004-02-14 16:40:02 +0000375
376 xpt->parent = parent;
nethercotefc016352004-04-27 09:51:51 +0000377
378 // Check parent is not a bottom-XPt
njnca82cc02004-11-22 17:18:48 +0000379 tl_assert(parent == NULL || 0 != parent->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000380
381 xpt->n_children = 0;
382
383 // If a bottom-XPt, don't allocate space for children. This can be 50%
384 // or more, although it tends to drop as --depth increases (eg. 10% for
385 // konqueror with --depth=20).
386 if ( is_bottom ) {
387 xpt->max_children = 0;
388 xpt->children = NULL;
389 n_bot_xpts++;
390 } else {
391 xpt->max_children = 4;
392 xpt->children = VG_(malloc)( xpt->max_children * sizeof(XPt*) );
393 }
394
395 // Update statistics
396 n_xpts++;
397
398 return xpt;
399}
400
njnd01fef72005-03-25 23:35:48 +0000401static Bool is_alloc_fn(Addr ip)
nethercotec9f36922004-02-14 16:40:02 +0000402{
403 Int i;
404
njnd01fef72005-03-25 23:35:48 +0000405 if ( VG_(get_fnname)(ip, buf, BUF_LEN) ) {
nethercotec9f36922004-02-14 16:40:02 +0000406 for (i = 0; i < n_alloc_fns; i++) {
407 if (VG_STREQ(buf, alloc_fns[i]))
408 return True;
409 }
410 }
411 return False;
412}
413
414// Returns an XCon, from the bottom-XPt. Nb: the XPt returned must be a
415// bottom-XPt now and must always remain a bottom-XPt. We go to some effort
416// to ensure this in certain cases. See comments below.
417static XPt* get_XCon( ThreadId tid, Bool custom_malloc )
418{
njnd01fef72005-03-25 23:35:48 +0000419 // Static to minimise stack size. +1 for added ~0 IP
420 static Addr ips[MAX_DEPTH + MAX_ALLOC_FNS + 1];
nethercotec9f36922004-02-14 16:40:02 +0000421
422 XPt* xpt = alloc_xpt;
njnd01fef72005-03-25 23:35:48 +0000423 UInt n_ips, L, A, B, nC;
nethercotec9f36922004-02-14 16:40:02 +0000424 UInt overestimate;
425 Bool reached_bottom;
426
nethercotec9f36922004-02-14 16:40:02 +0000427 // Want at least clo_depth non-alloc-fn entries in the snapshot.
428 // However, because we have 1 or more (an unknown number, at this point)
429 // alloc-fns ignored, we overestimate the size needed for the stack
430 // snapshot. Then, if necessary, we repeatedly increase the size until
431 // it is enough.
432 overestimate = 2;
433 while (True) {
njnd01fef72005-03-25 23:35:48 +0000434 n_ips = VG_(get_StackTrace)( tid, ips, clo_depth + overestimate );
nethercotec9f36922004-02-14 16:40:02 +0000435
njnd01fef72005-03-25 23:35:48 +0000436 // Now we add a dummy "unknown" IP at the end. This is only used if we
437 // run out of IPs before hitting clo_depth. It's done to ensure the
nethercotec9f36922004-02-14 16:40:02 +0000438 // XPt we return is (now and forever) a bottom-XPt. If the returned XPt
439 // wasn't a bottom-XPt (now or later) it would cause problems later (eg.
nethercote43a15ce2004-08-30 19:15:12 +0000440 // the parent's approx_ST wouldn't be equal [or almost equal] to the
441 // total of the childrens' approx_STs).
njnd01fef72005-03-25 23:35:48 +0000442 ips[ n_ips++ ] = ~((Addr)0);
nethercotec9f36922004-02-14 16:40:02 +0000443
njnd01fef72005-03-25 23:35:48 +0000444 // Skip over alloc functions in ips[].
445 for (L = 0; is_alloc_fn(ips[L]) && L < n_ips; L++) { }
nethercotec9f36922004-02-14 16:40:02 +0000446
447 // Must be at least one alloc function, unless client used
448 // MALLOCLIKE_BLOCK
njnca82cc02004-11-22 17:18:48 +0000449 if (!custom_malloc) tl_assert(L > 0);
nethercotec9f36922004-02-14 16:40:02 +0000450
451 // Should be at least one non-alloc function. If not, try again.
njnd01fef72005-03-25 23:35:48 +0000452 if (L == n_ips) {
nethercotec9f36922004-02-14 16:40:02 +0000453 overestimate += 2;
454 if (overestimate > MAX_ALLOC_FNS)
njn67993252004-11-22 18:02:32 +0000455 VG_(tool_panic)("No stk snapshot big enough to find non-alloc fns");
nethercotec9f36922004-02-14 16:40:02 +0000456 } else {
457 break;
458 }
459 }
460 A = L;
njnd01fef72005-03-25 23:35:48 +0000461 B = n_ips - 1;
nethercotec9f36922004-02-14 16:40:02 +0000462 reached_bottom = False;
463
njnd01fef72005-03-25 23:35:48 +0000464 // By this point, the IPs we care about are in ips[A]..ips[B]
nethercotec9f36922004-02-14 16:40:02 +0000465
466 // Now do the search/insertion of the XCon. 'L' is the loop counter,
njnd01fef72005-03-25 23:35:48 +0000467 // being the index into ips[].
nethercotec9f36922004-02-14 16:40:02 +0000468 while (True) {
njnd01fef72005-03-25 23:35:48 +0000469 // Look for IP in xpt's children.
nethercotec9f36922004-02-14 16:40:02 +0000470 // XXX: linear search, ugh -- about 10% of time for konqueror startup
471 // XXX: tried cacheing last result, only hit about 4% for konqueror
472 // Nb: this search hits about 98% of the time for konqueror
nethercotec9f36922004-02-14 16:40:02 +0000473
474 // If we've searched/added deep enough, or run out of EIPs, this is
475 // the bottom XPt.
476 if (L - A + 1 == clo_depth || L == B)
477 reached_bottom = True;
478
479 nC = 0;
480 while (True) {
481 if (nC == xpt->n_children) {
482 // not found, insert new XPt
njnca82cc02004-11-22 17:18:48 +0000483 tl_assert(xpt->max_children != 0);
484 tl_assert(xpt->n_children <= xpt->max_children);
nethercotec9f36922004-02-14 16:40:02 +0000485 // Expand 'children' if necessary
486 if (xpt->n_children == xpt->max_children) {
487 xpt->max_children *= 2;
488 xpt->children = VG_(realloc)( xpt->children,
489 xpt->max_children * sizeof(XPt*) );
490 n_children_reallocs++;
491 }
njnd01fef72005-03-25 23:35:48 +0000492 // Make new XPt for IP, insert in list
nethercotec9f36922004-02-14 16:40:02 +0000493 xpt->children[ xpt->n_children++ ] =
njnd01fef72005-03-25 23:35:48 +0000494 new_XPt(ips[L], xpt, reached_bottom);
nethercotec9f36922004-02-14 16:40:02 +0000495 break;
496 }
njnd01fef72005-03-25 23:35:48 +0000497 if (ips[L] == xpt->children[nC]->ip) break; // found the IP
nethercotec9f36922004-02-14 16:40:02 +0000498 nC++; // keep looking
499 }
nethercotec9f36922004-02-14 16:40:02 +0000500
501 // Return found/built bottom-XPt.
502 if (reached_bottom) {
njnca82cc02004-11-22 17:18:48 +0000503 tl_assert(0 == xpt->children[nC]->n_children); // Must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000504 return xpt->children[nC];
505 }
506
507 // Descend to next level in XTree, the newly found/built non-bottom-XPt
508 xpt = xpt->children[nC];
509 L++;
510 }
511}
512
513// Update 'curr_space' of every XPt in the XCon, by percolating upwards.
514static void update_XCon(XPt* xpt, Int space_delta)
515{
njnca82cc02004-11-22 17:18:48 +0000516 tl_assert(True == clo_heap);
517 tl_assert(0 != space_delta);
518 tl_assert(NULL != xpt);
519 tl_assert(0 == xpt->n_children); // must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000520
521 while (xpt != alloc_xpt) {
njnca82cc02004-11-22 17:18:48 +0000522 if (space_delta < 0) tl_assert(xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000523 xpt->curr_space += space_delta;
524 xpt = xpt->parent;
525 }
njnca82cc02004-11-22 17:18:48 +0000526 if (space_delta < 0) tl_assert(alloc_xpt->curr_space >= -space_delta);
nethercotec9f36922004-02-14 16:40:02 +0000527 alloc_xpt->curr_space += space_delta;
nethercotec9f36922004-02-14 16:40:02 +0000528}
529
530// Actually want a reverse sort, biggest to smallest
nethercote43a15ce2004-08-30 19:15:12 +0000531static Int XPt_cmp_approx_ST(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000532{
533 XPt* xpt1 = *(XPt**)n1;
534 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000535 return (xpt1->approx_ST < xpt2->approx_ST ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000536}
537
nethercote43a15ce2004-08-30 19:15:12 +0000538static Int XPt_cmp_exact_ST_dbld(void* n1, void* n2)
nethercotec9f36922004-02-14 16:40:02 +0000539{
540 XPt* xpt1 = *(XPt**)n1;
541 XPt* xpt2 = *(XPt**)n2;
nethercote43a15ce2004-08-30 19:15:12 +0000542 return (xpt1->exact_ST_dbld < xpt2->exact_ST_dbld ? 1 : -1);
nethercotec9f36922004-02-14 16:40:02 +0000543}
544
545
546/*------------------------------------------------------------*/
547/*--- A generic Queue ---*/
548/*------------------------------------------------------------*/
549
550typedef
551 struct {
552 UInt head; // Index of first entry
553 UInt tail; // Index of final+1 entry, ie. next free slot
554 UInt max_elems;
555 void** elems;
556 }
557 Queue;
558
559static Queue* construct_queue(UInt size)
560{
561 UInt i;
562 Queue* q = VG_(malloc)(sizeof(Queue));
563 q->head = 0;
564 q->tail = 0;
565 q->max_elems = size;
566 q->elems = VG_(malloc)(size * sizeof(void*));
567 for (i = 0; i < size; i++)
568 q->elems[i] = NULL;
569
570 return q;
571}
572
573static void destruct_queue(Queue* q)
574{
575 VG_(free)(q->elems);
576 VG_(free)(q);
577}
578
579static void shuffle(Queue* dest_q, void** old_elems)
580{
581 UInt i, j;
582 for (i = 0, j = dest_q->head; j < dest_q->tail; i++, j++)
583 dest_q->elems[i] = old_elems[j];
584 dest_q->head = 0;
585 dest_q->tail = i;
586 for ( ; i < dest_q->max_elems; i++)
587 dest_q->elems[i] = NULL; // paranoia
588}
589
590// Shuffles elements down. If not enough slots free, increase size. (We
591// don't wait until we've completely run out of space, because there could
592// be lots of shuffling just before that point which would be slow.)
593static void adjust(Queue* q)
594{
595 void** old_elems;
596
njnca82cc02004-11-22 17:18:48 +0000597 tl_assert(q->tail == q->max_elems);
nethercotec9f36922004-02-14 16:40:02 +0000598 if (q->head < 10) {
599 old_elems = q->elems;
600 q->max_elems *= 2;
601 q->elems = VG_(malloc)(q->max_elems * sizeof(void*));
602 shuffle(q, old_elems);
603 VG_(free)(old_elems);
604 } else {
605 shuffle(q, q->elems);
606 }
607}
608
609static void enqueue(Queue* q, void* elem)
610{
611 if (q->tail == q->max_elems)
612 adjust(q);
613 q->elems[q->tail++] = elem;
614}
615
616static Bool is_empty_queue(Queue* q)
617{
618 return (q->head == q->tail);
619}
620
621static void* dequeue(Queue* q)
622{
623 if (is_empty_queue(q))
624 return NULL; // Queue empty
625 else
626 return q->elems[q->head++];
627}
628
629/*------------------------------------------------------------*/
630/*--- malloc() et al replacement wrappers ---*/
631/*------------------------------------------------------------*/
632
nethercotec9f36922004-02-14 16:40:02 +0000633// Forward declaration
634static void hp_census(void);
635
nethercote159dfef2004-09-13 13:27:30 +0000636static
njn57735902004-11-25 18:04:54 +0000637void* new_block ( ThreadId tid, void* p, SizeT size, SizeT align,
638 Bool is_zeroed )
nethercotec9f36922004-02-14 16:40:02 +0000639{
640 HP_Chunk* hc;
nethercote57e36b32004-07-10 14:56:28 +0000641 Bool custom_alloc = (NULL == p);
nethercotec9f36922004-02-14 16:40:02 +0000642 if (size < 0) return NULL;
643
nethercotec9f36922004-02-14 16:40:02 +0000644 // Update statistics
645 n_allocs++;
nethercote57e36b32004-07-10 14:56:28 +0000646 if (0 == size) n_zero_allocs++;
nethercotec9f36922004-02-14 16:40:02 +0000647
nethercote57e36b32004-07-10 14:56:28 +0000648 // Allocate and zero if necessary
649 if (!p) {
650 p = VG_(cli_malloc)( align, size );
651 if (!p) {
nethercote57e36b32004-07-10 14:56:28 +0000652 return NULL;
653 }
654 if (is_zeroed) VG_(memset)(p, 0, size);
655 }
656
njnf1c5def2005-08-11 02:17:07 +0000657 // Make new HP_Chunk node, add to malloc_list
nethercote57e36b32004-07-10 14:56:28 +0000658 hc = VG_(malloc)(sizeof(HP_Chunk));
659 hc->size = size;
660 hc->data = (Addr)p;
661 hc->where = NULL; // paranoia
662 if (clo_heap) {
njn57735902004-11-25 18:04:54 +0000663 hc->where = get_XCon( tid, custom_alloc );
nethercote57e36b32004-07-10 14:56:28 +0000664 if (0 != size)
665 update_XCon(hc->where, size);
666 }
njn246a9d22005-08-14 06:24:20 +0000667 VG_(HT_add_node)(malloc_list, hc);
njnf1c5def2005-08-11 02:17:07 +0000668 n_heap_blocks++;
nethercote57e36b32004-07-10 14:56:28 +0000669
670 // do a census!
671 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000672
nethercotec9f36922004-02-14 16:40:02 +0000673 return p;
674}
675
676static __inline__
677void die_block ( void* p, Bool custom_free )
678{
njnf1c5def2005-08-11 02:17:07 +0000679 HP_Chunk* hc;
nethercotec9f36922004-02-14 16:40:02 +0000680
nethercotec9f36922004-02-14 16:40:02 +0000681 // Update statistics
682 n_frees++;
683
njnf1c5def2005-08-11 02:17:07 +0000684 // Remove HP_Chunk from malloc_list
njn9a463242005-08-16 03:29:50 +0000685 hc = VG_(HT_remove)(malloc_list, (UWord)p);
njn5cc5d7e2005-08-11 02:09:25 +0000686 if (NULL == hc)
687 return; // must have been a bogus free()
688 tl_assert(n_heap_blocks > 0);
689 n_heap_blocks--;
nethercotec9f36922004-02-14 16:40:02 +0000690
691 if (clo_heap && hc->size != 0)
692 update_XCon(hc->where, -hc->size);
693
nethercote57e36b32004-07-10 14:56:28 +0000694 VG_(free)( hc );
695
696 // Actually free the heap block, if necessary
nethercotec9f36922004-02-14 16:40:02 +0000697 if (!custom_free)
698 VG_(cli_free)( p );
699
nethercote57e36b32004-07-10 14:56:28 +0000700 // do a census!
701 hp_census();
nethercotec9f36922004-02-14 16:40:02 +0000702}
703
704
njn51d827b2005-05-09 01:02:08 +0000705static void* ms_malloc ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000706{
njn57735902004-11-25 18:04:54 +0000707 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000708}
709
njn51d827b2005-05-09 01:02:08 +0000710static void* ms___builtin_new ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000711{
njn57735902004-11-25 18:04:54 +0000712 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000713}
714
njn51d827b2005-05-09 01:02:08 +0000715static void* ms___builtin_vec_new ( ThreadId tid, SizeT n )
nethercotec9f36922004-02-14 16:40:02 +0000716{
njn57735902004-11-25 18:04:54 +0000717 return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +0000718}
719
njn51d827b2005-05-09 01:02:08 +0000720static void* ms_calloc ( ThreadId tid, SizeT m, SizeT size )
nethercotec9f36922004-02-14 16:40:02 +0000721{
njn57735902004-11-25 18:04:54 +0000722 return new_block( tid, NULL, m*size, VG_(clo_alignment), /*is_zeroed*/True );
nethercotec9f36922004-02-14 16:40:02 +0000723}
724
njn51d827b2005-05-09 01:02:08 +0000725static void *ms_memalign ( ThreadId tid, SizeT align, SizeT n )
fitzhardinge51f3ff12004-03-04 22:42:03 +0000726{
njn57735902004-11-25 18:04:54 +0000727 return new_block( tid, NULL, n, align, False );
fitzhardinge51f3ff12004-03-04 22:42:03 +0000728}
729
njn51d827b2005-05-09 01:02:08 +0000730static void ms_free ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000731{
732 die_block( p, /*custom_free*/False );
733}
734
njn51d827b2005-05-09 01:02:08 +0000735static void ms___builtin_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000736{
737 die_block( p, /*custom_free*/False);
738}
739
njn51d827b2005-05-09 01:02:08 +0000740static void ms___builtin_vec_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +0000741{
742 die_block( p, /*custom_free*/False );
743}
744
njn51d827b2005-05-09 01:02:08 +0000745static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_size )
nethercotec9f36922004-02-14 16:40:02 +0000746{
njn5cc5d7e2005-08-11 02:09:25 +0000747 HP_Chunk* hc;
748 void* p_new;
749 SizeT old_size;
750 XPt *old_where, *new_where;
nethercotec9f36922004-02-14 16:40:02 +0000751
njna0793652005-08-16 03:34:56 +0000752 // Remove the old block
njn9a463242005-08-16 03:29:50 +0000753 hc = VG_(HT_remove)(malloc_list, (UWord)p_old);
nethercotec9f36922004-02-14 16:40:02 +0000754 if (hc == NULL) {
njn5cc5d7e2005-08-11 02:09:25 +0000755 return NULL; // must have been a bogus realloc()
nethercotec9f36922004-02-14 16:40:02 +0000756 }
757
nethercotec9f36922004-02-14 16:40:02 +0000758 old_size = hc->size;
759
760 if (new_size <= old_size) {
761 // new size is smaller or same; block not moved
762 p_new = p_old;
763
764 } else {
765 // new size is bigger; make new block, copy shared contents, free old
766 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_size);
tom0cd42f02005-10-06 09:00:17 +0000767 if (p_new) {
768 VG_(memcpy)(p_new, p_old, old_size);
769 VG_(cli_free)(p_old);
770 }
nethercotec9f36922004-02-14 16:40:02 +0000771 }
nethercotec9f36922004-02-14 16:40:02 +0000772
tom0cd42f02005-10-06 09:00:17 +0000773 if (p_new) {
774 old_where = hc->where;
775 new_where = get_XCon( tid, /*custom_malloc*/False);
nethercotec9f36922004-02-14 16:40:02 +0000776
tom0cd42f02005-10-06 09:00:17 +0000777 // Update HP_Chunk
778 hc->data = (Addr)p_new;
779 hc->size = new_size;
780 hc->where = new_where;
781
782 // Update XPt curr_space fields
783 if (clo_heap) {
784 if (0 != old_size) update_XCon(old_where, -old_size);
785 if (0 != new_size) update_XCon(new_where, new_size);
786 }
nethercotec9f36922004-02-14 16:40:02 +0000787 }
788
njn5cc5d7e2005-08-11 02:09:25 +0000789 // Now insert the new hc (with a possibly new 'data' field) into
790 // malloc_list. If this realloc() did not increase the memory size, we
791 // will have removed and then re-added mc unnecessarily. But that's ok
792 // because shrinking a block with realloc() is (presumably) much rarer
793 // than growing it, and this way simplifies the growing case.
njn246a9d22005-08-14 06:24:20 +0000794 VG_(HT_add_node)(malloc_list, hc);
nethercotec9f36922004-02-14 16:40:02 +0000795
nethercotec9f36922004-02-14 16:40:02 +0000796 return p_new;
797}
798
799
800/*------------------------------------------------------------*/
801/*--- Taking a census ---*/
802/*------------------------------------------------------------*/
803
804static Census censi[MAX_N_CENSI];
805static UInt curr_census = 0;
806
nethercotec9f36922004-02-14 16:40:02 +0000807static UInt get_xtree_size(XPt* xpt, UInt ix)
808{
809 UInt i;
810
nethercote43a15ce2004-08-30 19:15:12 +0000811 // If no memory allocated at all, nothing interesting to record.
812 if (alloc_xpt->curr_space == 0) return 0;
813
814 // Ignore sub-XTrees that account for a miniscule fraction of current
815 // allocated space.
816 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000817 ix++;
818
819 // Count all (non-zero) descendent XPts
820 for (i = 0; i < xpt->n_children; i++)
821 ix = get_xtree_size(xpt->children[i], ix);
822 }
823 return ix;
824}
825
826static
827UInt do_space_snapshot(XPt xpt[], XTreeSnapshot xtree_snapshot, UInt ix)
828{
829 UInt i;
830
nethercote43a15ce2004-08-30 19:15:12 +0000831 // Structure of this function mirrors that of get_xtree_size().
832
833 if (alloc_xpt->curr_space == 0) return 0;
834
835 if (xpt->curr_space / (double)alloc_xpt->curr_space > 0.002) {
nethercotec9f36922004-02-14 16:40:02 +0000836 xtree_snapshot[ix].xpt = xpt;
837 xtree_snapshot[ix].space = xpt->curr_space;
838 ix++;
839
nethercotec9f36922004-02-14 16:40:02 +0000840 for (i = 0; i < xpt->n_children; i++)
841 ix = do_space_snapshot(xpt->children[i], xtree_snapshot, ix);
842 }
843 return ix;
844}
845
846static UInt ms_interval;
847static UInt do_every_nth_census = 30;
848
849// Weed out half the censi; we choose those that represent the smallest
850// time-spans, because that loses the least information.
851//
852// Algorithm for N censi: We find the census representing the smallest
853// timeframe, and remove it. We repeat this until (N/2)-1 censi are gone.
854// (It's (N/2)-1 because we never remove the first and last censi.)
855// We have to do this one census at a time, rather than finding the (N/2)-1
856// smallest censi in one hit, because when a census is removed, it's
857// neighbours immediately cover greater timespans. So it's N^2, but N only
858// equals 200, and this is only done every 100 censi, which is not too often.
859static void halve_censi(void)
860{
861 Int i, jp, j, jn, k;
862 Census* min_census;
863
864 n_halvings++;
865 if (VG_(clo_verbosity) > 1)
866 VG_(message)(Vg_UserMsg, "Halving censi...");
867
868 // Sets j to the index of the first not-yet-removed census at or after i
869 #define FIND_CENSUS(i, j) \
njn6f1f76d2005-05-24 21:28:54 +0000870 for (j = i; j < MAX_N_CENSI && -1 == censi[j].ms_time; j++) { }
nethercotec9f36922004-02-14 16:40:02 +0000871
872 for (i = 2; i < MAX_N_CENSI; i += 2) {
873 // Find the censi representing the smallest timespan. The timespan
874 // for census n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
875 // censi A and B. We don't consider the first and last censi for
876 // removal.
877 Int min_span = 0x7fffffff;
878 Int min_j = 0;
879
880 // Initial triple: (prev, curr, next) == (jp, j, jn)
881 jp = 0;
882 FIND_CENSUS(1, j);
883 FIND_CENSUS(j+1, jn);
884 while (jn < MAX_N_CENSI) {
885 Int timespan = censi[jn].ms_time - censi[jp].ms_time;
njnca82cc02004-11-22 17:18:48 +0000886 tl_assert(timespan >= 0);
nethercotec9f36922004-02-14 16:40:02 +0000887 if (timespan < min_span) {
888 min_span = timespan;
889 min_j = j;
890 }
891 // Move on to next triple
892 jp = j;
893 j = jn;
894 FIND_CENSUS(jn+1, jn);
895 }
896 // We've found the least important census, now remove it
897 min_census = & censi[ min_j ];
898 for (k = 0; NULL != min_census->xtree_snapshots[k]; k++) {
899 n_snapshot_frees++;
900 VG_(free)(min_census->xtree_snapshots[k]);
901 min_census->xtree_snapshots[k] = NULL;
902 }
903 min_census->ms_time = -1;
904 }
905
906 // Slide down the remaining censi over the removed ones. The '<=' is
907 // because we are removing on (N/2)-1, rather than N/2.
908 for (i = 0, j = 0; i <= MAX_N_CENSI / 2; i++, j++) {
909 FIND_CENSUS(j, j);
910 if (i != j) {
911 censi[i] = censi[j];
912 }
913 }
914 curr_census = i;
915
916 // Double intervals
917 ms_interval *= 2;
918 do_every_nth_census *= 2;
919
920 if (VG_(clo_verbosity) > 1)
921 VG_(message)(Vg_UserMsg, "...done");
922}
923
924// Take a census. Census time seems to be insignificant (usually <= 0 ms,
925// almost always <= 1ms) so don't have to worry about subtracting it from
926// running time in any way.
927//
928// XXX: NOT TRUE! with bigger depths, konqueror censuses can easily take
929// 50ms!
930static void hp_census(void)
931{
932 static UInt ms_prev_census = 0;
933 static UInt ms_next_census = 0; // zero allows startup census
934
935 Int ms_time, ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +0000936 Census* census;
937
nethercotec9f36922004-02-14 16:40:02 +0000938 // Only do a census if it's time
939 ms_time = VG_(read_millisecond_timer)();
940 ms_time_since_prev = ms_time - ms_prev_census;
941 if (ms_time < ms_next_census) {
942 n_fake_censi++;
nethercotec9f36922004-02-14 16:40:02 +0000943 return;
944 }
945 n_real_censi++;
946
947 census = & censi[curr_census];
948
949 census->ms_time = ms_time;
950
951 // Heap: snapshot the K most significant XTrees -------------------
952 if (clo_heap) {
njn6f1f76d2005-05-24 21:28:54 +0000953 Int i, K;
nethercotec9f36922004-02-14 16:40:02 +0000954 K = ( alloc_xpt->n_children < MAX_SNAPSHOTS
955 ? alloc_xpt->n_children
956 : MAX_SNAPSHOTS); // max out
957
nethercote43a15ce2004-08-30 19:15:12 +0000958 // Update .approx_ST field (approximatively) for all top-XPts.
nethercotec9f36922004-02-14 16:40:02 +0000959 // We *do not* do it for any non-top-XPTs.
960 for (i = 0; i < alloc_xpt->n_children; i++) {
961 XPt* top_XPt = alloc_xpt->children[i];
nethercote43a15ce2004-08-30 19:15:12 +0000962 top_XPt->approx_ST += top_XPt->curr_space * ms_time_since_prev;
nethercotec9f36922004-02-14 16:40:02 +0000963 }
nethercote43a15ce2004-08-30 19:15:12 +0000964 // Sort top-XPts by approx_ST field.
nethercotec9f36922004-02-14 16:40:02 +0000965 VG_(ssort)(alloc_xpt->children, alloc_xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +0000966 XPt_cmp_approx_ST);
nethercotec9f36922004-02-14 16:40:02 +0000967
nethercotec9f36922004-02-14 16:40:02 +0000968 // For each significant top-level XPt, record space info about its
969 // entire XTree, in a single census entry.
970 // Nb: the xtree_size count/snapshot buffer allocation, and the actual
971 // snapshot, take similar amounts of time (measured with the
nethercote43a15ce2004-08-30 19:15:12 +0000972 // millisecond counter).
nethercotec9f36922004-02-14 16:40:02 +0000973 for (i = 0; i < K; i++) {
974 UInt xtree_size, xtree_size2;
nethercote43a15ce2004-08-30 19:15:12 +0000975// VG_(printf)("%7u ", alloc_xpt->children[i]->approx_ST);
976 // Count how many XPts are in the XTree
nethercotec9f36922004-02-14 16:40:02 +0000977 xtree_size = get_xtree_size( alloc_xpt->children[i], 0 );
nethercote43a15ce2004-08-30 19:15:12 +0000978
979 // If no XPts counted (ie. alloc_xpt.curr_space==0 or XTree
980 // insignificant) then don't take any more snapshots.
981 if (0 == xtree_size) break;
982
983 // Make array of the appropriate size (+1 for zero termination,
984 // which calloc() does for us).
nethercotec9f36922004-02-14 16:40:02 +0000985 census->xtree_snapshots[i] =
986 VG_(calloc)(xtree_size+1, sizeof(XPtSnapshot));
jseward612e8362004-03-07 10:23:20 +0000987 if (0 && VG_(clo_verbosity) > 1)
nethercotec9f36922004-02-14 16:40:02 +0000988 VG_(printf)("calloc: %d (%d B)\n", xtree_size+1,
989 (xtree_size+1) * sizeof(XPtSnapshot));
990
991 // Take space-snapshot: copy 'curr_space' for every XPt in the
992 // XTree into the snapshot array, along with pointers to the XPts.
993 // (Except for ones with curr_space==0, which wouldn't contribute
nethercote43a15ce2004-08-30 19:15:12 +0000994 // to the final exact_ST_dbld calculation anyway; excluding them
nethercotec9f36922004-02-14 16:40:02 +0000995 // saves a lot of memory and up to 40% time with big --depth valus.
nethercotec9f36922004-02-14 16:40:02 +0000996 xtree_size2 = do_space_snapshot(alloc_xpt->children[i],
997 census->xtree_snapshots[i], 0);
njnca82cc02004-11-22 17:18:48 +0000998 tl_assert(xtree_size == xtree_size2);
nethercotec9f36922004-02-14 16:40:02 +0000999 }
1000// VG_(printf)("\n\n");
1001 // Zero-terminate 'xtree_snapshot' array
1002 census->xtree_snapshots[i] = NULL;
1003
nethercotec9f36922004-02-14 16:40:02 +00001004 //VG_(printf)("printed %d censi\n", K);
1005
1006 // Lump the rest into a single "others" entry.
1007 census->others_space = 0;
1008 for (i = K; i < alloc_xpt->n_children; i++) {
1009 census->others_space += alloc_xpt->children[i]->curr_space;
1010 }
1011 }
1012
1013 // Heap admin -------------------------------------------------------
1014 if (clo_heap_admin > 0)
1015 census->heap_admin_space = clo_heap_admin * n_heap_blocks;
1016
1017 // Stack(s) ---------------------------------------------------------
1018 if (clo_stacks) {
njn1d0cb0d2005-08-15 01:52:02 +00001019 ThreadId tid;
1020 Addr stack_min, stack_max;
thughes4ad52d02004-06-27 17:37:21 +00001021 census->stacks_space = sigstacks_space;
njn1d0cb0d2005-08-15 01:52:02 +00001022 VG_(thread_stack_reset_iter)();
1023 while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) {
1024 census->stacks_space += (stack_max - stack_min);
1025 }
nethercotec9f36922004-02-14 16:40:02 +00001026 }
1027
1028 // Finish, update interval if necessary -----------------------------
1029 curr_census++;
1030 census = NULL; // don't use again now that curr_census changed
1031
1032 // Halve the entries, if our census table is full
1033 if (MAX_N_CENSI == curr_census) {
1034 halve_censi();
1035 }
1036
1037 // Take time for next census from now, rather than when this census
1038 // should have happened. Because, if there's a big gap due to a kernel
1039 // operation, there's no point doing catch-up censi every BB for a while
1040 // -- that would just give N censi at almost the same time.
1041 if (VG_(clo_verbosity) > 1) {
1042 VG_(message)(Vg_UserMsg, "census: %d ms (took %d ms)", ms_time,
1043 VG_(read_millisecond_timer)() - ms_time );
1044 }
1045 ms_prev_census = ms_time;
1046 ms_next_census = ms_time + ms_interval;
1047 //ms_next_census += ms_interval;
1048
1049 //VG_(printf)("Next: %d ms\n", ms_next_census);
nethercotec9f36922004-02-14 16:40:02 +00001050}
1051
1052/*------------------------------------------------------------*/
1053/*--- Tracked events ---*/
1054/*------------------------------------------------------------*/
1055
nethercote8b5f40c2004-11-02 13:29:50 +00001056static void new_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001057{
1058 sigstacks_space += len;
1059}
1060
nethercote8b5f40c2004-11-02 13:29:50 +00001061static void die_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001062{
njnca82cc02004-11-22 17:18:48 +00001063 tl_assert(sigstacks_space >= len);
nethercotec9f36922004-02-14 16:40:02 +00001064 sigstacks_space -= len;
1065}
1066
1067/*------------------------------------------------------------*/
1068/*--- Client Requests ---*/
1069/*------------------------------------------------------------*/
1070
njn51d827b2005-05-09 01:02:08 +00001071static Bool ms_handle_client_request ( ThreadId tid, UWord* argv, UWord* ret )
nethercotec9f36922004-02-14 16:40:02 +00001072{
1073 switch (argv[0]) {
1074 case VG_USERREQ__MALLOCLIKE_BLOCK: {
nethercote57e36b32004-07-10 14:56:28 +00001075 void* res;
nethercotec9f36922004-02-14 16:40:02 +00001076 void* p = (void*)argv[1];
nethercoted1b64b22004-11-04 18:22:28 +00001077 SizeT sizeB = argv[2];
nethercotec9f36922004-02-14 16:40:02 +00001078 *ret = 0;
njn57735902004-11-25 18:04:54 +00001079 res = new_block( tid, p, sizeB, /*align--ignored*/0, /*is_zeroed*/False );
njnca82cc02004-11-22 17:18:48 +00001080 tl_assert(res == p);
nethercotec9f36922004-02-14 16:40:02 +00001081 return True;
1082 }
1083 case VG_USERREQ__FREELIKE_BLOCK: {
1084 void* p = (void*)argv[1];
1085 *ret = 0;
1086 die_block( p, /*custom_free*/True );
1087 return True;
1088 }
1089 default:
1090 *ret = 0;
1091 return False;
1092 }
1093}
1094
1095/*------------------------------------------------------------*/
nethercotec9f36922004-02-14 16:40:02 +00001096/*--- Instrumentation ---*/
1097/*------------------------------------------------------------*/
1098
sewardj4ba057c2005-10-18 12:04:18 +00001099static
sewardj0b9d74a2006-12-24 02:24:11 +00001100IRSB* ms_instrument ( VgCallbackClosure* closure,
1101 IRSB* bb_in,
sewardj461df9c2006-01-17 02:06:39 +00001102 VexGuestLayout* layout,
1103 VexGuestExtents* vge,
sewardj4ba057c2005-10-18 12:04:18 +00001104 IRType gWordTy, IRType hWordTy )
nethercotec9f36922004-02-14 16:40:02 +00001105{
sewardjd54babf2005-03-21 00:55:49 +00001106 /* XXX Will Massif work when gWordTy != hWordTy ? */
njnee8a5862004-11-22 21:08:46 +00001107 return bb_in;
nethercotec9f36922004-02-14 16:40:02 +00001108}
1109
1110/*------------------------------------------------------------*/
1111/*--- Spacetime recomputation ---*/
1112/*------------------------------------------------------------*/
1113
nethercote43a15ce2004-08-30 19:15:12 +00001114// Although we've been calculating space-time along the way, because the
1115// earlier calculations were done at a finer timescale, the .approx_ST field
nethercotec9f36922004-02-14 16:40:02 +00001116// might not agree with what hp2ps sees, because we've thrown away some of
1117// the information. So recompute it at the scale that hp2ps sees, so we can
1118// confidently determine which contexts hp2ps will choose for displaying as
1119// distinct bands. This recomputation only happens to the significant ones
1120// that get printed in the .hp file, so it's cheap.
1121//
nethercote43a15ce2004-08-30 19:15:12 +00001122// The approx_ST calculation:
nethercotec9f36922004-02-14 16:40:02 +00001123// ( a[0]*d(0,1) + a[1]*(d(0,1) + d(1,2)) + ... + a[N-1]*d(N-2,N-1) ) / 2
1124// where
1125// a[N] is the space at census N
1126// d(A,B) is the time interval between censi A and B
1127// and
1128// d(A,B) + d(B,C) == d(A,C)
1129//
1130// Key point: we can calculate the area for a census without knowing the
1131// previous or subsequent censi's space; because any over/underestimates
1132// for this census will be reversed in the next, balancing out. This is
1133// important, as getting the previous/next census entry for a particular
1134// AP is a pain with this data structure, but getting the prev/next
1135// census time is easy.
1136//
nethercote43a15ce2004-08-30 19:15:12 +00001137// Each heap calculation gets added to its context's exact_ST_dbld field.
nethercotec9f36922004-02-14 16:40:02 +00001138// The ULong* values are all running totals, hence the use of "+=" everywhere.
1139
1140// This does the calculations for a single census.
nethercote43a15ce2004-08-30 19:15:12 +00001141static void calc_exact_ST_dbld2(Census* census, UInt d_t1_t2,
nethercotec9f36922004-02-14 16:40:02 +00001142 ULong* twice_heap_ST,
1143 ULong* twice_heap_admin_ST,
1144 ULong* twice_stack_ST)
1145{
1146 UInt i, j;
1147 XPtSnapshot* xpt_snapshot;
1148
1149 // Heap --------------------------------------------------------
1150 if (clo_heap) {
1151 for (i = 0; NULL != census->xtree_snapshots[i]; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001152 // Compute total heap exact_ST_dbld for the entire XTree using only
1153 // the top-XPt (the first XPt in xtree_snapshot).
nethercotec9f36922004-02-14 16:40:02 +00001154 *twice_heap_ST += d_t1_t2 * census->xtree_snapshots[i][0].space;
1155
nethercote43a15ce2004-08-30 19:15:12 +00001156 // Increment exact_ST_dbld for every XPt in xtree_snapshot (inc.
1157 // top one)
nethercotec9f36922004-02-14 16:40:02 +00001158 for (j = 0; NULL != census->xtree_snapshots[i][j].xpt; j++) {
1159 xpt_snapshot = & census->xtree_snapshots[i][j];
nethercote43a15ce2004-08-30 19:15:12 +00001160 xpt_snapshot->xpt->exact_ST_dbld += d_t1_t2 * xpt_snapshot->space;
nethercotec9f36922004-02-14 16:40:02 +00001161 }
1162 }
1163 *twice_heap_ST += d_t1_t2 * census->others_space;
1164 }
1165
1166 // Heap admin --------------------------------------------------
1167 if (clo_heap_admin > 0)
1168 *twice_heap_admin_ST += d_t1_t2 * census->heap_admin_space;
1169
1170 // Stack(s) ----------------------------------------------------
1171 if (clo_stacks)
1172 *twice_stack_ST += d_t1_t2 * census->stacks_space;
1173}
1174
1175// This does the calculations for all censi.
nethercote43a15ce2004-08-30 19:15:12 +00001176static void calc_exact_ST_dbld(ULong* heap2, ULong* heap_admin2, ULong* stack2)
nethercotec9f36922004-02-14 16:40:02 +00001177{
1178 UInt i, N = curr_census;
1179
nethercotec9f36922004-02-14 16:40:02 +00001180 *heap2 = 0;
1181 *heap_admin2 = 0;
1182 *stack2 = 0;
1183
1184 if (N <= 1)
1185 return;
1186
nethercote43a15ce2004-08-30 19:15:12 +00001187 calc_exact_ST_dbld2( &censi[0], censi[1].ms_time - censi[0].ms_time,
1188 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001189
1190 for (i = 1; i <= N-2; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001191 calc_exact_ST_dbld2( & censi[i], censi[i+1].ms_time - censi[i-1].ms_time,
1192 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001193 }
1194
nethercote43a15ce2004-08-30 19:15:12 +00001195 calc_exact_ST_dbld2( & censi[N-1], censi[N-1].ms_time - censi[N-2].ms_time,
1196 heap2, heap_admin2, stack2 );
nethercotec9f36922004-02-14 16:40:02 +00001197 // Now get rid of the halves. May lose a 0.5 on each, doesn't matter.
1198 *heap2 /= 2;
1199 *heap_admin2 /= 2;
1200 *stack2 /= 2;
nethercotec9f36922004-02-14 16:40:02 +00001201}
1202
1203/*------------------------------------------------------------*/
1204/*--- Writing the graph file ---*/
1205/*------------------------------------------------------------*/
1206
1207static Char* make_filename(Char* dir, Char* suffix)
1208{
1209 Char* filename;
1210
1211 /* Block is big enough for dir name + massif.<pid>.<suffix> */
1212 filename = VG_(malloc)((VG_(strlen)(dir) + 32)*sizeof(Char));
1213 VG_(sprintf)(filename, "%s/massif.%d%s", dir, VG_(getpid)(), suffix);
1214
1215 return filename;
1216}
1217
1218// Make string acceptable to hp2ps (sigh): remove spaces, escape parentheses.
1219static Char* clean_fnname(Char *d, Char* s)
1220{
1221 Char* dorig = d;
1222 while (*s) {
1223 if (' ' == *s) { *d = '%'; }
1224 else if ('(' == *s) { *d++ = '\\'; *d = '('; }
1225 else if (')' == *s) { *d++ = '\\'; *d = ')'; }
1226 else { *d = *s; };
1227 s++;
1228 d++;
1229 }
1230 *d = '\0';
1231 return dorig;
1232}
1233
1234static void file_err ( Char* file )
1235{
njn02bc4b82005-05-15 17:28:26 +00001236 VG_(message)(Vg_UserMsg, "error: can't open output file '%s'", file );
nethercotec9f36922004-02-14 16:40:02 +00001237 VG_(message)(Vg_UserMsg, " ... so profile results will be missing.");
1238}
1239
1240/* Format, by example:
1241
1242 JOB "a.out -p"
1243 DATE "Fri Apr 17 11:43:45 1992"
1244 SAMPLE_UNIT "seconds"
1245 VALUE_UNIT "bytes"
1246 BEGIN_SAMPLE 0.00
1247 SYSTEM 24
1248 END_SAMPLE 0.00
1249 BEGIN_SAMPLE 1.00
1250 elim 180
1251 insert 24
1252 intersect 12
1253 disin 60
1254 main 12
1255 reduce 20
1256 SYSTEM 12
1257 END_SAMPLE 1.00
1258 MARK 1.50
1259 MARK 1.75
1260 MARK 1.80
1261 BEGIN_SAMPLE 2.00
1262 elim 192
1263 insert 24
1264 intersect 12
1265 disin 84
1266 main 12
1267 SYSTEM 24
1268 END_SAMPLE 2.00
1269 BEGIN_SAMPLE 2.82
1270 END_SAMPLE 2.82
1271 */
1272static void write_hp_file(void)
1273{
sewardj92645592005-07-23 09:18:34 +00001274 Int i, j;
1275 Int fd, res;
1276 SysRes sres;
1277 Char *hp_file, *ps_file, *aux_file;
1278 Char* cmdfmt;
1279 Char* cmdbuf;
1280 Int cmdlen;
nethercotec9f36922004-02-14 16:40:02 +00001281
nethercotec9f36922004-02-14 16:40:02 +00001282 // Open file
1283 hp_file = make_filename( base_dir, ".hp" );
1284 ps_file = make_filename( base_dir, ".ps" );
1285 aux_file = make_filename( base_dir, ".aux" );
sewardj92645592005-07-23 09:18:34 +00001286 sres = VG_(open)(hp_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1287 VKI_S_IRUSR|VKI_S_IWUSR);
1288 if (sres.isError) {
nethercotec9f36922004-02-14 16:40:02 +00001289 file_err( hp_file );
nethercotec9f36922004-02-14 16:40:02 +00001290 return;
sewardj92645592005-07-23 09:18:34 +00001291 } else {
sewardje8089302006-10-17 02:15:17 +00001292 fd = sres.res;
nethercotec9f36922004-02-14 16:40:02 +00001293 }
1294
1295 // File header, including command line
1296 SPRINTF(buf, "JOB \"");
sewardj45f4e7c2005-09-27 19:20:21 +00001297 if (VG_(args_the_exename)) {
1298 SPRINTF(buf, "%s", VG_(args_the_exename));
1299 }
sewardj14c7cc52007-02-25 15:08:24 +00001300 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1301 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1302 if (arg)
1303 SPRINTF(buf, " %s", arg);
njnd111d102005-09-13 00:46:27 +00001304 }
nethercotec9f36922004-02-14 16:40:02 +00001305 SPRINTF(buf, /*" (%d ms/sample)\"\n"*/ "\"\n"
1306 "DATE \"\"\n"
1307 "SAMPLE_UNIT \"ms\"\n"
1308 "VALUE_UNIT \"bytes\"\n", ms_interval);
1309
1310 // Censi
1311 for (i = 0; i < curr_census; i++) {
1312 Census* census = & censi[i];
1313
1314 // Census start
1315 SPRINTF(buf, "MARK %d.0\n"
1316 "BEGIN_SAMPLE %d.0\n",
1317 census->ms_time, census->ms_time);
1318
1319 // Heap -----------------------------------------------------------
1320 if (clo_heap) {
1321 // Print all the significant XPts from that census
1322 for (j = 0; NULL != census->xtree_snapshots[j]; j++) {
1323 // Grab the jth top-XPt
1324 XTreeSnapshot xtree_snapshot = & census->xtree_snapshots[j][0];
njnd01fef72005-03-25 23:35:48 +00001325 if ( ! VG_(get_fnname)(xtree_snapshot->xpt->ip, buf2, 16)) {
nethercotec9f36922004-02-14 16:40:02 +00001326 VG_(sprintf)(buf2, "???");
1327 }
njnd01fef72005-03-25 23:35:48 +00001328 SPRINTF(buf, "x%x:%s %d\n", xtree_snapshot->xpt->ip,
nethercotec9f36922004-02-14 16:40:02 +00001329 clean_fnname(buf3, buf2), xtree_snapshot->space);
1330 }
1331
1332 // Remaining heap block alloc points, combined
1333 if (census->others_space > 0)
1334 SPRINTF(buf, "other %d\n", census->others_space);
1335 }
1336
1337 // Heap admin -----------------------------------------------------
1338 if (clo_heap_admin > 0 && census->heap_admin_space)
1339 SPRINTF(buf, "heap-admin %d\n", census->heap_admin_space);
1340
1341 // Stack(s) -------------------------------------------------------
1342 if (clo_stacks)
1343 SPRINTF(buf, "stack(s) %d\n", census->stacks_space);
1344
1345 // Census end
1346 SPRINTF(buf, "END_SAMPLE %d.0\n", census->ms_time);
1347 }
1348
1349 // Close file
njnca82cc02004-11-22 17:18:48 +00001350 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001351 VG_(close)(fd);
1352
1353 // Attempt to convert file using hp2ps
1354 cmdfmt = "%s/hp2ps -c -t1 %s";
1355 cmdlen = VG_(strlen)(VG_(libdir)) + VG_(strlen)(hp_file)
1356 + VG_(strlen)(cmdfmt);
1357 cmdbuf = VG_(malloc)( sizeof(Char) * cmdlen );
1358 VG_(sprintf)(cmdbuf, cmdfmt, VG_(libdir), hp_file);
1359 res = VG_(system)(cmdbuf);
1360 VG_(free)(cmdbuf);
1361 if (res != 0) {
1362 VG_(message)(Vg_UserMsg,
1363 "Conversion to PostScript failed. Try converting manually.");
1364 } else {
1365 // remove the .hp and .aux file
1366 VG_(unlink)(hp_file);
1367 VG_(unlink)(aux_file);
1368 }
1369
1370 VG_(free)(hp_file);
1371 VG_(free)(ps_file);
1372 VG_(free)(aux_file);
nethercotec9f36922004-02-14 16:40:02 +00001373}
1374
1375/*------------------------------------------------------------*/
1376/*--- Writing the XPt text/HTML file ---*/
1377/*------------------------------------------------------------*/
1378
1379static void percentify(Int n, Int pow, Int field_width, char xbuf[])
1380{
1381 int i, len, space;
1382
1383 VG_(sprintf)(xbuf, "%d.%d%%", n / pow, n % pow);
1384 len = VG_(strlen)(xbuf);
1385 space = field_width - len;
1386 if (space < 0) space = 0; /* Allow for v. small field_width */
1387 i = len;
1388
1389 /* Right justify in field */
1390 for ( ; i >= 0; i--) xbuf[i + space] = xbuf[i];
1391 for (i = 0; i < space; i++) xbuf[i] = ' ';
1392}
1393
1394// Nb: uses a static buffer, each call trashes the last string returned.
1395static Char* make_perc(ULong spacetime, ULong total_spacetime)
1396{
1397 static Char mbuf[32];
1398
1399 UInt p = 10;
njnca82cc02004-11-22 17:18:48 +00001400 tl_assert(0 != total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001401 percentify(spacetime * 100 * p / total_spacetime, p, 5, mbuf);
1402 return mbuf;
1403}
1404
njnd01fef72005-03-25 23:35:48 +00001405// Nb: passed in XPt is a lower-level XPt; IPs are grabbed from
nethercotec9f36922004-02-14 16:40:02 +00001406// bottom-to-top of XCon, and then printed in the reverse order.
1407static UInt pp_XCon(Int fd, XPt* xpt)
1408{
njnd01fef72005-03-25 23:35:48 +00001409 Addr rev_ips[clo_depth+1];
nethercotec9f36922004-02-14 16:40:02 +00001410 Int i = 0;
1411 Int n = 0;
1412 Bool is_HTML = ( XHTML == clo_format );
1413 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1414 Char* maybe_indent = ( is_HTML ? "&nbsp;&nbsp;" : "" );
1415
njnca82cc02004-11-22 17:18:48 +00001416 tl_assert(NULL != xpt);
nethercotec9f36922004-02-14 16:40:02 +00001417
1418 while (True) {
njnd01fef72005-03-25 23:35:48 +00001419 rev_ips[i] = xpt->ip;
nethercotec9f36922004-02-14 16:40:02 +00001420 n++;
1421 if (alloc_xpt == xpt->parent) break;
1422 i++;
1423 xpt = xpt->parent;
1424 }
1425
1426 for (i = n-1; i >= 0; i--) {
1427 // -1 means point to calling line
njnd01fef72005-03-25 23:35:48 +00001428 VG_(describe_IP)(rev_ips[i]-1, buf2, BUF_LEN);
nethercotec9f36922004-02-14 16:40:02 +00001429 SPRINTF(buf, " %s%s%s\n", maybe_indent, buf2, maybe_br);
1430 }
1431
1432 return n;
1433}
1434
1435// Important point: for HTML, each XPt must be identified uniquely for the
njnd01fef72005-03-25 23:35:48 +00001436// HTML links to all match up correctly. Using xpt->ip is not
nethercotec9f36922004-02-14 16:40:02 +00001437// sufficient, because function pointers mean that you can call more than
1438// one other function from a single code location. So instead we use the
1439// address of the xpt struct itself, which is guaranteed to be unique.
1440
1441static void pp_all_XPts2(Int fd, Queue* q, ULong heap_spacetime,
1442 ULong total_spacetime)
1443{
1444 UInt i;
1445 XPt *xpt, *child;
1446 UInt L = 0;
1447 UInt c1 = 1;
1448 UInt c2 = 0;
1449 ULong sum = 0;
1450 UInt n;
njnd01fef72005-03-25 23:35:48 +00001451 Char *ip_desc, *perc;
nethercotec9f36922004-02-14 16:40:02 +00001452 Bool is_HTML = ( XHTML == clo_format );
1453 Char* maybe_br = ( is_HTML ? "<br>" : "" );
1454 Char* maybe_p = ( is_HTML ? "<p>" : "" );
1455 Char* maybe_ul = ( is_HTML ? "<ul>" : "" );
1456 Char* maybe_li = ( is_HTML ? "<li>" : "" );
1457 Char* maybe_fli = ( is_HTML ? "</li>" : "" );
1458 Char* maybe_ful = ( is_HTML ? "</ul>" : "" );
1459 Char* end_hr = ( is_HTML ? "<hr>" :
1460 "=================================" );
1461 Char* depth = ( is_HTML ? "<code>--depth</code>" : "--depth" );
1462
nethercote43a15ce2004-08-30 19:15:12 +00001463 if (total_spacetime == 0) {
1464 SPRINTF(buf, "(No heap memory allocated)\n");
1465 return;
1466 }
1467
1468
nethercotec9f36922004-02-14 16:40:02 +00001469 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1470
1471 while (NULL != (xpt = (XPt*)dequeue(q))) {
nethercote43a15ce2004-08-30 19:15:12 +00001472 // Check that non-top-level XPts have a zero .approx_ST field.
njnca82cc02004-11-22 17:18:48 +00001473 if (xpt->parent != alloc_xpt) tl_assert( 0 == xpt->approx_ST );
nethercotec9f36922004-02-14 16:40:02 +00001474
nethercote43a15ce2004-08-30 19:15:12 +00001475 // Check that the sum of all children .exact_ST_dbld fields equals
1476 // parent's (unless alloc_xpt, when it should == 0).
nethercotec9f36922004-02-14 16:40:02 +00001477 if (alloc_xpt == xpt) {
njnca82cc02004-11-22 17:18:48 +00001478 tl_assert(0 == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001479 } else {
1480 sum = 0;
1481 for (i = 0; i < xpt->n_children; i++) {
nethercote43a15ce2004-08-30 19:15:12 +00001482 sum += xpt->children[i]->exact_ST_dbld;
nethercotec9f36922004-02-14 16:40:02 +00001483 }
njnca82cc02004-11-22 17:18:48 +00001484 //tl_assert(sum == xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001485 // It's possible that not all the children were included in the
nethercote43a15ce2004-08-30 19:15:12 +00001486 // exact_ST_dbld calculations. Hopefully almost all of them were, and
nethercotec9f36922004-02-14 16:40:02 +00001487 // all the important ones.
njnca82cc02004-11-22 17:18:48 +00001488// tl_assert(sum <= xpt->exact_ST_dbld);
1489// tl_assert(sum * 1.05 > xpt->exact_ST_dbld );
nethercote43a15ce2004-08-30 19:15:12 +00001490// if (sum != xpt->exact_ST_dbld) {
njn68e46592005-08-26 19:42:27 +00001491// VG_(printf)("%lld, %lld\n", sum, xpt->exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001492// }
1493 }
1494
1495 if (xpt == alloc_xpt) {
1496 SPRINTF(buf, "Heap allocation functions accounted for "
1497 "%s of measured spacetime%s\n",
1498 make_perc(heap_spacetime, total_spacetime), maybe_br);
1499 } else {
nethercote43a15ce2004-08-30 19:15:12 +00001500 // Remember: exact_ST_dbld is space.time *doubled*
1501 perc = make_perc(xpt->exact_ST_dbld / 2, total_spacetime);
nethercotec9f36922004-02-14 16:40:02 +00001502 if (is_HTML) {
1503 SPRINTF(buf, "<a name=\"b%x\"></a>"
1504 "Context accounted for "
1505 "<a href=\"#a%x\">%s</a> of measured spacetime<br>\n",
1506 xpt, xpt, perc);
1507 } else {
1508 SPRINTF(buf, "Context accounted for %s of measured spacetime\n",
1509 perc);
1510 }
1511 n = pp_XCon(fd, xpt);
njnca82cc02004-11-22 17:18:48 +00001512 tl_assert(n == L);
nethercotec9f36922004-02-14 16:40:02 +00001513 }
1514
nethercote43a15ce2004-08-30 19:15:12 +00001515 // Sort children by exact_ST_dbld
nethercotec9f36922004-02-14 16:40:02 +00001516 VG_(ssort)(xpt->children, xpt->n_children, sizeof(XPt*),
nethercote43a15ce2004-08-30 19:15:12 +00001517 XPt_cmp_exact_ST_dbld);
nethercotec9f36922004-02-14 16:40:02 +00001518
1519 SPRINTF(buf, "%s\nCalled from:%s\n", maybe_p, maybe_ul);
1520 for (i = 0; i < xpt->n_children; i++) {
1521 child = xpt->children[i];
1522
1523 // Stop when <1% of total spacetime
nethercote43a15ce2004-08-30 19:15:12 +00001524 if (child->exact_ST_dbld * 1000 / (total_spacetime * 2) < 5) {
nethercotec9f36922004-02-14 16:40:02 +00001525 UInt n_insig = xpt->n_children - i;
1526 Char* s = ( n_insig == 1 ? "" : "s" );
1527 Char* and = ( 0 == i ? "" : "and " );
1528 Char* other = ( 0 == i ? "" : "other " );
1529 SPRINTF(buf, " %s%s%d %sinsignificant place%s%s\n\n",
1530 maybe_li, and, n_insig, other, s, maybe_fli);
1531 break;
1532 }
1533
nethercote43a15ce2004-08-30 19:15:12 +00001534 // Remember: exact_ST_dbld is space.time *doubled*
njnd01fef72005-03-25 23:35:48 +00001535 perc = make_perc(child->exact_ST_dbld / 2, total_spacetime);
1536 ip_desc = VG_(describe_IP)(child->ip-1, buf2, BUF_LEN);
nethercotec9f36922004-02-14 16:40:02 +00001537 if (is_HTML) {
1538 SPRINTF(buf, "<li><a name=\"a%x\"></a>", child );
1539
1540 if (child->n_children > 0) {
1541 SPRINTF(buf, "<a href=\"#b%x\">%s</a>", child, perc);
1542 } else {
1543 SPRINTF(buf, "%s", perc);
1544 }
njnd01fef72005-03-25 23:35:48 +00001545 SPRINTF(buf, ": %s\n", ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001546 } else {
njnd01fef72005-03-25 23:35:48 +00001547 SPRINTF(buf, " %6s: %s\n\n", perc, ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001548 }
1549
1550 if (child->n_children > 0) {
1551 enqueue(q, (void*)child);
1552 c2++;
1553 }
1554 }
1555 SPRINTF(buf, "%s%s", maybe_ful, maybe_p);
1556 c1--;
1557
1558 // Putting markers between levels of the structure:
1559 // c1 tracks how many to go on this level, c2 tracks how many we've
1560 // queued up for the next level while finishing off this level.
1561 // When c1 gets to zero, we've changed levels, so print a marker,
1562 // move c2 into c1, and zero c2.
1563 if (0 == c1) {
1564 L++;
1565 c1 = c2;
1566 c2 = 0;
1567 if (! is_empty_queue(q) ) { // avoid empty one at end
1568 SPRINTF(buf, "== %d ===========================%s\n", L, maybe_br);
1569 }
1570 } else {
1571 SPRINTF(buf, "---------------------------------%s\n", maybe_br);
1572 }
1573 }
1574 SPRINTF(buf, "%s\n\nEnd of information. Rerun with a bigger "
1575 "%s value for more.\n", end_hr, depth);
1576}
1577
1578static void pp_all_XPts(Int fd, XPt* xpt, ULong heap_spacetime,
1579 ULong total_spacetime)
1580{
1581 Queue* q = construct_queue(100);
nethercote43a15ce2004-08-30 19:15:12 +00001582
nethercotec9f36922004-02-14 16:40:02 +00001583 enqueue(q, xpt);
1584 pp_all_XPts2(fd, q, heap_spacetime, total_spacetime);
1585 destruct_queue(q);
1586}
1587
1588static void
1589write_text_file(ULong total_ST, ULong heap_ST)
1590{
sewardj92645592005-07-23 09:18:34 +00001591 SysRes sres;
1592 Int fd, i;
1593 Char* text_file;
1594 Char* maybe_p = ( XHTML == clo_format ? "<p>" : "" );
nethercotec9f36922004-02-14 16:40:02 +00001595
nethercotec9f36922004-02-14 16:40:02 +00001596 // Open file
1597 text_file = make_filename( base_dir,
1598 ( XText == clo_format ? ".txt" : ".html" ) );
1599
sewardj92645592005-07-23 09:18:34 +00001600 sres = VG_(open)(text_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
nethercotec9f36922004-02-14 16:40:02 +00001601 VKI_S_IRUSR|VKI_S_IWUSR);
sewardj92645592005-07-23 09:18:34 +00001602 if (sres.isError) {
nethercotec9f36922004-02-14 16:40:02 +00001603 file_err( text_file );
nethercotec9f36922004-02-14 16:40:02 +00001604 return;
sewardj92645592005-07-23 09:18:34 +00001605 } else {
sewardje8089302006-10-17 02:15:17 +00001606 fd = sres.res;
nethercotec9f36922004-02-14 16:40:02 +00001607 }
1608
1609 // Header
1610 if (XHTML == clo_format) {
1611 SPRINTF(buf, "<html>\n"
1612 "<head>\n"
1613 "<title>%s</title>\n"
1614 "</head>\n"
1615 "<body>\n",
1616 text_file);
1617 }
1618
1619 // Command line
sewardj45f4e7c2005-09-27 19:20:21 +00001620 SPRINTF(buf, "Command:");
1621 if (VG_(args_the_exename)) {
1622 SPRINTF(buf, " %s", VG_(args_the_exename));
1623 }
sewardj14c7cc52007-02-25 15:08:24 +00001624 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1625 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1626 if (arg)
1627 SPRINTF(buf, " %s", arg);
njnd111d102005-09-13 00:46:27 +00001628 }
nethercotec9f36922004-02-14 16:40:02 +00001629 SPRINTF(buf, "\n%s\n", maybe_p);
1630
1631 if (clo_heap)
1632 pp_all_XPts(fd, alloc_xpt, heap_ST, total_ST);
1633
njnca82cc02004-11-22 17:18:48 +00001634 tl_assert(fd >= 0);
nethercotec9f36922004-02-14 16:40:02 +00001635 VG_(close)(fd);
nethercotec9f36922004-02-14 16:40:02 +00001636}
1637
1638/*------------------------------------------------------------*/
1639/*--- Finalisation ---*/
1640/*------------------------------------------------------------*/
1641
1642static void
1643print_summary(ULong total_ST, ULong heap_ST, ULong heap_admin_ST,
1644 ULong stack_ST)
1645{
njn99cb9e32005-09-25 17:59:16 +00001646 VG_(message)(Vg_UserMsg, "Total spacetime: %,llu ms.B", total_ST);
nethercotec9f36922004-02-14 16:40:02 +00001647
1648 // Heap --------------------------------------------------------------
1649 if (clo_heap)
1650 VG_(message)(Vg_UserMsg, "heap: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001651 ( 0 == total_ST ? (Char*)"(n/a)"
1652 : make_perc(heap_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001653
1654 // Heap admin --------------------------------------------------------
1655 if (clo_heap_admin)
1656 VG_(message)(Vg_UserMsg, "heap admin: %s",
nethercote43a15ce2004-08-30 19:15:12 +00001657 ( 0 == total_ST ? (Char*)"(n/a)"
1658 : make_perc(heap_admin_ST, total_ST) ) );
nethercotec9f36922004-02-14 16:40:02 +00001659
njnca82cc02004-11-22 17:18:48 +00001660 tl_assert( VG_(HT_count_nodes)(malloc_list) == n_heap_blocks );
nethercotec9f36922004-02-14 16:40:02 +00001661
1662 // Stack(s) ----------------------------------------------------------
nethercote43a15ce2004-08-30 19:15:12 +00001663 if (clo_stacks) {
nethercotec9f36922004-02-14 16:40:02 +00001664 VG_(message)(Vg_UserMsg, "stack(s): %s",
sewardjb5f6f512005-03-10 23:59:00 +00001665 ( 0 == stack_ST ? (Char*)"0%"
1666 : make_perc(stack_ST, total_ST) ) );
nethercote43a15ce2004-08-30 19:15:12 +00001667 }
nethercotec9f36922004-02-14 16:40:02 +00001668
1669 if (VG_(clo_verbosity) > 1) {
njnca82cc02004-11-22 17:18:48 +00001670 tl_assert(n_xpts > 0); // always have alloc_xpt
nethercotec9f36922004-02-14 16:40:02 +00001671 VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
1672 VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
1673 n_zero_allocs * 100 / n_allocs );
1674 VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
1675 VG_(message)(Vg_DebugMsg, " XPts: %u (%d B)", n_xpts,
1676 n_xpts*sizeof(XPt));
1677 VG_(message)(Vg_DebugMsg, " bot-XPts: %u (%d%%)", n_bot_xpts,
1678 n_bot_xpts * 100 / n_xpts);
1679 VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)", alloc_xpt->n_children,
1680 alloc_xpt->n_children * 100 / n_xpts);
1681 VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
1682 VG_(message)(Vg_DebugMsg, "snap-frees: %u", n_snapshot_frees);
1683 VG_(message)(Vg_DebugMsg, "atmp censi: %u", n_attempted_censi);
1684 VG_(message)(Vg_DebugMsg, "fake censi: %u", n_fake_censi);
1685 VG_(message)(Vg_DebugMsg, "real censi: %u", n_real_censi);
1686 VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
1687 }
1688}
1689
njn51d827b2005-05-09 01:02:08 +00001690static void ms_fini(Int exit_status)
nethercotec9f36922004-02-14 16:40:02 +00001691{
1692 ULong total_ST = 0;
1693 ULong heap_ST = 0;
1694 ULong heap_admin_ST = 0;
1695 ULong stack_ST = 0;
1696
1697 // Do a final (empty) sample to show program's end
1698 hp_census();
1699
1700 // Redo spacetimes of significant contexts to match the .hp file.
nethercote43a15ce2004-08-30 19:15:12 +00001701 calc_exact_ST_dbld(&heap_ST, &heap_admin_ST, &stack_ST);
nethercotec9f36922004-02-14 16:40:02 +00001702 total_ST = heap_ST + heap_admin_ST + stack_ST;
1703 write_hp_file ( );
1704 write_text_file( total_ST, heap_ST );
1705 print_summary ( total_ST, heap_ST, heap_admin_ST, stack_ST );
1706}
1707
njn51d827b2005-05-09 01:02:08 +00001708/*------------------------------------------------------------*/
1709/*--- Initialisation ---*/
1710/*------------------------------------------------------------*/
1711
1712static void ms_post_clo_init(void)
1713{
1714 ms_interval = 1;
1715
1716 // Do an initial sample for t = 0
1717 hp_census();
1718}
1719
tom151a6392005-11-11 12:30:36 +00001720static void ms_pre_clo_init(void)
njn51d827b2005-05-09 01:02:08 +00001721{
1722 VG_(details_name) ("Massif");
1723 VG_(details_version) (NULL);
1724 VG_(details_description) ("a space profiler");
njn9a0cba42007-04-15 22:15:57 +00001725 VG_(details_copyright_author)(
1726 "Copyright (C) 2003-2007, Nicholas Nethercote");
njn51d827b2005-05-09 01:02:08 +00001727 VG_(details_bug_reports_to) (VG_BUGS_TO);
1728
1729 // Basic functions
1730 VG_(basic_tool_funcs) (ms_post_clo_init,
1731 ms_instrument,
1732 ms_fini);
1733
1734 // Needs
1735 VG_(needs_libc_freeres)();
1736 VG_(needs_command_line_options)(ms_process_cmd_line_option,
1737 ms_print_usage,
1738 ms_print_debug_usage);
1739 VG_(needs_client_requests) (ms_handle_client_request);
njnfc51f8d2005-06-21 03:20:17 +00001740 VG_(needs_malloc_replacement) (ms_malloc,
njn51d827b2005-05-09 01:02:08 +00001741 ms___builtin_new,
1742 ms___builtin_vec_new,
1743 ms_memalign,
1744 ms_calloc,
1745 ms_free,
1746 ms___builtin_delete,
1747 ms___builtin_vec_delete,
1748 ms_realloc,
1749 0 );
1750
1751 // Events to track
1752 VG_(track_new_mem_stack_signal)( new_mem_stack_signal );
1753 VG_(track_die_mem_stack_signal)( die_mem_stack_signal );
1754
njn51d827b2005-05-09 01:02:08 +00001755 // HP_Chunks
njnf69f9452005-07-03 17:53:11 +00001756 malloc_list = VG_(HT_construct)( 80021 ); // prime, big
njn51d827b2005-05-09 01:02:08 +00001757
1758 // Dummy node at top of the context structure.
1759 alloc_xpt = new_XPt(0, NULL, /*is_bottom*/False);
1760
njn57ca7ab2005-06-21 23:44:58 +00001761 tl_assert( VG_(getcwd)(base_dir, VKI_PATH_MAX) );
njn51d827b2005-05-09 01:02:08 +00001762}
1763
sewardj45f4e7c2005-09-27 19:20:21 +00001764VG_DETERMINE_INTERFACE_VERSION(ms_pre_clo_init)
nethercotec9f36922004-02-14 16:40:02 +00001765
1766/*--------------------------------------------------------------------*/
njnf1c5def2005-08-11 02:17:07 +00001767/*--- end ---*/
nethercotec9f36922004-02-14 16:40:02 +00001768/*--------------------------------------------------------------------*/
1769