blob: 953027126c0242da2c91c7e9286e34bf10154e55 [file] [log] [blame]
njn734b8052007-11-01 04:40:37 +00001//--------------------------------------------------------------------*/
2//--- Massif: a heap profiling tool. ms_main.c ---*/
3//--------------------------------------------------------------------*/
nethercotec9f36922004-02-14 16:40:02 +00004
5/*
nethercote996901a2004-08-03 13:29:09 +00006 This file is part of Massif, a Valgrind tool for profiling memory
nethercotec9f36922004-02-14 16:40:02 +00007 usage of programs.
8
sewardj9ebd6e02007-01-08 06:01:59 +00009 Copyright (C) 2003-2007 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000010 njn@valgrind.org
nethercotec9f36922004-02-14 16:40:02 +000011
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28*/
29
njn734b8052007-11-01 04:40:37 +000030//---------------------------------------------------------------------------
31// XXX:
32//---------------------------------------------------------------------------
33// Todo -- critical for release:
34// - do a graph-drawing test
35// - write a good basic test that shows how the tool works, suitable for
36// documentation
37// - write documentation
38// - make --threshold and --peak-inaccuracy fractional
39// - do filename properly, clean up Valgrind-wide log file naming mess
40// - currently recording asked-for sizes of heap blocks, not actual sizes.
41// Should add the difference to heap-admin, and change heap-admin name to
42// something else (heap-extra?).
43//
44// Todo -- nice, but less critical:
45// - make file format more generic. Obstacles:
46// - unit prefixes are not generic
47// - preset column widths for stats are not generic
48// - preset column headers are not generic
49// - "Massif arguments:" line is not generic
50// - do snapshots on client requests
51// - (Michael Meeks): have an interactive way to request a dump
52// (callgrind_control-style)
53// - "profile now"
54// - "show me the extra allocations since the last snapshot"
55// - "start/stop logging" (eg. quickly skip boring bits)
56// - Add ability to draw multiple graphs, eg. heap-only, stack-only, total.
57// Give each graph a title. (try to do it generically!)
58// - allow truncation of long fnnames if the exact line number is
59// identified? [hmm, could make getting the name of alloc-fns more
60// difficult] [could dump full names to file, truncate in ms_print]
61// - make --show-below-main=no work
62//
63// Performance:
64// - To run the benchmarks:
65//
66// perl perf/vg_perf --tools=massif --reps=3 perf/{heap,tinycc} massif
67// time valgrind --tool=massif --depth=100 konqueror
68//
69// The other benchmarks don't do much allocation, and so give similar speeds
70// to Nulgrind.
71//
72// Timing results on 'nevermore' (njn's machine) as of r7013:
73//
74// heap 0.53s ma:12.4s (23.5x, -----)
75// tinycc 0.46s ma: 4.9s (10.7x, -----)
76// many-xpts 0.08s ma: 2.0s (25.0x, -----)
77// konqueror 29.6s real 0:21.0s user
78//
79// - get_XCon accounts for about 9% of konqueror startup time. Try
80// keeping XPt children sorted by 'ip' and use binary search in get_XCon.
81// Requires factoring out binary search code from various places into a
82// VG_(bsearch) function.
83//
84// Todo -- low priority:
85// - Consider 'instructions executed' as a time unit -- more regular than
86// ms, less artificial than B (bug #121629).
87// - In each XPt, record both bytes and the number of allocations, and
88// possibly the global number of allocations.
89// - (Artur Wisz) add a feature to Massif to ignore any heap blocks larger
90// than a certain size! Because: "linux's malloc allows to set a
91// MMAP_THRESHOLD value, so we set it to 4096 - all blocks above that will
92// be handled directly by the kernel, and are guaranteed to be returned to
93// the system when freed. So we needed to profile only blocks below this
94// limit."
95//
96// Examine and fix bugs on bugzilla:
97// IGNORE:
98// 112163 nor MASSIF crashed with signal 7 (SIGBUS) after running 2 days
99// - weird, crashes in VEX, ignore
100// 82871 nor Massif output function names too short
101// - on .ps graph, now irrelevant, ignore
102// 129576 nor Massif loses track of memory, incorrect graphs
103// - dunno, hard to reproduce, ignore
104// 132132 nor massif --format=html output does not do html entity escaping
105// - only for HTML output, irrelevant, ignore
106// 132950 Heap alloc/usage summary
107// - doesn't seem that interesting or general
108//
109// FIXED/NOW IRRELEVANT:
110// 89061 cra Massif: ms_main.c:485 (get_XCon): Assertion `xpt->max_chi...
111// - relevant code now gone
112// 143062 cra massif crashes on app exit with signal 8 SIGFPE
113// - fixed
114// 95483 nor massif feature request: include peak allocation in report
115// - implemented in Massif2
116// 92615 nor Write output from Massif at crash
117// - this happens unless Massif2 itself crashes
118// 121629 add instruction-counting mode for timing
119// - time-unit=B is similar, plus I'm considering this above anyway
120// 142197 nor massif tool ignores --massif:alloc-fn parameters in .valg...
121// - fixed in trunk
122// 142491 nor Maximise use of alloc_fns array
123// - addressed, it's now an XArray and thus unlimited in size
124// 144453 (get_XCon): Assertion 'xpt->max_children != 0' failed.
125// - relevant code now gone
126//
127// POSSIBLY FIXED BY BETTER SANITY CHECKING, BUT HARD TO TELL:
128// 141631 Massif: percentages don't add up correctly
129// - better sanity-checking should help this greatly
130// 142706 massif numbers don't seem to add up
131// - better sanity-checking should help this greatly
132// 149504 Assertion hit on alloc_xpt->curr_space >= -space_delta
133// - better sanity-checking should help this greatly
134// 146456 (update_XCon): Assertion 'xpt->curr_space >= -space_delta' failed.
135// - better sanity-checking should help this greatly
136//
137// File format working notes:
138
139#if 0
140desc: --heap-admin=foo
141cmd: date
142time_unit: ms
143#-----------
144snapshot=0
145#-----------
146time=0
147mem_heap_B=0
148mem_heap_admin_B=0
149mem_stacks_B=0
150heap_tree=empty
151#-----------
152snapshot=1
153#-----------
154time=353
155mem_heap_B=5
156mem_heap_admin_B=0
157mem_stacks_B=0
158heap_tree=detailed
159n1: 5 (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
160 n1: 5 0x27F6E0: _nl_normalize_codeset (in /lib/libc-2.3.5.so)
161 n1: 5 0x279DE6: _nl_load_locale_from_archive (in /lib/libc-2.3.5.so)
162 n1: 5 0x278E97: _nl_find_locale (in /lib/libc-2.3.5.so)
163 n1: 5 0x278871: setlocale (in /lib/libc-2.3.5.so)
164 n1: 5 0x8049821: (within /bin/date)
165 n0: 5 0x26ED5E: (below main) (in /lib/libc-2.3.5.so)
166
167
168n_events: n time(ms) total(B) useful-heap(B) admin-heap(B) stacks(B)
169t_events: B
170n 0 0 0 0 0
171n 0 0 0 0 0
172t1: 5 <string...>
173 t1: 6 <string...>
174
175Ideas:
176- each snapshot specifies an x-axis value and one or more y-axis values.
177- can display the y-axis values separately if you like
178- can completely separate connection between snapshots and trees.
179
180Challenges:
181- how to specify and scale/abbreviate units on axes?
182- how to combine multiple values into the y-axis?
183
184--------------------------------------------------------------------------------Command: date
185Massif arguments: --heap-admin=foo
186ms_print arguments: massif.out
187--------------------------------------------------------------------------------
188 KB
1896.472^ :#
190 | :# :: . .
191 ...
192 | ::@ :@ :@ :@:::# :: : ::::
193 0 +-----------------------------------@---@---@-----@--@---#-------------->ms 0 713
194
195Number of snapshots: 50
196 Detailed snapshots: [2, 11, 13, 19, 25, 32 (peak)]
197-------------------------------------------------------------------------------- n time(ms) total(B) useful-heap(B) admin-heap(B) stacks(B)
198-------------------------------------------------------------------------------- 0 0 0 0 0 0
199 1 345 5 5 0 0
200 2 353 5 5 0 0
201100.00% (5B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
202->100.00% (5B) 0x27F6E0: _nl_normalize_codeset (in /lib/libc-2.3.5.so)
203#endif
204
205//---------------------------------------------------------------------------
nethercotec9f36922004-02-14 16:40:02 +0000206
njnc7561b92005-06-19 01:24:32 +0000207#include "pub_tool_basics.h"
sewardj4cfea4f2006-10-14 19:26:10 +0000208#include "pub_tool_vki.h"
sewardj45f4e7c2005-09-27 19:20:21 +0000209#include "pub_tool_aspacemgr.h"
njnea27e462005-05-31 02:38:09 +0000210#include "pub_tool_debuginfo.h"
njn81c00df2005-05-14 21:28:43 +0000211#include "pub_tool_hashtable.h"
njn97405b22005-06-02 03:39:33 +0000212#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +0000213#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +0000214#include "pub_tool_libcfile.h"
njn36a20fa2005-06-03 03:08:39 +0000215#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +0000216#include "pub_tool_libcproc.h"
njnb506bd82005-06-21 04:01:51 +0000217#include "pub_tool_machine.h"
njn717cde52005-05-10 02:47:21 +0000218#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +0000219#include "pub_tool_options.h"
njn717cde52005-05-10 02:47:21 +0000220#include "pub_tool_replacemalloc.h"
njnd01fef72005-03-25 23:35:48 +0000221#include "pub_tool_stacktrace.h"
njn43b9a8a2005-05-10 04:37:01 +0000222#include "pub_tool_tooliface.h"
sewardj14c7cc52007-02-25 15:08:24 +0000223#include "pub_tool_xarray.h"
sewardj45f4e7c2005-09-27 19:20:21 +0000224#include "pub_tool_clientstate.h"
nethercotec9f36922004-02-14 16:40:02 +0000225
226#include "valgrind.h" // For {MALLOC,FREE}LIKE_BLOCK
227
njn734b8052007-11-01 04:40:37 +0000228//------------------------------------------------------------*/
229//--- Overview of operation ---*/
230//------------------------------------------------------------*/
nethercotec9f36922004-02-14 16:40:02 +0000231
njn734b8052007-11-01 04:40:37 +0000232// The size of the stacks and heap is tracked. The heap is tracked in a lot
233// of detail, enough to tell how many bytes each line of code is responsible
234// for, more or less. The main data structure is a tree representing the
235// call tree beneath all the allocation functions like malloc().
nethercotec9f36922004-02-14 16:40:02 +0000236//
njn734b8052007-11-01 04:40:37 +0000237// "Snapshots" are recordings of the memory usage. There are two basic
238// kinds:
239// - Normal: these record the current time, total memory size, total heap
240// size, heap admin size and stack size.
241// - Detailed: these record those things in a normal snapshot, plus a very
242// detailed XTree (see below) indicating how the heap is structured.
nethercotec9f36922004-02-14 16:40:02 +0000243//
njn734b8052007-11-01 04:40:37 +0000244// Snapshots are taken every so often. There are two storage classes of
245// snapshots:
246// - Temporary: Massif does a temporary snapshot every so often. The idea
247// is to always have a certain number of temporary snapshots around. So
248// we take them frequently to begin with, but decreasingly often as the
249// program continues to run. Also, we remove some old ones after a while.
250// Overall it's a kind of exponential decay thing. Most of these are
251// normal snapshots, a small fraction are detailed snapshots.
252// - Permanent: Massif takes a permanent (detailed) snapshot in some
253// circumstances. They are:
254// - Peak snapshot: When the memory usage peak is reached, it takes a
255// snapshot. It keeps this, unless the peak is subsequently exceeded,
256// in which case it will overwrite the peak snapshot.
257// - User-requested snapshots: These are done in response to client
258// requests. They are always kept.
nethercotec9f36922004-02-14 16:40:02 +0000259
njn734b8052007-11-01 04:40:37 +0000260// Used for printing things when clo_verbosity > 1.
261#define VERB(verb, format, args...) \
262 if (VG_(clo_verbosity) > verb) { \
263 VG_(message)(Vg_DebugMsg, "Massif: " format, ##args); \
nethercotec9f36922004-02-14 16:40:02 +0000264 }
nethercotec9f36922004-02-14 16:40:02 +0000265
nethercotec9f36922004-02-14 16:40:02 +0000266
nethercotec9f36922004-02-14 16:40:02 +0000267
njn734b8052007-11-01 04:40:37 +0000268//------------------------------------------------------------//
269//--- Statistics ---//
270//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +0000271
272// Konqueror startup, to give an idea of the numbers involved with a biggish
273// program, with default depth:
274//
275// depth=3 depth=40
276// - 310,000 allocations
277// - 300,000 frees
278// - 15,000 XPts 800,000 XPts
279// - 1,800 top-XPts
280
njn734b8052007-11-01 04:40:37 +0000281static UInt n_heap_allocs = 0;
282static UInt n_heap_reallocs = 0;
283static UInt n_heap_frees = 0;
284static UInt n_stack_allocs = 0;
285static UInt n_stack_frees = 0;
286static UInt n_xpts = 0;
287static UInt n_xpt_init_expansions = 0;
288static UInt n_xpt_later_expansions = 0;
289static UInt n_sxpt_allocs = 0;
290static UInt n_sxpt_frees = 0;
291static UInt n_skipped_snapshots = 0;
292static UInt n_real_snapshots = 0;
293static UInt n_detailed_snapshots = 0;
294static UInt n_peak_snapshots = 0;
295static UInt n_cullings = 0;
296static UInt n_XCon_redos = 0;
nethercotec9f36922004-02-14 16:40:02 +0000297
njn734b8052007-11-01 04:40:37 +0000298//------------------------------------------------------------//
299//--- Globals ---//
300//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +0000301
njn734b8052007-11-01 04:40:37 +0000302static SizeT heap_szB = 0; // Live heap size
303static SizeT stacks_szB = 0; // Live stacks size
nethercotec9f36922004-02-14 16:40:02 +0000304
njn734b8052007-11-01 04:40:37 +0000305// This is the total size from the current peak snapshot, or 0 if no peak
306// snapshot has been taken yet.
307static SizeT peak_snapshot_total_szB = 0;
nethercotec9f36922004-02-14 16:40:02 +0000308
njn734b8052007-11-01 04:40:37 +0000309// Incremented every time memory is allocated/deallocated, by the
310// allocated/deallocated amount; includes heap, heap-admin and stack
311// memory. An alternative to milliseconds as a unit of program "time".
312static ULong total_allocs_deallocs_szB = 0;
nethercotec9f36922004-02-14 16:40:02 +0000313
314static UInt n_heap_blocks = 0;
315
njn51d827b2005-05-09 01:02:08 +0000316// Current directory at startup.
njn734b8052007-11-01 04:40:37 +0000317static Char base_dir[VKI_PATH_MAX]; // XXX: currently unused
nethercotec9f36922004-02-14 16:40:02 +0000318
njn734b8052007-11-01 04:40:37 +0000319// We don't start taking snapshots until the first basic block is executed,
320// rather than doing it in ms_post_clo_init (which is the obvious spot), for
321// two reasons.
322// - It lets us ignore stack events prior to that, because they're not
323// really proper ones and just would screw things up.
324// - Because there's still some core initialisation to do, and so there
325// would be an artificial time gap between the first and second snapshots.
326//
327static Bool have_started_executing_code = False;
nethercotec9f36922004-02-14 16:40:02 +0000328
njn734b8052007-11-01 04:40:37 +0000329//------------------------------------------------------------//
330//--- Alloc fns ---//
331//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +0000332
njn734b8052007-11-01 04:40:37 +0000333static XArray* alloc_fns;
nethercotec9f36922004-02-14 16:40:02 +0000334
njn734b8052007-11-01 04:40:37 +0000335static void init_alloc_fns(void)
336{
337 // Create the list, and add the default elements.
338 alloc_fns = VG_(newXA)(VG_(malloc), VG_(free), sizeof(Char*));
339 #define DO(x) { Char* s = x; VG_(addToXA)(alloc_fns, &s); }
nethercotec9f36922004-02-14 16:40:02 +0000340
njn734b8052007-11-01 04:40:37 +0000341 // Ordered according to (presumed) frequency.
342 // Nb: The C++ "operator new*" ones are overloadable. We include them
343 // always anyway, because even if they're overloaded, it would be a
344 // prodigiously stupid overloading that caused them to not allocate
345 // memory.
346 DO("malloc" );
347 DO("__builtin_new" );
348 DO("operator new(unsigned)" );
349 DO("operator new(unsigned long)" );
350 DO("__builtin_vec_new" );
351 DO("operator new[](unsigned)" );
352 DO("operator new[](unsigned long)" );
353 DO("calloc" );
354 DO("realloc" );
355 DO("memalign" );
356 DO("operator new(unsigned, std::nothrow_t const&)" );
357 DO("operator new[](unsigned, std::nothrow_t const&)" );
358 DO("operator new(unsigned long, std::nothrow_t const&)" );
359 DO("operator new[](unsigned long, std::nothrow_t const&)");
360}
nethercotec9f36922004-02-14 16:40:02 +0000361
njn734b8052007-11-01 04:40:37 +0000362static Bool is_alloc_fn(Char* fnname)
363{
364 Char** alloc_fn_ptr;
365 Int i;
366
367 // Nb: It's a linear search through the list, because we're comparing
368 // strings rather than pointers to strings.
369 // Nb: This gets called a lot. It was an OSet, but they're quite slow to
370 // iterate through so it wasn't a good choice.
371 for (i = 0; i < VG_(sizeXA)(alloc_fns); i++) {
372 alloc_fn_ptr = VG_(indexXA)(alloc_fns, i);
373 if (VG_STREQ(fnname, *alloc_fn_ptr))
374 return True;
nethercotec9f36922004-02-14 16:40:02 +0000375 }
njn734b8052007-11-01 04:40:37 +0000376 return False;
377}
nethercotec9f36922004-02-14 16:40:02 +0000378
njn734b8052007-11-01 04:40:37 +0000379
380//------------------------------------------------------------//
381//--- Command line args ---//
382//------------------------------------------------------------//
383
384#define MAX_DEPTH 200
385
386typedef enum { TimeMS, TimeB } TimeUnit;
387
388static Char* TimeUnit_to_string(TimeUnit time_unit)
389{
390 switch (time_unit) {
391 case TimeMS: return "ms";
392 case TimeB: return "B";
393 default: tl_assert2(0, "TimeUnit_to_string: unrecognised TimeUnit");
394 }
395}
396
397static Bool clo_heap = True;
njn429afb42007-11-02 04:12:48 +0000398 // clo_heap_admin is deliberately a word-sized type. At one point it was
399 // a UInt, but this caused problems on 64-bit machines when it was
400 // multiplied by a small negative number and then promoted to a
401 // word-sized type -- it ended up with a value of 4.2 billion. Sigh.
402static SizeT clo_heap_admin = 8;
njn734b8052007-11-01 04:40:37 +0000403static Bool clo_stacks = False;
404static UInt clo_depth = 30;
405static UInt clo_threshold = 100; // 100 == 1%
406static UInt clo_peak_inaccuracy = 100; // 100 == 1%
407static UInt clo_time_unit = TimeMS;
408static UInt clo_detailed_freq = 10;
409static UInt clo_max_snapshots = 100;
410
411static XArray* args_for_massif;
nethercotec9f36922004-02-14 16:40:02 +0000412
njn51d827b2005-05-09 01:02:08 +0000413static Bool ms_process_cmd_line_option(Char* arg)
nethercotec9f36922004-02-14 16:40:02 +0000414{
njn734b8052007-11-01 04:40:37 +0000415 // Remember the arg for later use.
416 VG_(addToXA)(args_for_massif, &arg);
nethercotec9f36922004-02-14 16:40:02 +0000417
njn734b8052007-11-01 04:40:37 +0000418 VG_BOOL_CLO(arg, "--heap", clo_heap)
419 else VG_BOOL_CLO(arg, "--stacks", clo_stacks)
420
421 // XXX: "--heap-admin= " is accepted!
422 else VG_NUM_CLO(arg, "--heap-admin", clo_heap_admin)
423 else VG_NUM_CLO(arg, "--depth", clo_depth)
424
425 // XXX: use a fractional number, so no division by 100
426 else VG_NUM_CLO(arg, "--threshold", clo_threshold)
427
428 // XXX: use a fractional number, so no division by 100
429 else VG_NUM_CLO(arg, "--peak-inaccuracy", clo_peak_inaccuracy)
430
431 else VG_NUM_CLO(arg, "--detailed-freq", clo_detailed_freq)
432 else VG_NUM_CLO(arg, "--max-snapshots", clo_max_snapshots)
433
434 else if (VG_CLO_STREQ(arg, "--time-unit=ms")) clo_time_unit = TimeMS;
435 else if (VG_CLO_STREQ(arg, "--time-unit=B")) clo_time_unit = TimeB;
nethercotec9f36922004-02-14 16:40:02 +0000436
437 else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
njn734b8052007-11-01 04:40:37 +0000438 Char* alloc_fn = &arg[11];
439 VG_(addToXA)(alloc_fns, &alloc_fn);
nethercotec9f36922004-02-14 16:40:02 +0000440 }
441
nethercotec9f36922004-02-14 16:40:02 +0000442 else
443 return VG_(replacement_malloc_process_cmd_line_option)(arg);
nethercote27fec902004-06-16 21:26:32 +0000444
nethercotec9f36922004-02-14 16:40:02 +0000445 return True;
446}
447
njn51d827b2005-05-09 01:02:08 +0000448static void ms_print_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000449{
njn734b8052007-11-01 04:40:37 +0000450 VG_(printf)(
nethercotec9f36922004-02-14 16:40:02 +0000451" --heap=no|yes profile heap blocks [yes]\n"
njn734b8052007-11-01 04:40:37 +0000452" --heap-admin=<number> average admin bytes per heap block;"
453" ignored if --heap=no [8]\n"
454" --stacks=no|yes profile stack(s) [no]\n"
455" --depth=<number> depth of contexts [30]\n"
nethercotec9f36922004-02-14 16:40:02 +0000456" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
njn734b8052007-11-01 04:40:37 +0000457" --threshold=<n> significance threshold, in 100ths of a percent\n"
458" (eg. <n>=100 shows nodes covering >= 1%% of\n"
459" total size, <n>=0 shows all nodes) [100]\n"
460" --peak-inaccuracy=<n> closeness of recorded peak to true peak,\n"
461" in 100ths of a percent\n"
462" --time-unit=ms|B time unit, milliseconds or bytes\n"
463" alloc'd/dealloc'd on the heap [ms]\n"
464" --detailed-freq=<N> every Nth snapshot should be detailed [10]\n"
465" --max-snapshots=<N> maximum number of snapshots recorded [100]\n"
nethercotec9f36922004-02-14 16:40:02 +0000466 );
467 VG_(replacement_malloc_print_usage)();
468}
469
njn51d827b2005-05-09 01:02:08 +0000470static void ms_print_debug_usage(void)
nethercotec9f36922004-02-14 16:40:02 +0000471{
472 VG_(replacement_malloc_print_debug_usage)();
473}
474
njn734b8052007-11-01 04:40:37 +0000475
476//------------------------------------------------------------//
477//--- XPts, XTrees and XCons ---//
478//------------------------------------------------------------//
479
480// An XPt represents an "execution point", ie. a code address. Each XPt is
481// part of a tree of XPts (an "execution tree", or "XTree"). The details of
482// the heap are represented by a single XTree.
483//
484// The root of the tree is 'alloc_xpt', which represents all allocation
485// functions, eg:
486// - malloc/calloc/realloc/memalign/new/new[];
487// - user-specified allocation functions (using --alloc-fn);
488// - custom allocation (MALLOCLIKE) points
489// It's a bit of a fake XPt (ie. its 'ip' is zero), and is only used because
490// it makes the code simpler.
491//
492// Any child of 'alloc_xpt' is called a "top-XPt". The XPts at the bottom
493// of an XTree (leaf nodes) are "bottom-XPTs".
494//
495// Each path from a top-XPt to a bottom-XPt through an XTree gives an
496// execution context ("XCon"), ie. a stack trace. (And sub-paths represent
497// stack sub-traces.) The number of XCons in an XTree is equal to the
498// number of bottom-XPTs in that XTree.
499//
500// alloc_xpt XTrees are bi-directional.
501// | ^
502// v |
503// > parent < Example: if child1() calls parent() and child2()
504// / | \ also calls parent(), and parent() calls malloc(),
505// | / \ | the XTree will look like this.
506// | v v |
507// child1 child2
508//
509// XTrees and XPts are mirrored by SXTrees and SXPts, where the 'S' is short
510// for "saved". When the XTree is duplicated for a snapshot, we duplicate
511// it as an SXTree, which is similar but omits some things it does not need,
512// and aggregates up insignificant nodes. This is important as an SXTree is
513// typically much smaller than an XTree.
514
515// XXX: make XPt and SXPt extensible arrays, to avoid having to do two
516// allocations per Pt.
517
518typedef struct _XPt XPt;
519struct _XPt {
520 Addr ip; // code address
521
522 // Bottom-XPts: space for the precise context.
523 // Other XPts: space of all the descendent bottom-XPts.
524 // Nb: this value goes up and down as the program executes.
525 SizeT szB;
526
527 XPt* parent; // pointer to parent XPt
528
529 // Children.
530 // n_children and max_children are 32-bit integers. 16-bit integers
531 // are too small -- a very big program might have more than 65536
532 // allocation points (ie. top-XPts) -- Konqueror starting up has 1800.
533 UInt n_children; // number of children
534 UInt max_children; // capacity of children array
535 XPt** children; // pointers to children XPts
536};
537
538typedef
539 enum {
540 SigSXPt,
541 InsigSXPt
542 }
543 SXPtTag;
544
545typedef struct _SXPt SXPt;
546struct _SXPt {
547 SXPtTag tag;
548 SizeT szB; // memory size for the node, be it Sig or Insig
549 union {
550 // An SXPt representing a single significant code location. Much like
551 // an XPt, minus the fields that aren't necessary.
552 struct {
553 Addr ip;
554 UInt n_children;
555 SXPt** children;
556 }
557 Sig;
558
559 // An SXPt representing one or more code locations, all below the
560 // significance threshold.
561 struct {
562 Int n_xpts; // number of aggregated XPts
563 }
564 Insig;
565 };
566};
nethercotec9f36922004-02-14 16:40:02 +0000567
568// Fake XPt representing all allocation functions like malloc(). Acts as
569// parent node to all top-XPts.
570static XPt* alloc_xpt;
571
572// Cheap allocation for blocks that never need to be freed. Saves about 10%
573// for Konqueror startup with --depth=40.
nethercote7ac7f7b2004-11-02 12:36:02 +0000574static void* perm_malloc(SizeT n_bytes)
nethercotec9f36922004-02-14 16:40:02 +0000575{
576 static Addr hp = 0; // current heap pointer
577 static Addr hp_lim = 0; // maximum usable byte in current block
578
579 #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
580
581 if (hp + n_bytes > hp_lim) {
sewardj45f4e7c2005-09-27 19:20:21 +0000582 hp = (Addr)VG_(am_shadow_alloc)(SUPERBLOCK_SIZE);
583 if (hp == 0)
njn734b8052007-11-01 04:40:37 +0000584 VG_(out_of_memory_NORETURN)( "massif:perm_malloc",
sewardj45f4e7c2005-09-27 19:20:21 +0000585 SUPERBLOCK_SIZE);
nethercotec9f36922004-02-14 16:40:02 +0000586 hp_lim = hp + SUPERBLOCK_SIZE - 1;
587 }
588
589 hp += n_bytes;
590
591 return (void*)(hp - n_bytes);
592}
593
njn734b8052007-11-01 04:40:37 +0000594static XPt* new_XPt(Addr ip, XPt* parent)
nethercotec9f36922004-02-14 16:40:02 +0000595{
njn734b8052007-11-01 04:40:37 +0000596 // XPts are never freed, so we can use perm_malloc to allocate them.
597 // Note that we cannot use perm_malloc for the 'children' array, because
598 // that needs to be resizable.
599 XPt* xpt = perm_malloc(sizeof(XPt));
600 xpt->ip = ip;
601 xpt->szB = 0;
602 xpt->parent = parent;
nethercotec9f36922004-02-14 16:40:02 +0000603
njn734b8052007-11-01 04:40:37 +0000604 // We don't initially allocate any space for children. We let that
605 // happen on demand. Many XPts (ie. all the bottom-XPts) don't have any
606 // children anyway.
nethercotec9f36922004-02-14 16:40:02 +0000607 xpt->n_children = 0;
njn734b8052007-11-01 04:40:37 +0000608 xpt->max_children = 0;
609 xpt->children = NULL;
nethercotec9f36922004-02-14 16:40:02 +0000610
611 // Update statistics
612 n_xpts++;
613
614 return xpt;
615}
616
njn734b8052007-11-01 04:40:37 +0000617static void add_child_xpt(XPt* parent, XPt* child)
618{
619 // Expand 'children' if necessary.
620 tl_assert(parent->n_children <= parent->max_children);
621 if (parent->n_children == parent->max_children) {
622 if (parent->max_children == 0) {
623 parent->max_children = 4;
624 parent->children = VG_(malloc)( parent->max_children * sizeof(XPt*) );
625 n_xpt_init_expansions++;
626 } else {
627 parent->max_children *= 2; // Double size
628 parent->children = VG_(realloc)( parent->children,
629 parent->max_children * sizeof(XPt*) );
630 n_xpt_later_expansions++;
631 }
632 }
633
634 // Insert new child XPt in parent's children list.
635 parent->children[ parent->n_children++ ] = child;
636}
637
638// Reverse comparison for a reverse sort -- biggest to smallest.
639static Int SXPt_revcmp_szB(void* n1, void* n2)
640{
641 SXPt* sxpt1 = *(SXPt**)n1;
642 SXPt* sxpt2 = *(SXPt**)n2;
643 return ( sxpt1->szB < sxpt2->szB ? 1
644 : sxpt1->szB > sxpt2->szB ? -1
645 : 0);
646}
647
648//------------------------------------------------------------//
649//--- XTree Operations ---//
650//------------------------------------------------------------//
651
652// Duplicates an XTree as an SXTree.
653static SXPt* dup_XTree(XPt* xpt, SizeT total_szB)
654{
655 Int i, n_sig_children, n_insig_children, n_child_sxpts;
656 SizeT insig_children_szB, sig_child_threshold_szB;
657 SXPt* sxpt;
658
659 // Number of XPt children Action for SXPT
660 // ------------------ ---------------
661 // 0 sig, 0 insig alloc 0 children
662 // N sig, 0 insig alloc N children, dup all
663 // N sig, M insig alloc N+1, dup first N, aggregate remaining M
664 // 0 sig, M insig alloc 1, aggregate M
665
666 // Work out how big a child must be to be significant. If the current
667 // total_szB is zero, then we set it to 1, which means everything will be
668 // judged insignificant -- this is sensible, as there's no point showing
669 // any detail for this case. Unless they used --threshold=0, in which
670 // case we show them everything because that's what they asked for.
671 //
672 // Nb: We do this once now, rather than once per child, because if we do
673 // that the cost of all the divisions adds up to something significant.
674 if (total_szB == 0 && clo_threshold != 0) {
675 sig_child_threshold_szB = 1;
676 } else {
677 sig_child_threshold_szB = (((ULong)total_szB) * clo_threshold) / 10000ULL;
678 }
679
680 // How many children are significant? And do we need an aggregate SXPt?
681 n_sig_children = 0;
682 for (i = 0; i < xpt->n_children; i++) {
683 if (xpt->children[i]->szB >= sig_child_threshold_szB) {
684 n_sig_children++;
685 }
686 }
687 n_insig_children = xpt->n_children - n_sig_children;
688 n_child_sxpts = n_sig_children + ( n_insig_children > 0 ? 1 : 0 );
689
690 // Duplicate the XPt.
691 sxpt = VG_(malloc)(sizeof(SXPt));
692 n_sxpt_allocs++;
693 sxpt->tag = SigSXPt;
694 sxpt->szB = xpt->szB;
695 sxpt->Sig.ip = xpt->ip;
696 sxpt->Sig.n_children = n_child_sxpts;
697
698 // Create the SXPt's children.
699 if (n_child_sxpts > 0) {
700 Int j;
701 SizeT sig_children_szB = 0;
702 sxpt->Sig.children = VG_(malloc)(n_child_sxpts * sizeof(SXPt*));
703
704 // Duplicate the significant children.
705 j = 0;
706 for (i = 0; i < xpt->n_children; i++) {
707 if (xpt->children[i]->szB >= sig_child_threshold_szB) {
708 sxpt->Sig.children[j++] = dup_XTree(xpt->children[i], total_szB);
709 sig_children_szB += xpt->children[i]->szB;
710 }
711 }
712
713 // Create the SXPt for the insignificant children, if any, and put it
714 // in the last child entry.
715 insig_children_szB = sxpt->szB - sig_children_szB;
716 if (n_insig_children > 0) {
717 // Nb: We 'n_sxpt_allocs' here because creating an Insig SXPt
718 // doesn't involve a call to dup_XTree().
719 SXPt* insig_sxpt = VG_(malloc)(sizeof(SXPt));
720 n_sxpt_allocs++;
721 insig_sxpt->tag = InsigSXPt;
722 insig_sxpt->szB = insig_children_szB;
723 insig_sxpt->Insig.n_xpts = n_insig_children;
724 sxpt->Sig.children[n_sig_children] = insig_sxpt;
725 }
726 } else {
727 sxpt->Sig.children = NULL;
728 }
729
730 return sxpt;
731}
732
733static void free_SXTree(SXPt* sxpt)
734{
735 Int i;
736 tl_assert(sxpt != NULL);
737
738 switch (sxpt->tag) {
739 case SigSXPt:
740 // Free all children SXPts, then the children array.
741 for (i = 0; i < sxpt->Sig.n_children; i++) {
742 free_SXTree(sxpt->Sig.children[i]);
743 sxpt->Sig.children[i] = NULL;
744 }
745 VG_(free)(sxpt->Sig.children); sxpt->Sig.children = NULL;
746 break;
747
748 case InsigSXPt:
749 break;
750
751 default: tl_assert2(0, "free_SXTree: unknown SXPt tag");
752 }
753
754 // Free the SXPt itself.
755 VG_(free)(sxpt); sxpt = NULL;
756 n_sxpt_frees++;
757}
758
759// Sanity checking: we periodically check the heap XTree with
760// ms_expensive_sanity_check.
761static void sanity_check_XTree(XPt* xpt, XPt* parent)
nethercotec9f36922004-02-14 16:40:02 +0000762{
763 Int i;
764
njn734b8052007-11-01 04:40:37 +0000765 tl_assert(xpt != NULL);
766
767 // Check back-pointer.
768 tl_assert2(xpt->parent == parent,
769 "xpt->parent = %p, parent = %p\n", xpt->parent, parent);
770
771 // Check children counts look sane.
772 tl_assert(xpt->n_children <= xpt->max_children);
773
774 // Check the sum of any children szBs equals the XPt's szB. Check the
775 // children at the same time.
776 if (xpt->n_children > 0) {
777 SizeT children_sum_szB = 0;
778 for (i = 0; i < xpt->n_children; i++) {
779 sanity_check_XTree(xpt->children[i], xpt);
780 children_sum_szB += xpt->children[i]->szB;
nethercotec9f36922004-02-14 16:40:02 +0000781 }
njn734b8052007-11-01 04:40:37 +0000782 tl_assert(children_sum_szB == xpt->szB);
nethercotec9f36922004-02-14 16:40:02 +0000783 }
nethercotec9f36922004-02-14 16:40:02 +0000784}
785
njn734b8052007-11-01 04:40:37 +0000786// Sanity checking: we check SXTrees (which are in snapshots) after
787// snapshots are created, before they are deleted, and before they are
788// printed.
789static void sanity_check_SXTree(SXPt* sxpt)
nethercotec9f36922004-02-14 16:40:02 +0000790{
njn734b8052007-11-01 04:40:37 +0000791 Int i;
nethercotec9f36922004-02-14 16:40:02 +0000792
njn734b8052007-11-01 04:40:37 +0000793 tl_assert(sxpt != NULL);
nethercotec9f36922004-02-14 16:40:02 +0000794
njn734b8052007-11-01 04:40:37 +0000795 // Check the sum of any children szBs equals the SXPt's szB. Check the
796 // children at the same time.
797 switch (sxpt->tag) {
798 case SigSXPt: {
799 if (sxpt->Sig.n_children > 0) {
800 SizeT children_sum_szB = 0;
801 for (i = 0; i < sxpt->Sig.n_children; i++) {
802 sanity_check_SXTree(sxpt->Sig.children[i]);
803 children_sum_szB += sxpt->Sig.children[i]->szB;
804 }
805 tl_assert(children_sum_szB == sxpt->szB);
806 }
807 break;
808 }
809 case InsigSXPt:
810 break; // do nothing
811
812 default: tl_assert2(0, "sanity_check_SXTree: unknown SXPt tag");
813 }
814}
815
816
817//------------------------------------------------------------//
818//--- XCon Operations ---//
819//------------------------------------------------------------//
820
821// This is the limit on the number of removed alloc-fns that can be in a
822// single XCon.
823#define MAX_OVERESTIMATE 50
824#define MAX_IPS (MAX_DEPTH + MAX_OVERESTIMATE)
825
826// Get the stack trace for an XCon, filtering out uninteresting entries:
827// alloc-fns and entries above alloc-fns, and entries below main-or-below-main.
828// Eg: alloc-fn1 / alloc-fn2 / a / b / main / (below main) / c
829// becomes: a / b / main
830// Nb: it's possible to end up with an empty trace, eg. if 'main' is marked
831// as an alloc-fn. This is ok.
832static
833Int get_IPs( ThreadId tid, Bool is_custom_alloc, Addr ips[])
834{
835 #define BUF_LEN 1024
836 Char buf[BUF_LEN];
837 Int n_ips, i, n_alloc_fns_removed;
838 Int overestimate;
839 Bool redo;
840
841 // We ask for a few more IPs than clo_depth suggests we need. Then we
842 // remove every entry that is an alloc-fn. Depending on the
843 // circumstances, we may need to redo it all, asking for more IPs.
844 // Details:
845 // - If the original stack trace is smaller than asked-for, redo=False
846 // - Else if after filtering we have >= clo_depth IPs, redo=False
847 // - Else redo=True
848 // In other words, to redo, we'd have to get a stack trace as big as we
849 // asked for and remove more than 'overestimate' alloc-fns.
850
851 // Main loop.
852 redo = True; // Assume this to begin with.
853 for (overestimate = 3; redo; overestimate += 6) {
854 // This should never happen -- would require MAX_OVERESTIMATE
855 // alloc-fns to be removed from the stack trace.
856 if (overestimate > MAX_OVERESTIMATE)
857 VG_(tool_panic)("get_IPs: ips[] too small, inc. MAX_OVERESTIMATE?");
858
859 // Ask for more IPs than clo_depth suggests we need.
njnd01fef72005-03-25 23:35:48 +0000860 n_ips = VG_(get_StackTrace)( tid, ips, clo_depth + overestimate );
njn734b8052007-11-01 04:40:37 +0000861 tl_assert(n_ips > 0);
nethercotec9f36922004-02-14 16:40:02 +0000862
njn734b8052007-11-01 04:40:37 +0000863 // If the original stack trace is smaller than asked-for, redo=False.
864 if (n_ips < clo_depth + overestimate) { redo = False; }
nethercotec9f36922004-02-14 16:40:02 +0000865
njn734b8052007-11-01 04:40:37 +0000866 // If it's a non-custom block, we will always remove the first stack
867 // trace entry (which will be one of malloc, __builtin_new, etc).
868 n_alloc_fns_removed = ( is_custom_alloc ? 0 : 1 );
nethercotec9f36922004-02-14 16:40:02 +0000869
njn734b8052007-11-01 04:40:37 +0000870 // Filter out alloc fns. If it's a non-custom block, we remove the
871 // first entry (which will be one of malloc, __builtin_new, etc)
872 // without looking at it, because VG_(get_fnname) is expensive (it
873 // involves calls to VG_(malloc)/VG_(free)).
874 for (i = n_alloc_fns_removed; i < n_ips; i++) {
875 if (VG_(get_fnname)(ips[i], buf, BUF_LEN)) {
876 if (is_alloc_fn(buf)) {
877 n_alloc_fns_removed++;
878 } else {
879 break;
880 }
881 }
882 }
883 // Remove the alloc fns by shuffling the rest down over them.
884 n_ips -= n_alloc_fns_removed;
885 for (i = 0; i < n_ips; i++) {
886 ips[i] = ips[i + n_alloc_fns_removed];
887 }
nethercotec9f36922004-02-14 16:40:02 +0000888
njn734b8052007-11-01 04:40:37 +0000889 // If after filtering we have >= clo_depth IPs, redo=False
890 if (n_ips >= clo_depth) {
891 redo = False;
892 n_ips = clo_depth; // Ignore any IPs below --depth.
893 }
894
895 if (redo) {
896 n_XCon_redos++;
nethercotec9f36922004-02-14 16:40:02 +0000897 }
898 }
njn734b8052007-11-01 04:40:37 +0000899 return n_ips;
900}
nethercotec9f36922004-02-14 16:40:02 +0000901
njn734b8052007-11-01 04:40:37 +0000902// Gets an XCon and puts it in the tree. Returns the XCon's bottom-XPt.
903static XPt* get_XCon( ThreadId tid, Bool is_custom_alloc )
904{
905 Addr ips[MAX_IPS];
906 Int i;
907 XPt* xpt = alloc_xpt;
nethercotec9f36922004-02-14 16:40:02 +0000908
njn734b8052007-11-01 04:40:37 +0000909 // After this call, the IPs we want are in ips[0]..ips[n_ips-1].
910 Int n_ips = get_IPs(tid, is_custom_alloc, ips);
911
912 // Now do the search/insertion of the XCon.
913 for (i = 0; i < n_ips; i++) {
914 Addr ip = ips[i];
915 Int ch;
njnd01fef72005-03-25 23:35:48 +0000916 // Look for IP in xpt's children.
njn734b8052007-11-01 04:40:37 +0000917 // Linear search, ugh -- about 10% of time for konqueror startup tried
918 // caching last result, only hit about 4% for konqueror.
nethercotec9f36922004-02-14 16:40:02 +0000919 // Nb: this search hits about 98% of the time for konqueror
njn734b8052007-11-01 04:40:37 +0000920 for (ch = 0; True; ch++) {
921 if (ch == xpt->n_children) {
922 // IP not found in the children.
923 // Create and add new child XPt, then stop.
924 XPt* new_child_xpt = new_XPt(ip, xpt);
925 add_child_xpt(xpt, new_child_xpt);
926 xpt = new_child_xpt;
927 break;
nethercotec9f36922004-02-14 16:40:02 +0000928
njn734b8052007-11-01 04:40:37 +0000929 } else if (ip == xpt->children[ch]->ip) {
930 // Found the IP in the children, stop.
931 xpt = xpt->children[ch];
nethercotec9f36922004-02-14 16:40:02 +0000932 break;
933 }
nethercotec9f36922004-02-14 16:40:02 +0000934 }
nethercotec9f36922004-02-14 16:40:02 +0000935 }
njn734b8052007-11-01 04:40:37 +0000936 tl_assert(0 == xpt->n_children); // Must be bottom-XPt
937 return xpt;
nethercotec9f36922004-02-14 16:40:02 +0000938}
939
njn734b8052007-11-01 04:40:37 +0000940// Update 'szB' of every XPt in the XCon, by percolating upwards.
941static void update_XCon(XPt* xpt, SSizeT space_delta)
nethercotec9f36922004-02-14 16:40:02 +0000942{
njnca82cc02004-11-22 17:18:48 +0000943 tl_assert(True == clo_heap);
njnca82cc02004-11-22 17:18:48 +0000944 tl_assert(NULL != xpt);
945 tl_assert(0 == xpt->n_children); // must be bottom-XPt
nethercotec9f36922004-02-14 16:40:02 +0000946
njn734b8052007-11-01 04:40:37 +0000947 if (0 == space_delta)
948 return;
949
nethercotec9f36922004-02-14 16:40:02 +0000950 while (xpt != alloc_xpt) {
njn734b8052007-11-01 04:40:37 +0000951 if (space_delta < 0) tl_assert(xpt->szB >= -space_delta);
952 xpt->szB += space_delta;
nethercotec9f36922004-02-14 16:40:02 +0000953 xpt = xpt->parent;
njn734b8052007-11-01 04:40:37 +0000954 }
955 if (space_delta < 0) tl_assert(alloc_xpt->szB >= -space_delta);
956 alloc_xpt->szB += space_delta;
nethercotec9f36922004-02-14 16:40:02 +0000957}
958
959
njn734b8052007-11-01 04:40:37 +0000960//------------------------------------------------------------//
961//--- Snapshots ---//
962//------------------------------------------------------------//
963
964// Snapshots are done in a way so that we always have a reasonable number of
965// them. We start by taking them quickly. Once we hit our limit, we cull
966// some (eg. half), and start taking them more slowly. Once we hit the
967// limit again, we again cull and then take them even more slowly, and so
968// on.
969
970// Time is measured either in ms or bytes, depending on the --time-unit
971// option. It's a Long because it can exceed 32-bits reasonably easily, and
972// because we need to allow negative values to represent unset times.
973typedef Long Time;
974
975#define UNUSED_SNAPSHOT_TIME -333 // A conspicuous negative number.
976
977typedef
978 enum {
979 Normal = 77,
980 Peak,
981 Unused
982 }
983 SnapshotKind;
nethercotec9f36922004-02-14 16:40:02 +0000984
985typedef
986 struct {
njn734b8052007-11-01 04:40:37 +0000987 SnapshotKind kind;
988 Time time;
989 SizeT heap_szB;
990 SizeT heap_admin_szB;
991 SizeT stacks_szB;
992 SXPt* alloc_sxpt; // Heap XTree root, if a detailed snapshot,
993 } // otherwise NULL
994 Snapshot;
nethercotec9f36922004-02-14 16:40:02 +0000995
njn734b8052007-11-01 04:40:37 +0000996static UInt next_snapshot_i = 0; // Index of where next snapshot will go.
997static Snapshot* snapshots; // Array of snapshots.
998
999static Bool is_snapshot_in_use(Snapshot* snapshot)
nethercotec9f36922004-02-14 16:40:02 +00001000{
njn734b8052007-11-01 04:40:37 +00001001 if (Unused == snapshot->kind) {
1002 // If snapshot is unused, check all the fields are unset.
1003 tl_assert(snapshot->time == UNUSED_SNAPSHOT_TIME);
1004 tl_assert(snapshot->heap_admin_szB == 0);
1005 tl_assert(snapshot->heap_szB == 0);
1006 tl_assert(snapshot->stacks_szB == 0);
1007 tl_assert(snapshot->alloc_sxpt == NULL);
1008 return False;
nethercotec9f36922004-02-14 16:40:02 +00001009 } else {
njn734b8052007-11-01 04:40:37 +00001010 tl_assert(snapshot->time != UNUSED_SNAPSHOT_TIME);
1011 return True;
nethercotec9f36922004-02-14 16:40:02 +00001012 }
1013}
1014
njn734b8052007-11-01 04:40:37 +00001015static Bool is_detailed_snapshot(Snapshot* snapshot)
nethercotec9f36922004-02-14 16:40:02 +00001016{
njn734b8052007-11-01 04:40:37 +00001017 return (snapshot->alloc_sxpt ? True : False);
nethercotec9f36922004-02-14 16:40:02 +00001018}
1019
njn734b8052007-11-01 04:40:37 +00001020static Bool is_uncullable_snapshot(Snapshot* snapshot)
nethercotec9f36922004-02-14 16:40:02 +00001021{
njn734b8052007-11-01 04:40:37 +00001022 return &snapshots[0] == snapshot // First snapshot
1023 || &snapshots[next_snapshot_i-1] == snapshot // Last snapshot
1024 || snapshot->kind == Peak; // Peak snapshot
nethercotec9f36922004-02-14 16:40:02 +00001025}
1026
njn734b8052007-11-01 04:40:37 +00001027static void sanity_check_snapshot(Snapshot* snapshot)
nethercotec9f36922004-02-14 16:40:02 +00001028{
njn734b8052007-11-01 04:40:37 +00001029 if (snapshot->alloc_sxpt) {
1030 sanity_check_SXTree(snapshot->alloc_sxpt);
1031 }
nethercotec9f36922004-02-14 16:40:02 +00001032}
1033
njn734b8052007-11-01 04:40:37 +00001034// All the used entries should look used, all the unused ones should be clear.
1035static void sanity_check_snapshots_array(void)
1036{
1037 Int i;
1038 for (i = 0; i < next_snapshot_i; i++) {
1039 tl_assert( is_snapshot_in_use( & snapshots[i] ));
1040 }
1041 for ( ; i < clo_max_snapshots; i++) {
1042 tl_assert(!is_snapshot_in_use( & snapshots[i] ));
1043 }
1044}
nethercotec9f36922004-02-14 16:40:02 +00001045
njn734b8052007-11-01 04:40:37 +00001046// This zeroes all the fields in the snapshot, but does not free the heap
1047// XTree if present. It also does a sanity check unless asked not to; we
1048// can't sanity check at startup when clearing the initial snapshots because
1049// they're full of junk.
1050static void clear_snapshot(Snapshot* snapshot, Bool do_sanity_check)
1051{
1052 if (do_sanity_check) sanity_check_snapshot(snapshot);
1053 snapshot->kind = Unused;
1054 snapshot->time = UNUSED_SNAPSHOT_TIME;
1055 snapshot->heap_admin_szB = 0;
1056 snapshot->heap_szB = 0;
1057 snapshot->stacks_szB = 0;
1058 snapshot->alloc_sxpt = NULL;
1059}
1060
1061// This zeroes all the fields in the snapshot, and frees the heap XTree if
1062// present.
1063static void delete_snapshot(Snapshot* snapshot)
1064{
1065 // Nb: if there's an XTree, we free it after calling clear_snapshot,
1066 // because clear_snapshot does a sanity check which includes checking the
1067 // XTree.
1068 SXPt* tmp_sxpt = snapshot->alloc_sxpt;
1069 clear_snapshot(snapshot, /*do_sanity_check*/True);
1070 if (tmp_sxpt) {
1071 free_SXTree(tmp_sxpt);
1072 }
1073}
1074
1075static void VERB_snapshot(Int verbosity, Char* prefix, Int i)
1076{
1077 Snapshot* snapshot = &snapshots[i];
1078 Char* suffix;
1079 switch (snapshot->kind) {
1080 case Peak: suffix = "p"; break;
1081 case Normal: suffix = ( is_detailed_snapshot(snapshot) ? "d" : "." ); break;
1082 case Unused: suffix = "u"; break;
1083 default:
1084 tl_assert2(0, "VERB_snapshot: unknown snapshot kind: %d", snapshot->kind);
1085 }
1086 VERB(verbosity, "%s S%s%3d (t:%lld, hp:%ld, ad:%ld, st:%ld)",
1087 prefix, suffix, i,
1088 snapshot->time,
1089 snapshot->heap_szB,
1090 snapshot->heap_admin_szB,
1091 snapshot->stacks_szB
1092 );
1093}
1094
1095// Cull half the snapshots; we choose those that represent the smallest
1096// time-spans, because that gives us the most even distribution of snapshots
1097// over time. (It's possible to lose interesting spikes, however.)
1098//
1099// Algorithm for N snapshots: We find the snapshot representing the smallest
1100// timeframe, and remove it. We repeat this until (N/2) snapshots are gone.
1101// We have to do this one snapshot at a time, rather than finding the (N/2)
1102// smallest snapshots in one hit, because when a snapshot is removed, its
1103// neighbours immediately cover greater timespans. So it's O(N^2), but N is
1104// small, and it's not done very often.
1105//
1106// Once we're done, we return the new smallest interval between snapshots.
1107// That becomes our minimum time interval.
1108static UInt cull_snapshots(void)
1109{
1110 Int i, jp, j, jn, min_timespan_i;
1111 Int n_deleted = 0;
1112 Time min_timespan;
1113
1114 n_cullings++;
1115
1116 // Sets j to the index of the first not-yet-removed snapshot at or after i
1117 #define FIND_SNAPSHOT(i, j) \
1118 for (j = i; \
1119 j < clo_max_snapshots && !is_snapshot_in_use(&snapshots[j]); \
1120 j++) { }
1121
1122 VERB(2, "Culling...");
1123
1124 // First we remove enough snapshots by clearing them in-place. Once
1125 // that's done, we can slide the remaining ones down.
1126 for (i = 0; i < clo_max_snapshots/2; i++) {
1127 // Find the snapshot representing the smallest timespan. The timespan
1128 // for snapshot n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
1129 // snapshot A and B. We don't consider the first and last snapshots for
1130 // removal.
1131 Snapshot* min_snapshot;
1132 Int min_j;
1133
1134 // Initial triple: (prev, curr, next) == (jp, j, jn)
1135 // Initial min_timespan is the first one.
1136 jp = 0;
1137 FIND_SNAPSHOT(1, j);
1138 FIND_SNAPSHOT(j+1, jn);
1139 min_timespan = 0x7fffffffffffffffLL;
1140 min_j = -1;
1141 while (jn < clo_max_snapshots) {
1142 Time timespan = snapshots[jn].time - snapshots[jp].time;
1143 tl_assert(timespan >= 0);
1144 // Nb: We never cull the peak snapshot.
1145 if (Peak != snapshots[j].kind && timespan < min_timespan) {
1146 min_timespan = timespan;
1147 min_j = j;
1148 }
1149 // Move on to next triple
1150 jp = j;
1151 j = jn;
1152 FIND_SNAPSHOT(jn+1, jn);
1153 }
1154 // We've found the least important snapshot, now delete it. First
1155 // print it if necessary.
1156 tl_assert(-1 != min_j); // Check we found a minimum.
1157 min_snapshot = & snapshots[ min_j ];
1158 if (VG_(clo_verbosity) > 1) {
1159 Char buf[64];
1160 VG_(snprintf)(buf, 64, " %3d (t-span = %lld)", i, min_timespan);
1161 VERB_snapshot(2, buf, min_j);
1162 }
1163 delete_snapshot(min_snapshot);
1164 n_deleted++;
1165 }
1166
1167 // Slide down the remaining snapshots over the removed ones. First set i
1168 // to point to the first empty slot, and j to the first full slot after
1169 // i. Then slide everything down.
1170 for (i = 0; is_snapshot_in_use( &snapshots[i] ); i++) { }
1171 for (j = i; !is_snapshot_in_use( &snapshots[j] ); j++) { }
1172 for ( ; j < clo_max_snapshots; j++) {
1173 if (is_snapshot_in_use( &snapshots[j] )) {
1174 snapshots[i++] = snapshots[j];
1175 clear_snapshot(&snapshots[j], /*do_sanity_check*/True);
1176 }
1177 }
1178 next_snapshot_i = i;
1179
1180 // Check snapshots array looks ok after changes.
1181 sanity_check_snapshots_array();
1182
1183 // Find the minimum timespan remaining; that will be our new minimum
1184 // time interval. Note that above we were finding timespans by measuring
1185 // two intervals around a snapshot that was under consideration for
1186 // deletion. Here we only measure single intervals because all the
1187 // deletions have occurred.
1188 //
1189 // But we have to be careful -- some snapshots (eg. snapshot 0, and the
1190 // peak snapshot) are uncullable. If two uncullable snapshots end up
1191 // next to each other, they'll never be culled (assuming the peak doesn't
1192 // change), and the time gap between them will not change. However, the
1193 // time between the remaining cullable snapshots will grow ever larger.
1194 // This means that the min_timespan found will always be that between the
1195 // two uncullable snapshots, and it will be much smaller than it should
1196 // be. To avoid this problem, when computing the minimum timespan, we
1197 // ignore any timespans between two uncullable snapshots.
1198 tl_assert(next_snapshot_i > 1);
1199 min_timespan = 0x7fffffffffffffffLL;
1200 min_timespan_i = -1;
1201 for (i = 1; i < next_snapshot_i; i++) {
1202 if (is_uncullable_snapshot(&snapshots[i]) &&
1203 is_uncullable_snapshot(&snapshots[i-1]))
1204 {
1205 VERB(2, "(Ignoring interval %d--%d when computing minimum)", i-1, i);
1206 } else {
1207 Time timespan = snapshots[i].time - snapshots[i-1].time;
1208 tl_assert(timespan >= 0);
1209 if (timespan < min_timespan) {
1210 min_timespan = timespan;
1211 min_timespan_i = i;
1212 }
1213 }
1214 }
1215 tl_assert(-1 != min_timespan_i); // Check we found a minimum.
1216
1217 // Print remaining snapshots, if necessary.
1218 if (VG_(clo_verbosity) > 1) {
1219 VERB(2, "Finished culling (%3d of %3d deleted)",
1220 n_deleted, clo_max_snapshots);
1221 for (i = 0; i < next_snapshot_i; i++) {
1222 VERB_snapshot(2, " post-cull", i);
1223 }
1224 VERB(2, "New time interval = %lld (between snapshots %d and %d)",
1225 min_timespan, min_timespan_i-1, min_timespan_i);
1226 }
1227
1228 return min_timespan;
1229}
1230
1231static Time get_time(void)
1232{
1233 // Get current time, in whatever time unit we're using.
1234 if (clo_time_unit == TimeMS) {
1235 // Some stuff happens between the millisecond timer being initialised
1236 // to zero and us taking our first snapshot. We determine that time
1237 // gap so we can subtract it from all subsequent times so that our
1238 // first snapshot is considered to be at t = 0ms. Unfortunately, a
1239 // bunch of symbols get read after the first snapshot is taken but
1240 // before the second one (which is triggered by the first allocation),
1241 // so when the time-unit is 'ms' we always have a big gap between the
1242 // first two snapshots. But at least users won't have to wonder why
1243 // the first snapshot isn't at t=0.
1244 static Bool is_first_get_time = True;
1245 static Time start_time_ms;
1246 if (is_first_get_time) {
1247 start_time_ms = VG_(read_millisecond_timer)();
1248 is_first_get_time = False;
1249 return 0;
1250 } else {
1251 return VG_(read_millisecond_timer)() - start_time_ms;
1252 }
1253 } else if (clo_time_unit == TimeB) {
1254 return total_allocs_deallocs_szB;
1255 } else {
1256 tl_assert2(0, "bad --time-unit value");
1257 }
1258}
1259
1260// Take a snapshot, and only that -- decisions on whether to take a
1261// snapshot, or what kind of snapshot, are made elsewhere.
1262static void
1263take_snapshot(Snapshot* snapshot, SnapshotKind kind, Time time,
1264 Bool is_detailed, Char* what)
1265{
1266 tl_assert(!is_snapshot_in_use(snapshot));
1267 tl_assert(have_started_executing_code);
1268
1269 // Heap and heap admin.
1270 if (clo_heap) {
1271 snapshot->heap_szB = heap_szB;
1272 if (is_detailed) {
1273 // XXX: total_szB computed in various places -- factor it out
1274 SizeT total_szB = heap_szB + clo_heap_admin*n_heap_blocks + stacks_szB;
1275 snapshot->alloc_sxpt = dup_XTree(alloc_xpt, total_szB);
1276 tl_assert( alloc_xpt->szB == heap_szB);
1277 tl_assert(snapshot->alloc_sxpt->szB == heap_szB);
1278 }
1279 snapshot->heap_admin_szB = clo_heap_admin * n_heap_blocks;
1280 }
1281
1282 // Stack(s).
1283 if (clo_stacks) {
1284 snapshot->stacks_szB = stacks_szB;
1285 }
1286
1287 // Rest of snapshot.
1288 snapshot->kind = kind;
1289 snapshot->time = time;
1290 sanity_check_snapshot(snapshot);
1291
1292 // Update stats.
1293 if (Peak == kind) n_peak_snapshots++;
1294 if (is_detailed) n_detailed_snapshots++;
1295 n_real_snapshots++;
1296}
1297
1298
1299// Take a snapshot, if it's time, or if we've hit a peak.
1300static void
1301maybe_take_snapshot(SnapshotKind kind, Char* what)
1302{
1303 // 'min_time_interval' is the minimum time interval between snapshots.
1304 // If we try to take a snapshot and less than this much time has passed,
1305 // we don't take it. It gets larger as the program runs longer. It's
1306 // initialised to zero so that we begin by taking snapshots as quickly as
1307 // possible.
1308 static Time min_time_interval = 0;
1309 // Zero allows startup snapshot.
1310 static Time earliest_possible_time_of_next_snapshot = 0;
1311 static Int n_snapshots_since_last_detailed = 0;
1312 static Int n_skipped_snapshots_since_last_snapshot = 0;
1313
1314 Snapshot* snapshot;
1315 Bool is_detailed;
1316 Time time = get_time();
1317
1318 switch (kind) {
1319 case Normal:
1320 // Only do a snapshot if it's time.
1321 if (time < earliest_possible_time_of_next_snapshot) {
1322 n_skipped_snapshots++;
1323 n_skipped_snapshots_since_last_snapshot++;
1324 return;
1325 }
1326 is_detailed = (clo_detailed_freq-1 == n_snapshots_since_last_detailed);
1327 break;
1328
1329 case Peak: {
1330 // Because we're about to do a deallocation, we're coming down from a
1331 // local peak. If it is (a) actually a global peak, and (b) a certain
1332 // amount bigger than the previous peak, then we take a peak snapshot.
1333 // By not taking a snapshot for every peak, we save a lot of effort --
1334 // because many peaks remain peak only for a short time.
1335 SizeT total_szB = heap_szB + clo_heap_admin*n_heap_blocks + stacks_szB;
1336 SizeT excess_szB_for_new_peak =
1337 (((ULong)peak_snapshot_total_szB) * clo_peak_inaccuracy) / 10000ULL;
1338 if (total_szB <= peak_snapshot_total_szB + excess_szB_for_new_peak) {
1339 return;
1340 }
1341 is_detailed = True;
1342 break;
1343 }
1344
1345 default:
1346 tl_assert2(0, "maybe_take_snapshot: unrecognised snapshot kind");
1347 }
1348
1349 // Take the snapshot.
1350 snapshot = & snapshots[next_snapshot_i];
1351 take_snapshot(snapshot, kind, time, is_detailed, what);
1352
1353 // Record if it was detailed.
1354 if (is_detailed) {
1355 n_snapshots_since_last_detailed = 0;
1356 } else {
1357 n_snapshots_since_last_detailed++;
1358 }
1359
1360 // Update peak data, if it's a Peak snapshot.
1361 if (Peak == kind) {
1362 Int i, number_of_peaks_snapshots_found = 0;
1363
1364 // Sanity check the size, then update our recorded peak.
1365 SizeT snapshot_total_szB =
1366 snapshot->heap_szB + snapshot->heap_admin_szB + snapshot->stacks_szB;
1367 tl_assert2(snapshot_total_szB > peak_snapshot_total_szB,
1368 "%ld, %ld\n", snapshot_total_szB, peak_snapshot_total_szB);
1369 peak_snapshot_total_szB = snapshot_total_szB;
1370
1371 // Find the old peak snapshot, if it exists, and mark it as normal.
1372 for (i = 0; i < next_snapshot_i; i++) {
1373 if (Peak == snapshots[i].kind) {
1374 snapshots[i].kind = Normal;
1375 number_of_peaks_snapshots_found++;
1376 }
1377 }
1378 tl_assert(number_of_peaks_snapshots_found <= 1);
1379 }
1380
1381 // Finish up verbosity and stats stuff.
1382 if (n_skipped_snapshots_since_last_snapshot > 0) {
1383 VERB(2, " (skipped %d snapshot%s)",
1384 n_skipped_snapshots_since_last_snapshot,
1385 ( n_skipped_snapshots_since_last_snapshot == 1 ? "" : "s") );
1386 }
1387 VERB_snapshot(2, what, next_snapshot_i);
1388 n_skipped_snapshots_since_last_snapshot = 0;
1389
1390 // Cull the entries, if our snapshot table is full.
1391 next_snapshot_i++;
1392 if (clo_max_snapshots == next_snapshot_i) {
1393 min_time_interval = cull_snapshots();
1394 }
1395
1396 // Work out the earliest time when the next snapshot can happen.
1397 earliest_possible_time_of_next_snapshot = time + min_time_interval;
1398}
1399
1400
1401//------------------------------------------------------------//
1402//--- Sanity checking ---//
1403//------------------------------------------------------------//
1404
1405static Bool ms_cheap_sanity_check ( void )
1406{
1407 return True; // Nothing useful we can cheaply check.
1408}
1409
1410static Bool ms_expensive_sanity_check ( void )
1411{
1412 sanity_check_XTree(alloc_xpt, /*parent*/NULL);
1413 sanity_check_snapshots_array();
1414 return True;
1415}
1416
1417
1418//------------------------------------------------------------//
1419//--- Heap management ---//
1420//------------------------------------------------------------//
1421
1422// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
1423// which is a foothold into the XCon at which it was allocated. From
1424// HP_Chunks, XPt 'space' fields are incremented (at allocation) and
1425// decremented (at deallocation).
1426//
1427// Nb: first two fields must match core's VgHashNode.
1428typedef
1429 struct _HP_Chunk {
1430 struct _HP_Chunk* next;
1431 Addr data; // Ptr to actual block
1432 SizeT szB; // Size requested
1433 XPt* where; // Where allocated; bottom-XPt
1434 }
1435 HP_Chunk;
1436
1437static VgHashTable malloc_list = NULL; // HP_Chunks
1438
1439static void update_alloc_stats(SSizeT szB_delta)
1440{
1441 // Update total_allocs_deallocs_szB.
1442 if (szB_delta < 0) szB_delta = -szB_delta;
1443 total_allocs_deallocs_szB += szB_delta;
1444}
1445
1446static void update_heap_stats(SSizeT heap_szB_delta, Int n_heap_blocks_delta)
1447{
1448 if (n_heap_blocks_delta<0) tl_assert(n_heap_blocks >= -n_heap_blocks_delta);
1449 if (heap_szB_delta <0) tl_assert(heap_szB >= -heap_szB_delta );
1450 n_heap_blocks += n_heap_blocks_delta;
1451 heap_szB += heap_szB_delta;
1452
1453 update_alloc_stats(heap_szB_delta + clo_heap_admin*n_heap_blocks_delta);
1454}
nethercotec9f36922004-02-14 16:40:02 +00001455
nethercote159dfef2004-09-13 13:27:30 +00001456static
njn734b8052007-11-01 04:40:37 +00001457void* new_block ( ThreadId tid, void* p, SizeT szB, SizeT alignB,
njn57735902004-11-25 18:04:54 +00001458 Bool is_zeroed )
nethercotec9f36922004-02-14 16:40:02 +00001459{
1460 HP_Chunk* hc;
njn734b8052007-11-01 04:40:37 +00001461 Bool is_custom_alloc = (NULL != p);
1462 if (szB < 0) return NULL;
nethercotec9f36922004-02-14 16:40:02 +00001463
nethercote57e36b32004-07-10 14:56:28 +00001464 // Allocate and zero if necessary
1465 if (!p) {
njn734b8052007-11-01 04:40:37 +00001466 p = VG_(cli_malloc)( alignB, szB );
nethercote57e36b32004-07-10 14:56:28 +00001467 if (!p) {
nethercote57e36b32004-07-10 14:56:28 +00001468 return NULL;
1469 }
njn734b8052007-11-01 04:40:37 +00001470 if (is_zeroed) VG_(memset)(p, 0, szB);
nethercote57e36b32004-07-10 14:56:28 +00001471 }
1472
njnf1c5def2005-08-11 02:17:07 +00001473 // Make new HP_Chunk node, add to malloc_list
nethercote57e36b32004-07-10 14:56:28 +00001474 hc = VG_(malloc)(sizeof(HP_Chunk));
njn734b8052007-11-01 04:40:37 +00001475 hc->szB = szB;
nethercote57e36b32004-07-10 14:56:28 +00001476 hc->data = (Addr)p;
njn734b8052007-11-01 04:40:37 +00001477 hc->where = NULL;
njn246a9d22005-08-14 06:24:20 +00001478 VG_(HT_add_node)(malloc_list, hc);
nethercote57e36b32004-07-10 14:56:28 +00001479
njn734b8052007-11-01 04:40:37 +00001480 if (clo_heap) {
1481 VERB(3, "<<< new_mem_heap (%lu)", szB);
1482
1483 // Update statistics.
1484 n_heap_allocs++;
1485
1486 // Update heap stats.
1487 update_heap_stats(hc->szB, /*n_heap_blocks_delta*/1);
1488
1489 // Update XTree.
1490 hc->where = get_XCon( tid, is_custom_alloc );
1491 update_XCon(hc->where, szB);
1492
1493 // Maybe take a snapshot.
1494 maybe_take_snapshot(Normal, " alloc");
1495
1496 VERB(3, ">>>");
1497 }
nethercotec9f36922004-02-14 16:40:02 +00001498
nethercotec9f36922004-02-14 16:40:02 +00001499 return p;
1500}
1501
1502static __inline__
1503void die_block ( void* p, Bool custom_free )
1504{
njnf1c5def2005-08-11 02:17:07 +00001505 HP_Chunk* hc;
njn734b8052007-11-01 04:40:37 +00001506 SizeT die_szB;
nethercotec9f36922004-02-14 16:40:02 +00001507
njnf1c5def2005-08-11 02:17:07 +00001508 // Remove HP_Chunk from malloc_list
njn9a463242005-08-16 03:29:50 +00001509 hc = VG_(HT_remove)(malloc_list, (UWord)p);
njn734b8052007-11-01 04:40:37 +00001510 if (NULL == hc) {
njn5cc5d7e2005-08-11 02:09:25 +00001511 return; // must have been a bogus free()
njn734b8052007-11-01 04:40:37 +00001512 }
1513 die_szB = hc->szB;
nethercotec9f36922004-02-14 16:40:02 +00001514
njn734b8052007-11-01 04:40:37 +00001515 if (clo_heap) {
1516 VERB(3, "<<< die_mem_heap");
nethercotec9f36922004-02-14 16:40:02 +00001517
njn734b8052007-11-01 04:40:37 +00001518 // Update statistics
1519 n_heap_frees++;
nethercote57e36b32004-07-10 14:56:28 +00001520
njn734b8052007-11-01 04:40:37 +00001521 // Maybe take a peak snapshot, since it's a deallocation.
1522 maybe_take_snapshot(Peak, "de-PEAK");
1523
1524 // Update heap stats.
1525 update_heap_stats(-die_szB, /*n_heap_blocks_delta*/-1);
1526
1527 // Update XTree.
1528 update_XCon(hc->where, -hc->szB);
1529
1530 // Maybe take a snapshot.
1531 maybe_take_snapshot(Normal, "dealloc");
1532
1533 VERB(3, ">>> (-%lu)", die_szB);
1534 }
1535
1536 // Actually free the chunk, and the heap block (if necessary)
1537 VG_(free)( hc ); hc = NULL;
nethercotec9f36922004-02-14 16:40:02 +00001538 if (!custom_free)
1539 VG_(cli_free)( p );
nethercotec9f36922004-02-14 16:40:02 +00001540}
1541
njn734b8052007-11-01 04:40:37 +00001542static __inline__
1543void* renew_block ( ThreadId tid, void* p_old, SizeT new_szB )
nethercotec9f36922004-02-14 16:40:02 +00001544{
njn734b8052007-11-01 04:40:37 +00001545 HP_Chunk* hc;
1546 void* p_new;
1547 SizeT old_szB;
1548 XPt *old_where, *new_where;
1549
1550 // Remove the old block
1551 hc = VG_(HT_remove)(malloc_list, (UWord)p_old);
1552 if (hc == NULL) {
1553 return NULL; // must have been a bogus realloc()
1554 }
1555
1556 old_szB = hc->szB;
1557
1558 if (clo_heap) {
1559 VERB(3, "<<< renew_mem_heap (%lu)", new_szB);
1560
1561 // Update statistics
1562 n_heap_reallocs++;
1563
1564 // Maybe take a peak snapshot, if it's (effectively) a deallocation.
1565 if (new_szB < old_szB) {
1566 maybe_take_snapshot(Peak, "re-PEAK");
1567 }
1568
1569 // Update heap stats.
1570 update_heap_stats(new_szB - old_szB, /*n_heap_blocks_delta*/0);
1571 }
1572
1573 // Actually do the allocation, if necessary.
1574 if (new_szB <= old_szB) {
1575 // new size is smaller or same; block not moved
1576 p_new = p_old;
1577
1578 } else {
1579 // new size is bigger; make new block, copy shared contents, free old
1580 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_szB);
1581 if (p_new) {
1582 VG_(memcpy)(p_new, p_old, old_szB);
1583 VG_(cli_free)(p_old);
1584 }
1585 }
1586
1587 if (p_new) {
1588 // Update HP_Chunk.
1589 hc->data = (Addr)p_new;
1590 hc->szB = new_szB;
1591 old_where = hc->where;
1592 hc->where = NULL;
1593
1594 // Update XTree.
1595 if (clo_heap) {
1596 new_where = get_XCon( tid, /*custom_malloc*/False);
1597 hc->where = new_where;
1598 update_XCon(old_where, -old_szB);
1599 update_XCon(new_where, new_szB);
1600 }
1601 }
1602
1603 // Now insert the new hc (with a possibly new 'data' field) into
1604 // malloc_list. If this realloc() did not increase the memory size, we
1605 // will have removed and then re-added hc unnecessarily. But that's ok
1606 // because shrinking a block with realloc() is (presumably) much rarer
1607 // than growing it, and this way simplifies the growing case.
1608 VG_(HT_add_node)(malloc_list, hc);
1609
1610 // Maybe take a snapshot.
1611 if (clo_heap) {
1612 maybe_take_snapshot(Normal, "realloc");
1613
1614 VERB(3, ">>> (%ld)", new_szB - old_szB);
1615 }
1616
1617 return p_new;
nethercotec9f36922004-02-14 16:40:02 +00001618}
1619
njn734b8052007-11-01 04:40:37 +00001620
1621//------------------------------------------------------------//
1622//--- malloc() et al replacement wrappers ---//
1623//------------------------------------------------------------//
1624
1625static void* ms_malloc ( ThreadId tid, SizeT szB )
nethercotec9f36922004-02-14 16:40:02 +00001626{
njn734b8052007-11-01 04:40:37 +00001627 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +00001628}
1629
njn734b8052007-11-01 04:40:37 +00001630static void* ms___builtin_new ( ThreadId tid, SizeT szB )
nethercotec9f36922004-02-14 16:40:02 +00001631{
njn734b8052007-11-01 04:40:37 +00001632 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
nethercotec9f36922004-02-14 16:40:02 +00001633}
1634
njn734b8052007-11-01 04:40:37 +00001635static void* ms___builtin_vec_new ( ThreadId tid, SizeT szB )
fitzhardinge51f3ff12004-03-04 22:42:03 +00001636{
njn734b8052007-11-01 04:40:37 +00001637 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
1638}
1639
1640static void* ms_calloc ( ThreadId tid, SizeT m, SizeT szB )
1641{
1642 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
1643}
1644
1645static void *ms_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
1646{
1647 return new_block( tid, NULL, szB, alignB, False );
fitzhardinge51f3ff12004-03-04 22:42:03 +00001648}
1649
njn51d827b2005-05-09 01:02:08 +00001650static void ms_free ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +00001651{
1652 die_block( p, /*custom_free*/False );
1653}
1654
njn51d827b2005-05-09 01:02:08 +00001655static void ms___builtin_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +00001656{
1657 die_block( p, /*custom_free*/False);
1658}
1659
njn51d827b2005-05-09 01:02:08 +00001660static void ms___builtin_vec_delete ( ThreadId tid, void* p )
nethercotec9f36922004-02-14 16:40:02 +00001661{
1662 die_block( p, /*custom_free*/False );
1663}
1664
njn734b8052007-11-01 04:40:37 +00001665static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
nethercotec9f36922004-02-14 16:40:02 +00001666{
njn734b8052007-11-01 04:40:37 +00001667 return renew_block(tid, p_old, new_szB);
nethercotec9f36922004-02-14 16:40:02 +00001668}
1669
1670
njn734b8052007-11-01 04:40:37 +00001671//------------------------------------------------------------//
1672//--- Stacks ---//
1673//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +00001674
njn734b8052007-11-01 04:40:37 +00001675// We really want the inlining to occur...
1676#define INLINE inline __attribute__((always_inline))
nethercotec9f36922004-02-14 16:40:02 +00001677
njn734b8052007-11-01 04:40:37 +00001678static void update_stack_stats(SSizeT stack_szB_delta)
nethercotec9f36922004-02-14 16:40:02 +00001679{
njn734b8052007-11-01 04:40:37 +00001680 if (stack_szB_delta < 0) tl_assert(stacks_szB >= -stack_szB_delta);
1681 stacks_szB += stack_szB_delta;
nethercotec9f36922004-02-14 16:40:02 +00001682
njn734b8052007-11-01 04:40:37 +00001683 update_alloc_stats(stack_szB_delta);
nethercotec9f36922004-02-14 16:40:02 +00001684}
1685
njn734b8052007-11-01 04:40:37 +00001686static INLINE void new_mem_stack_2(Addr a, SizeT len, Char* what)
nethercotec9f36922004-02-14 16:40:02 +00001687{
njn734b8052007-11-01 04:40:37 +00001688 if (have_started_executing_code) {
1689 VERB(3, "<<< new_mem_stack (%ld)", len);
1690 n_stack_allocs++;
1691 update_stack_stats(len);
1692 maybe_take_snapshot(Normal, what);
1693 VERB(3, ">>>");
nethercotec9f36922004-02-14 16:40:02 +00001694 }
nethercotec9f36922004-02-14 16:40:02 +00001695}
1696
njn734b8052007-11-01 04:40:37 +00001697static INLINE void die_mem_stack_2(Addr a, SizeT len, Char* what)
nethercotec9f36922004-02-14 16:40:02 +00001698{
njn734b8052007-11-01 04:40:37 +00001699 if (have_started_executing_code) {
1700 VERB(3, "<<< die_mem_stack (%ld)", -len);
1701 n_stack_frees++;
1702 maybe_take_snapshot(Peak, "stkPEAK");
1703 update_stack_stats(-len);
1704 maybe_take_snapshot(Normal, what);
1705 VERB(3, ">>>");
nethercotec9f36922004-02-14 16:40:02 +00001706 }
nethercotec9f36922004-02-14 16:40:02 +00001707}
1708
njn734b8052007-11-01 04:40:37 +00001709static void new_mem_stack(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001710{
njn734b8052007-11-01 04:40:37 +00001711 new_mem_stack_2(a, len, "stk-new");
1712}
nethercotec9f36922004-02-14 16:40:02 +00001713
njn734b8052007-11-01 04:40:37 +00001714static void die_mem_stack(Addr a, SizeT len)
1715{
1716 die_mem_stack_2(a, len, "stk-die");
1717}
nethercotec9f36922004-02-14 16:40:02 +00001718
nethercote8b5f40c2004-11-02 13:29:50 +00001719static void new_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001720{
njn734b8052007-11-01 04:40:37 +00001721 new_mem_stack_2(a, len, "sig-new");
nethercotec9f36922004-02-14 16:40:02 +00001722}
1723
nethercote8b5f40c2004-11-02 13:29:50 +00001724static void die_mem_stack_signal(Addr a, SizeT len)
nethercotec9f36922004-02-14 16:40:02 +00001725{
njn734b8052007-11-01 04:40:37 +00001726 die_mem_stack_2(a, len, "sig-die");
nethercotec9f36922004-02-14 16:40:02 +00001727}
1728
njn734b8052007-11-01 04:40:37 +00001729
1730//------------------------------------------------------------//
1731//--- Client Requests ---//
1732//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +00001733
njn51d827b2005-05-09 01:02:08 +00001734static Bool ms_handle_client_request ( ThreadId tid, UWord* argv, UWord* ret )
nethercotec9f36922004-02-14 16:40:02 +00001735{
1736 switch (argv[0]) {
1737 case VG_USERREQ__MALLOCLIKE_BLOCK: {
nethercote57e36b32004-07-10 14:56:28 +00001738 void* res;
njn734b8052007-11-01 04:40:37 +00001739 void* p = (void*)argv[1];
1740 SizeT szB = argv[2];
1741 res = new_block( tid, p, szB, /*alignB--ignored*/0, /*is_zeroed*/False );
njnca82cc02004-11-22 17:18:48 +00001742 tl_assert(res == p);
njn734b8052007-11-01 04:40:37 +00001743 *ret = 0;
nethercotec9f36922004-02-14 16:40:02 +00001744 return True;
1745 }
1746 case VG_USERREQ__FREELIKE_BLOCK: {
njn734b8052007-11-01 04:40:37 +00001747 void* p = (void*)argv[1];
nethercotec9f36922004-02-14 16:40:02 +00001748 die_block( p, /*custom_free*/True );
njn734b8052007-11-01 04:40:37 +00001749 *ret = 0;
nethercotec9f36922004-02-14 16:40:02 +00001750 return True;
1751 }
1752 default:
1753 *ret = 0;
1754 return False;
1755 }
1756}
1757
njn734b8052007-11-01 04:40:37 +00001758//------------------------------------------------------------//
1759//--- Instrumentation ---//
1760//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +00001761
sewardj4ba057c2005-10-18 12:04:18 +00001762static
sewardj0b9d74a2006-12-24 02:24:11 +00001763IRSB* ms_instrument ( VgCallbackClosure* closure,
njn734b8052007-11-01 04:40:37 +00001764 IRSB* bb_in,
1765 VexGuestLayout* layout,
sewardj461df9c2006-01-17 02:06:39 +00001766 VexGuestExtents* vge,
sewardj4ba057c2005-10-18 12:04:18 +00001767 IRType gWordTy, IRType hWordTy )
nethercotec9f36922004-02-14 16:40:02 +00001768{
njn734b8052007-11-01 04:40:37 +00001769 if (! have_started_executing_code) {
1770 // Do an initial sample to guarantee that we have at least one.
1771 // We use 'maybe_take_snapshot' instead of 'take_snapshot' to ensure
1772 // 'maybe_take_snapshot's internal static variables are initialised.
1773 have_started_executing_code = True;
1774 maybe_take_snapshot(Normal, "startup");
1775 }
njnee8a5862004-11-22 21:08:46 +00001776 return bb_in;
nethercotec9f36922004-02-14 16:40:02 +00001777}
1778
nethercotec9f36922004-02-14 16:40:02 +00001779
njn734b8052007-11-01 04:40:37 +00001780//------------------------------------------------------------//
1781//--- Writing snapshots ---//
1782//------------------------------------------------------------//
nethercotec9f36922004-02-14 16:40:02 +00001783
njn734b8052007-11-01 04:40:37 +00001784// XXX: do the filename properly, eventually
1785static Char* massif_out_file = "massif.out";
nethercotec9f36922004-02-14 16:40:02 +00001786
njn734b8052007-11-01 04:40:37 +00001787#define FP_BUF_SIZE 1024
1788Char FP_buf[FP_BUF_SIZE];
nethercotec9f36922004-02-14 16:40:02 +00001789
njn734b8052007-11-01 04:40:37 +00001790// XXX: implement f{,n}printf in m_libcprint.c eventually, and use it here.
1791// Then change Cachegrind to use it too.
1792#define FP(format, args...) ({ \
1793 VG_(snprintf)(FP_buf, FP_BUF_SIZE, format, ##args); \
1794 VG_(write)(fd, (void*)FP_buf, VG_(strlen)(FP_buf)); \
1795})
nethercotec9f36922004-02-14 16:40:02 +00001796
1797// Nb: uses a static buffer, each call trashes the last string returned.
njn734b8052007-11-01 04:40:37 +00001798static Char* make_perc(ULong x, ULong y)
nethercotec9f36922004-02-14 16:40:02 +00001799{
1800 static Char mbuf[32];
njn734b8052007-11-01 04:40:37 +00001801
1802// tl_assert(x <= y); XXX; put back in later...
1803
1804// XXX: I'm not confident that VG_(percentify) works as it should...
1805 VG_(percentify)(x, y, 2, 6, mbuf);
1806 // XXX: this is bogus if the denominator was zero -- resulting string is
1807 // something like "0 --%")
1808 if (' ' == mbuf[0]) mbuf[0] = '0';
nethercotec9f36922004-02-14 16:40:02 +00001809 return mbuf;
1810}
1811
njn734b8052007-11-01 04:40:37 +00001812static void pp_snapshot_SXPt(Int fd, SXPt* sxpt, Int depth, Char* depth_str,
1813 Int depth_str_len,
1814 SizeT snapshot_heap_szB, SizeT snapshot_total_szB)
nethercotec9f36922004-02-14 16:40:02 +00001815{
njn734b8052007-11-01 04:40:37 +00001816 #define BUF_LEN 1024
1817 Int i, n_insig_children_sxpts;
1818 Char* perc;
1819 Char ip_desc_array[BUF_LEN];
1820 Char* ip_desc = ip_desc_array;
1821 SXPt* pred = NULL;
1822 SXPt* child = NULL;
nethercotec9f36922004-02-14 16:40:02 +00001823
njn734b8052007-11-01 04:40:37 +00001824 switch (sxpt->tag) {
1825 case SigSXPt:
1826 // Print the SXPt itself.
1827 if (sxpt->Sig.ip == 0) {
1828 ip_desc =
1829 "(heap allocation functions) malloc/new/new[], --alloc-fns, etc.";
1830 } else {
1831 // If it's main-or-below-main, we (if appropriate) ignore everything
1832 // below it by pretending it has no children.
1833 // XXX: get this properly. Also, don't hard-code "(below main)"
1834 // here -- look at the "(below main)"/"__libc_start_main" mess
1835 // (m_stacktrace.c and m_demangle.c).
1836 // [Nb: Josef wants --show-below-main to work for his fn entry/exit
1837 // tracing]
1838 Bool should_hide_below_main = /*!VG_(clo_show_below_main)*/True;
1839 if (should_hide_below_main &&
1840 VG_(get_fnname)(sxpt->Sig.ip, ip_desc, BUF_LEN) &&
1841 (VG_STREQ(ip_desc, "main") || VG_STREQ(ip_desc, "(below main)")))
1842 {
1843 sxpt->Sig.n_children = 0;
1844 }
1845 // We need the -1 to get the line number right, But I'm not sure why.
1846 ip_desc = VG_(describe_IP)(sxpt->Sig.ip-1, ip_desc, BUF_LEN);
1847 }
1848 perc = make_perc(sxpt->szB, snapshot_total_szB);
1849 FP("%sn%d: %lu %s\n",
1850 depth_str, sxpt->Sig.n_children, sxpt->szB, ip_desc);
nethercotec9f36922004-02-14 16:40:02 +00001851
njn734b8052007-11-01 04:40:37 +00001852 // Indent.
1853 tl_assert(depth+1 < depth_str_len-1); // -1 for end NUL char
1854 depth_str[depth+0] = ' ';
1855 depth_str[depth+1] = '\0';
1856
1857 // Sort SXPt's children by szB (reverse order: biggest to smallest).
1858 // Nb: we sort them here, rather than earlier (eg. in dup_XTree), for
1859 // two reasons. First, if we do it during dup_XTree, it can get
1860 // expensive (eg. 15% of execution time for konqueror
1861 // startup/shutdown). Second, this way we get the Insig SXPt (if one
1862 // is present) in its sorted position, not at the end.
1863 VG_(ssort)(sxpt->Sig.children, sxpt->Sig.n_children, sizeof(SXPt*),
1864 SXPt_revcmp_szB);
1865
1866 // Print the SXPt's children. They should already be in sorted order.
1867 n_insig_children_sxpts = 0;
1868 for (i = 0; i < sxpt->Sig.n_children; i++) {
1869 pred = child;
1870 child = sxpt->Sig.children[i];
1871
1872 if (InsigSXPt == child->tag)
1873 n_insig_children_sxpts++;
1874
1875 // Ok, print the child.
1876 pp_snapshot_SXPt(fd, child, depth+1, depth_str, depth_str_len,
1877 snapshot_heap_szB, snapshot_total_szB);
1878
1879 // Unindent.
1880 depth_str[depth+0] = '\0';
1881 depth_str[depth+1] = '\0';
1882 }
1883 // There should be 0 or 1 Insig children SXPts.
1884 tl_assert(n_insig_children_sxpts <= 1);
1885 break;
1886
1887 case InsigSXPt: {
1888 Char* s = ( sxpt->Insig.n_xpts == 1 ? "," : "s, all" );
1889 perc = make_perc(sxpt->szB, snapshot_total_szB);
1890 FP("%sn0: %lu in %d place%s below massif's threshold (%s)\n",
1891 depth_str, sxpt->szB, sxpt->Insig.n_xpts, s,
1892 make_perc(clo_threshold, 10000));
1893 break;
1894 }
1895
1896 default:
1897 tl_assert2(0, "pp_snapshot_SXPt: unrecognised SXPt tag");
nethercotec9f36922004-02-14 16:40:02 +00001898 }
nethercotec9f36922004-02-14 16:40:02 +00001899}
1900
njn734b8052007-11-01 04:40:37 +00001901static void pp_snapshot(Int fd, Snapshot* snapshot, Int snapshot_n)
nethercotec9f36922004-02-14 16:40:02 +00001902{
njn734b8052007-11-01 04:40:37 +00001903 sanity_check_snapshot(snapshot);
nethercotec9f36922004-02-14 16:40:02 +00001904
njn734b8052007-11-01 04:40:37 +00001905 FP("#-----------\n");
1906 FP("snapshot=%d\n", snapshot_n);
1907 FP("#-----------\n");
1908 FP("time=%lld\n", snapshot->time);
1909 FP("mem_heap_B=%lu\n", snapshot->heap_szB);
1910 FP("mem_heap_admin_B=%lu\n", snapshot->heap_admin_szB);
1911 FP("mem_stacks_B=%lu\n", snapshot->stacks_szB);
1912
1913 if (is_detailed_snapshot(snapshot)) {
1914 // Detailed snapshot -- print heap tree.
1915 Int depth_str_len = clo_depth + 3;
1916 Char* depth_str = VG_(malloc)(sizeof(Char) * depth_str_len);
1917 SizeT snapshot_total_szB =
1918 snapshot->heap_szB + snapshot->heap_admin_szB + snapshot->stacks_szB;
1919 depth_str[0] = '\0'; // Initialise depth_str to "".
1920
1921 FP("heap_tree=%s\n", ( Peak == snapshot->kind ? "peak" : "detailed" ));
1922 pp_snapshot_SXPt(fd, snapshot->alloc_sxpt, 0, depth_str,
1923 depth_str_len, snapshot->heap_szB,
1924 snapshot_total_szB);
1925
1926 VG_(free)(depth_str);
1927
1928 } else {
1929 FP("heap_tree=empty\n");
nethercote43a15ce2004-08-30 19:15:12 +00001930 }
nethercotec9f36922004-02-14 16:40:02 +00001931}
1932
njn734b8052007-11-01 04:40:37 +00001933static void write_snapshots_to_file(void)
nethercotec9f36922004-02-14 16:40:02 +00001934{
njn734b8052007-11-01 04:40:37 +00001935 Int i, fd;
sewardj92645592005-07-23 09:18:34 +00001936 SysRes sres;
nethercotec9f36922004-02-14 16:40:02 +00001937
njn734b8052007-11-01 04:40:37 +00001938 sres = VG_(open)(massif_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1939 VKI_S_IRUSR|VKI_S_IWUSR);
sewardj92645592005-07-23 09:18:34 +00001940 if (sres.isError) {
njn734b8052007-11-01 04:40:37 +00001941 // If the file can't be opened for whatever reason (conflict
1942 // between multiple cachegrinded processes?), give up now.
1943 VG_(message)(Vg_UserMsg,
1944 "error: can't open output file '%s'", massif_out_file );
1945 VG_(message)(Vg_UserMsg,
1946 " ... so profiling results will be missing.");
nethercotec9f36922004-02-14 16:40:02 +00001947 return;
sewardj92645592005-07-23 09:18:34 +00001948 } else {
sewardje8089302006-10-17 02:15:17 +00001949 fd = sres.res;
nethercotec9f36922004-02-14 16:40:02 +00001950 }
1951
njn734b8052007-11-01 04:40:37 +00001952 // Print massif-specific options that were used.
1953 // XXX: is it worth having a "desc:" line? Could just call it "options:"
1954 // -- this file format isn't as generic as Cachegrind's, so the
1955 // implied genericity of "desc:" is bogus.
1956 FP("desc:");
1957 for (i = 0; i < VG_(sizeXA)(args_for_massif); i++) {
1958 Char* arg = *(Char**)VG_(indexXA)(args_for_massif, i);
1959 FP(" %s", arg);
nethercotec9f36922004-02-14 16:40:02 +00001960 }
njn734b8052007-11-01 04:40:37 +00001961 if (0 == i) FP(" (none)");
1962 FP("\n");
nethercotec9f36922004-02-14 16:40:02 +00001963
njn734b8052007-11-01 04:40:37 +00001964 // Print "cmd:" line.
1965 FP("cmd: ");
sewardj45f4e7c2005-09-27 19:20:21 +00001966 if (VG_(args_the_exename)) {
njn734b8052007-11-01 04:40:37 +00001967 FP("%s", VG_(args_the_exename));
1968 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1969 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1970 if (arg)
1971 FP(" %s", arg);
1972 }
1973 } else {
1974 FP(" ???");
sewardj45f4e7c2005-09-27 19:20:21 +00001975 }
njn734b8052007-11-01 04:40:37 +00001976 FP("\n");
nethercotec9f36922004-02-14 16:40:02 +00001977
njn734b8052007-11-01 04:40:37 +00001978 FP("time_unit: %s\n", TimeUnit_to_string(clo_time_unit));
nethercotec9f36922004-02-14 16:40:02 +00001979
njn734b8052007-11-01 04:40:37 +00001980 for (i = 0; i < next_snapshot_i; i++) {
1981 Snapshot* snapshot = & snapshots[i];
1982 pp_snapshot(fd, snapshot, i); // Detailed snapshot!
nethercotec9f36922004-02-14 16:40:02 +00001983 }
1984}
1985
njn734b8052007-11-01 04:40:37 +00001986
1987//------------------------------------------------------------//
1988//--- Finalisation ---//
1989//------------------------------------------------------------//
1990
njn51d827b2005-05-09 01:02:08 +00001991static void ms_fini(Int exit_status)
nethercotec9f36922004-02-14 16:40:02 +00001992{
njn734b8052007-11-01 04:40:37 +00001993 // Output.
1994 write_snapshots_to_file();
nethercotec9f36922004-02-14 16:40:02 +00001995
njn734b8052007-11-01 04:40:37 +00001996 // Stats
1997 tl_assert(n_xpts > 0); // always have alloc_xpt
1998 VERB(1, "heap allocs: %u", n_heap_allocs);
1999 VERB(1, "heap reallocs: %u", n_heap_reallocs);
2000 VERB(1, "heap frees: %u", n_heap_frees);
2001 VERB(1, "stack allocs: %u", n_stack_allocs);
2002 VERB(1, "stack frees: %u", n_stack_frees);
2003 VERB(1, "XPts: %u", n_xpts);
2004 VERB(1, "top-XPts: %u (%d%%)",
2005 alloc_xpt->n_children,
2006 ( n_xpts ? alloc_xpt->n_children * 100 / n_xpts : 0));
2007 VERB(1, "XPt-init-expansions: %u", n_xpt_init_expansions);
2008 VERB(1, "XPt-later-expansions: %u", n_xpt_later_expansions);
2009 VERB(1, "SXPt allocs: %u", n_sxpt_allocs);
2010 VERB(1, "SXPt frees: %u", n_sxpt_frees);
2011 VERB(1, "skipped snapshots: %u", n_skipped_snapshots);
2012 VERB(1, "real snapshots: %u", n_real_snapshots);
2013 VERB(1, "detailed snapshots: %u", n_detailed_snapshots);
2014 VERB(1, "peak snapshots: %u", n_peak_snapshots);
2015 VERB(1, "cullings: %u", n_cullings);
2016 VERB(1, "XCon_redos: %u", n_XCon_redos);
nethercotec9f36922004-02-14 16:40:02 +00002017}
2018
njn734b8052007-11-01 04:40:37 +00002019
2020//------------------------------------------------------------//
2021//--- Initialisation ---//
2022//------------------------------------------------------------//
njn51d827b2005-05-09 01:02:08 +00002023
2024static void ms_post_clo_init(void)
2025{
njn734b8052007-11-01 04:40:37 +00002026 Int i;
njn51d827b2005-05-09 01:02:08 +00002027
njn734b8052007-11-01 04:40:37 +00002028 // Check options.
2029 if (clo_heap_admin < 0 || clo_heap_admin > 1024) {
2030 VG_(message)(Vg_UserMsg, "--heap-admin must be between 0 and 1024");
2031 VG_(err_bad_option)("--heap-admin");
2032 }
2033 if (clo_depth < 1 || clo_depth > MAX_DEPTH) {
2034 VG_(message)(Vg_UserMsg, "--depth must be between 1 and %d", MAX_DEPTH);
2035 VG_(err_bad_option)("--depth");
2036 }
2037 if (clo_threshold < 0 || clo_threshold > 10000) {
2038 VG_(message)(Vg_UserMsg, "--threshold must be between 0 and 10000");
2039 VG_(err_bad_option)("--threshold");
2040 }
2041 if (clo_detailed_freq < 1 || clo_detailed_freq > 10000) {
2042 VG_(message)(Vg_UserMsg, "--detailed-freq must be between 1 and 10000");
2043 VG_(err_bad_option)("--detailed-freq");
2044 }
2045 if (clo_max_snapshots < 10 || clo_max_snapshots > 1000) {
2046 VG_(message)(Vg_UserMsg, "--max-snapshots must be between 10 and 1000");
2047 VG_(err_bad_option)("--max-snapshots");
2048 }
2049
2050 // If we have --heap=no, set --heap-admin to zero, just to make sure we
2051 // don't accidentally use a non-zero heap-admin size somewhere.
2052 if (!clo_heap) {
2053 clo_heap_admin = 0;
2054 }
2055
2056 // Print alloc-fns, if necessary.
2057 if (VG_(clo_verbosity) > 1) {
2058 VERB(1, "alloc-fns:");
2059 for (i = 0; i < VG_(sizeXA)(alloc_fns); i++) {
2060 Char** alloc_fn_ptr = VG_(indexXA)(alloc_fns, i);
2061 VERB(1, " %d: %s", i, *alloc_fn_ptr);
2062 }
2063 }
2064
2065 // Events to track.
2066 if (clo_stacks) {
2067 VG_(track_new_mem_stack) ( new_mem_stack );
2068 VG_(track_die_mem_stack) ( die_mem_stack );
2069 VG_(track_new_mem_stack_signal) ( new_mem_stack_signal );
2070 VG_(track_die_mem_stack_signal) ( die_mem_stack_signal );
2071 }
2072
2073 // Initialise snapshot array, and sanity-check it.
2074 snapshots = VG_(malloc)(sizeof(Snapshot) * clo_max_snapshots);
2075 // We don't want to do snapshot sanity checks here, because they're
2076 // currently uninitialised.
2077 for (i = 0; i < clo_max_snapshots; i++) {
2078 clear_snapshot( & snapshots[i], /*do_sanity_check*/False );
2079 }
2080 sanity_check_snapshots_array();
njn51d827b2005-05-09 01:02:08 +00002081}
2082
tom151a6392005-11-11 12:30:36 +00002083static void ms_pre_clo_init(void)
njn734b8052007-11-01 04:40:37 +00002084{
njn51d827b2005-05-09 01:02:08 +00002085 VG_(details_name) ("Massif");
2086 VG_(details_version) (NULL);
2087 VG_(details_description) ("a space profiler");
njn9a0cba42007-04-15 22:15:57 +00002088 VG_(details_copyright_author)(
njn734b8052007-11-01 04:40:37 +00002089 "Copyright (C) 2003-2007, and GNU GPL'd, by Nicholas Nethercote");
njn51d827b2005-05-09 01:02:08 +00002090 VG_(details_bug_reports_to) (VG_BUGS_TO);
2091
2092 // Basic functions
2093 VG_(basic_tool_funcs) (ms_post_clo_init,
2094 ms_instrument,
2095 ms_fini);
2096
2097 // Needs
2098 VG_(needs_libc_freeres)();
2099 VG_(needs_command_line_options)(ms_process_cmd_line_option,
2100 ms_print_usage,
2101 ms_print_debug_usage);
2102 VG_(needs_client_requests) (ms_handle_client_request);
njn734b8052007-11-01 04:40:37 +00002103 VG_(needs_sanity_checks) (ms_cheap_sanity_check,
2104 ms_expensive_sanity_check);
njnfc51f8d2005-06-21 03:20:17 +00002105 VG_(needs_malloc_replacement) (ms_malloc,
njn51d827b2005-05-09 01:02:08 +00002106 ms___builtin_new,
2107 ms___builtin_vec_new,
2108 ms_memalign,
2109 ms_calloc,
2110 ms_free,
2111 ms___builtin_delete,
2112 ms___builtin_vec_delete,
2113 ms_realloc,
2114 0 );
2115
njn51d827b2005-05-09 01:02:08 +00002116 // HP_Chunks
njn734b8052007-11-01 04:40:37 +00002117 malloc_list = VG_(HT_construct)( "Massif's malloc list" );
njn51d827b2005-05-09 01:02:08 +00002118
2119 // Dummy node at top of the context structure.
njn734b8052007-11-01 04:40:37 +00002120 alloc_xpt = new_XPt(/*ip*/0, /*parent*/NULL);
2121
2122 // Initialise alloc_fns.
2123 init_alloc_fns();
2124
2125 // Initialise args_for_massif.
2126 args_for_massif = VG_(newXA)(VG_(malloc), VG_(free), sizeof(HChar*));
njn51d827b2005-05-09 01:02:08 +00002127
sewardj198f34f2007-07-09 23:13:07 +00002128 tl_assert( VG_(get_startup_wd)(base_dir, VKI_PATH_MAX) );
njn51d827b2005-05-09 01:02:08 +00002129}
2130
sewardj45f4e7c2005-09-27 19:20:21 +00002131VG_DETERMINE_INTERFACE_VERSION(ms_pre_clo_init)
nethercotec9f36922004-02-14 16:40:02 +00002132
njn734b8052007-11-01 04:40:37 +00002133//--------------------------------------------------------------------//
2134//--- end ---//
2135//--------------------------------------------------------------------//