Several improvements to (internal) statistics reporting about memory usage,
making it easier to understand the memory and/or oom situation.
No functional (user level) change.

* For --profile-heap=yes, sort the cost centers by decreasing size,
  so that the most relevant cost centers are closed to the arena
  total.

* factorise duplicated code calling a series of print stat functions

* VG_(show_sched_status)
    optionally show the host stacktrace
               the amount of valgrind stack used by each thread
               the exited threads

* various functions: update to add VG_(show_sched_status) new
  args, keeping the same info production as before.
 
* In case of out of memory situation detected by m_mallocfree.c,
  reports more information:
    valgrind and tool stats
    scheduler status (full information)

* gdbserver v.info scheduler :
    show full information.

The oom behaviour/reporting was tested using a small
program causing an OOM, and having several threads
(some terminated, some still active).



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13897 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_gdbserver/server.c b/coregrind/m_gdbserver/server.c
index 6f45ba8..3027a8c 100644
--- a/coregrind/m_gdbserver/server.c
+++ b/coregrind/m_gdbserver/server.c
@@ -148,6 +148,30 @@
    return s;
 }
 
+void VG_(print_all_stats) (Bool memory_stats, Bool tool_stats)
+{
+   if (memory_stats) {
+      VG_(message)(Vg_DebugMsg, "\n");
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's internal memory use stats follow ------\n" );
+      VG_(sanity_check_malloc_all)();
+      VG_(message)(Vg_DebugMsg, "------\n" );
+      VG_(print_all_arena_stats)();
+      if (VG_(clo_profile_heap))
+         VG_(print_arena_cc_analysis) ();
+      VG_(message)(Vg_DebugMsg, "\n");
+   }
+
+   VG_(print_translation_stats)();
+   VG_(print_tt_tc_stats)();
+   VG_(print_scheduler_stats)();
+   VG_(print_ExeContext_stats)( False /* with_stacktraces */ );
+   VG_(print_errormgr_stats)();
+   if (tool_stats && VG_(needs).print_stats) {
+      VG_TDICT_CALL(tool_print_stats);
+   }
+}
+
 /* handle_gdb_valgrind_command handles the provided mon string command.
    If command is recognised, return 1 else return 0.
    Note that in case of ambiguous command, 1 is returned.
@@ -333,18 +357,14 @@
          ret = 1;
          break;
       case  5: /* scheduler */
-         VG_(show_sched_status) ();
+         VG_(show_sched_status) (True,  // host_stacktrace
+                                 True,  // valgrind_stack_usage
+                                 True); // exited_threads
          ret = 1;
          break;
       case  6: /* stats */
-         VG_(print_translation_stats)();
-         VG_(print_tt_tc_stats)();
-         VG_(print_scheduler_stats)();
-         VG_(print_ExeContext_stats)( False /* with_stacktraces */ );
-         VG_(print_errormgr_stats)();
-         if (VG_(needs).print_stats) {
-            VG_TDICT_CALL(tool_print_stats);
-         }
+         VG_(print_all_stats)(False, /* Memory stats */
+                              True   /* Tool stats */);
          ret = 1;
          break;
       case  7: /* open_fds */
diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c
index 65a373e..e88afb0 100644
--- a/coregrind/m_libcassert.c
+++ b/coregrind/m_libcassert.c
@@ -33,6 +33,7 @@
 #include "pub_core_vkiscnums.h"
 #include "pub_core_libcsetjmp.h"    // to keep threadstate.h happy
 #include "pub_core_threadstate.h"
+#include "pub_core_aspacemgr.h"
 #include "pub_core_libcbase.h"
 #include "pub_core_libcassert.h"
 #include "pub_core_libcprint.h"
@@ -245,56 +246,89 @@
 }
 
 // Print the scheduler status.
-void VG_(show_sched_status) ( void )
+static void show_sched_status_wrk ( Bool host_stacktrace,
+                                    Bool valgrind_stack_usage,
+                                    Bool exited_threads,
+                                    UnwindStartRegs* startRegsIN)
 {
    Int i; 
+   if (host_stacktrace) {
+      const Bool save_clo_xml = VG_(clo_xml);
+      Addr stacktop;
+      Addr ips[BACKTRACE_DEPTH];
+      Int  n_ips;
+      ThreadState *tst 
+         = VG_(get_ThreadState)( VG_(lwpid_to_vgtid)( VG_(gettid)() ) );
+ 
+      // If necessary, fake up an ExeContext which is of our actual real CPU
+      // state.  Could cause problems if we got the panic/exception within the
+      // execontext/stack dump/symtab code.  But it's better than nothing.
+      UnwindStartRegs startRegs;
+      VG_(memset)(&startRegs, 0, sizeof(startRegs));
+      
+      if (startRegsIN == NULL) {
+         GET_STARTREGS(&startRegs);
+      } else {
+         startRegs = *startRegsIN;
+      }
+ 
+      stacktop = tst->os_state.valgrind_stack_init_SP;
+
+      n_ips = 
+         VG_(get_StackTrace_wrk)(
+            0/*tid is unknown*/, 
+            ips, BACKTRACE_DEPTH, 
+            NULL/*array to dump SP values in*/,
+            NULL/*array to dump FP values in*/,
+            &startRegs, stacktop
+         );
+      VG_(printf)("\nhost stacktrace:\n"); 
+      VG_(clo_xml) = False;
+      VG_(pp_StackTrace) (ips, n_ips);
+      VG_(clo_xml) = save_clo_xml;
+   }
+
    VG_(printf)("\nsched status:\n"); 
    VG_(printf)("  running_tid=%d\n", VG_(get_running_tid)());
    for (i = 1; i < VG_N_THREADS; i++) {
-      if (VG_(threads)[i].status == VgTs_Empty) continue;
-      VG_(printf)( "\nThread %d: status = %s\n", i, 
-                   VG_(name_of_ThreadStatus)(VG_(threads)[i].status) );
-      VG_(get_and_pp_StackTrace)( i, BACKTRACE_DEPTH );
+      VgStack* stack 
+         = (VgStack*)VG_(threads)[i].os_state.valgrind_stack_base;
+      /* If a thread slot was never used (yet), valgrind_stack_base is 0.
+         If a thread slot is used by a thread or was used by a thread which
+         has exited, then valgrind_stack_base points to the stack base. */
+      if (VG_(threads)[i].status == VgTs_Empty
+          && (!exited_threads || stack == 0)) continue;
+      VG_(printf)("\nThread %d: status = %s\n", i, 
+                  VG_(name_of_ThreadStatus)(VG_(threads)[i].status) );
+      if (VG_(threads)[i].status != VgTs_Empty)
+         VG_(get_and_pp_StackTrace)( i, BACKTRACE_DEPTH );
+      if (valgrind_stack_usage && stack != 0)
+          VG_(printf)("valgrind stack top usage: %ld of %ld\n",
+                      VG_STACK_ACTIVE_SZB 
+                      - VG_(am_get_VgStack_unused_szB)(stack, VG_STACK_ACTIVE_SZB),
+                      (SizeT) VG_STACK_ACTIVE_SZB);
    }
    VG_(printf)("\n");
 }
 
+void VG_(show_sched_status) ( Bool host_stacktrace,
+                              Bool valgrind_stack_usage,
+                              Bool exited_threads)
+{
+   show_sched_status_wrk (host_stacktrace,
+                          valgrind_stack_usage,
+                          exited_threads,
+                          NULL);
+}
+
 __attribute__ ((noreturn))
 static void report_and_quit ( const HChar* report,
                               UnwindStartRegs* startRegsIN )
 {
-   Addr stacktop;
-   Addr ips[BACKTRACE_DEPTH];
-   Int  n_ips;
-   ThreadState *tst 
-      = VG_(get_ThreadState)( VG_(lwpid_to_vgtid)( VG_(gettid)() ) );
- 
-   // If necessary, fake up an ExeContext which is of our actual real CPU
-   // state.  Could cause problems if we got the panic/exception within the
-   // execontext/stack dump/symtab code.  But it's better than nothing.
-   UnwindStartRegs startRegs;
-   VG_(memset)(&startRegs, 0, sizeof(startRegs));
-
-   if (startRegsIN == NULL) {
-      GET_STARTREGS(&startRegs);
-   } else {
-      startRegs = *startRegsIN;
-   }
- 
-   stacktop = tst->os_state.valgrind_stack_init_SP;
-
-   n_ips = 
-      VG_(get_StackTrace_wrk)(
-         0/*tid is unknown*/, 
-         ips, BACKTRACE_DEPTH, 
-         NULL/*array to dump SP values in*/,
-         NULL/*array to dump FP values in*/,
-         &startRegs, stacktop
-      );
-   VG_(clo_xml) = False;
-   VG_(pp_StackTrace) (ips, n_ips);
- 
-   VG_(show_sched_status)();
+   show_sched_status_wrk (True,  // host_stacktrace
+                          False, // valgrind_stack_usage
+                          False, // exited_threads
+                          startRegsIN);
    VG_(printf)(
       "\n"
       "Note: see also the FAQ in the source distribution.\n"
@@ -402,7 +436,9 @@
    VG_(umsg)("\n");
    VG_(umsg)("Valgrind has to exit now.  Sorry.  Bye!\n");
    VG_(umsg)("\n");
-   VG_(show_sched_status)();
+   VG_(show_sched_status)(False,  // host_stacktrace
+                          False,  // valgrind_stack_usage
+                          False); // exited_threads
    VG_(exit)(1);
 }
 
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 82a5dde..98192de 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -73,31 +73,6 @@
 
 
 /*====================================================================*/
-/*=== Counters, for profiling purposes only                        ===*/
-/*====================================================================*/
-
-static void print_all_stats ( void )
-{
-   VG_(print_translation_stats)();
-   VG_(print_tt_tc_stats)();
-   VG_(print_scheduler_stats)();
-   VG_(print_ExeContext_stats)( False /* with_stacktraces */ );
-   VG_(print_errormgr_stats)();
-
-   // Memory stats
-   if (VG_(clo_verbosity) > 2) {
-      VG_(message)(Vg_DebugMsg, "\n");
-      VG_(message)(Vg_DebugMsg, 
-         "------ Valgrind's internal memory use stats follow ------\n" );
-      VG_(sanity_check_malloc_all)();
-      VG_(message)(Vg_DebugMsg, "------\n" );
-      VG_(print_all_arena_stats)();
-      VG_(message)(Vg_DebugMsg, "\n");
-   }
-}
-
-
-/*====================================================================*/
 /*=== Command-line: variables, processing, etc                     ===*/
 /*====================================================================*/
 
@@ -2489,7 +2464,8 @@
    VG_(sanity_check_general)( True /*include expensive checks*/ );
 
    if (VG_(clo_stats))
-      print_all_stats();
+      VG_(print_all_stats)(VG_(clo_verbosity) > 2, /* Memory stats */
+                           False /* tool prints stats in the tool fini */);
 
    /* Show a profile of the heap(s) at shutdown.  Optionally, first
       throw away all the debug info, as that makes it easy to spot
diff --git a/coregrind/m_mallocfree.c b/coregrind/m_mallocfree.c
index 43718c6..95806b4 100644
--- a/coregrind/m_mallocfree.c
+++ b/coregrind/m_mallocfree.c
@@ -40,6 +40,7 @@
 #include "pub_core_options.h"
 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
 #include "pub_core_threadstate.h"   // For VG_INVALID_THREADID
+#include "pub_core_gdbserver.h"
 #include "pub_core_transtab.h"
 #include "pub_core_tooliface.h"
 
@@ -742,11 +743,18 @@
    if (outputTrial <= 1) {
       if (outputTrial == 0) {
          outputTrial++;
+         // First print the memory stats with the aspacemgr data.
          VG_(am_show_nsegments) (0, "out_of_memory");
          VG_(print_all_arena_stats) ();
          if (VG_(clo_profile_heap))
             VG_(print_arena_cc_analysis) ();
-         /* In case we are an inner valgrind, asks the outer to report
+         // And then print some other information that might help.
+         VG_(print_all_stats) (False, /* Memory stats */
+                               True /* Tool stats */);
+         VG_(show_sched_status) (True,  // host_stacktrace
+                                 True,  // valgrind_stack_usage
+                                 True); // exited_threads
+        /* In case we are an inner valgrind, asks the outer to report
             its memory state in its log output. */
          INNER_REQUEST(VALGRIND_MONITOR_COMMAND("v.set log_output"));
          INNER_REQUEST(VALGRIND_MONITOR_COMMAND("v.info memory aspacemgr"));
@@ -1289,11 +1297,13 @@
 
 static AnCC anCCs[N_AN_CCS];
 
+/* Sorting by decreasing cost center nBytes, to have the biggest
+   cost centres at the top. */
 static Int cmp_AnCC_by_vol ( const void* v1, const void* v2 ) {
    const AnCC* ancc1 = v1;
    const AnCC* ancc2 = v2;
-   if (ancc1->nBytes < ancc2->nBytes) return -1;
-   if (ancc1->nBytes > ancc2->nBytes) return 1;
+   if (ancc1->nBytes < ancc2->nBytes) return 1;
+   if (ancc1->nBytes > ancc2->nBytes) return -1;
    return 0;
 }
 
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 52b4d0f..8359aba 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -2101,7 +2101,9 @@
          lasttime = now;
          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
                      (Int)now);
-         VG_(show_sched_status)();
+         VG_(show_sched_status)(True,  // host_stacktrace
+                                True,  // valgrind_stack_usage
+                                True); // exited_threads);
       }
    }
 
diff --git a/coregrind/pub_core_libcassert.h b/coregrind/pub_core_libcassert.h
index e08983a..0ca4e1f 100644
--- a/coregrind/pub_core_libcassert.h
+++ b/coregrind/pub_core_libcassert.h
@@ -71,8 +71,17 @@
 extern void VG_(unimplemented) ( const HChar* msg )
             __attribute__((__noreturn__));
 
-/* Show the state of all threads.  Mostly for debugging V. */
-extern void VG_(show_sched_status) ( void );
+/* Show the state of various threads related information, such
+   as the guest stacktrace for each thread.
+   Mostly for debugging V.
+   The following activates optional output:
+     host_stacktrace : shows the host stacktrace.
+     valgrind_stack_usage : shows how much of the valgrind stack was used.
+     exited_thread_slots : show information for thread slots that were used
+        but the thread has now exited. */
+extern void VG_(show_sched_status) ( Bool host_stacktrace,
+                                     Bool valgrind_stack_usage,
+                                     Bool exited_threads);
 
 #endif   // __PUB_CORE_LIBCASSERT_H
 
diff --git a/docs/xml/manual-core-adv.xml b/docs/xml/manual-core-adv.xml
index 382b86a..1e6e22e 100644
--- a/docs/xml/manual-core-adv.xml
+++ b/docs/xml/manual-core-adv.xml
@@ -1418,18 +1418,24 @@
   </listitem>
 
   <listitem>
-    <para><varname>v.info scheduler</varname> shows the state and
-    stack trace for all threads, as known by Valgrind.  This allows to
-    compare the stack traces produced by the Valgrind unwinder with
-    the stack traces produced by GDB+Valgrind gdbserver. Pay attention
-    that GDB and Valgrind scheduler status have their own thread
-    numbering scheme. To make the link between the GDB thread
-    number and the corresponding Valgrind scheduler thread number,
-    use the GDB command <computeroutput>info
-    threads</computeroutput>.  The output of this command shows the
-    GDB thread number and the valgrind 'tid'. The 'tid' is the thread number
-    output by <computeroutput>v.info scheduler</computeroutput>.
-    When using the callgrind tool, the callgrind monitor command
+    <para><varname>v.info scheduler</varname> shows various
+    information about threads. First, it outputs the host stack trace,
+    i.e. the Valgrind code being executed. Then, for each thread, it
+    outputs the thread state. For non terminated threads, the state is
+    followed by the guest (client) stack trace. Finally, for each
+    active thread or for each terminated thread slot not yet re-used,
+    it shows the max usage of the valgrind stack.</para>
+    <para>Showing the client stack traces allows to compare the stack
+    traces produced by the Valgrind unwinder with the stack traces
+    produced by GDB+Valgrind gdbserver. Pay attention that GDB and
+    Valgrind scheduler status have their own thread numbering
+    scheme. To make the link between the GDB thread number and the
+    corresponding Valgrind scheduler thread number, use the GDB
+    command <computeroutput>info threads</computeroutput>.  The output
+    of this command shows the GDB thread number and the valgrind
+    'tid'. The 'tid' is the thread number output
+    by <computeroutput>v.info scheduler</computeroutput>.  When using
+    the callgrind tool, the callgrind monitor command
     <computeroutput>status</computeroutput> outputs internal callgrind
     information about the stack/call graph it maintains.
     </para>
diff --git a/include/pub_tool_gdbserver.h b/include/pub_tool_gdbserver.h
index 0c2545f..868ca0a 100644
--- a/include/pub_tool_gdbserver.h
+++ b/include/pub_tool_gdbserver.h
@@ -186,6 +186,10 @@
                                               SizeT* szB, 
                                               HChar **ssaveptr);
 
+/* Print various statistics about Valgrind core,
+   and optionally tool and memory statistics. */
+extern void VG_(print_all_stats) (Bool memory_stats, Bool tool_stats);
+
 #endif   // __PUB_TOOL_GDBSERVER_H
 
 /*--------------------------------------------------------------------*/