perf report: Add support for callchain graph output

Currently, the printing of callchains is done in a single
vertical level, this is the "flat" mode:

8.25%  [k] copy_user_generic_string
             4.19%
                copy_user_generic_string
                generic_file_aio_read
                do_sync_read
                vfs_read
                sys_pread64
                system_call_fastpath
                pread64

This patch introduces a new "graph" mode which provides a
hierarchical output of factorized paths recursively sorted:

 8.25%  [k] copy_user_generic_string
                |
                |--4.31%-- generic_file_aio_read
                |          do_sync_read
                |          vfs_read
                |          |
                |          |--4.19%-- sys_pread64
                |          |          system_call_fastpath
                |          |          pread64
                |          |
                |           --0.12%-- sys_read
                |                     system_call_fastpath
                |                     __read
                |
                |--3.24%-- generic_file_buffered_write
                |          __generic_file_aio_write_nolock
                |          generic_file_aio_write
                |          do_sync_write
                |          reiserfs_file_write
                |          vfs_write
                |          |
                |          |--3.14%-- sys_pwrite64
                |          |          system_call_fastpath
                |          |          __pwrite64
                |          |
                |           --0.10%-- sys_write
[...]

The command line has then changed.

By providing the -c option, the callchain will output in the
flat mode by default.

But you can override it:

    perf report -c graph

or

    perf report -c flat

You can also pass the abreviated mode:

    perf report -c g

or

    perf report -c gra

will both make use of the graph mode.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3c4a91f..a9873aa 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -19,9 +19,9 @@
 #define chain_for_each_child(child, parent)	\
 	list_for_each_entry(child, &parent->children, brothers)
 
-
 static void
-rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+		    enum chain_mode mode)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
@@ -31,10 +31,22 @@
 		parent = *p;
 		rnode = rb_entry(parent, struct callchain_node, rb_node);
 
-		if (rnode->hit < chain->hit)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
+		switch (mode) {
+		case FLAT:
+			if (rnode->hit < chain->hit)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		case GRAPH:
+			if (rnode->cumul_hit < chain->cumul_hit)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		default:
+			break;
+		}
 	}
 
 	rb_link_node(&chain->rb_node, parent, p);
@@ -45,15 +57,36 @@
  * Once we get every callchains from the stream, we can now
  * sort them by hit
  */
-void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
+void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node)
 {
 	struct callchain_node *child;
 
 	chain_for_each_child(child, node)
-		sort_chain_to_rbtree(rb_root, child);
+		sort_chain_flat(rb_root, child);
 
 	if (node->hit)
-		rb_insert_callchain(rb_root, node);
+		rb_insert_callchain(rb_root, node, FLAT);
+}
+
+static void __sort_chain_graph(struct callchain_node *node)
+{
+	struct callchain_node *child;
+
+	node->rb_root = RB_ROOT;
+	node->cumul_hit = node->hit;
+
+	chain_for_each_child(child, node) {
+		__sort_chain_graph(child);
+		rb_insert_callchain(&node->rb_root, child, GRAPH);
+		node->cumul_hit += child->cumul_hit;
+	}
+}
+
+void
+sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root)
+{
+	__sort_chain_graph(chain_root);
+	rb_root->rb_node = chain_root->rb_root.rb_node;
 }
 
 /*