perf report: Add support for callchain graph output

Currently, the printing of callchains is done in a single
vertical level, this is the "flat" mode:

8.25%  [k] copy_user_generic_string
             4.19%
                copy_user_generic_string
                generic_file_aio_read
                do_sync_read
                vfs_read
                sys_pread64
                system_call_fastpath
                pread64

This patch introduces a new "graph" mode which provides a
hierarchical output of factorized paths recursively sorted:

 8.25%  [k] copy_user_generic_string
                |
                |--4.31%-- generic_file_aio_read
                |          do_sync_read
                |          vfs_read
                |          |
                |          |--4.19%-- sys_pread64
                |          |          system_call_fastpath
                |          |          pread64
                |          |
                |           --0.12%-- sys_read
                |                     system_call_fastpath
                |                     __read
                |
                |--3.24%-- generic_file_buffered_write
                |          __generic_file_aio_write_nolock
                |          generic_file_aio_write
                |          do_sync_write
                |          reiserfs_file_write
                |          vfs_write
                |          |
                |          |--3.14%-- sys_pwrite64
                |          |          system_call_fastpath
                |          |          __pwrite64
                |          |
                |           --0.10%-- sys_write
[...]

The command line has then changed.

By providing the -c option, the callchain will output in the
flat mode by default.

But you can override it:

    perf report -c graph

or

    perf report -c flat

You can also pass the abreviated mode:

    perf report -c g

or

    perf report -c gra

will both make use of the graph mode.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b44476c..0ca4638 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -59,6 +59,7 @@
 
 static int		exclude_other = 1;
 static int		callchain;
+static enum chain_mode	callchain_mode;
 
 static u64		sample_type;
 
@@ -787,8 +788,103 @@
 	return cmp;
 }
 
+static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
+{
+	int i;
+	size_t ret = 0;
+
+	ret += fprintf(fp, "%s", "                ");
+
+	for (i = 0; i < depth; i++)
+		if (depth_mask & (1 << i))
+			ret += fprintf(fp, "|          ");
+		else
+			ret += fprintf(fp, "           ");
+
+	ret += fprintf(fp, "\n");
+
+	return ret;
+}
 static size_t
-callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples)
+ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
+		       int depth_mask, int count, u64 total_samples,
+		       int hits)
+{
+	int i;
+	size_t ret = 0;
+
+	ret += fprintf(fp, "%s", "                ");
+	for (i = 0; i < depth; i++) {
+		if (depth_mask & (1 << i))
+			ret += fprintf(fp, "|");
+		else
+			ret += fprintf(fp, " ");
+		if (!count && i == depth - 1) {
+			double percent;
+
+			percent = hits * 100.0 / total_samples;
+			ret += fprintf(fp, "--%2.2f%%-- ", percent);
+		} else
+			ret += fprintf(fp, "%s", "          ");
+	}
+	if (chain->sym)
+		ret += fprintf(fp, "%s\n", chain->sym->name);
+	else
+		ret += fprintf(fp, "%p\n", (void *)(long)chain->ip);
+
+	return ret;
+}
+
+static size_t
+callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
+			u64 total_samples, int depth, int depth_mask)
+{
+	struct rb_node *node, *next;
+	struct callchain_node *child;
+	struct callchain_list *chain;
+	int new_depth_mask = depth_mask;
+	size_t ret = 0;
+	int i;
+
+	node = rb_first(&self->rb_root);
+	while (node) {
+		child = rb_entry(node, struct callchain_node, rb_node);
+
+		/*
+		 * The depth mask manages the output of pipes that show
+		 * the depth. We don't want to keep the pipes of the current
+		 * level for the last child of this depth
+		 */
+		next = rb_next(node);
+		if (!next)
+			new_depth_mask &= ~(1 << (depth - 1));
+
+		/*
+		 * But we keep the older depth mask for the line seperator
+		 * to keep the level link until we reach the last child
+		 */
+		ret += ipchain__fprintf_graph_line(fp, depth, depth_mask);
+		i = 0;
+		list_for_each_entry(chain, &child->val, list) {
+			if (chain->ip >= PERF_CONTEXT_MAX)
+				continue;
+			ret += ipchain__fprintf_graph(fp, chain, depth,
+						      new_depth_mask, i++,
+						      total_samples,
+						      child->cumul_hit);
+		}
+		ret += callchain__fprintf_graph(fp, child, total_samples,
+						depth + 1,
+						new_depth_mask | (1 << depth));
+		node = next;
+	}
+
+	return ret;
+}
+
+static size_t
+callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
+			u64 total_samples)
 {
 	struct callchain_list *chain;
 	size_t ret = 0;
@@ -796,7 +892,7 @@
 	if (!self)
 		return 0;
 
-	ret += callchain__fprintf(fp, self->parent, total_samples);
+	ret += callchain__fprintf_flat(fp, self->parent, total_samples);
 
 
 	list_for_each_entry(chain, &self->val, list) {
@@ -826,8 +922,13 @@
 
 		chain = rb_entry(rb_node, struct callchain_node, rb_node);
 		percent = chain->hit * 100.0 / total_samples;
-		ret += fprintf(fp, "           %6.2f%%\n", percent);
-		ret += callchain__fprintf(fp, chain, total_samples);
+		if (callchain_mode == FLAT) {
+			ret += fprintf(fp, "           %6.2f%%\n", percent);
+			ret += callchain__fprintf_flat(fp, chain, total_samples);
+		} else if (callchain_mode == GRAPH) {
+			ret += callchain__fprintf_graph(fp, chain,
+							total_samples, 1, 1);
+		}
 		ret += fprintf(fp, "\n");
 		rb_node = rb_next(rb_node);
 	}
@@ -1129,8 +1230,12 @@
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
 
-	if (callchain)
-		sort_chain_to_rbtree(&he->sorted_chain, &he->callchain);
+	if (callchain) {
+		if (callchain_mode == FLAT)
+			sort_chain_flat(&he->sorted_chain, &he->callchain);
+		else if (callchain_mode == GRAPH)
+			sort_chain_graph(&he->sorted_chain, &he->callchain);
+	}
 
 	while (*p != NULL) {
 		parent = *p;
@@ -1702,6 +1807,26 @@
 	return rc;
 }
 
+static int
+parse_callchain_opt(const struct option *opt __used, const char *arg,
+		    int unset __used)
+{
+	callchain = 1;
+
+	if (!arg)
+		return 0;
+
+	if (!strncmp(arg, "graph", strlen(arg)))
+		callchain_mode = GRAPH;
+
+	else if (!strncmp(arg, "flat", strlen(arg)))
+		callchain_mode = FLAT;
+	else
+		return -1;
+
+	return 0;
+}
+
 static const char * const report_usage[] = {
 	"perf report [<options>] <command>",
 	NULL
@@ -1725,7 +1850,9 @@
 		   "regex filter to identify parent, see: '--sort parent'"),
 	OPT_BOOLEAN('x', "exclude-other", &exclude_other,
 		    "Only display entries with parent-match"),
-	OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
+	OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type",
+		     "Display callchains with output_type: flat, graph. "
+		     "Default to flat", &parse_callchain_opt, "flat"),
 	OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3c4a91f..a9873aa 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -19,9 +19,9 @@
 #define chain_for_each_child(child, parent)	\
 	list_for_each_entry(child, &parent->children, brothers)
 
-
 static void
-rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+		    enum chain_mode mode)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
@@ -31,10 +31,22 @@
 		parent = *p;
 		rnode = rb_entry(parent, struct callchain_node, rb_node);
 
-		if (rnode->hit < chain->hit)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
+		switch (mode) {
+		case FLAT:
+			if (rnode->hit < chain->hit)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		case GRAPH:
+			if (rnode->cumul_hit < chain->cumul_hit)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		default:
+			break;
+		}
 	}
 
 	rb_link_node(&chain->rb_node, parent, p);
@@ -45,15 +57,36 @@
  * Once we get every callchains from the stream, we can now
  * sort them by hit
  */
-void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
+void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node)
 {
 	struct callchain_node *child;
 
 	chain_for_each_child(child, node)
-		sort_chain_to_rbtree(rb_root, child);
+		sort_chain_flat(rb_root, child);
 
 	if (node->hit)
-		rb_insert_callchain(rb_root, node);
+		rb_insert_callchain(rb_root, node, FLAT);
+}
+
+static void __sort_chain_graph(struct callchain_node *node)
+{
+	struct callchain_node *child;
+
+	node->rb_root = RB_ROOT;
+	node->cumul_hit = node->hit;
+
+	chain_for_each_child(child, node) {
+		__sort_chain_graph(child);
+		rb_insert_callchain(&node->rb_root, child, GRAPH);
+		node->cumul_hit += child->cumul_hit;
+	}
+}
+
+void
+sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root)
+{
+	__sort_chain_graph(chain_root);
+	rb_root->rb_node = chain_root->rb_root.rb_node;
 }
 
 /*
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index e9bd5e8..dfa5600 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -6,15 +6,21 @@
 #include <linux/rbtree.h>
 #include "symbol.h"
 
+enum chain_mode {
+	FLAT,
+	GRAPH
+};
 
 struct callchain_node {
 	struct callchain_node	*parent;
 	struct list_head	brothers;
 	struct list_head	children;
 	struct list_head	val;
-	struct rb_node		rb_node;
+	struct rb_node		rb_node; /* to sort nodes in an rbtree */
+	struct rb_root		rb_root; /* sorted tree of children */
 	unsigned int		val_nr;
 	u64			hit;
+	u64			cumul_hit; /* hit + hits of children */
 };
 
 struct callchain_list {
@@ -32,5 +38,6 @@
 
 void append_chain(struct callchain_node *root, struct ip_callchain *chain,
 		  struct symbol **syms);
-void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node);
+void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node);
+void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node);
 #endif