perf diff: Introduce tool to show performance difference

I guess it is enough to show some examples:

[root@doppio linux-2.6-tip]# rm -f perf.data*
[root@doppio linux-2.6-tip]# ls -la perf.data*
ls: cannot access perf.data*: No such file or directory
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2692 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74280 2009-12-14 20:03 perf.data
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data.old
[root@doppio linux-2.6-tip]# perf diff | head -5
   1        -34994580     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        -15307806         [kernel.kallsyms]   __kmalloc
   3    +1   +3665941     /lib64/libc-2.10.1.so   __GI_memmove
   4    +4  +23508995     /lib64/libc-2.10.1.so   _int_malloc
   5    +7  +38538813         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -p | head -5
   1        +1.00%     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2                       [kernel.kallsyms]   __kmalloc
   3    +1             /lib64/libc-2.10.1.so   __GI_memmove
   4    +4             /lib64/libc-2.10.1.so   _int_malloc
   5    +7  -1.00%         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -v | head -5
   1        361449551 326454971 -34994580     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        151009241 135701435 -15307806         [kernel.kallsyms]   __kmalloc
   3    +1  101805328 105471269  +3665941     /lib64/libc-2.10.1.so   __GI_memmove
   4    +4   78041440 101550435 +23508995     /lib64/libc-2.10.1.so   _int_malloc
   5    +7   59536172  98074985 +38538813         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]# perf diff -vp | head -5
   1        9.00% 8.00% +1.00%     /lib64/libc-2.10.1.so   _IO_vfprintf_internal
   2        3.00% 3.00%                [kernel.kallsyms]   __kmalloc
   3    +1  2.00% 2.00%            /lib64/libc-2.10.1.so   __GI_memmove
   4    +4  2.00% 2.00%            /lib64/libc-2.10.1.so   _int_malloc
   5    +7  1.00% 2.00% -1.00%         [kernel.kallsyms]   __d_lookup
[root@doppio linux-2.6-tip]#

This should be enough for diffs where the system is non
volatile, i.e. when one doesn't updates binaries.

For volatile environments, stay tuned for the next perf tool
feature: a buildid cache populated by 'perf record', managed by
'perf buildid-cache' a-la ccache, and used by all the report
tools.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
LKML-Reference: <1260828571-3613-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
new file mode 100644
index 0000000..bd1ee55
--- /dev/null
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -0,0 +1,31 @@
+perf-diff(1)
+==============
+
+NAME
+----
+perf-diff - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf diff' [oldfile] [newfile]
+
+DESCRIPTION
+-----------
+This command displays the performance difference among two perf.data files
+captured via perf record.
+
+If no parameters are passed it will assume perf.data.old and perf.data.
+
+OPTIONS
+-------
+-p::
+--percentage::
+	Show percentages instead of raw counters
+-v::
+--verbose::
+	Be verbose, for instance, show the raw counters in addition to the
+	diff.
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a4cb792..87a424e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -427,6 +427,7 @@
 BUILTIN_OBJS += bench/sched-pipe.o
 BUILTIN_OBJS += bench/mem-memcpy.o
 
+BUILTIN_OBJS += builtin-diff.o
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
 BUILTIN_OBJS += builtin-buildid-list.o
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
new file mode 100644
index 0000000..0d52801
--- /dev/null
+++ b/tools/perf/builtin-diff.c
@@ -0,0 +1,288 @@
+/*
+ * builtin-diff.c
+ *
+ * Builtin diff command: Analyze two perf.data input files, look up and read
+ * DSOs and symbol information, sort them and produce a diff.
+ */
+#include "builtin.h"
+
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/hist.h"
+#include "util/session.h"
+#include "util/sort.h"
+#include "util/symbol.h"
+#include "util/util.h"
+
+#include <stdlib.h>
+
+static char	   const *input_old = "perf.data.old",
+			 *input_new = "perf.data";
+static int	   force;
+static bool 	   show_percent;
+
+struct symbol_conf symbol_conf;
+
+static int perf_session__add_hist_entry(struct perf_session *self,
+					struct addr_location *al, u64 count)
+{
+	bool hit;
+	struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL,
+							       count, &hit);
+	if (he == NULL)
+		return -ENOMEM;
+
+	if (hit)
+		he->count += count;
+
+	return 0;
+}
+
+static int diff__process_sample_event(event_t *event, struct perf_session *session)
+{
+	struct addr_location al;
+	struct sample_data data = { .period = 1, };
+
+	dump_printf("(IP, %d): %d: %p\n", event->header.misc,
+		    event->ip.pid, (void *)(long)event->ip.ip);
+
+	if (event__preprocess_sample(event, session, &al, NULL) < 0) {
+		pr_warning("problem processing %d event, skipping it.\n",
+			   event->header.type);
+		return -1;
+	}
+
+	event__parse_sample(event, session->sample_type, &data);
+
+	if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
+		pr_warning("problem incrementing symbol count, skipping event\n");
+		return -1;
+	}
+
+	session->events_stats.total += data.period;
+	return 0;
+}
+
+static struct perf_event_ops event_ops = {
+	.process_sample_event = diff__process_sample_event,
+	.process_mmap_event   = event__process_mmap,
+	.process_comm_event   = event__process_comm,
+	.process_exit_event   = event__process_task,
+	.process_fork_event   = event__process_task,
+	.process_lost_event   = event__process_lost,
+};
+
+static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
+						    struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		int cmp;
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+		if (cmp > 0)
+			p = &(*p)->rb_left;
+		else if (cmp < 0)
+			p = &(*p)->rb_right;
+		else {
+			cmp = strcmp(he->sym->name, iter->sym->name);
+			if (cmp > 0)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+		}
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+}
+
+static void perf_session__resort_by_name(struct perf_session *self)
+{
+	unsigned long position = 1;
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *next = rb_first(&self->hists);
+
+	while (next != NULL) {
+		struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
+
+		next = rb_next(&n->rb_node);
+		rb_erase(&n->rb_node, &self->hists);
+		n->position = position++;
+		perf_session__insert_hist_entry_by_name(&tmp, n);
+	}
+
+	self->hists = tmp;
+}
+
+static struct hist_entry *
+perf_session__find_hist_entry_by_name(struct perf_session *self,
+				      struct hist_entry *he)
+{
+	struct rb_node *n = self->hists.rb_node;
+
+	while (n) {
+		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
+		int cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+
+		if (cmp > 0)
+			n = n->rb_left;
+		else if (cmp < 0)
+			n = n->rb_right;
+		else {
+			cmp = strcmp(he->sym->name, iter->sym->name);
+			if (cmp > 0)
+				n = n->rb_left;
+			else if (cmp < 0)
+				n = n->rb_right;
+			else
+				return iter;
+		}
+	}
+
+	return NULL;
+}
+
+static void perf_session__match_hists(struct perf_session *old_session,
+				      struct perf_session *new_session)
+{
+	struct rb_node *nd;
+
+	perf_session__resort_by_name(old_session);
+
+	for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
+		struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
+		pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
+	}
+}
+
+static size_t hist_entry__fprintf_matched(struct hist_entry *self,
+					  unsigned long pos,
+					  struct perf_session *session,
+					  struct perf_session *pair_session,
+					  FILE *fp)
+{
+	u64 old_count = 0;
+	char displacement[16];
+	size_t printed;
+
+	if (self->pair != NULL) {
+		long pdiff = (long)self->pair->position - (long)pos;
+		old_count = self->pair->count;
+		if (pdiff == 0)
+			goto blank;
+		snprintf(displacement, sizeof(displacement), "%+4ld", pdiff);
+	} else {
+blank:		memset(displacement, ' ', sizeof(displacement));
+	}
+
+	printed = fprintf(fp, "%4lu %5.5s ", pos, displacement);
+
+	if (show_percent) {
+		double old_percent = (old_count * 100) / pair_session->events_stats.total,
+		       new_percent = (self->count * 100) / session->events_stats.total;
+		double diff = old_percent - new_percent;
+
+		if (verbose)
+			printed += fprintf(fp, " %3.2f%% %3.2f%%", old_percent, new_percent);
+
+		if ((u64)diff != 0)
+			printed += fprintf(fp, " %+4.2F%%", diff);
+		else
+			printed += fprintf(fp, "       ");
+	} else {
+		if (verbose)
+			printed += fprintf(fp, " %9Lu %9Lu", old_count, self->count);
+		printed += fprintf(fp, " %+9Ld", (s64)self->count - (s64)old_count);
+	}
+
+	return printed + fprintf(fp, " %25.25s   %s\n",
+				 self->map->dso->name, self->sym->name);
+}
+
+static size_t perf_session__fprintf_matched_hists(struct perf_session *self,
+						  struct perf_session *pair,
+						  FILE *fp)
+{
+	struct rb_node *nd;
+	size_t printed = 0;
+	unsigned long pos = 1;
+
+	for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) {
+		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+		printed += hist_entry__fprintf_matched(he, pos++, self, pair, fp);
+	}
+
+	return printed;
+}
+
+static int __cmd_diff(void)
+{
+	int ret, i;
+	struct perf_session *session[2];
+
+	session[0] = perf_session__new(input_old, O_RDONLY, force, &symbol_conf);
+	session[1] = perf_session__new(input_new, O_RDONLY, force, &symbol_conf);
+	if (session[0] == NULL || session[1] == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < 2; ++i) {
+		ret = perf_session__process_events(session[i], &event_ops);
+		if (ret)
+			goto out_delete;
+		perf_session__output_resort(session[i], session[i]->events_stats.total);
+	}
+
+	perf_session__match_hists(session[0], session[1]);
+	perf_session__fprintf_matched_hists(session[1], session[0], stdout);
+out_delete:
+	for (i = 0; i < 2; ++i)
+		perf_session__delete(session[i]);
+	return ret;
+}
+
+static const char *const diff_usage[] = {
+	"perf diff [<options>] [old_file] [new_file]",
+};
+
+static const struct option options[] = {
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
+	OPT_BOOLEAN('p', "percentages", &show_percent,
+		    "Don't shorten the pathnames taking into account the cwd"),
+	OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+		    "Don't shorten the pathnames taking into account the cwd"),
+	OPT_END()
+};
+
+int cmd_diff(int argc, const char **argv, const char *prefix __used)
+{
+	if (symbol__init(&symbol_conf) < 0)
+		return -1;
+
+	setup_sorting(diff_usage, options);
+
+	argc = parse_options(argc, argv, options, diff_usage, 0);
+	if (argc) {
+		if (argc > 2)
+			usage_with_options(diff_usage, options);
+		if (argc == 2) {
+			input_old = argv[0];
+			input_new = argv[1];
+		} else
+			input_new = argv[0];
+	}
+
+	setup_pager();
+	return __cmd_diff();
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index a3d8bf6..18035b1 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -17,6 +17,7 @@
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_bench(int argc, const char **argv, const char *prefix);
 extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+extern int cmd_diff(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_sched(int argc, const char **argv, const char *prefix);
 extern int cmd_list(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 02b09ea..71dc7c3 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -5,6 +5,7 @@
 perf-annotate			mainporcelain common
 perf-bench			mainporcelain common
 perf-buildid-list		mainporcelain common
+perf-diff			mainporcelain common
 perf-list			mainporcelain common
 perf-sched			mainporcelain common
 perf-record			mainporcelain common
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index cf64049..873e55f 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -286,6 +286,7 @@
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
 		{ "buildid-list", cmd_buildid_list, 0 },
+		{ "diff",	cmd_diff,	0 },
 		{ "help",	cmd_help,	0 },
 		{ "list",	cmd_list,	0 },
 		{ "record",	cmd_record,	0 },
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index cb6151c..925f083 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -49,9 +49,13 @@
 	struct symbol		*sym;
 	u64			ip;
 	char			level;
-	struct symbol		*parent;
+	struct symbol	  *parent;
 	struct callchain_node	callchain;
-	struct rb_root		sorted_chain;
+	union {
+		unsigned long	  position;
+		struct hist_entry *pair;
+		struct rb_root	  sorted_chain;
+	};
 };
 
 enum sort_type {