perf report: Implement --sort cpu

In a shared multi-core environment, users want to analyze why their
program was slow. In particular, if the code ran slower only on certain
CPUs due to interference from other programs or kernel threads, the user
should be able to notice that.

Sample usage:

perf record -f -a -- sleep 3
perf report --sort cpu,comm

Workload:

program is running on 16 CPUs
Experiencing interference from an antagonist only on 4 CPUs.

  Samples: 106218177676 cycles

  Overhead  CPU          Command
  ........  ...  ...............

     6.25%  2            program
     6.24%  6            program
     6.24%  11           program
     6.24%  5            program
     6.24%  9            program
     6.24%  10           program
     6.23%  15           program
     6.23%  7            program
     6.23%  3            program
     6.23%  14           program
     6.22%  1            program
     6.20%  13           program
     3.17%  12           program
     3.15%  8            program
     3.14%  0            program
     3.13%  4            program
     3.11%  4         antagonist
     3.11%  0         antagonist
     3.10%  8         antagonist
     3.07%  12        antagonist

Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <20100505181612.GA5091@sharma-home.net>
Signed-off-by: Arun Sharma <aruns@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f28c4bb..5e5c640 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -274,6 +274,9 @@
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
+	if (system_wide)
+		attr->sample_type	|= PERF_SAMPLE_CPU;
+
 	if (raw_samples) {
 		attr->sample_type	|= PERF_SAMPLE_TIME;
 		attr->sample_type	|= PERF_SAMPLE_RAW;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ed3e14f..a746086 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -710,6 +710,7 @@
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
 	al->sym = NULL;
+	al->cpu = data->cpu;
 
 	if (al->map) {
 		if (symbol_conf.dso_list &&
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9e6baad..68d288c 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -70,6 +70,7 @@
 			.map	= al->map,
 			.sym	= al->sym,
 		},
+		.cpu	= al->cpu,
 		.ip	= al->addr,
 		.level	= al->level,
 		.period	= period,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2316cb5..c27b4b0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -13,6 +13,7 @@
 unsigned int dsos__col_width;
 unsigned int comms__col_width;
 unsigned int threads__col_width;
+unsigned int cpus__col_width;
 static unsigned int parent_symbol__col_width;
 char * field_sep;
 
@@ -28,6 +29,8 @@
 				    size_t size, unsigned int width);
 static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
 				       size_t size, unsigned int width);
+static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
+				    size_t size, unsigned int width);
 
 struct sort_entry sort_thread = {
 	.se_header	= "Command:  Pid",
@@ -63,6 +66,13 @@
 	.se_snprintf	= hist_entry__parent_snprintf,
 	.se_width	= &parent_symbol__col_width,
 };
+ 
+struct sort_entry sort_cpu = {
+	.se_header      = "CPU",
+	.se_cmp	        = sort__cpu_cmp,
+	.se_snprintf    = hist_entry__cpu_snprintf,
+	.se_width	= &cpus__col_width,
+};
 
 struct sort_dimension {
 	const char		*name;
@@ -76,6 +86,7 @@
 	{ .name = "dso",	.entry = &sort_dso,	},
 	{ .name = "symbol",	.entry = &sort_sym,	},
 	{ .name = "parent",	.entry = &sort_parent,	},
+	{ .name = "cpu",	.entry = &sort_cpu,	},
 };
 
 int64_t cmp_null(void *l, void *r)
@@ -242,6 +253,20 @@
 			      self->parent ? self->parent->name : "[other]");
 }
 
+/* --sort cpu */
+
+int64_t
+sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->cpu - left->cpu;
+}
+
+static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
+				       size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
+}
+
 int sort_dimension__add(const char *tok)
 {
 	unsigned int i;
@@ -281,6 +306,8 @@
 				sort__first_dimension = SORT_SYM;
 			else if (!strcmp(sd->name, "parent"))
 				sort__first_dimension = SORT_PARENT;
+			else if (!strcmp(sd->name, "cpu"))
+				sort__first_dimension = SORT_CPU;
 		}
 
 		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0d61c40..560c855 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -39,6 +39,7 @@
 extern unsigned int dsos__col_width;
 extern unsigned int comms__col_width;
 extern unsigned int threads__col_width;
+extern unsigned int cpus__col_width;
 extern enum sort_type sort__first_dimension;
 
 struct hist_entry {
@@ -51,6 +52,7 @@
 	struct map_symbol	ms;
 	struct thread		*thread;
 	u64			ip;
+	s32			cpu;
 	u32			nr_events;
 	char			level;
 	u8			filtered;
@@ -68,7 +70,8 @@
 	SORT_COMM,
 	SORT_DSO,
 	SORT_SYM,
-	SORT_PARENT
+	SORT_PARENT,
+	SORT_CPU,
 };
 
 /*
@@ -104,6 +107,7 @@
 extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
 extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
 extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
+int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
 extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
 extern int sort_dimension__add(const char *);
 void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 34760a2..10b7ff8 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -110,7 +110,8 @@
 	u64	      addr;
 	char	      level;
 	bool	      filtered;
-	unsigned int  cpumode;
+	u8	      cpumode;
+	s32	      cpu;
 };
 
 enum dso_kernel_type {