perf sched: Implement multidimensional sorting
Implement multidimensional sorting on perf sched so that
you can sort either by number of switches, latency average,
latency maximum, runtime.
perf sched -l -s avg,max (this is the default)
-----------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------
gnome-power-man | 0.113 ms | 1 | avg: 4998.531 ms | max: 4998.531 ms |
xfdesktop | 1.190 ms | 7 | avg: 136.475 ms | max: 940.933 ms |
xfce-mcs-manage | 2.194 ms | 22 | avg: 38.534 ms | max: 735.174 ms |
notification-da | 2.749 ms | 31 | avg: 27.436 ms | max: 731.791 ms |
xfce4-session | 3.343 ms | 28 | avg: 26.796 ms | max: 734.891 ms |
xfwm4 | 3.159 ms | 22 | avg: 12.406 ms | max: 241.333 ms |
xchat | 42.789 ms | 214 | avg: 11.886 ms | max: 100.349 ms |
xfce4-terminal | 5.386 ms | 22 | avg: 11.414 ms | max: 241.611 ms |
firefox | 151.992 ms | 123 | avg: 9.543 ms | max: 153.717 ms |
xfce4-panel | 24.324 ms | 47 | avg: 8.189 ms | max: 242.352 ms |
:5090 | 6.932 ms | 111 | avg: 8.131 ms | max: 102.665 ms |
events/0 | 0.758 ms | 12 | avg: 1.964 ms | max: 21.879 ms |
Xorg | 280.558 ms | 340 | avg: 1.864 ms | max: 99.526 ms |
geany | 63.391 ms | 295 | avg: 1.099 ms | max: 9.334 ms |
reiserfs/0 | 0.039 ms | 2 | avg: 0.854 ms | max: 1.487 ms |
kondemand/0 | 8.251 ms | 245 | avg: 0.691 ms | max: 34.372 ms |
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 67a0ba8..10fcd49 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -33,6 +33,9 @@
static int replay_mode;
static int lat_mode;
+static char default_sort_order[] = "avg, max, switch, runtime";
+static char *sort_order = default_sort_order;
+
/*
* Scheduler benchmarks
@@ -890,7 +893,17 @@
u64 total_runtime;
};
-static struct rb_root lat_snapshot_root;
+typedef int (*sort_thread_lat)(struct task_atoms *, struct task_atoms *);
+
+struct sort_dimension {
+ const char *name;
+ sort_thread_lat cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(cmp_pid);
+
+static struct rb_root lat_snapshot_root, sorted_lat_snapshot_root;
static struct task_atoms *
thread_atom_list_search(struct rb_root *root, struct thread *thread)
@@ -901,9 +914,9 @@
struct task_atoms *atoms;
atoms = container_of(node, struct task_atoms, node);
- if (thread->pid < atoms->thread->pid)
+ if (thread->pid > atoms->thread->pid)
node = node->rb_left;
- else if (thread->pid > atoms->thread->pid)
+ else if (thread->pid < atoms->thread->pid)
node = node->rb_right;
else {
return atoms;
@@ -912,22 +925,41 @@
return NULL;
}
+static int
+thread_lat_cmp(struct list_head *list, struct task_atoms *l,
+ struct task_atoms *r)
+{
+ struct sort_dimension *sort;
+ int ret = 0;
+
+ list_for_each_entry(sort, list, list) {
+ ret = sort->cmp(l, r);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
static void
-__thread_latency_insert(struct rb_root *root, struct task_atoms *data)
+__thread_latency_insert(struct rb_root *root, struct task_atoms *data,
+ struct list_head *sort_list)
{
struct rb_node **new = &(root->rb_node), *parent = NULL;
while (*new) {
struct task_atoms *this;
+ int cmp;
this = container_of(*new, struct task_atoms, node);
parent = *new;
- if (data->thread->pid < this->thread->pid)
+
+ cmp = thread_lat_cmp(sort_list, data, this);
+
+ if (cmp > 0)
new = &((*new)->rb_left);
- else if (data->thread->pid > this->thread->pid)
- new = &((*new)->rb_right);
else
- die("Double thread insertion\n");
+ new = &((*new)->rb_right);
}
rb_link_node(&data->node, parent, new);
@@ -943,7 +975,7 @@
atoms->thread = thread;
INIT_LIST_HEAD(&atoms->snapshot_list);
- __thread_latency_insert(&lat_snapshot_root, atoms);
+ __thread_latency_insert(&lat_snapshot_root, atoms, &cmp_pid);
}
static void
@@ -1134,18 +1166,151 @@
(double)atom_list->max_lat / 1e6);
}
+static int pid_cmp(struct task_atoms *l, struct task_atoms *r)
+{
+
+ if (l->thread->pid < r->thread->pid)
+ return -1;
+ if (l->thread->pid > r->thread->pid)
+ return 1;
+
+ return 0;
+}
+
+static struct sort_dimension pid_sort_dimension = {
+ .name = "pid",
+ .cmp = pid_cmp,
+};
+
+static int avg_cmp(struct task_atoms *l, struct task_atoms *r)
+{
+ u64 avgl, avgr;
+
+ if (!l->nb_atoms)
+ return -1;
+
+ if (!r->nb_atoms)
+ return 1;
+
+ avgl = l->total_lat / l->nb_atoms;
+ avgr = r->total_lat / r->nb_atoms;
+
+ if (avgl < avgr)
+ return -1;
+ if (avgl > avgr)
+ return 1;
+
+ return 0;
+}
+
+static struct sort_dimension avg_sort_dimension = {
+ .name = "avg",
+ .cmp = avg_cmp,
+};
+
+static int max_cmp(struct task_atoms *l, struct task_atoms *r)
+{
+ if (l->max_lat < r->max_lat)
+ return -1;
+ if (l->max_lat > r->max_lat)
+ return 1;
+
+ return 0;
+}
+
+static struct sort_dimension max_sort_dimension = {
+ .name = "max",
+ .cmp = max_cmp,
+};
+
+static int switch_cmp(struct task_atoms *l, struct task_atoms *r)
+{
+ if (l->nb_atoms < r->nb_atoms)
+ return -1;
+ if (l->nb_atoms > r->nb_atoms)
+ return 1;
+
+ return 0;
+}
+
+static struct sort_dimension switch_sort_dimension = {
+ .name = "switch",
+ .cmp = switch_cmp,
+};
+
+static int runtime_cmp(struct task_atoms *l, struct task_atoms *r)
+{
+ if (l->total_runtime < r->total_runtime)
+ return -1;
+ if (l->total_runtime > r->total_runtime)
+ return 1;
+
+ return 0;
+}
+
+static struct sort_dimension runtime_sort_dimension = {
+ .name = "runtime",
+ .cmp = runtime_cmp,
+};
+
+static struct sort_dimension *available_sorts[] = {
+ &pid_sort_dimension,
+ &avg_sort_dimension,
+ &max_sort_dimension,
+ &switch_sort_dimension,
+ &runtime_sort_dimension,
+};
+
+#define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *))
+
+static LIST_HEAD(sort_list);
+
+static int sort_dimension__add(char *tok, struct list_head *list)
+{
+ int i;
+
+ for (i = 0; i < NB_AVAILABLE_SORTS; i++) {
+ if (!strcmp(available_sorts[i]->name, tok)) {
+ list_add_tail(&available_sorts[i]->list, list);
+
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static void setup_sorting(void);
+
+static void sort_lat(void)
+{
+ struct rb_node *node;
+
+ for (;;) {
+ struct task_atoms *data;
+ node = rb_first(&lat_snapshot_root);
+ if (!node)
+ break;
+
+ rb_erase(node, &lat_snapshot_root);
+ data = rb_entry(node, struct task_atoms, node);
+ __thread_latency_insert(&sorted_lat_snapshot_root, data, &sort_list);
+ }
+}
+
static void __cmd_lat(void)
{
struct rb_node *next;
setup_pager();
read_events();
+ sort_lat();
printf("-----------------------------------------------------------------------------------\n");
printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n");
printf("-----------------------------------------------------------------------------------\n");
- next = rb_first(&lat_snapshot_root);
+ next = rb_first(&sorted_lat_snapshot_root);
while (next) {
struct task_atoms *atom_list;
@@ -1469,11 +1634,30 @@
"replay sched behaviour from traces"),
OPT_BOOLEAN('l', "latency", &lat_mode,
"measure various latencies"),
+ OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+ "sort by key(s): runtime, switch, avg, max"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_END()
};
+static void setup_sorting(void)
+{
+ char *tmp, *tok, *str = strdup(sort_order);
+
+ for (tok = strtok_r(str, ", ", &tmp);
+ tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ if (sort_dimension__add(tok, &sort_list) < 0) {
+ error("Unknown --sort key: `%s'", tok);
+ usage_with_options(sched_usage, options);
+ }
+ }
+
+ free(str);
+
+ sort_dimension__add((char *)"pid", &cmp_pid);
+}
+
int cmd_sched(int argc, const char **argv, const char *prefix __used)
{
symbol__init();
@@ -1496,6 +1680,8 @@
else
usage_with_options(sched_usage, options);
+ setup_sorting();
+
if (replay_mode)
__cmd_replay();
else if (lat_mode)