perf report: Add front-entry cache for lookups
Before:
Performance counter stats for './perf report -i perf.data.big':
12453988058 instructions
Performance counter stats for './perf report -i perf.data.big':
12379566017 instructions
0.60% reduction.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e837bb9..33b3b15 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -229,6 +229,7 @@
}
static struct rb_root threads;
+static struct thread *last_match;
static struct thread *threads__findnew(pid_t pid)
{
@@ -236,12 +237,22 @@
struct rb_node *parent = NULL;
struct thread *th;
+ /*
+ * Font-end cache - PID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
+ */
+ if (last_match && last_match->pid == pid)
+ return last_match;
+
while (*p != NULL) {
parent = *p;
th = rb_entry(parent, struct thread, rb_node);
- if (th->pid == pid)
+ if (th->pid == pid) {
+ last_match = th;
return th;
+ }
if (pid < th->pid)
p = &(*p)->rb_left;
@@ -253,7 +264,9 @@
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, &threads);
+ last_match = th;
}
+
return th;
}