perf report: Add front-entry cache for lookups

Before:

 Performance counter stats for './perf report -i perf.data.big':

     12453988058  instructions

 Performance counter stats for './perf report -i perf.data.big':

     12379566017  instructions

0.60% reduction.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e837bb9..33b3b15 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -229,6 +229,7 @@
 }
 
 static struct rb_root threads;
+static struct thread *last_match;
 
 static struct thread *threads__findnew(pid_t pid)
 {
@@ -236,12 +237,22 @@
 	struct rb_node *parent = NULL;
 	struct thread *th;
 
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
 	while (*p != NULL) {
 		parent = *p;
 		th = rb_entry(parent, struct thread, rb_node);
 
-		if (th->pid == pid)
+		if (th->pid == pid) {
+			last_match = th;
 			return th;
+		}
 
 		if (pid < th->pid)
 			p = &(*p)->rb_left;
@@ -253,7 +264,9 @@
 	if (th != NULL) {
 		rb_link_node(&th->rb_node, parent, p);
 		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
 	}
+
 	return th;
 }