perf record: Use per-task-per-cpu events for inherited events

Create events with a pid and cpu contraint for inherited events
so that we get a stream per cpu, instead of all cpus contending
on a single stream.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: fweisbec@gmail.com
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091216165904.987643843@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9b7c6d8..63136d0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -278,7 +278,7 @@
 
 	attr->mmap		= track;
 	attr->comm		= track;
-	attr->inherit		= (cpu < 0) && inherit;
+	attr->inherit		= inherit;
 	attr->disabled		= 1;
 
 try_again:
@@ -537,7 +537,7 @@
 	}
 
 
-	if (!system_wide || profile_cpu != -1) {
+	if ((!system_wide && !inherit) || profile_cpu != -1) {
 		open_counters(profile_cpu, target_pid);
 	} else {
 		for (i = 0; i < nr_cpus; i++)