perf stat: Use stddev_mean in stead of stddev

When we're computing the mean by sampling the distribution,
then the std dev of the mean is related to the std dev of the
sample set by:

  stddev_mean = std_dev / sqrt(N)

Which is exactly what we want.

This results in the error on the mean decreasing with
increasing number of samples.

Also fix the scaled == -1, aka not counted case.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9c6377f..e9424fa 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -75,7 +75,7 @@
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static u64			event_res[MAX_COUNTERS][3];
-static u64			event_scaled[MAX_COUNTERS];
+static int			event_scaled[MAX_COUNTERS];
 
 struct stats
 {
@@ -97,17 +97,31 @@
 }
 
 /*
- * stddev = sqrt(1/N (\Sum n_i^2) - avg(n)^2)
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ *      (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2  -------------------------------
+ *                   n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ *             s
+ * s_mean = -------
+ *          sqrt(n)
+ *
  */
 static double stddev_stats(struct stats *stats)
 {
 	double avg = stats->sum / run_count;
+	double variance = (stats->sum_sq - stats->sum*avg)/(run_count - 1);
+	double variance_mean = variance / run_count;
 
-	return sqrt(stats->sum_sq/run_count - avg*avg);
+	return sqrt(variance_mean);
 }
 
 struct stats			event_res_stats[MAX_COUNTERS][3];
-struct stats			event_scaled_stats[MAX_COUNTERS];
 struct stats			runtime_nsecs_stats;
 struct stats			walltime_nsecs_stats;
 struct stats			runtime_cycles_stats;
@@ -343,11 +357,10 @@
 static void print_counter(int counter)
 {
 	double avg, stddev;
-	int scaled;
+	int scaled = event_scaled[counter];
 
 	avg    = avg_stats(&event_res_stats[counter][0]);
 	stddev = stddev_stats(&event_res_stats[counter][0]);
-	scaled = avg_stats(&event_scaled_stats[counter]);
 
 	if (scaled == -1) {
 		fprintf(stderr, " %14s  %-24s\n",