sched/clock, x86: Rewrite cyc2ns() to avoid the need to disable IRQs

Use a ring-buffer like multi-version object structure which allows
always having a coherent object; we use this to avoid having to
disable IRQs while reading sched_clock() and avoids a problem when
getting an NMI while changing the cyc2ns data.

                        MAINLINE   PRE        POST

    sched_clock_stable: 1          1          1
    (cold) sched_clock: 329841     331312     257223
    (cold) local_clock: 301773     310296     309889
    (warm) sched_clock: 38375      38247      25280
    (warm) local_clock: 100371     102713     85268
    (warm) rdtsc:       27340      27289      24247
    sched_clock_stable: 0          0          0
    (cold) sched_clock: 382634     372706     301224
    (cold) local_clock: 396890     399275     399870
    (warm) sched_clock: 38194      38124      25630
    (warm) local_clock: 143452     148698     129629
    (warm) rdtsc:       27345      27365      24307

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-s567in1e5ekq2nlyhn8f987r@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index b4c6676..3de54ef 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -13,7 +13,26 @@
 
 extern int no_timer_check;
 
-DECLARE_PER_CPU(unsigned long, cyc2ns);
-DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
+/*
+ * We use the full linear equation: f(x) = a + b*x, in order to allow
+ * a continuous function in the face of dynamic freq changes.
+ *
+ * Continuity means that when our frequency changes our slope (b); we want to
+ * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t.
+ *
+ * Without an offset (a) the above would not be possible.
+ *
+ * See the comment near cycles_2_ns() for details on how we compute (b).
+ */
+struct cyc2ns_data {
+	u32 cyc2ns_mul;
+	u32 cyc2ns_shift;
+	u64 cyc2ns_offset;
+	u32 __count;
+	/* u32 hole */
+}; /* 24 bytes -- do not grow */
+
+extern struct cyc2ns_data *cyc2ns_read_begin(void);
+extern void cyc2ns_read_end(struct cyc2ns_data *);
 
 #endif /* _ASM_X86_TIMER_H */