Add check for invariant TSC on x86 and use TSC is default clock if reliable

TSC is by far the fastest clock we can use. Check the CPUID bits for
whether it is both constant rate AND synced across cores. If it is,
we can use it as our default clock source.

Fio will default to this clock source on x86 if no other clock source
is specifically given with clocksource= in the job file.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/arch/arch-x86-common.h b/arch/arch-x86-common.h
new file mode 100644
index 0000000..1e62354
--- /dev/null
+++ b/arch/arch-x86-common.h
@@ -0,0 +1,43 @@
+#ifndef FIO_ARCH_X86_COMMON
+#define FIO_ARCH_X86_COMMON
+
+static inline void do_cpuid(unsigned int *eax, unsigned int *ebx,
+			    unsigned int *ecx, unsigned int *edx)
+{
+	unsigned int id = *eax;
+
+	asm("movl %4, %%eax;"
+	    "cpuid;"
+	    "movl %%eax, %0;"
+	    "movl %%ebx, %1;"
+	    "movl %%ecx, %2;"
+	    "movl %%edx, %3;"
+		: "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
+		: "r" (id)
+		: "eax", "ebx", "ecx", "edx");
+}
+
+#define ARCH_HAVE_INIT
+extern int tsc_reliable;
+static inline int arch_init(char *envp[])
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	/*
+	 * Check for TSC
+	 */
+	eax = 1;
+	do_cpuid(&eax, &ebx, &ecx, &edx);
+	if (!(edx & (1U << 4)))
+		return 0;
+
+	/*
+	 * Check for constant rate and synced (across cores) TSC
+	 */
+	eax = 0x80000007;
+	do_cpuid(&eax, &ebx, &ecx, &edx);
+	tsc_reliable = edx & (1U << 8);
+	return 0;
+}
+
+#endif
diff --git a/arch/arch-x86.h b/arch/arch-x86.h
index 1ededd8..4803006 100644
--- a/arch/arch-x86.h
+++ b/arch/arch-x86.h
@@ -1,6 +1,8 @@
 #ifndef ARCH_X86_H
 #define ARCH_X86_H
 
+#include "arch-x86-common.h"
+
 #define FIO_ARCH	(arch_i386)
 
 #ifndef __NR_ioprio_set
diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h
index 29e681f..d8b0933 100644
--- a/arch/arch-x86_64.h
+++ b/arch/arch-x86_64.h
@@ -1,6 +1,8 @@
 #ifndef ARCH_X86_64_h
 #define ARCH_X86_64_h
 
+#include "arch-x86-common.h"
+
 #define FIO_ARCH	(arch_x86_64)
 
 #ifndef __NR_ioprio_set
diff --git a/crc/crc32c-intel.c b/crc/crc32c-intel.c
index 8a6e6dc..8e1cd58 100644
--- a/crc/crc32c-intel.c
+++ b/crc/crc32c-intel.c
@@ -78,22 +78,6 @@
 	return crc;
 }
 
-static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
-		     unsigned int *edx)
-{
-	int id = *eax;
-
-	asm("movl %4, %%eax;"
-	    "cpuid;"
-	    "movl %%eax, %0;"
-	    "movl %%ebx, %1;"
-	    "movl %%ecx, %2;"
-	    "movl %%edx, %3;"
-		: "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
-		: "r" (id)
-		: "eax", "ebx", "ecx", "edx");
-}
-
 void crc32c_intel_probe(void)
 {
 	if (!crc32c_probed) {
diff --git a/fio.c b/fio.c
index ac026fb..f44273f 100644
--- a/fio.c
+++ b/fio.c
@@ -104,6 +104,8 @@
 	if (parse_options(argc, argv))
 		return 1;
 
+	fio_time_init();
+
 	if (nr_clients)
 		return fio_handle_clients();
 	else
diff --git a/fio.h b/fio.h
index 3de3f00..de4ca4d 100644
--- a/fio.h
+++ b/fio.h
@@ -598,6 +598,7 @@
 extern int fio_gtod_offload;
 extern int fio_gtod_cpu;
 extern enum fio_cs fio_clock_source;
+extern int fio_clock_source_set;
 extern int warnings_fatal;
 extern int terse_version;
 extern int is_backend;
diff --git a/gettime.c b/gettime.c
index 35d685e..89a3a01 100644
--- a/gettime.c
+++ b/gettime.c
@@ -15,11 +15,13 @@
 #ifdef ARCH_HAVE_CPU_CLOCK
 static unsigned long cycles_per_usec;
 static unsigned long last_cycles;
+int tsc_reliable = 0;
 #endif
 static struct timeval last_tv;
 static int last_tv_valid;
 
 enum fio_cs fio_clock_source = FIO_PREFERRED_CLOCK_SOURCE;
+int fio_clock_source_set = 0;
 
 #ifdef FIO_DEBUG_TIME
 
@@ -208,15 +210,17 @@
 	return c_e - c_s;
 }
 
+#define NR_TIME_ITERS	50
+
 static void calibrate_cpu_clock(void)
 {
 	double delta, mean, S;
-	unsigned long avg, cycles[10];
+	unsigned long avg, cycles[NR_TIME_ITERS];
 	int i, samples;
 
 	cycles[0] = get_cycles_per_usec();
 	S = delta = mean = 0.0;
-	for (i = 0; i < 10; i++) {
+	for (i = 0; i < NR_TIME_ITERS; i++) {
 		cycles[i] = get_cycles_per_usec();
 		delta = cycles[i] - mean;
 		if (delta) {
@@ -225,10 +229,10 @@
 		}
 	}
 
-	S = sqrt(S / (10 - 1.0));
+	S = sqrt(S / (NR_TIME_ITERS - 1.0));
 
 	samples = avg = 0;
-	for (i = 0; i < 10; i++) {
+	for (i = 0; i < NR_TIME_ITERS; i++) {
 		double this = cycles[i];
 
 		if ((fmax(this, mean) - fmin(this, mean)) > S)
@@ -237,10 +241,10 @@
 		avg += this;
 	}
 
-	S /= 10.0;
-	mean /= 10.0;
+	S /= (double) NR_TIME_ITERS;
+	mean /= (double) NR_TIME_ITERS;
 
-	for (i = 0; i < 10; i++)
+	for (i = 0; i < NR_TIME_ITERS; i++)
 		dprint(FD_TIME, "cycles[%d]=%lu\n", i, cycles[i] / 10);
 
 	avg /= (samples * 10);
@@ -248,7 +252,6 @@
 	dprint(FD_TIME, "mean=%f, S=%f\n", mean, S);
 
 	cycles_per_usec = avg;
-
 }
 #else
 static void calibrate_cpu_clock(void)
@@ -260,6 +263,17 @@
 {
 	last_tv_valid = 0;
 	calibrate_cpu_clock();
+
+	/*
+	 * If the arch sets tsc_reliable != 0, then it must be good enough
+	 * to use as THE clock source. For x86 CPUs, this means the TSC
+	 * runs at a constant rate and is synced across CPU cores.
+	 */
+	if (tsc_reliable) {
+		if (!fio_clock_source_set)
+			fio_clock_source = CS_CPUCLOCK;
+	} else if (fio_clock_source == CS_CPUCLOCK)
+		log_info("fio: clocksource=cpu may not be reliable\n");
 }
 
 unsigned long long utime_since(struct timeval *s, struct timeval *e)
diff --git a/options.c b/options.c
index d46bcbb..738c78b 100644
--- a/options.c
+++ b/options.c
@@ -370,7 +370,7 @@
 	struct thread_data *td = data;
 
 	fio_clock_source = td->o.clocksource;
-	fio_time_init();
+	fio_clock_source_set = 1;
 	return 0;
 }