Add gtod_cpu option for pinning gettimeofday() to a single CPU

Similar to what real life products sometimes do, offload gettimeofday()
calls to a single CPU and have that update the current time into a shared
memory location. This option pins a specific CPU for that job, and excludes
it from participating in any of the IO jobs.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index 7bd71a9..731684c 100644
--- a/HOWTO
+++ b/HOWTO
@@ -869,6 +869,18 @@
 		we only do about 0.4% of the gtod() calls we would have
 		done if all time keeping was enabled.
 
+gtod_cpu=int	Sometimes it's cheaper to dedicate a single thread of
+		execution to just getting the current time. Fio (and
+		databases, for instance) are very intensive on gettimeofday()
+		calls. With this option, you can set one CPU aside for
+		doing nothing but logging current time to a shared memory
+		location. Then the other threads/processes that run IO
+		workloads need only copy that segment, instead of entering
+		the kernel with a gettimeofday() call. The CPU set aside
+		for doing these time calls will be excluded from other
+		uses. Fio will manually clear it from the CPU mask of other
+		jobs.
+
 
 6.0 Interpreting the output
 ---------------------------
diff --git a/fio.c b/fio.c
index 5a87ae4..a58effc 100644
--- a/fio.c
+++ b/fio.c
@@ -55,6 +55,7 @@
 static volatile int fio_abort;
 static int exit_value;
 static struct itimerval itimer;
+static pthread_t gtod_thread;
 
 struct io_log *agg_io_log[2];
 
@@ -964,6 +965,18 @@
 		goto err;
 	}
 
+	if (td->o.gtod_cpu) {
+		if (fio_getaffinity(td->pid, &td->o.cpumask) == -1) {
+			td_verror(td, errno, "cpu_get_affinity");
+			goto err;
+		}
+		fio_cpu_clear(&td->o.cpumask, td->o.gtod_cpu);
+		if (fio_setaffinity(td) == -1) {
+			td_verror(td, errno, "cpu_set_affinity");
+			goto err;
+		}
+	}
+
 	if (td->ioprio_set) {
 		if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
 			td_verror(td, errno, "ioprio_set");
@@ -1229,6 +1242,39 @@
 		terminate_threads(TERMINATE_ALL);
 }
 
+static void *gtod_thread_main(void *data)
+{
+	fio_mutex_up(startup_mutex);
+
+	/*
+	 * As long as we have jobs around, update the clock. It would be nice
+	 * to have some way of NOT hammering that CPU with gettimeofday(),
+	 * but I'm not sure what to use outside of a simple CPU nop to relax
+	 * it - we don't want to lose precision.
+	 */
+	while (threads) {
+		fio_gtod_update();
+		nop;
+	}
+
+	return NULL;
+}
+
+static int fio_start_gtod_thread(void)
+{
+	if (pthread_create(&gtod_thread, NULL, gtod_thread_main, NULL)) {
+		perror("Can't create gtod thread");
+		return 1;
+	}
+	if (pthread_detach(gtod_thread) < 0) {
+		perror("Can't detatch gtod thread");
+		return 1;
+	}
+
+	fio_mutex_down(startup_mutex);
+	return 0;
+}
+
 /*
  * Main function for kicking off and reaping jobs, as needed.
  */
@@ -1241,6 +1287,9 @@
 	if (fio_pin_memory())
 		return;
 
+	if (fio_gtod_offload && fio_start_gtod_thread())
+		return;
+
 	if (!terse_output) {
 		printf("Starting ");
 		if (nr_thread)
diff --git a/fio.h b/fio.h
index f8e6a4a..d21f5e4 100644
--- a/fio.h
+++ b/fio.h
@@ -500,6 +500,8 @@
 	unsigned int disable_slat;
 	unsigned int disable_bw;
 	unsigned int gtod_reduce;
+	unsigned int gtod_cpu;
+	unsigned int gtod_offload;
 
 	char *read_iolog_file;
 	char *write_iolog_file;
@@ -699,6 +701,8 @@
 extern int eta_print;
 extern unsigned long done_secs;
 extern char *job_section;
+extern int fio_gtod_offload;
+extern int fio_gtod_cpu;
 
 extern struct thread_data *threads;
 
@@ -828,6 +832,8 @@
 extern void rate_throttle(struct thread_data *, unsigned long, unsigned int);
 extern void fill_start_time(struct timeval *);
 extern void fio_gettime(struct timeval *, void *);
+extern void fio_gtod_init(void);
+extern void fio_gtod_update(void);
 extern void set_genesis_time(void);
 extern int ramp_time_over(struct thread_data *);
 extern int in_ramp_time(struct thread_data *);
diff --git a/gettime.c b/gettime.c
index 80eeaf1..b1431f3 100644
--- a/gettime.c
+++ b/gettime.c
@@ -6,6 +6,7 @@
 #include <sys/time.h>
 
 #include "fio.h"
+#include "smalloc.h"
 
 #include "hash.h"
 
@@ -13,6 +14,10 @@
 static struct timeval last_tv;
 static int last_tv_valid;
 
+static struct timeval *fio_tv;
+int fio_gtod_offload = 0;
+int fio_gtod_cpu = -1;
+
 #ifdef FIO_DEBUG_TIME
 
 #define HASH_BITS	8
@@ -116,7 +121,10 @@
 
 	gtod_log_caller(caller);
 #endif
-	if (!clock_gettime_works) {
+	if (fio_tv) {
+		memcpy(tp, fio_tv, sizeof(*tp));
+		return;
+	} else if (!clock_gettime_works) {
 gtod:
 		gettimeofday(tp, NULL);
 	} else {
@@ -145,3 +153,14 @@
 	last_tv_valid = 1;
 	memcpy(&last_tv, tp, sizeof(*tp));
 }
+
+void fio_gtod_init(void)
+{
+	fio_tv = smalloc(sizeof(struct timeval));
+	assert(fio_tv);
+}
+
+void fio_gtod_update(void)
+{
+	gettimeofday(fio_tv, NULL);
+}
diff --git a/init.c b/init.c
index f00ced3..a8acdc0 100644
--- a/init.c
+++ b/init.c
@@ -27,7 +27,7 @@
 static int max_jobs = MAX_JOBS;
 static int dump_cmdline;
 
-struct thread_data def_thread;
+static struct thread_data def_thread;
 struct thread_data *threads = NULL;
 
 int exitall_on_terminate = 0;
@@ -214,6 +214,14 @@
 {
 	struct thread_options *o = &td->o;
 
+#ifndef FIO_HAVE_CPU_AFFINITY
+	if (td->o.gtod_cpu) {
+		log_err("fio: platform must support CPU affinity for"
+			"gettimeofday() offloading\n");
+		return 1;
+	}
+#endif
+
 	if (read_only && td_write(td)) {
 		log_err("fio: job <%s> has write bit set, but fio is in"
 			" read-only mode\n", td->o.name);
@@ -1104,5 +1112,11 @@
 		return 1;
 	}
 
+	if (def_thread.o.gtod_offload) {
+		fio_gtod_init();
+		fio_gtod_offload = 1;
+		fio_gtod_cpu = def_thread.o.gtod_cpu;
+	}
+
 	return 0;
 }
diff --git a/options.c b/options.c
index 1953e3d..5bbeb34 100644
--- a/options.c
+++ b/options.c
@@ -497,6 +497,16 @@
 	return 0;
 }
 
+static int str_gtod_cpu_cb(void *data, int *il)
+{
+	struct thread_data *td = data;
+	int val = *il;
+
+	td->o.gtod_cpu = val;
+	td->o.gtod_offload = 1;
+	return 0;
+}
+
 #define __stringify_1(x)	#x
 #define __stringify(x)		__stringify_1(x)
 
@@ -1389,6 +1399,12 @@
 		.def	= "0",
 	},
 	{
+		.name	= "gtod_cpu",
+		.type	= FIO_OPT_INT,
+		.cb	= str_gtod_cpu_cb,
+		.help	= "Setup dedicated gettimeofday() thread on this CPU",
+	},
+	{
 		.name = NULL,
 	},
 };
diff --git a/os/os-linux.h b/os/os-linux.h
index c0f5327..6812acd 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -54,7 +54,6 @@
  * If you are on an ancient glibc (2.3.2), then define GLIBC_2_3_2 if you want
  * the affinity helpers to work.
  */
-#ifdef FIO_HAVE_CPU_AFFINITY
 #ifndef GLIBC_2_3_2
 #define fio_setaffinity(td)		\
 	sched_setaffinity((td)->pid, sizeof((td)->o.cpumask), &(td)->o.cpumask)
@@ -66,7 +65,8 @@
 #define fio_getaffinity(pid, ptr)	\
 	sched_getaffinity((pid), (ptr))
 #endif
-#endif
+
+#define fio_cpu_clear(mask, cpu)	CPU_CLR((cpu), (mask))
 
 static inline int ioprio_set(int which, int who, int ioprio)
 {
diff --git a/os/os.h b/os/os.h
index 1cff494..823b039 100644
--- a/os/os.h
+++ b/os/os.h
@@ -40,7 +40,8 @@
 
 #ifndef FIO_HAVE_CPU_AFFINITY
 #define fio_setaffinity(td)		(0)
-#define fio_getaffinity(pid, mask)	do { } while(0)
+#define fio_getaffinity(pid, mask)	do { } while (0)
+#define fio_cpu_clear(mask, cpu)	do { } while (0)
 #endif
 
 #ifndef FIO_HAVE_IOPRIO