Add gtod_reduce option

This cuts down even more on gtod() calls. While the three disable_X
options halved the gettimeofday() call count, this can reduce it to
less than 1 percent of what it otherwise would have been.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index 1d1aa61..f4efd28 100644
--- a/HOWTO
+++ b/HOWTO
@@ -848,6 +848,13 @@
 disable_bw=bool	Disable measurements of throughput/bandwidth numbers. See
 		disable_clat.
 
+gtod_reduce=bool Enable all of the gettimeofday() reducing options
+		(disable_clat, disable_slat, disable_bw) plus reduce
+		precision of the timeout somewhat to really shrink
+		the gettimeofday() call count. With this option enabled,
+		we only do about 0.4% of the gtod() calls we would have
+		done if all time keeping was enabled.
+
 
 6.0 Interpreting the output
 ---------------------------
diff --git a/fio.c b/fio.c
index 7fb9490..08011c9 100644
--- a/fio.c
+++ b/fio.c
@@ -156,6 +156,17 @@
 	sigaction(SIGILL, &act, NULL);
 }
 
+static inline int should_check_rate(struct thread_data *td)
+{
+	/*
+	 * No minimum rate set, always ok
+	 */
+	if (!td->o.ratemin && !td->o.rate_iops_min)
+		return 0;
+
+	return 1;
+}
+
 /*
  * Check if we are above the minimum rate given.
  */
@@ -167,12 +178,6 @@
 	unsigned long rate;
 
 	/*
-	 * No minimum rate set, always ok
-	 */
-	if (!td->o.ratemin && !td->o.rate_iops_min)
-		return 0;
-
-	/*
 	 * allow a 2 second settle period in the beginning
 	 */
 	if (mtime_since(&td->start, now) < 2000)
@@ -340,6 +345,12 @@
 	return 0;
 }
 
+static inline void update_tv_cache(struct thread_data *td)
+{
+	if ((++td->tv_cache_nr & td->tv_cache_mask) == td->tv_cache_mask)
+		fio_gettime(&td->tv_cache, NULL);
+}
+
 /*
  * The main verify engine. Runs over the writes we previously submitted,
  * reads the blocks back in, and checks the crc/md5 of the data.
@@ -377,7 +388,9 @@
 		if (!io_u)
 			break;
 
-		if (runtime_exceeded(td, &io_u->start_time)) {
+		update_tv_cache(td);
+
+		if (runtime_exceeded(td, &td->tv_cache)) {
 			put_io_u(td, io_u);
 			td->terminate = 1;
 			break;
@@ -490,7 +503,6 @@
  */
 static void do_io(struct thread_data *td)
 {
-	struct timeval s;
 	unsigned long usec;
 	unsigned int i;
 	int ret = 0;
@@ -514,9 +526,9 @@
 		if (!io_u)
 			break;
 
-		memcpy(&s, &io_u->start_time, sizeof(s));
+		update_tv_cache(td);
 
-		if (runtime_exceeded(td, &s)) {
+		if (runtime_exceeded(td, &td->tv_cache)) {
 			put_io_u(td, io_u);
 			td->terminate = 1;
 			break;
@@ -564,7 +576,9 @@
 				requeue_io_u(td, &io_u);
 			} else {
 sync_done:
-				fio_gettime(&comp_time, NULL);
+				if (should_check_rate(td))
+					fio_gettime(&comp_time, NULL);
+
 				bytes_done = io_u_sync_complete(td, io_u);
 				if (bytes_done < 0)
 					ret = bytes_done;
@@ -603,7 +617,8 @@
 			if (full && !min_evts)
 				min_evts = 1;
 
-			fio_gettime(&comp_time, NULL);
+			if (should_check_rate(td))
+				fio_gettime(&comp_time, NULL);
 
 			do {
 				ret = io_u_queued_complete(td, min_evts);
@@ -624,8 +639,8 @@
 		 * of completions except the very first one which may look
 		 * a little bursty
 		 */
-		if (!in_ramp_time(td)) {
-			usec = utime_since(&s, &comp_time);
+		if (!in_ramp_time(td) && should_check_rate(td)) {
+			usec = utime_since(&td->tv_cache, &comp_time);
 
 			rate_throttle(td, usec, bytes_done);
 
@@ -977,6 +992,7 @@
 	while (keep_running(td)) {
 		fio_gettime(&td->start, NULL);
 		memcpy(&td->ts.stat_sample_time, &td->start, sizeof(td->start));
+		memcpy(&td->tv_cache, &td->start, sizeof(td->start));
 
 		if (td->o.ratemin)
 			memcpy(&td->lastrate, &td->ts.stat_sample_time,
diff --git a/fio.h b/fio.h
index ed1257c..3e39aea 100644
--- a/fio.h
+++ b/fio.h
@@ -499,6 +499,7 @@
 	unsigned int disable_clat;
 	unsigned int disable_slat;
 	unsigned int disable_bw;
+	unsigned int gtod_reduce;
 
 	char *read_iolog_file;
 	char *write_iolog_file;
@@ -612,6 +613,9 @@
 	struct timeval epoch;	/* time job was started */
 	struct timeval rw_end[2];
 	struct timeval last_issue;
+	struct timeval tv_cache;
+	unsigned int tv_cache_nr;
+	unsigned int tv_cache_mask;
 	unsigned int rw_end_set[2];
 	unsigned int ramp_time_over;
 
diff --git a/io_u.c b/io_u.c
index 62a76b9..8863fa7 100644
--- a/io_u.c
+++ b/io_u.c
@@ -851,7 +851,8 @@
 
 out:
 	if (!td_io_prep(td, io_u)) {
-		fio_gettime(&io_u->start_time, NULL);
+		if (!td->o.disable_slat)
+			fio_gettime(&io_u->start_time, NULL);
 		return io_u;
 	}
 err_put:
diff --git a/options.c b/options.c
index d4fc184..f068ebd 100644
--- a/options.c
+++ b/options.c
@@ -432,6 +432,20 @@
 	return 0;
 }
 
+static int str_gtod_reduce_cb(void *data, int *il)
+{
+	struct thread_data *td = data;
+	int val = *il;
+
+	td->o.disable_clat = !!val;
+	td->o.disable_slat = !!val;
+	td->o.disable_bw = !!val;
+	if (val)
+		td->tv_cache_mask = 63;
+
+	return 0;
+}
+
 #define __stringify_1(x)	#x
 #define __stringify(x)		__stringify_1(x)
 
@@ -1291,10 +1305,18 @@
 	},
 #endif
 	{
+		.name	= "gtod_reduce",
+		.type	= FIO_OPT_BOOL,
+		.help	= "Greatly reduce number of gettimeofday() calls",
+		.cb	= str_gtod_reduce_cb,
+		.def	= "0",
+	},
+	{
 		.name	= "disable_clat",
 		.type	= FIO_OPT_BOOL,
 		.off1	= td_var_offset(disable_clat),
 		.help	= "Disable completion latency numbers",
+		.parent	= "gtod_reduce",
 		.def	= "0",
 	},
 	{
@@ -1302,6 +1324,7 @@
 		.type	= FIO_OPT_BOOL,
 		.off1	= td_var_offset(disable_slat),
 		.help	= "Disable submissionn latency numbers",
+		.parent	= "gtod_reduce",
 		.def	= "0",
 	},
 	{
@@ -1309,6 +1332,7 @@
 		.type	= FIO_OPT_BOOL,
 		.off1	= td_var_offset(disable_bw),
 		.help	= "Disable bandwidth logging",
+		.parent	= "gtod_reduce",
 		.def	= "0",
 	},
 	{