[PATCH] Time and seek optimizations

We did too many gettimeofday() calls, this patch cuts the number by
40%. Use clock_gettime() MONOTONIC instead, it is faster on my system
at least.

This patch also optimizes calling lseek() only when necessary for the
sync io engine.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/Makefile b/Makefile
index 07ec6a9..4a4434d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 CFLAGS	= -W -Wwrite-strings -Wall -O2 -g -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 PROGS	= fio
 SCRIPTS = fio_generate_plots
-OBJS = fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
+OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
 	filesetup.o eta.o verify.o memory.o io_u.o parse.o
 
 OBJS += engines/fio-engine-cpu.o
diff --git a/Makefile.FreeBSD b/Makefile.FreeBSD
index 06cdc80..8947117 100644
--- a/Makefile.FreeBSD
+++ b/Makefile.FreeBSD
@@ -2,8 +2,7 @@
 CFLAGS	= -Wall -O2 -g -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 PROGS	= fio
 SCRIPTS = fio_generate_plots
-
-OBJS = fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
+OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
 	filesetup.o eta.o verify.o memory.o io_u.o parse.o
 
 OBJS += engines/fio-engine-cpu.o
diff --git a/Makefile.solaris b/Makefile.solaris
index 442a103..b487a05 100644
--- a/Makefile.solaris
+++ b/Makefile.solaris
@@ -2,8 +2,7 @@
 CFLAGS	= -Wall -O2 -g -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 PROGS	= fio
 SCRIPTS = fio_generate_plots
-
-OBJS = fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
+OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
 	filesetup.o eta.o verify.o memory.o io_u.o parse.o
 
 OBJS += engines/fio-engine-cpu.o
diff --git a/engines/fio-engine-sync.c b/engines/fio-engine-sync.c
index 5919830..c7ddd4c 100644
--- a/engines/fio-engine-sync.c
+++ b/engines/fio-engine-sync.c
@@ -45,6 +45,8 @@
 
 	if (io_u->ddir == DDIR_SYNC)
 		return 0;
+	if (io_u->offset == f->last_completed_pos)
+		return 0;
 
 	if (lseek(f->fd, io_u->offset, SEEK_SET) == -1) {
 		td_verror(td, errno);
diff --git a/eta.c b/eta.c
index 339f656..99e59ec 100644
--- a/eta.c
+++ b/eta.c
@@ -238,7 +238,7 @@
 	if (mtime > 1000) {
 		r_rate = (io_bytes[0] - prev_io_bytes[0]) / mtime;
 		w_rate = (io_bytes[1] - prev_io_bytes[1]) / mtime;
-		gettimeofday(&prev_time, NULL);
+		fio_gettime(&prev_time, NULL);
 		memcpy(prev_io_bytes, io_bytes, sizeof(io_bytes));
 	}
 
diff --git a/filesetup.c b/filesetup.c
index 50434e6..21854a9 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -434,7 +434,8 @@
 	int i;
 
 	for_each_file(td, f, i) {
-		if (td->unlink && td->filetype == FIO_TYPE_FILE) {
+		if (!td->filename && td->unlink &&
+		    td->filetype == FIO_TYPE_FILE) {
 			unlink(f->file_name);
 			free(f->file_name);
 			f->file_name = NULL;
diff --git a/fio.c b/fio.c
index 42ac0c9..a175be7 100644
--- a/fio.c
+++ b/fio.c
@@ -244,7 +244,6 @@
  */
 void do_verify(struct thread_data *td)
 {
-	struct timeval t;
 	struct io_u *io_u, *v_io_u = NULL;
 	struct io_completion_data icd;
 	struct fio_file *f;
@@ -265,14 +264,15 @@
 		if (td->terminate)
 			break;
 
-		gettimeofday(&t, NULL);
-		if (runtime_exceeded(td, &t))
-			break;
-
 		io_u = __get_io_u(td);
 		if (!io_u)
 			break;
 
+		if (runtime_exceeded(td, &io_u->start_time)) {
+			put_io_u(td, io_u);
+			break;
+		}
+
 		if (get_next_verify(td, io_u)) {
 			put_io_u(td, io_u);
 			break;
@@ -313,6 +313,7 @@
 		v_io_u = td->io_ops->event(td, 0);
 		icd.nr = 1;
 		icd.error = 0;
+		fio_gettime(&icd.time, NULL);
 		io_completed(td, v_io_u, &icd);
 
 		if (icd.error) {
@@ -349,7 +350,7 @@
 	int i = 0;
 
 	while (!td->terminate) {
-		gettimeofday(&e, NULL);
+		fio_gettime(&e, NULL);
 
 		if (runtime_exceeded(td, &e))
 			break;
@@ -370,7 +371,7 @@
 static void do_io(struct thread_data *td)
 {
 	struct io_completion_data icd;
-	struct timeval s, e;
+	struct timeval s;
 	unsigned long usec;
 	struct fio_file *f;
 	int i, ret = 0;
@@ -432,19 +433,18 @@
 		 * of completions except the very first one which may look
 		 * a little bursty
 		 */
-		gettimeofday(&e, NULL);
-		usec = utime_since(&s, &e);
+		usec = utime_since(&s, &icd.time);
 
 		rate_throttle(td, usec, icd.bytes_done[td->ddir], td->ddir);
 
-		if (check_min_rate(td, &e)) {
+		if (check_min_rate(td, &icd.time)) {
 			if (exitall_on_terminate)
 				terminate_threads(td->groupid);
 			td_verror(td, ENOMEM);
 			break;
 		}
 
-		if (runtime_exceeded(td, &e))
+		if (runtime_exceeded(td, &icd.time))
 			break;
 
 		if (td->thinktime)
@@ -658,14 +658,14 @@
 	if (open_files(td))
 		goto err;
 
-	gettimeofday(&td->epoch, NULL);
+	fio_gettime(&td->epoch, NULL);
 
 	if (td->exec_prerun)
 		system(td->exec_prerun);
 
 	while (td->loops--) {
 		getrusage(RUSAGE_SELF, &td->ru_start);
-		gettimeofday(&td->start, NULL);
+		fio_gettime(&td->start, NULL);
 		memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
 
 		if (td->ratemin)
@@ -692,7 +692,7 @@
 			continue;
 
 		clear_io_state(td);
-		gettimeofday(&td->start, NULL);
+		fio_gettime(&td->start, NULL);
 
 		do_verify(td);
 
@@ -836,8 +836,6 @@
 		}
 	}
 
-	time_init();
-
 	while (todo) {
 		struct thread_data *map[MAX_JOBS];
 		struct timeval this_start;
@@ -897,7 +895,7 @@
 		 * Wait for the started threads to transition to
 		 * TD_INITIALIZED.
 		 */
-		gettimeofday(&this_start, NULL);
+		fio_gettime(&this_start, NULL);
 		left = this_jobs;
 		while (left) {
 			if (mtime_since_now(&this_start) > JOB_START_TIMEOUT)
diff --git a/fio.h b/fio.h
index faa06a4..cb78f25 100644
--- a/fio.h
+++ b/fio.h
@@ -156,6 +156,7 @@
 	unsigned long long real_file_size;
 	unsigned long long file_offset;
 	unsigned long long last_pos;
+	unsigned long long last_completed_pos;
 
 	unsigned long *file_map;
 	unsigned int num_maps;
@@ -410,6 +411,7 @@
 
 	int error;			/* output */
 	unsigned long bytes_done[2];	/* output */
+	struct timeval time;		/* output */
 };
 
 #define DISK_UTIL_MSEC	(250)
@@ -436,7 +438,7 @@
  */
 extern void add_clat_sample(struct thread_data *, int, unsigned long);
 extern void add_slat_sample(struct thread_data *, int, unsigned long);
-extern void add_bw_sample(struct thread_data *, int);
+extern void add_bw_sample(struct thread_data *, int, struct timeval *);
 extern void show_run_stats(void);
 extern void init_disk_util(struct thread_data *);
 extern void update_rusage_stat(struct thread_data *);
@@ -449,7 +451,6 @@
 /*
  * Time functions
  */
-extern void time_init(void);
 extern unsigned long utime_since(struct timeval *, struct timeval *);
 extern unsigned long mtime_since(struct timeval *, struct timeval *);
 extern unsigned long mtime_since_now(struct timeval *);
@@ -459,6 +460,7 @@
 extern void usec_sleep(struct thread_data *, unsigned long);
 extern void rate_throttle(struct thread_data *, unsigned long, unsigned int, int);
 extern void fill_start_time(struct timeval *);
+extern void fio_gettime(struct timeval *, void *);
 
 /*
  * Init functions
diff --git a/gettime.c b/gettime.c
new file mode 100644
index 0000000..48aeb07
--- /dev/null
+++ b/gettime.c
@@ -0,0 +1,135 @@
+/*
+ * Memory helpers
+ */
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "fio.h"
+#include "os.h"
+
+#include "hash.h"
+
+static int clock_gettime_works = 1;
+
+#ifdef FIO_DEBUG_TIME
+
+#define HASH_BITS	8
+#define HASH_SIZE	(1 << HASH_BITS)
+
+static struct list_head hash[HASH_SIZE];
+static int gtod_inited;
+
+struct gtod_log {
+	struct list_head list;
+	void *caller;
+	unsigned long calls;
+};
+
+static struct gtod_log *find_hash(void *caller)
+{
+	unsigned long h = hash_ptr(caller, HASH_BITS);
+	struct list_head *entry;
+
+	list_for_each(entry, &hash[h]) {
+		struct gtod_log *log = list_entry(entry, struct gtod_log, list);
+
+		if (log->caller == caller)
+			return log;
+	}
+
+	return NULL;
+}
+
+static struct gtod_log *find_log(void *caller)
+{
+	struct gtod_log *log = find_hash(caller);
+
+	if (!log) {
+		unsigned long h;
+
+		log = malloc(sizeof(*log));
+		INIT_LIST_HEAD(&log->list);
+		log->caller = caller;
+		log->calls = 0;
+
+		h = hash_ptr(caller, HASH_BITS);
+		list_add_tail(&log->list, &hash[h]);
+	}
+
+	return log;
+}
+
+static void gtod_log_caller(void *caller)
+{
+	if (gtod_inited) {
+		struct gtod_log *log = find_log(caller);
+
+		log->calls++;
+	}
+}
+
+static void fio_exit fio_dump_gtod(void)
+{
+	unsigned long total_calls = 0;
+	int i;
+
+	for (i = 0; i < HASH_SIZE; i++) {
+		struct list_head *entry;
+		struct gtod_log *log;
+
+		list_for_each(entry, &hash[i]) {
+			log = list_entry(entry, struct gtod_log, list);
+
+			printf("function %p, calls %lu\n", log->caller, log->calls);
+			total_calls += log->calls;
+		}
+	}
+
+	printf("Total %lu gettimeofday\n", total_calls);
+}
+
+static void fio_init gtod_init(void)
+{
+	int i;
+
+	for (i = 0; i < HASH_SIZE; i++)
+		INIT_LIST_HEAD(&hash[i]);
+
+	gtod_inited = 1;
+}
+
+#endif /* FIO_DEBUG_TIME */
+
+void fio_gettime(struct timeval *tp, void *caller)
+{
+#ifdef FIO_DEBUG_TIME
+	if (!caller)
+		caller = __builtin_return_address(0);
+
+	gtod_log_caller(caller);
+#else
+	caller = NULL;
+#endif
+repeat:
+	if (!clock_gettime_works)
+		gettimeofday(tp, NULL);
+	else {
+		struct timespec ts;
+
+		if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) {
+			clock_gettime_works = 0;
+			goto repeat;
+		}
+
+		tp->tv_sec = ts.tv_sec;
+		tp->tv_usec = ts.tv_nsec / 1000;
+	}
+}
+
+static void fio_init check_clock(void)
+{
+	struct timespec ts;
+
+	if (clock_getres(CLOCK_MONOTONIC, &ts) < 0)
+		clock_gettime_works = 0;
+}
diff --git a/io_u.c b/io_u.c
index 318614c..8ec9dd9 100644
--- a/io_u.c
+++ b/io_u.c
@@ -153,7 +153,7 @@
 		struct timeval now;
 		unsigned long elapsed;
 
-		gettimeofday(&now, NULL);
+		fio_gettime(&now, NULL);
 	 	elapsed = mtime_since_now(&td->rwmix_switch);
 
 		/*
@@ -296,14 +296,13 @@
 		return NULL;
 	}
 
-	gettimeofday(&io_u->start_time, NULL);
+	fio_gettime(&io_u->start_time, NULL);
 	return io_u;
 }
 
 void io_completed(struct thread_data *td, struct io_u *io_u,
 		  struct io_completion_data *icd)
 {
-	struct timeval e;
 	unsigned long msec;
 
 	if (io_u->ddir == DDIR_SYNC) {
@@ -313,8 +312,6 @@
 
 	td->last_was_sync = 0;
 
-	gettimeofday(&e, NULL);
-
 	if (!io_u->error) {
 		unsigned int bytes = io_u->buflen - io_u->resid;
 		const int idx = io_u->ddir;
@@ -324,10 +321,12 @@
 		td->zone_bytes += bytes;
 		td->this_io_bytes[idx] += bytes;
 
-		msec = mtime_since(&io_u->issue_time, &e);
+		io_u->file->last_completed_pos = io_u->offset + io_u->buflen;
+
+		msec = mtime_since(&io_u->issue_time, &icd->time);
 
 		add_clat_sample(td, idx, msec);
-		add_bw_sample(td, idx);
+		add_bw_sample(td, idx, &icd->time);
 
 		if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE)
 			log_io_piece(td, io_u);
@@ -342,6 +341,8 @@
 	struct io_u *io_u;
 	int i;
 
+	fio_gettime(&icd->time, NULL);
+
 	icd->error = 0;
 	icd->bytes_done[0] = icd->bytes_done[1] = 0;
 
diff --git a/ioengines.c b/ioengines.c
index 56a718c..1666612 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -176,7 +176,7 @@
 
 int td_io_queue(struct thread_data *td, struct io_u *io_u)
 {
-	gettimeofday(&io_u->issue_time, NULL);
+	fio_gettime(&io_u->issue_time, NULL);
 
 	return td->io_ops->queue(td, io_u);
 }
diff --git a/stat.c b/stat.c
index 0ccf37e..e6da421 100644
--- a/stat.c
+++ b/stat.c
@@ -60,7 +60,7 @@
 	dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
 	dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
 
-	gettimeofday(&t, NULL);
+	fio_gettime(&t, NULL);
 	du->msec += mtime_since(&du->time, &t);
 	memcpy(&du->time, &t, sizeof(t));
 	memcpy(ldus, &__dus, sizeof(__dus));
@@ -102,7 +102,7 @@
 	du->name = strdup(basename(path));
 	du->dev = dev;
 
-	gettimeofday(&du->time, NULL);
+	fio_gettime(&du->time, NULL);
 	get_io_ticks(du, &du->last_dus);
 
 	list_add_tail(&du->list, &disk_list);
@@ -574,9 +574,9 @@
 		add_log_sample(td, td->slat_log, msec, ddir);
 }
 
-void add_bw_sample(struct thread_data *td, int ddir)
+void add_bw_sample(struct thread_data *td, int ddir, struct timeval *t)
 {
-	unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]);
+	unsigned long spent = mtime_since(&td->stat_sample_time[ddir], t);
 	unsigned long rate;
 
 	if (spent < td->bw_avg_time)
@@ -588,7 +588,7 @@
 	if (td->bw_log)
 		add_log_sample(td, td->bw_log, rate, ddir);
 
-	gettimeofday(&td->stat_sample_time[ddir], NULL);
+	fio_gettime(&td->stat_sample_time[ddir], NULL);
 	td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
 }
 
diff --git a/time.c b/time.c
index d08c791..cb44c20 100644
--- a/time.c
+++ b/time.c
@@ -25,7 +25,7 @@
 {
 	struct timeval t;
 
-	gettimeofday(&t, NULL);
+	fio_gettime(&t, NULL);
 	return utime_since(s, &t);
 }
 
@@ -49,8 +49,9 @@
 unsigned long mtime_since_now(struct timeval *s)
 {
 	struct timeval t;
+	void *p = __builtin_return_address(0);
 
-	gettimeofday(&t, NULL);
+	fio_gettime(&t, p);
 	return mtime_since(s, &t);
 }
 
@@ -66,7 +67,7 @@
 {
 	struct timeval start;
 
-	gettimeofday(&start, NULL);
+	fio_gettime(&start, NULL);
 	while (utime_since_now(&start) < usec)
 		nop;
 }
@@ -128,9 +129,9 @@
 	return mtime_since_now(&genesis);
 }
 
-void time_init(void)
+static void fio_init time_init(void)
 {
-	gettimeofday(&genesis, NULL);
+	fio_gettime(&genesis, NULL);
 }
 
 void fill_start_time(struct timeval *t)