Initial suppor for sync_file_range()

This revs the ioengine to 11, as we now have another data direction.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/engines/sync.c b/engines/sync.c
index 12b85f6..a673ae9 100644
--- a/engines/sync.c
+++ b/engines/sync.c
@@ -64,6 +64,29 @@
 	return FIO_Q_COMPLETED;
 }
 
+static int fio_syncio_sync(struct thread_data *td, struct io_u *io_u)
+{
+	int ret;
+
+	if (io_u->ddir == DDIR_SYNC) {
+		ret = fsync(io_u->file->fd);
+	} else if (io_u->ddir == DDIR_DATASYNC) {
+#ifdef FIO_HAVE_FDATASYNC
+		ret = fdatasync(io_u->file->fd);
+#else
+		ret = io_u->xfer_buflen;
+		io_u->error = EINVAL;
+#endif
+	} else if (io_u->ddir == DDIR_SYNC_FILE_RANGE)
+		ret = do_sync_file_range(td, io_u->file);
+	else {
+		ret = io_u->xfer_buflen;
+		io_u->error = EINVAL;
+	}
+
+	return fio_io_end(td, io_u, ret);
+}
+
 static int fio_psyncio_queue(struct thread_data *td, struct io_u *io_u)
 {
 	struct fio_file *f = io_u->file;
@@ -76,7 +99,7 @@
 	else if (io_u->ddir == DDIR_WRITE)
 		ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
 	else
-		ret = fsync(f->fd);
+		return fio_syncio_sync(td, io_u);
 
 	return fio_io_end(td, io_u, ret);
 }
@@ -93,7 +116,7 @@
 	else if (io_u->ddir == DDIR_WRITE)
 		ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
 	else
-		ret = fsync(f->fd);
+		return fio_syncio_sync(td, io_u);
 
 	return fio_io_end(td, io_u, ret);
 }
@@ -163,22 +186,9 @@
 		 */
 		if (sd->queued)
 			return FIO_Q_BUSY;
-		if (io_u->ddir == DDIR_SYNC) {
-			int ret = fsync(io_u->file->fd);
+		if (ddir_sync(io_u->ddir))
+			return fio_syncio_sync(td, io_u);
 
-			return fio_io_end(td, io_u, ret);
-		} else if (io_u->ddir == DDIR_DATASYNC) {
-			int ret;
-#ifdef FIO_HAVE_FDATASYNC
-			ret = fdatasync(io_u->file->fd);
-#else
-			ret = io_u->xfer_buflen;
-			io_u->error = EINVAL;
-#endif
-			return fio_io_end(td, io_u, ret);
-			
-		}
-	
 		sd->queued = 0;
 		sd->queued_bytes = 0;
 		fio_vsyncio_set_iov(sd, io_u, 0);
diff --git a/file.h b/file.h
index 2abe3ba..30293fc 100644
--- a/file.h
+++ b/file.h
@@ -74,6 +74,9 @@
 
 	unsigned long long last_pos;
 
+	unsigned long long first_write;
+	unsigned long long last_write;
+
 	/*
 	 * For use by the io engine
 	 */
diff --git a/fio.h b/fio.h
index f00f64a..05911c0 100644
--- a/fio.h
+++ b/fio.h
@@ -282,6 +282,8 @@
 
 	unsigned int uid;
 	unsigned int gid;
+
+	unsigned int sync_file_range;
 };
 
 #define FIO_VERROR_SIZE	128
@@ -417,6 +419,8 @@
 	unsigned int file_service_left;
 	struct fio_file *file_service_file;
 
+	unsigned int sync_file_range_nr;
+
 	/*
 	 * For generating file sizes
 	 */
diff --git a/io_ddir.h b/io_ddir.h
index 03eefdb..87cded4 100644
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -6,6 +6,7 @@
 	DDIR_WRITE,
 	DDIR_SYNC,
 	DDIR_DATASYNC,
+	DDIR_SYNC_FILE_RANGE,
 	DDIR_WAIT,
 	DDIR_INVAL = -1,
 };
@@ -28,7 +29,8 @@
 
 static inline int ddir_sync(enum fio_ddir ddir)
 {
-	return ddir == DDIR_SYNC || ddir == DDIR_DATASYNC;
+	return ddir == DDIR_SYNC || ddir == DDIR_DATASYNC ||
+	       ddir == DDIR_SYNC_FILE_RANGE;
 }
 
 #endif
diff --git a/io_u.c b/io_u.c
index 9b9570e..a4bf0c0 100644
--- a/io_u.c
+++ b/io_u.c
@@ -394,6 +394,14 @@
 	     td->io_issues[DDIR_WRITE] && should_fsync(td))
 		return DDIR_DATASYNC;
 
+	/*
+	 * see if it's time to sync_file_range
+	 */
+	if (td->sync_file_range_nr &&
+	   !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr) &&
+	     td->io_issues[DDIR_WRITE] && should_fsync(td))
+		return DDIR_SYNC_FILE_RANGE;
+
 	if (td_rw(td)) {
 		/*
 		 * Check if it's time to seed a new data direction.
@@ -996,6 +1004,7 @@
 	 * initialized, silence that warning.
 	 */
 	unsigned long uninitialized_var(usec);
+	struct fio_file *f;
 
 	dprint_io_u(io_u, "io complete");
 
@@ -1006,6 +1015,11 @@
 
 	if (ddir_sync(io_u->ddir)) {
 		td->last_was_sync = 1;
+		f = io_u->file;
+		if (f) {
+			f->first_write = -1ULL;
+			f->last_write = -1ULL;
+		}
 		return;
 	}
 
@@ -1021,6 +1035,18 @@
 		td->io_bytes[idx] += bytes;
 		td->this_io_bytes[idx] += bytes;
 
+		if (idx == DDIR_WRITE) {
+			f = io_u->file;
+			if (f) {
+				if (f->first_write == -1ULL ||
+				    io_u->offset < f->first_write)
+					f->first_write = io_u->offset;
+				if (f->last_write == -1ULL ||
+				    ((io_u->offset + bytes) > f->last_write))
+					f->last_write = io_u->offset + bytes;
+			}
+		}
+
 		if (ramp_time_over(td)) {
 			unsigned long uninitialized_var(lusec);
 
diff --git a/ioengine.h b/ioengine.h
index eb6655d..5acfbd2 100644
--- a/ioengine.h
+++ b/ioengine.h
@@ -1,7 +1,7 @@
 #ifndef FIO_IOENGINE_H
 #define FIO_IOENGINE_H
 
-#define FIO_IOOPS_VERSION	10
+#define FIO_IOOPS_VERSION	11
 
 enum {
 	IO_U_F_FREE		= 1 << 0,
@@ -153,6 +153,8 @@
 void io_u_mark_complete(struct thread_data *, unsigned int);
 void io_u_mark_submit(struct thread_data *, unsigned int);
 
+int do_sync_file_range(struct thread_data *, struct fio_file *);
+
 #ifdef FIO_INC_DEBUG
 static inline void dprint_io_u(struct io_u *io_u, const char *p)
 {
diff --git a/ioengines.c b/ioengines.c
index 7f0a5c4..0e89534 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -431,3 +431,16 @@
 
 	return td->io_ops->get_file_size(td, f);
 }
+
+int do_sync_file_range(struct thread_data *td, struct fio_file *f)
+{
+	off64_t offset, nbytes;
+
+	offset = f->first_write;
+	nbytes = f->last_write - f->first_write;
+
+	if (nbytes)
+		return sync_file_range(f->fd, offset, nbytes, 0);
+
+	return 0;
+}
diff --git a/log.c b/log.c
index ba52f07..99f20b5 100644
--- a/log.c
+++ b/log.c
@@ -20,9 +20,10 @@
 
 void log_io_u(struct thread_data *td, struct io_u *io_u)
 {
-	const char *act[] = { "read", "write", "sync", "datasync" };
+	const char *act[] = { "read", "write", "sync", "datasync",
+				"sync_file_range" };
 
-	assert(io_u->ddir < 3);
+	assert(io_u->ddir <= 4);
 
 	if (!td->o.write_iolog_file)
 		return;
diff --git a/options.c b/options.c
index e2daf37..bee15cf 100644
--- a/options.c
+++ b/options.c
@@ -429,6 +429,20 @@
 	return 0;
 }
 
+static int str_sfr_cb(void *data, const char *str)
+{
+	struct thread_data *td = data;
+	char *nr = get_opt_postfix(str);
+
+	td->sync_file_range_nr = 1;
+	if (nr) {
+		td->sync_file_range_nr = atoi(nr);
+		free(nr);
+	}
+
+	return 0;
+}
+
 static int check_dir(struct thread_data *td, char *fname)
 {
 	char file[PATH_MAX], *dir;
@@ -1110,6 +1124,30 @@
 		.help	= "Issue fdatasync for writes every given number of blocks",
 		.def	= "0",
 	},
+#ifdef FIO_HAVE_SYNC_FILE_RANGE
+	{
+		.name	= "sync_file_range",
+		.posval	= {
+			  { .ival = "wait_before",
+			    .oval = SYNC_FILE_RANGE_WAIT_BEFORE,
+			    .help = "SYNC_FILE_RANGE_WAIT_BEFORE",
+			  },
+			  { .ival = "write",
+			    .oval = SYNC_FILE_RANGE_WRITE,
+			    .help = "SYNC_FILE_RANGE_WRITE",
+			  },
+			  {
+			    .ival = "wait_after",
+			    .oval = SYNC_FILE_RANGE_WAIT_AFTER,
+			    .help = "SYNC_FILE_RANGE_WAIT_AFTER",
+			  },
+		},
+		.type	= FIO_OPT_STR,
+		.cb	= str_sfr_cb,
+		.off1	= td_var_offset(sync_file_range),
+		.help	= "Use sync_file_range()",
+	},
+#endif
 	{
 		.name	= "direct",
 		.type	= FIO_OPT_BOOL,
diff --git a/os/os-linux.h b/os/os-linux.h
index 01140e0..8c61cc0 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -34,6 +34,7 @@
 #define FIO_HAVE_CL_SIZE
 #define FIO_HAVE_CGROUPS
 #define FIO_HAVE_FDATASYNC
+#define FIO_HAVE_SYNC_FILE_RANGE
 
 #define OS_MAP_ANON		MAP_ANONYMOUS