[PATCH] Support residual io counts from io engines

We need this for requeuing support, the network engine makes this
pretty apparent (it's not unusual to see short tranfers there).

Basically we add an xfer_buf and xfer_buflen member to the io_u,
and these are the fields that the io engine MUST use. That allows
fio to increment and reset these appropriately, and simply requeue
the io_u for service of the next part of it.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/engines/libaio.c b/engines/libaio.c
index da43f18..9a644f6 100644
--- a/engines/libaio.c
+++ b/engines/libaio.c
@@ -25,9 +25,9 @@
 	struct fio_file *f = io_u->file;
 
 	if (io_u->ddir == DDIR_READ)
-		io_prep_pread(&io_u->iocb, f->fd, io_u->buf, io_u->buflen, io_u->offset);
+		io_prep_pread(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
 	else if (io_u->ddir == DDIR_WRITE)
-		io_prep_pwrite(&io_u->iocb, f->fd, io_u->buf, io_u->buflen, io_u->offset);
+		io_prep_pwrite(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
 	else if (io_u->ddir == DDIR_SYNC)
 		io_prep_fsync(&io_u->iocb, f->fd);
 	else
@@ -87,7 +87,7 @@
 	} while (1);
 
 	if (ret <= 0) {
-		io_u->resid = io_u->buflen;
+		io_u->resid = io_u->xfer_buflen;
 		io_u->error = -ret;
 		return 1;
 	}
diff --git a/engines/mmap.c b/engines/mmap.c
index 20dcfd2..dc1fd33 100644
--- a/engines/mmap.c
+++ b/engines/mmap.c
@@ -48,9 +48,9 @@
 	struct mmapio_data *sd = td->io_ops->data;
 
 	if (io_u->ddir == DDIR_READ)
-		memcpy(io_u->buf, f->mmap + real_off, io_u->buflen);
+		memcpy(io_u->xfer_buf, f->mmap + real_off, io_u->xfer_buflen);
 	else if (io_u->ddir == DDIR_WRITE)
-		memcpy(f->mmap + real_off, io_u->buf, io_u->buflen);
+		memcpy(f->mmap + real_off, io_u->xfer_buf, io_u->xfer_buflen);
 	else if (io_u->ddir == DDIR_SYNC) {
 		if (msync(f->mmap, f->file_size, MS_SYNC))
 			io_u->error = errno;
@@ -60,9 +60,9 @@
 	 * not really direct, but should drop the pages from the cache
 	 */
 	if (td->odirect && io_u->ddir != DDIR_SYNC) {
-		if (msync(f->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+		if (msync(f->mmap + real_off, io_u->xfer_buflen, MS_SYNC) < 0)
 			io_u->error = errno;
-		if (madvise(f->mmap + real_off, io_u->buflen,  MADV_DONTNEED) < 0)
+		if (madvise(f->mmap + real_off, io_u->xfer_buflen,  MADV_DONTNEED) < 0)
 			io_u->error = errno;
 	}
 
diff --git a/engines/net.c b/engines/net.c
index b1a4cdc..43026e5 100644
--- a/engines/net.c
+++ b/engines/net.c
@@ -77,17 +77,18 @@
 {
 	struct net_data *nd = td->io_ops->data;
 	struct fio_file *f = io_u->file;
-	unsigned int ret = 0;
+	int ret = 0;
 
 	if (io_u->ddir == DDIR_WRITE)
-		ret = write(f->fd, io_u->buf, io_u->buflen);
+		ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
 	else if (io_u->ddir == DDIR_READ)
-		ret = read(f->fd, io_u->buf, io_u->buflen);
+		ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
 
-	if (ret != io_u->buflen) {
+	if (ret != (int) io_u->xfer_buflen) {
 		if (ret > 0) {
-			io_u->resid = io_u->buflen - ret;
-			io_u->error = EIO;
+			io_u->resid = io_u->xfer_buflen - ret;
+			io_u->error = 0;
+			return ret;
 		} else
 			io_u->error = errno;
 	}
diff --git a/engines/posixaio.c b/engines/posixaio.c
index ef4d78e..71601fd 100644
--- a/engines/posixaio.c
+++ b/engines/posixaio.c
@@ -67,8 +67,8 @@
 	struct fio_file *f = io_u->file;
 
 	aiocb->aio_fildes = f->fd;
-	aiocb->aio_buf = io_u->buf;
-	aiocb->aio_nbytes = io_u->buflen;
+	aiocb->aio_buf = io_u->xfer_buf;
+	aiocb->aio_nbytes = io_u->xfer_buflen;
 	aiocb->aio_offset = io_u->offset;
 
 	io_u->seen = 0;
diff --git a/engines/sg.c b/engines/sg.c
index 3ea1e28..862cd60 100644
--- a/engines/sg.c
+++ b/engines/sg.c
@@ -40,8 +40,8 @@
 	hdr->usr_ptr = io_u;
 
 	if (fs) {
-		hdr->dxferp = io_u->buf;
-		hdr->dxfer_len = io_u->buflen;
+		hdr->dxferp = io_u->xfer_buf;
+		hdr->dxfer_len = io_u->xfer_buflen;
 	}
 }
 
@@ -160,7 +160,7 @@
 	struct sgio_data *sd = td->io_ops->data;
 	int nr_blocks, lba;
 
-	if (io_u->buflen & (sd->bs - 1)) {
+	if (io_u->xfer_buflen & (sd->bs - 1)) {
 		log_err("read/write not sector aligned\n");
 		return EINVAL;
 	}
@@ -183,7 +183,7 @@
 	}
 
 	if (hdr->dxfer_direction != SG_DXFER_NONE) {
-		nr_blocks = io_u->buflen / sd->bs;
+		nr_blocks = io_u->xfer_buflen / sd->bs;
 		lba = io_u->offset / sd->bs;
 		hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
 		hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
diff --git a/engines/splice.c b/engines/splice.c
index fa4a6ee..026a82b 100644
--- a/engines/splice.c
+++ b/engines/splice.c
@@ -57,8 +57,8 @@
 	void *p;
 
 	offset = io_u->offset;
-	buflen = io_u->buflen;
-	p = io_u->buf;
+	buflen = io_u->xfer_buflen;
+	p = io_u->xfer_buf;
 	while (buflen) {
 		int this_len = buflen;
 
@@ -85,7 +85,7 @@
 		}
 	}
 
-	return io_u->buflen;
+	return io_u->xfer_buflen;
 }
 
 /*
@@ -97,8 +97,8 @@
 	struct spliceio_data *sd = td->io_ops->data;
 	struct iovec iov[1] = {
 		{
-			.iov_base = io_u->buf,
-			.iov_len = io_u->buflen,
+			.iov_base = io_u->xfer_buf,
+			.iov_len = io_u->xfer_buflen,
 		}
 	};
 	struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
@@ -126,13 +126,13 @@
 		}
 	}
 
-	return io_u->buflen;
+	return io_u->xfer_buflen;
 }
 
 static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
 {
 	struct spliceio_data *sd = td->io_ops->data;
-	unsigned int ret;
+	int ret;
 
 	if (io_u->ddir == DDIR_READ)
 		ret = fio_splice_read(td, io_u);
@@ -141,10 +141,11 @@
 	else
 		ret = fsync(io_u->file->fd);
 
-	if (ret != io_u->buflen) {
+	if (ret != (int) io_u->xfer_buflen) {
 		if (ret > 0) {
-			io_u->resid = io_u->buflen - ret;
-			io_u->error = ENODATA;
+			io_u->resid = io_u->xfer_buflen - ret;
+			io_u->error = 0;
+			return ret;
 		} else
 			io_u->error = errno;
 	}
diff --git a/engines/sync.c b/engines/sync.c
index c7ddd4c..94dd710 100644
--- a/engines/sync.c
+++ b/engines/sync.c
@@ -60,19 +60,20 @@
 {
 	struct syncio_data *sd = td->io_ops->data;
 	struct fio_file *f = io_u->file;
-	unsigned int ret;
+	int ret;
 
 	if (io_u->ddir == DDIR_READ)
-		ret = read(f->fd, io_u->buf, io_u->buflen);
+		ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
 	else if (io_u->ddir == DDIR_WRITE)
-		ret = write(f->fd, io_u->buf, io_u->buflen);
+		ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
 	else
 		ret = fsync(f->fd);
 
-	if (ret != io_u->buflen) {
+	if (ret != (int) io_u->xfer_buflen) {
 		if (ret > 0) {
-			io_u->resid = io_u->buflen - ret;
-			io_u->error = EIO;
+			io_u->resid = io_u->xfer_buflen - ret;
+			io_u->error = 0;
+			return ret;
 		} else
 			io_u->error = errno;
 	}
diff --git a/fio.c b/fio.c
index bce13a7..127314e 100644
--- a/fio.c
+++ b/fio.c
@@ -406,11 +406,22 @@
 
 		memcpy(&s, &io_u->start_time, sizeof(s));
 
+requeue:
 		ret = td_io_queue(td, io_u);
 		if (ret) {
-			td_verror(td, io_u->error);
-			put_io_u(td, io_u);
-			break;
+			if (ret > 0 && (io_u->xfer_buflen != io_u->resid) &&
+			    io_u->resid) {
+				/*
+				 * short read/write. requeue.
+				 */
+				io_u->xfer_buflen = io_u->resid;
+				io_u->xfer_buf += ret;
+				goto requeue;
+			} else {
+				td_verror(td, io_u->error);
+				put_io_u(td, io_u);
+				break;
+			}
 		}
 
 		add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
diff --git a/fio.h b/fio.h
index d635e99..56ed248 100644
--- a/fio.h
+++ b/fio.h
@@ -75,6 +75,9 @@
 	unsigned int buflen;
 	unsigned long long offset;
 
+	void *xfer_buf;
+	unsigned int xfer_buflen;
+
 	unsigned int resid;
 	unsigned int error;
 
diff --git a/io_u.c b/io_u.c
index 5e53b81..da6fe8f 100644
--- a/io_u.c
+++ b/io_u.c
@@ -322,6 +322,8 @@
 		return NULL;
 	}
 
+	io_u->xfer_buf = io_u->buf;
+	io_u->xfer_buflen = io_u->buflen;
 	fio_gettime(&io_u->start_time, NULL);
 	return io_u;
 }