mmap engine: allow large files on 32-bit archs

Map chunks of 2GB at the time in total, remapping as we go
along.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/engines/mmap.c b/engines/mmap.c
index 5b8d800..05a4d51 100644
--- a/engines/mmap.c
+++ b/engines/mmap.c
@@ -13,59 +13,19 @@
 
 #include "../fio.h"
 
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+/*
+ * Limits us to 2GB of mapped files in total
+ */
+#define MMAP_TOTAL_SZ	(2 * 1024 * 1024 * 1024UL)
+
+static unsigned long mmap_map_size;
+static unsigned long mmap_map_mask;
+
+static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
+			 size_t length, off_t off)
 {
-	struct fio_file *f = io_u->file;
-	unsigned long long real_off = io_u->offset - f->file_offset;
-
-	fio_ro_check(td, io_u);
-
-	if (io_u->ddir == DDIR_READ)
-		memcpy(io_u->xfer_buf, f->mmap + real_off, io_u->xfer_buflen);
-	else if (io_u->ddir == DDIR_WRITE)
-		memcpy(f->mmap + real_off, io_u->xfer_buf, io_u->xfer_buflen);
-	else if (io_u->ddir == DDIR_SYNC) {
-		size_t len = (f->io_size + page_size - 1) & ~page_mask;
-
-		if (msync(f->mmap, len, MS_SYNC)) {
-			io_u->error = errno;
-			td_verror(td, io_u->error, "msync");
-		}
-	}
-
-	/*
-	 * not really direct, but should drop the pages from the cache
-	 */
-	if (td->o.odirect && io_u->ddir != DDIR_SYNC) {
-		size_t len = (io_u->xfer_buflen + page_size - 1) & ~page_mask;
-		unsigned long long off = real_off & ~page_mask;
-
-		if (msync(f->mmap + off, len, MS_SYNC) < 0) {
-			io_u->error = errno;
-			td_verror(td, io_u->error, "msync");
-		}
-		if (madvise(f->mmap + off, len,  MADV_DONTNEED) < 0) {
-			io_u->error = errno;
-			td_verror(td, io_u->error, "madvise");
-		}
-	}
-
-	return FIO_Q_COMPLETED;
-}
-
-static int fio_mmapio_open(struct thread_data *td, struct fio_file *f)
-{
-	int ret, flags;
-
-	ret = generic_open_file(td, f);
-	if (ret)
-		return ret;
-
-	/*
-	 * for size checkup, don't mmap anything.
-	 */
-	if (!f->io_size)
-		return 0;
+	int flags = 0;
+	int ret = 0;
 
 	if (td_rw(td))
 		flags = PROT_READ | PROT_WRITE;
@@ -77,11 +37,11 @@
 	} else
 		flags = PROT_READ;
 
-	f->mmap = mmap(NULL, f->io_size, flags, MAP_SHARED, f->fd, f->file_offset);
-	if (f->mmap == MAP_FAILED) {
+	f->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
+	if (f->mmap_ptr == MAP_FAILED) {
 		int err = errno;
 
-		f->mmap = NULL;
+		f->mmap_ptr = NULL;
 		td_verror(td, err, "mmap");
 		if (err == EINVAL && f->io_size > 2*1024*1024*1024UL)
 			log_err("fio: mmap size likely too large\n");
@@ -92,49 +52,121 @@
 		goto err;
 
 	if (!td_random(td)) {
-		if (madvise(f->mmap, f->io_size, MADV_SEQUENTIAL) < 0) {
+		if (madvise(f->mmap_ptr, length, MADV_SEQUENTIAL) < 0) {
 			td_verror(td, errno, "madvise");
 			goto err;
 		}
 	} else {
-		if (madvise(f->mmap, f->io_size, MADV_RANDOM) < 0) {
+		if (madvise(f->mmap_ptr, length, MADV_RANDOM) < 0) {
 			td_verror(td, errno, "madvise");
 			goto err;
 		}
 	}
 
-	return 0;
-
 err:
-	td->io_ops->close_file(td, f);
-	return 1;
+	return ret;
 }
 
-static int fio_mmapio_close(struct thread_data fio_unused *td,
-			    struct fio_file *f)
+static int fio_mmapio_prep(struct thread_data *td, struct io_u *io_u)
 {
-	int ret = 0, ret2;
+	struct fio_file *f = io_u->file;
+	int ret = 0;
 
-	if (f->mmap) {
-		if (munmap(f->mmap, f->io_size) < 0)
-			ret = errno;
-
-		f->mmap = NULL;
+	if (io_u->buflen > mmap_map_size) {
+		log_err("fio: bs too big for mmap engine\n");
+		ret = EIO;
+		goto err;
 	}
 
-	ret2 = generic_close_file(td, f);
-	if (!ret && ret2)
-		ret = ret2;
+	if (io_u->offset >= f->mmap_off &&
+	    io_u->offset + io_u->buflen < f->mmap_off + f->mmap_sz)
+		goto done;
 
+	if (f->mmap_ptr) {
+		if (munmap(f->mmap_ptr, f->mmap_sz) < 0) {
+			ret = errno;
+			goto err;
+		}
+		f->mmap_ptr = NULL;
+	}
+
+	f->mmap_sz = mmap_map_size;
+	if (f->mmap_sz  > f->io_size)
+		f->mmap_sz = f->io_size;
+
+	f->mmap_off = io_u->offset & ~mmap_map_mask;
+	if (io_u->offset + io_u->buflen >= f->mmap_off + f->mmap_sz)
+		f->mmap_off -= io_u->buflen;
+
+	ret = fio_mmap_file(td, f, f->mmap_sz, f->mmap_off);
+done:
+	if (!ret)
+		io_u->mmap_data = f->mmap_ptr + io_u->offset - f->mmap_off -
+					f->file_offset;
+err:
 	return ret;
 }
 
+static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+{
+	struct fio_file *f = io_u->file;
+
+	fio_ro_check(td, io_u);
+
+	if (io_u->ddir == DDIR_READ)
+		memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
+	else if (io_u->ddir == DDIR_WRITE)
+		memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen);
+	else if (io_u->ddir == DDIR_SYNC) {
+		if (msync(f->mmap_ptr, f->mmap_sz, MS_SYNC)) {
+			io_u->error = errno;
+			td_verror(td, io_u->error, "msync");
+		}
+	}
+
+	/*
+	 * not really direct, but should drop the pages from the cache
+	 */
+	if (td->o.odirect && io_u->ddir != DDIR_SYNC) {
+		if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) {
+			io_u->error = errno;
+			td_verror(td, io_u->error, "msync");
+		}
+		if (madvise(io_u->mmap_data, io_u->xfer_buflen,  MADV_DONTNEED) < 0) {
+			io_u->error = errno;
+			td_verror(td, io_u->error, "madvise");
+		}
+	}
+
+	return FIO_Q_COMPLETED;
+}
+
+static int fio_mmapio_init(struct thread_data *td)
+{
+	unsigned long shift, mask;
+
+	mmap_map_size = MMAP_TOTAL_SZ / td->o.nr_files;
+	mask = mmap_map_size;
+	shift = 0;
+	do {
+		mask >>= 1;
+		if (!mask)
+			break;
+		shift++;
+	} while (1);
+		
+	mmap_map_mask = 1UL << shift;
+	return 0;
+}
+
 static struct ioengine_ops ioengine = {
 	.name		= "mmap",
 	.version	= FIO_IOOPS_VERSION,
+	.init		= fio_mmapio_init,
+	.prep		= fio_mmapio_prep,
 	.queue		= fio_mmapio_queue,
-	.open_file	= fio_mmapio_open,
-	.close_file	= fio_mmapio_close,
+	.open_file	= generic_open_file,
+	.close_file	= generic_close_file,
 	.get_file_size	= generic_get_file_size,
 	.flags		= FIO_SYNCIO | FIO_NOEXTEND,
 };