mmap engine: allow large files on 32-bit archs
Map chunks of 2GB at the time in total, remapping as we go
along.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/engines/mmap.c b/engines/mmap.c
index 5b8d800..05a4d51 100644
--- a/engines/mmap.c
+++ b/engines/mmap.c
@@ -13,59 +13,19 @@
#include "../fio.h"
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+/*
+ * Limits us to 2GB of mapped files in total
+ */
+#define MMAP_TOTAL_SZ (2 * 1024 * 1024 * 1024UL)
+
+static unsigned long mmap_map_size;
+static unsigned long mmap_map_mask;
+
+static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
+ size_t length, off_t off)
{
- struct fio_file *f = io_u->file;
- unsigned long long real_off = io_u->offset - f->file_offset;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- memcpy(io_u->xfer_buf, f->mmap + real_off, io_u->xfer_buflen);
- else if (io_u->ddir == DDIR_WRITE)
- memcpy(f->mmap + real_off, io_u->xfer_buf, io_u->xfer_buflen);
- else if (io_u->ddir == DDIR_SYNC) {
- size_t len = (f->io_size + page_size - 1) & ~page_mask;
-
- if (msync(f->mmap, len, MS_SYNC)) {
- io_u->error = errno;
- td_verror(td, io_u->error, "msync");
- }
- }
-
- /*
- * not really direct, but should drop the pages from the cache
- */
- if (td->o.odirect && io_u->ddir != DDIR_SYNC) {
- size_t len = (io_u->xfer_buflen + page_size - 1) & ~page_mask;
- unsigned long long off = real_off & ~page_mask;
-
- if (msync(f->mmap + off, len, MS_SYNC) < 0) {
- io_u->error = errno;
- td_verror(td, io_u->error, "msync");
- }
- if (madvise(f->mmap + off, len, MADV_DONTNEED) < 0) {
- io_u->error = errno;
- td_verror(td, io_u->error, "madvise");
- }
- }
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_mmapio_open(struct thread_data *td, struct fio_file *f)
-{
- int ret, flags;
-
- ret = generic_open_file(td, f);
- if (ret)
- return ret;
-
- /*
- * for size checkup, don't mmap anything.
- */
- if (!f->io_size)
- return 0;
+ int flags = 0;
+ int ret = 0;
if (td_rw(td))
flags = PROT_READ | PROT_WRITE;
@@ -77,11 +37,11 @@
} else
flags = PROT_READ;
- f->mmap = mmap(NULL, f->io_size, flags, MAP_SHARED, f->fd, f->file_offset);
- if (f->mmap == MAP_FAILED) {
+ f->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
+ if (f->mmap_ptr == MAP_FAILED) {
int err = errno;
- f->mmap = NULL;
+ f->mmap_ptr = NULL;
td_verror(td, err, "mmap");
if (err == EINVAL && f->io_size > 2*1024*1024*1024UL)
log_err("fio: mmap size likely too large\n");
@@ -92,49 +52,121 @@
goto err;
if (!td_random(td)) {
- if (madvise(f->mmap, f->io_size, MADV_SEQUENTIAL) < 0) {
+ if (madvise(f->mmap_ptr, length, MADV_SEQUENTIAL) < 0) {
td_verror(td, errno, "madvise");
goto err;
}
} else {
- if (madvise(f->mmap, f->io_size, MADV_RANDOM) < 0) {
+ if (madvise(f->mmap_ptr, length, MADV_RANDOM) < 0) {
td_verror(td, errno, "madvise");
goto err;
}
}
- return 0;
-
err:
- td->io_ops->close_file(td, f);
- return 1;
+ return ret;
}
-static int fio_mmapio_close(struct thread_data fio_unused *td,
- struct fio_file *f)
+static int fio_mmapio_prep(struct thread_data *td, struct io_u *io_u)
{
- int ret = 0, ret2;
+ struct fio_file *f = io_u->file;
+ int ret = 0;
- if (f->mmap) {
- if (munmap(f->mmap, f->io_size) < 0)
- ret = errno;
-
- f->mmap = NULL;
+ if (io_u->buflen > mmap_map_size) {
+ log_err("fio: bs too big for mmap engine\n");
+ ret = EIO;
+ goto err;
}
- ret2 = generic_close_file(td, f);
- if (!ret && ret2)
- ret = ret2;
+ if (io_u->offset >= f->mmap_off &&
+ io_u->offset + io_u->buflen < f->mmap_off + f->mmap_sz)
+ goto done;
+ if (f->mmap_ptr) {
+ if (munmap(f->mmap_ptr, f->mmap_sz) < 0) {
+ ret = errno;
+ goto err;
+ }
+ f->mmap_ptr = NULL;
+ }
+
+ f->mmap_sz = mmap_map_size;
+ if (f->mmap_sz > f->io_size)
+ f->mmap_sz = f->io_size;
+
+ f->mmap_off = io_u->offset & ~mmap_map_mask;
+ if (io_u->offset + io_u->buflen >= f->mmap_off + f->mmap_sz)
+ f->mmap_off -= io_u->buflen;
+
+ ret = fio_mmap_file(td, f, f->mmap_sz, f->mmap_off);
+done:
+ if (!ret)
+ io_u->mmap_data = f->mmap_ptr + io_u->offset - f->mmap_off -
+ f->file_offset;
+err:
return ret;
}
+static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct fio_file *f = io_u->file;
+
+ fio_ro_check(td, io_u);
+
+ if (io_u->ddir == DDIR_READ)
+ memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
+ else if (io_u->ddir == DDIR_WRITE)
+ memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen);
+ else if (io_u->ddir == DDIR_SYNC) {
+ if (msync(f->mmap_ptr, f->mmap_sz, MS_SYNC)) {
+ io_u->error = errno;
+ td_verror(td, io_u->error, "msync");
+ }
+ }
+
+ /*
+ * not really direct, but should drop the pages from the cache
+ */
+ if (td->o.odirect && io_u->ddir != DDIR_SYNC) {
+ if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) {
+ io_u->error = errno;
+ td_verror(td, io_u->error, "msync");
+ }
+ if (madvise(io_u->mmap_data, io_u->xfer_buflen, MADV_DONTNEED) < 0) {
+ io_u->error = errno;
+ td_verror(td, io_u->error, "madvise");
+ }
+ }
+
+ return FIO_Q_COMPLETED;
+}
+
+static int fio_mmapio_init(struct thread_data *td)
+{
+ unsigned long shift, mask;
+
+ mmap_map_size = MMAP_TOTAL_SZ / td->o.nr_files;
+ mask = mmap_map_size;
+ shift = 0;
+ do {
+ mask >>= 1;
+ if (!mask)
+ break;
+ shift++;
+ } while (1);
+
+ mmap_map_mask = 1UL << shift;
+ return 0;
+}
+
static struct ioengine_ops ioengine = {
.name = "mmap",
.version = FIO_IOOPS_VERSION,
+ .init = fio_mmapio_init,
+ .prep = fio_mmapio_prep,
.queue = fio_mmapio_queue,
- .open_file = fio_mmapio_open,
- .close_file = fio_mmapio_close,
+ .open_file = generic_open_file,
+ .close_file = generic_close_file,
.get_file_size = generic_get_file_size,
.flags = FIO_SYNCIO | FIO_NOEXTEND,
};
diff --git a/filesetup.c b/filesetup.c
index 1372177..a3f96a1 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -269,8 +269,8 @@
/*
* FIXME: add blockdev flushing too
*/
- if (f->mmap)
- ret = madvise(f->mmap, len, MADV_DONTNEED);
+ if (f->mmap_ptr)
+ ret = madvise(f->mmap_ptr, f->mmap_sz, MADV_DONTNEED);
else if (f->filetype == FIO_TYPE_FILE) {
ret = fadvise(f->fd, off, len, POSIX_FADV_DONTNEED);
} else if (f->filetype == FIO_TYPE_BD) {
diff --git a/fio.h b/fio.h
index c9fa6a9..2e5431e 100644
--- a/fio.h
+++ b/fio.h
@@ -135,6 +135,7 @@
#ifdef FIO_HAVE_SOLARISAIO
aio_result_t resultp;
#endif
+ void *mmap_data;
};
struct timeval start_time;
struct timeval issue_time;
@@ -313,9 +314,12 @@
* filename and possible memory mapping
*/
char *file_name;
- void *mmap;
unsigned int major, minor;
+ void *mmap_ptr;
+ size_t mmap_sz;
+ off_t mmap_off;
+
/*
* size of the file, offset into file, and io size from that offset
*/