[PATCH] First cut at supporting > 1 file per job

This is likely very buggy, a simple test works though.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/Makefile b/Makefile
index f6001ad..b1a4986 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@
 all: depend $(PROGS) $(SCRIPTS)
 	$(MAKE) -C engines
 
-fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o
+fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o filesetup.o
 	$(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt -ldl
 
 clean:
diff --git a/README b/README
index 359fc2d..d063df9 100644
--- a/README
+++ b/README
@@ -88,6 +88,8 @@
 			usb-storage or sata/libata driven) devices.
 	iodepth=x	For async io, allow 'x' ios in flight
 	overwrite=x	If 'x', layout a write file first.
+	nrfiles=x	Spread io load over 'x' number of files per job,
+			if possible.
 	prio=x		Run io at prio X, 0-7 is the kernel allowed range
 	prioclass=x	Run io at prio class X
 	bs=x		Use 'x' for thread blocksize. May include k/m postfix.
diff --git a/engines/fio-engine-cpu.c b/engines/fio-engine-cpu.c
index 9a32330..538fc86 100644
--- a/engines/fio-engine-cpu.c
+++ b/engines/fio-engine-cpu.c
@@ -15,7 +15,7 @@
 		td->cpuload = 100;
 
 	td->read_iolog = td->write_iolog = 0;
-	td->fd = -1;
+	td->nr_files = 0;
 
 	return 0;
 }
diff --git a/engines/fio-engine-libaio.c b/engines/fio-engine-libaio.c
index 703808b..9197107 100644
--- a/engines/fio-engine-libaio.c
+++ b/engines/fio-engine-libaio.c
@@ -17,17 +17,19 @@
 	struct io_event *aio_events;
 };
 
-static int fio_libaio_sync(struct thread_data *td)
+static int fio_libaio_sync(struct thread_data *td, struct fio_file *f)
 {
-	return fsync(td->fd);
+	return fsync(f->fd);
 }
 
 static int fio_libaio_prep(struct thread_data *td, struct io_u *io_u)
 {
+	struct fio_file *f = io_u->file;
+
 	if (io_u->ddir == DDIR_READ)
-		io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+		io_prep_pread(&io_u->iocb, f->fd, io_u->buf, io_u->buflen, io_u->offset);
 	else
-		io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+		io_prep_pwrite(&io_u->iocb, f->fd, io_u->buf, io_u->buflen, io_u->offset);
 
 	return 0;
 }
diff --git a/engines/fio-engine-mmap.c b/engines/fio-engine-mmap.c
index abb42bf..ad294f5 100644
--- a/engines/fio-engine-mmap.c
+++ b/engines/fio-engine-mmap.c
@@ -42,21 +42,22 @@
 
 static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
 {
-	unsigned long long real_off = io_u->offset - td->file_offset;
+	struct fio_file *f = io_u->file;
+	unsigned long long real_off = io_u->offset - f->file_offset;
 	struct mmapio_data *sd = td->io_ops->data;
 
 	if (io_u->ddir == DDIR_READ)
-		memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
+		memcpy(io_u->buf, f->mmap + real_off, io_u->buflen);
 	else
-		memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+		memcpy(f->mmap + real_off, io_u->buf, io_u->buflen);
 
 	/*
 	 * not really direct, but should drop the pages from the cache
 	 */
 	if (td->odirect) {
-		if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+		if (msync(f->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
 			io_u->error = errno;
-		if (madvise(td->mmap + real_off, io_u->buflen,  MADV_DONTNEED) < 0)
+		if (madvise(f->mmap + real_off, io_u->buflen,  MADV_DONTNEED) < 0)
 			io_u->error = errno;
 	}
 
@@ -66,9 +67,9 @@
 	return io_u->error;
 }
 
-static int fio_mmapio_sync(struct thread_data *td)
+static int fio_mmapio_sync(struct thread_data *td, struct fio_file *f)
 {
-	return msync(td->mmap, td->file_size, MS_SYNC);
+	return msync(f->mmap, f->file_size, MS_SYNC);
 }
 
 static void fio_mmapio_cleanup(struct thread_data *td)
diff --git a/engines/fio-engine-posixaio.c b/engines/fio-engine-posixaio.c
index 871db77..8cd3cf4 100644
--- a/engines/fio-engine-posixaio.c
+++ b/engines/fio-engine-posixaio.c
@@ -45,14 +45,15 @@
 	return sec + nsec;
 }
 
-static int fio_posixaio_sync(struct thread_data *td)
+static int fio_posixaio_sync(struct thread_data *td, struct fio_file *f)
 {
-	return fsync(td->fd);
+	return fsync(f->fd);
 }
 
 static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
 {
-	int r = aio_cancel(td->fd, &io_u->aiocb);
+	struct fio_file *f = io_u->file;
+	int r = aio_cancel(f->fd, &io_u->aiocb);
 
 	if (r == 1 || r == AIO_CANCELED)
 		return 0;
@@ -63,8 +64,9 @@
 static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
 {
 	struct aiocb *aiocb = &io_u->aiocb;
+	struct fio_file *f = io_u->file;
 
-	aiocb->aio_fildes = td->fd;
+	aiocb->aio_fildes = f->fd;
 	aiocb->aio_buf = io_u->buf;
 	aiocb->aio_nbytes = io_u->buflen;
 	aiocb->aio_offset = io_u->offset;
diff --git a/engines/fio-engine-sg.c b/engines/fio-engine-sg.c
index 59eea1d..0db5ca9 100644
--- a/engines/fio-engine-sg.c
+++ b/engines/fio-engine-sg.c
@@ -61,8 +61,9 @@
 static int fio_sgio_getevents(struct thread_data *td, int min, int max,
 			      struct timespec fio_unused *t)
 {
+	struct fio_file *f = &td->files[0];
 	struct sgio_data *sd = td->io_ops->data;
-	struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
+	struct pollfd pfd = { .fd = f->fd, .events = POLLIN };
 	void *buf = malloc(max * sizeof(struct sg_io_hdr));
 	int left = max, ret, events, i, r = 0, fl = 0;
 
@@ -70,8 +71,8 @@
 	 * don't block for !events
 	 */
 	if (!min) {
-		fl = fcntl(td->fd, F_GETFL);
-		fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
+		fl = fcntl(f->fd, F_GETFL);
+		fcntl(f->fd, F_SETFL, fl | O_NONBLOCK);
 	}
 
 	while (left) {
@@ -83,7 +84,7 @@
 				break;
 		} while (1);
 
-		ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
+		ret = read(f->fd, buf, left * sizeof(struct sg_io_hdr));
 		if (ret < 0) {
 			if (errno == EAGAIN)
 				break;
@@ -105,33 +106,35 @@
 	}
 
 	if (!min)
-		fcntl(td->fd, F_SETFL, fl);
+		fcntl(f->fd, F_SETFL, fl);
 
 	free(buf);
 	return r;
 }
 
-static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
+static int fio_sgio_ioctl_doio(struct thread_data *td, struct fio_file *f,
+			       struct io_u *io_u)
 {
 	struct sgio_data *sd = td->io_ops->data;
 	struct sg_io_hdr *hdr = &io_u->hdr;
 
 	sd->events[0] = io_u;
 
-	return ioctl(td->fd, SG_IO, hdr);
+	return ioctl(f->fd, SG_IO, hdr);
 }
 
-static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
+static int fio_sgio_rw_doio(struct thread_data *td, struct fio_file *f,
+			    struct io_u *io_u, int sync)
 {
 	struct sg_io_hdr *hdr = &io_u->hdr;
 	int ret;
 
-	ret = write(td->fd, hdr, sizeof(*hdr));
+	ret = write(f->fd, hdr, sizeof(*hdr));
 	if (ret < 0)
 		return errno;
 
 	if (sync) {
-		ret = read(td->fd, hdr, sizeof(*hdr));
+		ret = read(f->fd, hdr, sizeof(*hdr));
 		if (ret < 0)
 			return errno;
 	}
@@ -141,13 +144,15 @@
 
 static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
 {
-	if (td->filetype == FIO_TYPE_BD)
-		return fio_sgio_ioctl_doio(td, io_u);
+	struct fio_file *f = io_u->file;
 
-	return fio_sgio_rw_doio(td, io_u, sync);
+	if (td->filetype == FIO_TYPE_BD)
+		return fio_sgio_ioctl_doio(td, f, io_u);
+
+	return fio_sgio_rw_doio(td, f, io_u, sync);
 }
 
-static int fio_sgio_sync(struct thread_data *td)
+static int fio_sgio_sync(struct thread_data *td, struct fio_file *f)
 {
 	struct sgio_data *sd = td->io_ops->data;
 	struct sg_io_hdr *hdr;
@@ -266,6 +271,7 @@
 
 static int fio_sgio_init(struct thread_data *td)
 {
+	struct fio_file *f = &td->files[0];
 	struct sgio_data *sd;
 	unsigned int bs;
 	int ret;
@@ -276,14 +282,14 @@
 	td->io_ops->data = sd;
 
 	if (td->filetype == FIO_TYPE_BD) {
-		if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
+		if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
 			td_verror(td, errno);
 			return 1;
 		}
 	} else if (td->filetype == FIO_TYPE_CHAR) {
 		int version;
 
-		if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
+		if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
 			td_verror(td, errno);
 			return 1;
 		}
diff --git a/engines/fio-engine-splice.c b/engines/fio-engine-splice.c
index 30984f1..4f5b86c 100644
--- a/engines/fio-engine-splice.c
+++ b/engines/fio-engine-splice.c
@@ -16,9 +16,9 @@
 	int pipe[2];
 };
 
-static int fio_spliceio_sync(struct thread_data *td)
+static int fio_spliceio_sync(struct thread_data *td, struct fio_file *f)
 {
-	return fsync(td->fd);
+	return fsync(f->fd);
 }
 
 static int fio_spliceio_getevents(struct thread_data *td, int fio_unused min,
@@ -53,6 +53,7 @@
 static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
 {
 	struct spliceio_data *sd = td->io_ops->data;
+	struct fio_file *f = io_u->file;
 	int ret, ret2, buflen;
 	off_t offset;
 	void *p;
@@ -66,7 +67,7 @@
 		if (this_len > SPLICE_DEF_SIZE)
 			this_len = SPLICE_DEF_SIZE;
 
-		ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
+		ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
 		if (ret < 0) {
 			if (errno == ENODATA || errno == EAGAIN)
 				continue;
@@ -103,6 +104,7 @@
 		}
 	};
 	struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
+	struct fio_file *f = io_u->file;
 	off_t off = io_u->offset;
 	int ret, ret2;
 
@@ -118,7 +120,7 @@
 		iov[0].iov_base += ret;
 
 		while (ret) {
-			ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
+			ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0);
 			if (ret2 < 0)
 				return errno;
 
diff --git a/engines/fio-engine-sync.c b/engines/fio-engine-sync.c
index abc29f4..1806d4d 100644
--- a/engines/fio-engine-sync.c
+++ b/engines/fio-engine-sync.c
@@ -14,9 +14,9 @@
 	struct io_u *last_io_u;
 };
 
-static int fio_syncio_sync(struct thread_data *td)
+static int fio_syncio_sync(struct thread_data *td, struct fio_file *f)
 {
-	return fsync(td->fd);
+	return fsync(f->fd);
 }
 
 static int fio_syncio_getevents(struct thread_data *td, int fio_unused min,
@@ -45,7 +45,9 @@
 
 static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
 {
-	if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
+	struct fio_file *f = io_u->file;
+
+	if (lseek(f->fd, io_u->offset, SEEK_SET) == -1) {
 		td_verror(td, errno);
 		return 1;
 	}
@@ -56,12 +58,13 @@
 static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
 {
 	struct syncio_data *sd = td->io_ops->data;
+	struct fio_file *f = io_u->file;
 	int ret;
 
 	if (io_u->ddir == DDIR_READ)
-		ret = read(td->fd, io_u->buf, io_u->buflen);
+		ret = read(f->fd, io_u->buf, io_u->buflen);
 	else
-		ret = write(td->fd, io_u->buf, io_u->buflen);
+		ret = write(f->fd, io_u->buf, io_u->buflen);
 
 	if ((unsigned int) ret != io_u->buflen) {
 		if (ret > 0) {
diff --git a/filesetup.c b/filesetup.c
new file mode 100644
index 0000000..adfe940
--- /dev/null
+++ b/filesetup.c
@@ -0,0 +1,380 @@
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "fio.h"
+#include "os.h"
+
+static int create_file(struct thread_data *td, struct fio_file *f)
+{
+	unsigned long long left;
+	unsigned int bs;
+	char *b;
+	int r;
+
+	f->fd = open(f->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	if (f->fd < 0) {
+		td_verror(td, errno);
+		return 1;
+	}
+
+	if (ftruncate(f->fd, f->file_size) == -1) {
+		td_verror(td, errno);
+		goto err;
+	}
+
+	b = malloc(td->max_bs);
+	memset(b, 0, td->max_bs);
+
+	left = f->file_size;
+	while (left && !td->terminate) {
+		bs = td->max_bs;
+		if (bs > left)
+			bs = left;
+
+		r = write(f->fd, b, bs);
+
+		if (r == (int) bs) {
+			left -= bs;
+			continue;
+		} else {
+			if (r < 0)
+				td_verror(td, errno);
+			else
+				td_verror(td, EIO);
+
+			break;
+		}
+	}
+
+	if (td->terminate)
+		unlink(f->file_name);
+	else if (td->create_fsync)
+		fsync(f->fd);
+
+	free(b);
+	close(f->fd);
+	f->fd = -1;
+	return 0;
+err:
+	close(f->fd);
+	f->fd = -1;
+	return 1;
+}
+
+static int create_files(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i, err;
+
+	/*
+	 * unless specifically asked for overwrite, let normal io extend it
+	 */
+	if (!td->overwrite) {
+		td->io_size = td->total_file_size;
+		for_each_file(td, f, i)
+			f->file_size = td->total_file_size / td->nr_files;
+
+		return 0;
+	}
+
+	if (!td->total_file_size) {
+		log_err("Need size for create\n");
+		td_verror(td, EINVAL);
+		return 1;
+	}
+
+	temp_stall_ts = 1;
+	fprintf(f_out, "%s: Laying out IO file(s) (%LuMiB)\n",
+					td->name, td->total_file_size >> 20);
+
+	err = 0;
+	for_each_file(td, f, i) {
+		f->file_size = td->total_file_size / td->nr_files;
+		err = create_file(td, f);
+			break;
+
+		td->io_size += f->file_size;
+	}
+
+	temp_stall_ts = 0;
+	return err;
+}
+
+static int file_size(struct thread_data *td, struct fio_file *f)
+{
+	struct stat st;
+
+	if (td->overwrite) {
+		if (fstat(f->fd, &st) == -1) {
+			td_verror(td, errno);
+			return 1;
+		}
+
+		f->real_file_size = st.st_size;
+
+		if (!f->file_size || f->file_size > f->real_file_size)
+			f->file_size = f->real_file_size;
+	}
+
+	f->file_size -= f->file_offset;
+	return 0;
+}
+
+static int bdev_size(struct thread_data *td, struct fio_file *f)
+{
+	unsigned long long bytes;
+	int r;
+
+	r = blockdev_size(f->fd, &bytes);
+	if (r) {
+		td_verror(td, r);
+		return 1;
+	}
+
+	f->real_file_size = bytes;
+
+	/*
+	 * no extend possibilities, so limit size to device size if too large
+	 */
+	if (!f->file_size || f->file_size > f->real_file_size)
+		f->file_size = f->real_file_size;
+
+	f->file_size -= f->file_offset;
+	return 0;
+}
+
+static int get_file_size(struct thread_data *td, struct fio_file *f)
+{
+	int ret = 0;
+
+	if (td->filetype == FIO_TYPE_FILE)
+		ret = file_size(td, f);
+	else if (td->filetype == FIO_TYPE_BD)
+		ret = bdev_size(td, f);
+	else
+		f->real_file_size = -1;
+
+	if (ret)
+		return ret;
+
+	if (f->file_offset > f->real_file_size) {
+		log_err("%s: offset extends end (%Lu > %Lu)\n", td->name, f->file_offset, f->real_file_size);
+		return 1;
+	}
+
+	td->io_size += f->file_size;
+	return 0;
+}
+
+static int __setup_file_mmap(struct thread_data *td, struct fio_file *f)
+{
+	int flags;
+
+	if (td_rw(td))
+		flags = PROT_READ | PROT_WRITE;
+	else if (td_write(td)) {
+		flags = PROT_WRITE;
+
+		if (td->verify != VERIFY_NONE)
+			flags |= PROT_READ;
+	} else
+		flags = PROT_READ;
+
+	f->mmap = mmap(NULL, f->file_size, flags, MAP_SHARED, f->fd, f->file_offset);
+	if (f->mmap == MAP_FAILED) {
+		f->mmap = NULL;
+		td_verror(td, errno);
+		return 1;
+	}
+
+	if (td->invalidate_cache) {
+		if (madvise(f->mmap, f->file_size, MADV_DONTNEED) < 0) {
+			td_verror(td, errno);
+			return 1;
+		}
+	}
+
+	if (td->sequential) {
+		if (madvise(f->mmap, f->file_size, MADV_SEQUENTIAL) < 0) {
+			td_verror(td, errno);
+			return 1;
+		}
+	} else {
+		if (madvise(f->mmap, f->file_size, MADV_RANDOM) < 0) {
+			td_verror(td, errno);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int setup_files_mmap(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i, err = 0;
+
+	for_each_file(td, f, i) {
+		err = __setup_file_mmap(td, f);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int __setup_file_plain(struct thread_data *td, struct fio_file *f)
+{
+	if (td->invalidate_cache) {
+		if (fadvise(f->fd, f->file_offset, f->file_size, POSIX_FADV_DONTNEED) < 0) {
+			td_verror(td, errno);
+			return 1;
+		}
+	}
+
+	if (td->sequential) {
+		if (fadvise(f->fd, f->file_offset, f->file_size, POSIX_FADV_SEQUENTIAL) < 0) {
+			td_verror(td, errno);
+			return 1;
+		}
+	} else {
+		if (fadvise(f->fd, f->file_offset, f->file_size, POSIX_FADV_RANDOM) < 0) {
+			td_verror(td, errno);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int setup_files_plain(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i, err = 0;
+
+	for_each_file(td, f, i) {
+		err = __setup_file_plain(td, f);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int setup_file(struct thread_data *td, struct fio_file *f)
+{
+	struct stat st;
+	int flags = 0;
+
+	if (stat(f->file_name, &st) == -1) {
+		if (errno != ENOENT) {
+			td_verror(td, errno);
+			return 1;
+		}
+		if (!td->create_file) {
+			td_verror(td, ENOENT);
+			return 1;
+		}
+		if (create_file(td, f))
+			return 1;
+	} else if (td->filetype == FIO_TYPE_FILE &&
+		   st.st_size < (off_t) f->file_size) {
+		if (create_file(td, f))
+			return 1;
+	}
+
+	if (td->odirect)
+		flags |= OS_O_DIRECT;
+
+	if (td_write(td) || td_rw(td)) {
+		if (td->filetype == FIO_TYPE_FILE) {
+			if (!td->overwrite)
+				flags |= O_TRUNC;
+
+			flags |= O_CREAT;
+		}
+		if (td->sync_io)
+			flags |= O_SYNC;
+
+		flags |= O_RDWR;
+
+		f->fd = open(f->file_name, flags, 0600);
+	} else {
+		if (td->filetype == FIO_TYPE_CHAR)
+			flags |= O_RDWR;
+		else
+			flags |= O_RDONLY;
+
+		f->fd = open(f->file_name, flags);
+	}
+
+	if (f->fd == -1) {
+		td_verror(td, errno);
+		return 1;
+	}
+
+	if (get_file_size(td, f))
+		return 1;
+
+	return 0;
+}
+
+int setup_files(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i, err;
+
+	/*
+	 * if ioengine defines a setup() method, it's responsible for
+	 * setting up everything in the td->files[] area.
+	 */
+	if (td->io_ops->setup)
+		return td->io_ops->setup(td);
+
+	if (create_files(td))
+		return 1;
+
+	for_each_file(td, f, i) {
+		err = setup_file(td, f);
+		if (err)
+			break;
+	}
+
+	if (td->io_size == 0) {
+		log_err("%s: no io blocks\n", td->name);
+		td_verror(td, EINVAL);
+		return 1;
+	}
+
+	if (!td->zone_size)
+		td->zone_size = td->io_size;
+
+	td->total_io_size = td->io_size * td->loops;
+
+	if (td->io_ops->flags & FIO_MMAPIO)
+		return setup_files_mmap(td);
+	else
+		return setup_files_plain(td);
+}
+
+void close_files(struct thread_data *td)
+{
+	int i;
+
+	for (i = 0; i < td->nr_files; i++) {
+		struct fio_file *f = &td->files[i];
+
+		if (f->fd != -1) {
+			close(f->fd);
+			f->fd = -1;
+		}
+		if (f->mmap) {
+			munmap(f->mmap, f->file_size);
+			f->mmap = NULL;
+		}
+	}
+}
diff --git a/fio.c b/fio.c
index ea57d78..dc228a8 100644
--- a/fio.c
+++ b/fio.c
@@ -44,7 +44,7 @@
 static char run_str[MAX_JOBS + 1];
 int shm_id = 0;
 static struct timeval genesis;
-static int temp_stall_ts;
+int temp_stall_ts;
 char *fio_inst_prefix = _INST_PREFIX;
 
 static void print_thread_status(void);
@@ -109,26 +109,28 @@
  * The ->file_map[] contains a map of blocks we have or have not done io
  * to yet. Used to make sure we cover the entire range in a fair fashion.
  */
-static int random_map_free(struct thread_data *td, unsigned long long block)
+static int random_map_free(struct thread_data *td, struct fio_file *f,
+			   unsigned long long block)
 {
-	unsigned int idx = RAND_MAP_IDX(td, block);
-	unsigned int bit = RAND_MAP_BIT(td, block);
+	unsigned int idx = RAND_MAP_IDX(td, f, block);
+	unsigned int bit = RAND_MAP_BIT(td, f, block);
 
-	return (td->file_map[idx] & (1UL << bit)) == 0;
+	return (f->file_map[idx] & (1UL << bit)) == 0;
 }
 
 /*
  * Return the next free block in the map.
  */
-static int get_next_free_block(struct thread_data *td, unsigned long long *b)
+static int get_next_free_block(struct thread_data *td, struct fio_file *f,
+			       unsigned long long *b)
 {
 	int i;
 
 	*b = 0;
 	i = 0;
-	while ((*b) * td->min_bs < td->io_size) {
-		if (td->file_map[i] != -1UL) {
-			*b += ffz(td->file_map[i]);
+	while ((*b) * td->min_bs < f->file_size) {
+		if (f->file_map[i] != -1UL) {
+			*b += ffz(f->file_map[i]);
 			return 0;
 		}
 
@@ -142,7 +144,8 @@
 /*
  * Mark a given offset as used in the map.
  */
-static void mark_random_map(struct thread_data *td, struct io_u *io_u)
+static void mark_random_map(struct thread_data *td, struct fio_file *f,
+			    struct io_u *io_u)
 {
 	unsigned long long block = io_u->offset / (unsigned long long) td->min_bs;
 	unsigned int blocks = 0;
@@ -150,15 +153,15 @@
 	while (blocks < (io_u->buflen / td->min_bs)) {
 		unsigned int idx, bit;
 
-		if (!random_map_free(td, block))
+		if (!random_map_free(td, f, block))
 			break;
 
-		idx = RAND_MAP_IDX(td, block);
-		bit = RAND_MAP_BIT(td, block);
+		idx = RAND_MAP_IDX(td, f, block);
+		bit = RAND_MAP_BIT(td, f, block);
 
-		assert(idx < td->num_maps);
+		assert(idx < f->num_maps);
 
-		td->file_map[idx] |= (1UL << bit);
+		f->file_map[idx] |= (1UL << bit);
 		block++;
 		blocks++;
 	}
@@ -172,7 +175,8 @@
  * until we find a free one. For sequential io, just return the end of
  * the last io issued.
  */
-static int get_next_offset(struct thread_data *td, unsigned long long *offset)
+static int get_next_offset(struct thread_data *td, struct fio_file *f,
+			   unsigned long long *offset)
 {
 	unsigned long long b, rb;
 	long r;
@@ -184,19 +188,19 @@
 		do {
 			r = os_random_long(&td->random_state);
 			b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0));
-			rb = b + (td->file_offset / td->min_bs);
+			rb = b + (f->file_offset / td->min_bs);
 			loops--;
-		} while (!random_map_free(td, rb) && loops);
+		} while (!random_map_free(td, f, rb) && loops);
 
 		if (!loops) {
-			if (get_next_free_block(td, &b))
+			if (get_next_free_block(td, f, &b))
 				return 1;
 		}
 	} else
-		b = td->last_pos / td->min_bs;
+		b = f->last_pos / td->min_bs;
 
-	*offset = (b * td->min_bs) + td->file_offset;
-	if (*offset > td->real_file_size)
+	*offset = (b * td->min_bs) + f->file_offset;
+	if (*offset > f->file_size)
 		return 1;
 
 	return 0;
@@ -443,12 +447,14 @@
 
 void put_io_u(struct thread_data *td, struct io_u *io_u)
 {
+	io_u->file = NULL;
 	list_del(&io_u->list);
 	list_add(&io_u->list, &td->io_u_freelist);
 	td->cur_depth--;
 }
 
-static int fill_io_u(struct thread_data *td, struct io_u *io_u)
+static int fill_io_u(struct thread_data *td, struct fio_file *f,
+		     struct io_u *io_u)
 {
 	/*
 	 * If using an iolog, grab next piece if any available.
@@ -459,7 +465,7 @@
 	/*
 	 * No log, let the seq/rand engine retrieve the next position.
 	 */
-	if (!get_next_offset(td, &io_u->offset)) {
+	if (!get_next_offset(td, f, &io_u->offset)) {
 		io_u->buflen = get_next_buflen(td);
 
 		if (io_u->buflen) {
@@ -471,6 +477,7 @@
 			if (td->write_iolog)
 				write_iolog_put(td, io_u);
 
+			io_u->file = f;
 			return 0;
 		}
 	}
@@ -501,7 +508,7 @@
  * Return an io_u to be processed. Gets a buflen and offset, sets direction,
  * etc. The returned io_u is fully ready to be prepped and submitted.
  */
-static struct io_u *get_io_u(struct thread_data *td)
+static struct io_u *get_io_u(struct thread_data *td, struct fio_file *f)
 {
 	struct io_u *io_u;
 
@@ -511,16 +518,16 @@
 
 	if (td->zone_bytes >= td->zone_size) {
 		td->zone_bytes = 0;
-		td->last_pos += td->zone_skip;
+		f->last_pos += td->zone_skip;
 	}
 
-	if (fill_io_u(td, io_u)) {
+	if (fill_io_u(td, f, io_u)) {
 		put_io_u(td, io_u);
 		return NULL;
 	}
 
-	if (io_u->buflen + io_u->offset > td->real_file_size)
-		io_u->buflen = td->real_file_size - io_u->offset;
+	if (io_u->buflen + io_u->offset > f->file_size)
+		io_u->buflen = f->file_size - io_u->offset;
 
 	if (!io_u->buflen) {
 		put_io_u(td, io_u);
@@ -528,9 +535,9 @@
 	}
 
 	if (!td->read_iolog && !td->sequential)
-		mark_random_map(td, io_u);
+		mark_random_map(td, f, io_u);
 
-	td->last_pos += io_u->buflen;
+	f->last_pos += io_u->buflen;
 
 	if (td->verify != VERIFY_NONE)
 		populate_io_u(td, io_u);
@@ -568,10 +575,21 @@
 	return 1;
 }
 
-static int sync_td(struct thread_data *td)
+static struct fio_file *get_next_file(struct thread_data *td)
+{
+	struct fio_file *f = &td->files[td->next_file];
+
+	td->next_file++;
+	if (td->next_file >= td->nr_files)
+		td->next_file = 0;
+
+	return f;
+}
+
+static int td_io_sync(struct thread_data *td, struct fio_file *f)
 {
 	if (td->io_ops->sync)
-		return td->io_ops->sync(td);
+		return td->io_ops->sync(td, f);
 
 	return 0;
 }
@@ -703,6 +721,7 @@
 	struct timeval t;
 	struct io_u *io_u, *v_io_u = NULL;
 	struct io_completion_data icd;
+	struct fio_file *f;
 	int ret;
 
 	td_set_runstate(td, TD_VERIFYING);
@@ -724,6 +743,12 @@
 			break;
 		}
 
+		f = get_next_file(td);
+		if (!f)
+			break;
+
+		io_u->file = f;
+
 		if (td_io_prep(td, io_u)) {
 			put_io_u(td, io_u);
 			break;
@@ -812,6 +837,8 @@
 	struct io_completion_data icd;
 	struct timeval s, e;
 	unsigned long usec;
+	struct fio_file *f;
+	int i;
 
 	td_set_runstate(td, TD_RUNNING);
 
@@ -824,7 +851,11 @@
 		if (td->terminate)
 			break;
 
-		io_u = get_io_u(td);
+		f = get_next_file(td);
+		if (!f)
+			break;
+
+		io_u = get_io_u(td, f);
 		if (!io_u)
 			break;
 
@@ -884,7 +915,7 @@
 
 		if (should_fsync(td) && td->fsync_blocks &&
 		    (td->io_blocks[DDIR_WRITE] % td->fsync_blocks) == 0)
-			sync_td(td);
+			td_io_sync(td, f);
 	}
 
 	if (td->cur_depth)
@@ -892,7 +923,8 @@
 
 	if (should_fsync(td) && td->end_fsync) {
 		td_set_runstate(td, TD_FSYNCING);
-		sync_td(td);
+		for_each_file(td, f, i)
+			td_io_sync(td, f);
 	}
 }
 
@@ -988,287 +1020,6 @@
 	return 0;
 }
 
-static int create_file(struct thread_data *td, unsigned long long size)
-{
-	unsigned long long left;
-	unsigned int bs;
-	char *b;
-	int r;
-
-	/*
-	 * unless specifically asked for overwrite, let normal io extend it
-	 */
-	if (!td->overwrite) {
-		td->real_file_size = size;
-		return 0;
-	}
-
-	if (!size) {
-		log_err("Need size for create\n");
-		td_verror(td, EINVAL);
-		return 1;
-	}
-
-	temp_stall_ts = 1;
-	fprintf(f_out, "%s: Laying out IO file (%LuMiB)\n",td->name,size >> 20);
-
-	td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
-	if (td->fd < 0) {
-		td_verror(td, errno);
-		goto done_noclose;
-	}
-
-	if (ftruncate(td->fd, td->file_size) == -1) {
-		td_verror(td, errno);
-		goto done;
-	}
-
-	td->io_size = td->file_size;
-	b = malloc(td->max_bs);
-	memset(b, 0, td->max_bs);
-
-	left = size;
-	while (left && !td->terminate) {
-		bs = td->max_bs;
-		if (bs > left)
-			bs = left;
-
-		r = write(td->fd, b, bs);
-
-		if (r == (int) bs) {
-			left -= bs;
-			continue;
-		} else {
-			if (r < 0)
-				td_verror(td, errno);
-			else
-				td_verror(td, EIO);
-
-			break;
-		}
-	}
-
-	if (td->terminate)
-		unlink(td->file_name);
-	else if (td->create_fsync)
-		fsync(td->fd);
-
-	free(b);
-done:
-	close(td->fd);
-	td->fd = -1;
-done_noclose:
-	temp_stall_ts = 0;
-	return 0;
-}
-
-static int file_size(struct thread_data *td)
-{
-	struct stat st;
-
-	if (td->overwrite) {
-		if (fstat(td->fd, &st) == -1) {
-			td_verror(td, errno);
-			return 1;
-		}
-
-		td->real_file_size = st.st_size;
-
-		if (!td->file_size || td->file_size > td->real_file_size)
-			td->file_size = td->real_file_size;
-	}
-
-	td->file_size -= td->file_offset;
-	return 0;
-}
-
-static int bdev_size(struct thread_data *td)
-{
-	unsigned long long bytes;
-	int r;
-
-	r = blockdev_size(td->fd, &bytes);
-	if (r) {
-		td_verror(td, r);
-		return 1;
-	}
-
-	td->real_file_size = bytes;
-
-	/*
-	 * no extend possibilities, so limit size to device size if too large
-	 */
-	if (!td->file_size || td->file_size > td->real_file_size)
-		td->file_size = td->real_file_size;
-
-	td->file_size -= td->file_offset;
-	return 0;
-}
-
-static int get_file_size(struct thread_data *td)
-{
-	int ret = 0;
-
-	if (td->filetype == FIO_TYPE_FILE)
-		ret = file_size(td);
-	else if (td->filetype == FIO_TYPE_BD)
-		ret = bdev_size(td);
-	else
-		td->real_file_size = -1;
-
-	if (ret)
-		return ret;
-
-	if (td->file_offset > td->real_file_size) {
-		log_err("%s: offset extends end (%Lu > %Lu)\n", td->name, td->file_offset, td->real_file_size);
-		return 1;
-	}
-
-	td->io_size = td->file_size;
-	if (td->io_size == 0) {
-		log_err("%s: no io blocks\n", td->name);
-		td_verror(td, EINVAL);
-		return 1;
-	}
-
-	if (!td->zone_size)
-		td->zone_size = td->io_size;
-
-	td->total_io_size = td->io_size * td->loops;
-	return 0;
-}
-
-static int setup_file_mmap(struct thread_data *td)
-{
-	int flags;
-
-	if (td_rw(td))
-		flags = PROT_READ | PROT_WRITE;
-	else if (td_write(td)) {
-		flags = PROT_WRITE;
-
-		if (td->verify != VERIFY_NONE)
-			flags |= PROT_READ;
-	} else
-		flags = PROT_READ;
-
-	td->mmap = mmap(NULL, td->file_size, flags, MAP_SHARED, td->fd, td->file_offset);
-	if (td->mmap == MAP_FAILED) {
-		td->mmap = NULL;
-		td_verror(td, errno);
-		return 1;
-	}
-
-	if (td->invalidate_cache) {
-		if (madvise(td->mmap, td->file_size, MADV_DONTNEED) < 0) {
-			td_verror(td, errno);
-			return 1;
-		}
-	}
-
-	if (td->sequential) {
-		if (madvise(td->mmap, td->file_size, MADV_SEQUENTIAL) < 0) {
-			td_verror(td, errno);
-			return 1;
-		}
-	} else {
-		if (madvise(td->mmap, td->file_size, MADV_RANDOM) < 0) {
-			td_verror(td, errno);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int setup_file_plain(struct thread_data *td)
-{
-	if (td->invalidate_cache) {
-		if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
-			td_verror(td, errno);
-			return 1;
-		}
-	}
-
-	if (td->sequential) {
-		if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_SEQUENTIAL) < 0) {
-			td_verror(td, errno);
-			return 1;
-		}
-	} else {
-		if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_RANDOM) < 0) {
-			td_verror(td, errno);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int setup_file(struct thread_data *td)
-{
-	struct stat st;
-	int flags = 0;
-
-	if (td->io_ops->setup)
-		return td->io_ops->setup(td);
-
-	if (stat(td->file_name, &st) == -1) {
-		if (errno != ENOENT) {
-			td_verror(td, errno);
-			return 1;
-		}
-		if (!td->create_file) {
-			td_verror(td, ENOENT);
-			return 1;
-		}
-		if (create_file(td, td->file_size))
-			return 1;
-	} else if (td->filetype == FIO_TYPE_FILE &&
-		   st.st_size < (off_t) td->file_size) {
-		if (create_file(td, td->file_size))
-			return 1;
-	}
-
-	if (td->odirect)
-		flags |= OS_O_DIRECT;
-
-	if (td_write(td) || td_rw(td)) {
-		if (td->filetype == FIO_TYPE_FILE) {
-			if (!td->overwrite)
-				flags |= O_TRUNC;
-
-			flags |= O_CREAT;
-		}
-		if (td->sync_io)
-			flags |= O_SYNC;
-
-		flags |= O_RDWR;
-
-		td->fd = open(td->file_name, flags, 0600);
-	} else {
-		if (td->filetype == FIO_TYPE_CHAR)
-			flags |= O_RDWR;
-		else
-			flags |= O_RDONLY;
-
-		td->fd = open(td->file_name, flags);
-	}
-
-	if (td->fd == -1) {
-		td_verror(td, errno);
-		return 1;
-	}
-
-	if (get_file_size(td))
-		return 1;
-
-	if (td->io_ops->flags & FIO_MMAPIO)
-		return setup_file_mmap(td);
-	else
-		return setup_file_plain(td);
-}
-
 static int switch_ioscheduler(struct thread_data *td)
 {
 	char tmp[256], tmp2[128];
@@ -1319,16 +1070,21 @@
 
 static void clear_io_state(struct thread_data *td)
 {
-	if (td->io_ops->flags & FIO_SYNCIO)
-		lseek(td->fd, SEEK_SET, 0);
+	struct fio_file *f;
+	int i;
 
-	td->last_pos = 0;
 	td->stat_io_bytes[0] = td->stat_io_bytes[1] = 0;
 	td->this_io_bytes[0] = td->this_io_bytes[1] = 0;
 	td->zone_bytes = 0;
 
-	if (td->file_map)
-		memset(td->file_map, 0, td->num_maps * sizeof(long));
+	for_each_file(td, f, i) {
+		f->last_pos = 0;
+		if (td->io_ops->flags & FIO_SYNCIO)
+			lseek(f->fd, SEEK_SET, 0);
+
+		if (f->file_map)
+			memset(f->file_map, 0, f->num_maps * sizeof(long));
+	}
 }
 
 /*
@@ -1385,7 +1141,7 @@
 	fio_sem_up(&startup_sem);
 	fio_sem_down(&td->mutex);
 
-	if (!td->create_serialize && setup_file(td))
+	if (!td->create_serialize && setup_files(td))
 		goto err;
 
 	gettimeofday(&td->epoch, NULL);
@@ -1447,12 +1203,7 @@
 		terminate_threads(td->groupid);
 
 err:
-	if (td->fd != -1) {
-		close(td->fd);
-		td->fd = -1;
-	}
-	if (td->mmap)
-		munmap(td->mmap, td->file_size);
+	close_files(td);
 	close_ioengine(td);
 	cleanup_io_u(td);
 	td_set_runstate(td, TD_EXITED);
@@ -1826,7 +1577,7 @@
 		 * we don't want X number of threads getting their
 		 * client data interspersed on disk
 		 */
-		if (setup_file(td)) {
+		if (setup_files(td)) {
 			td_set_runstate(td, TD_REAPED);
 			todo--;
 		}
diff --git a/fio.h b/fio.h
index db39ea1..915c0b5 100644
--- a/fio.h
+++ b/fio.h
@@ -39,6 +39,7 @@
 
 struct io_piece {
 	struct list_head list;
+	struct fio_file *file;
 	unsigned long long offset;
 	unsigned int len;
 	int ddir;
@@ -73,6 +74,8 @@
 	unsigned char seen;
 	unsigned char ddir;
 
+	struct fio_file *file;
+
 	struct list_head list;
 };
 
@@ -130,21 +133,40 @@
 	FIO_MMAPIO	= 1 << 2,
 };
 
+struct fio_file {
+	/*
+	 * A file may not be a file descriptor, let the io engine decide
+	 */
+	union {
+		unsigned long file_data;
+		int fd;
+	};
+	char *file_name;
+	void *mmap;
+	unsigned long long file_size;
+	unsigned long long real_file_size;
+	unsigned long long file_offset;
+	unsigned long long last_pos;
+
+	unsigned long *file_map;
+	unsigned int num_maps;
+};
+
 /*
  * This describes a single thread/process executing a fio job.
  */
 struct thread_data {
 	char name[32];
-	char *file_name;
 	char *directory;
 	char verror[80];
 	pthread_t thread;
 	int thread_number;
 	int groupid;
 	enum fio_filetype filetype;
+	struct fio_file *files;
+	unsigned int nr_files;
+	unsigned int next_file;
 	int error;
-	int fd;
-	void *mmap;
 	pid_t pid;
 	char *orig_buffer;
 	size_t orig_buffer_size;
@@ -178,9 +200,6 @@
 	unsigned int overwrite;
 	unsigned int bw_avg_time;
 	unsigned int loops;
-	unsigned long long file_size;
-	unsigned long long real_file_size;
-	unsigned long long file_offset;
 	unsigned long long zone_size;
 	unsigned long long zone_skip;
 	enum fio_memtype mem_type;
@@ -233,21 +252,20 @@
 
 	unsigned long runtime[2];		/* msec */
 	unsigned long long io_size;
+	unsigned long long total_file_size;
+	unsigned long long start_offset;
 	unsigned long long total_io_size;
 
 	unsigned long long io_blocks[2];
 	unsigned long long io_bytes[2];
 	unsigned long long zone_bytes;
 	unsigned long long this_io_bytes[2];
-	unsigned long long last_pos;
 	volatile int mutex;
 
 	/*
 	 * State for random io, a bitmap of blocks done vs not done
 	 */
 	os_random_state_t random_state;
-	unsigned long *file_map;
-	unsigned int num_maps;
 
 	/*
 	 * CPU "io" cycle burner
@@ -326,6 +344,7 @@
 extern FILE *f_out;
 extern FILE *f_err;
 extern char *fio_inst_prefix;
+extern int temp_stall_ts;
 
 extern struct thread_data *threads;
 
@@ -334,9 +353,9 @@
 #define td_rw(td)		((td)->iomix != 0)
 
 #define BLOCKS_PER_MAP		(8 * sizeof(long))
-#define TO_MAP_BLOCK(td, b)	((b) - ((td)->file_offset / (td)->min_bs))
-#define RAND_MAP_IDX(td, b)	(TO_MAP_BLOCK(td, b) / BLOCKS_PER_MAP)
-#define RAND_MAP_BIT(td, b)	(TO_MAP_BLOCK(td, b) & (BLOCKS_PER_MAP - 1))
+#define TO_MAP_BLOCK(td, f, b)	((b) - ((f)->file_offset / (td)->min_bs))
+#define RAND_MAP_IDX(td, f, b)	(TO_MAP_BLOCK(td, f, b) / BLOCKS_PER_MAP)
+#define RAND_MAP_BIT(td, f, b)	(TO_MAP_BLOCK(td, f, b) & (BLOCKS_PER_MAP - 1))
 
 #define MAX_JOBS	(1024)
 
@@ -419,6 +438,12 @@
 extern int init_random_state(struct thread_data *);
 
 /*
+ * File setup/shutdown
+ */
+extern void close_files(struct thread_data *);
+extern int setup_files(struct thread_data *);
+
+/*
  * This is a pretty crappy semaphore implementation, but with the use that fio
  * has (just signalling start/go conditions), it doesn't have to be better.
  * Naturally this would not work for any type of contended semaphore or
@@ -463,12 +488,12 @@
 	struct io_u *(*event)(struct thread_data *, int);
 	int (*cancel)(struct thread_data *, struct io_u *);
 	void (*cleanup)(struct thread_data *);
-	int (*sync)(struct thread_data *);
+	int (*sync)(struct thread_data *, struct fio_file *);
 	void *data;
 	void *dlhandle;
 };
 
-#define FIO_IOOPS_VERSION	1
+#define FIO_IOOPS_VERSION	2
 
 extern struct ioengine_ops *load_ioengine(struct thread_data *, char *);
 extern void close_ioengine(struct thread_data *);
@@ -478,4 +503,7 @@
  */
 #define fio_unused	__attribute((__unused__))
 
+#define for_each_file(td, f, i)	\
+	for ((i) = 0, (f) = &(td)->files[(i)]; (i) < (td)->nr_files; (i)++, (f) = &(td)->files[(i)])
+
 #endif
diff --git a/init.c b/init.c
index 58aca3c..967f0b5 100644
--- a/init.c
+++ b/init.c
@@ -46,6 +46,7 @@
 #define DEF_RWMIX_CYCLE		(500)
 #define DEF_RWMIX_READ		(50)
 #define DEF_NICE		(0)
+#define DEF_NR_FILES		(1)
 
 static int def_timeout = DEF_TIMEOUT;
 
@@ -82,7 +83,6 @@
 	*td = *parent;
 	td->name[0] = '\0';
 
-	td->fd = -1;
 	td->thread_number = thread_number;
 	return td;
 }
@@ -103,7 +103,8 @@
 	char *ddir_str[] = { "read", "write", "randread", "randwrite",
 			     "rw", NULL, "randrw" };
 	struct stat sb;
-	int numjobs, ddir;
+	int numjobs, ddir, i;
+	struct fio_file *f;
 
 #ifndef FIO_HAVE_LIBAIO
 	if (td->io_engine == FIO_LIBAIO) {
@@ -143,9 +144,9 @@
 	}
 
 	/*
-	 * only really works for sequential io for now
+	 * only really works for sequential io for now, and with 1 file
 	 */
-	if (td->zone_size && !td->sequential)
+	if (td->zone_size && !td->sequential && td->nr_files == 1)
 		td->zone_size = 0;
 
 	/*
@@ -164,15 +165,36 @@
 
 	if (td->filetype == FIO_TYPE_FILE) {
 		char tmp[PATH_MAX];
+		int len = 0;
+		int i;
 
 		if (td->directory && td->directory[0] != '\0')
-			sprintf(tmp, "%s/%s.%d", td->directory, jobname, td->thread_number);
-		else
-			sprintf(tmp, "%s.%d", jobname, td->thread_number);
-		td->file_name = strdup(tmp);
-	} else
-		td->file_name = strdup(jobname);
+			sprintf(tmp, "%s/", td->directory);
 
+		td->files = malloc(sizeof(struct fio_file) * td->nr_files);
+
+		for_each_file(td, f, i) {
+			memset(f, 0, sizeof(*f));
+			f->fd = -1;
+
+			sprintf(tmp + len, "%s.%d.%d", jobname, td->thread_number, i);
+			f->file_name = strdup(tmp);
+		}
+	} else {
+		td->nr_files = 1;
+		td->files = malloc(sizeof(struct fio_file));
+		f = &td->files[0];
+
+		memset(f, 0, sizeof(*f));
+		f->fd = -1;
+		f->file_name = strdup(jobname);
+	}
+
+	for_each_file(td, f, i) {
+		f->file_size = td->total_file_size / td->nr_files;
+		f->file_offset = td->start_offset;
+	}
+		
 	fio_sem_init(&td->mutex, 0);
 
 	td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
@@ -247,7 +269,7 @@
 int init_random_state(struct thread_data *td)
 {
 	unsigned long seeds[4];
-	int fd, num_maps, blocks;
+	int fd, num_maps, blocks, i;
 
 	fd = open("/dev/urandom", O_RDONLY);
 	if (fd == -1) {
@@ -273,11 +295,15 @@
 	if (td->rand_repeatable)
 		seeds[3] = DEF_RANDSEED;
 
-	blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
-	num_maps = blocks / BLOCKS_PER_MAP;
-	td->file_map = malloc(num_maps * sizeof(long));
-	td->num_maps = num_maps;
-	memset(td->file_map, 0, num_maps * sizeof(long));
+	for (i = 0; i < td->nr_files; i++) {
+		struct fio_file *f = &td->files[i];
+
+		blocks = (f->file_size + td->min_bs - 1) / td->min_bs;
+		num_maps = blocks / BLOCKS_PER_MAP;
+		f->file_map = malloc(num_maps * sizeof(long));
+		f->num_maps = num_maps;
+		memset(f->file_map, 0, num_maps * sizeof(long));
+	}
 
 	os_random_seed(seeds[3], &td->random_state);
 	return 0;
@@ -821,6 +847,10 @@
 				fgetpos(f, &off);
 				continue;
 			}
+			if (!check_int(p, "nrfiles", &td->nr_files)) {
+				fgetpos(f, &off);
+				continue;
+			}
 			if (!check_range_bytes(p, "bsrange", &ul1, &ul2)) {
 				if (ul1 > ul2) {
 					td->max_bs = ul1;
@@ -837,11 +867,11 @@
 				fgetpos(f, &off);
 				continue;
 			}
-			if (!check_str_bytes(p, "size", &td->file_size)) {
+			if (!check_str_bytes(p, "size", &td->total_file_size)) {
 				fgetpos(f, &off);
 				continue;
 			}
-			if (!check_str_bytes(p, "offset", &td->file_offset)) {
+			if (!check_str_bytes(p, "offset", &td->start_offset)) {
 				fgetpos(f, &off);
 				continue;
 			}
@@ -1007,6 +1037,7 @@
 	def_thread.rwmixread = DEF_RWMIX_READ;
 	def_thread.nice = DEF_NICE;
 	def_thread.rand_repeatable = DEF_RAND_REPEAT;
+	def_thread.nr_files = DEF_NR_FILES;
 #ifdef FIO_HAVE_DISK_UTIL
 	def_thread.do_disk_util = 1;
 #endif
diff --git a/log.c b/log.c
index 4c7742b..00e3913 100644
--- a/log.c
+++ b/log.c
@@ -18,6 +18,7 @@
 		io_u->offset = ipo->offset;
 		io_u->buflen = ipo->len;
 		io_u->ddir = ipo->ddir;
+		io_u->file = ipo->file;
 		free(ipo);
 		return 0;
 	}
@@ -46,6 +47,7 @@
 	struct list_head *entry;
 
 	INIT_LIST_HEAD(&ipo->list);
+	ipo->file = io_u->file;
 	ipo->offset = io_u->offset;
 	ipo->len = io_u->buflen;
 
diff --git a/stat.c b/stat.c
index 7f38d96..e43a336 100644
--- a/stat.c
+++ b/stat.c
@@ -187,6 +187,7 @@
 
 void init_disk_util(struct thread_data *td)
 {
+	struct fio_file *f;
 	struct stat st;
 	char foo[256], tmp[256];
 	dev_t dev;
@@ -195,7 +196,11 @@
 	if (!td->do_disk_util)
 		return;
 
-	if (!stat(td->file_name, &st)) {
+	/*
+	 * Just use the same file, they are on the same device.
+	 */
+	f = &td->files[0];
+	if (!stat(f->file_name, &st)) {
 		if (S_ISBLK(st.st_mode))
 			dev = st.st_rdev;
 		else
@@ -204,7 +209,7 @@
 		/*
 		 * must be a file, open "." in that path
 		 */
-		strcpy(foo, td->file_name);
+		strcpy(foo, f->file_name);
 		p = dirname(foo);
 		if (stat(p, &st)) {
 			perror("disk util stat");