Revamp file locking
Get rid of the semaphore implementation, no need to carry both.
Add different locking modes (exclusive and readwrite) to enable
a wider range of testing. Also combine lockfile and lockfile_batch,
the latter is now a postfix option to the former.
So to enable readers-excluding-writers locking mode with a lock batch
count of 4, you would write:
lockfile=readwrite:4
instead.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index 3e0a31b..3d92293 100644
--- a/HOWTO
+++ b/HOWTO
@@ -219,13 +219,24 @@
opendir=str Tell fio to recursively add any file it can find in this
directory and down the file system tree.
-lockfile=bool If set, fio will lock a file internally before doing IO to it.
- This makes it safe to share file descriptors across fio
- jobs that run at the same time.
+lockfile=str Fio defaults to not doing any locking files before it does
+ IO to them. If a file or file descriptor is shared, fio
+ can serialize IO to that file to make the end result
+ consistent. This is usual for emulating real workloads that
+ share files. The lock modes are:
-lockfile_batch=int Acquiring a semaphore can be quite expensive, so
- allow a process to complete this number of IOs before releasing
- the semaphore again. Defaults to 1.
+ none No locking. The default.
+ exclusive Only one thread/process may do IO,
+ excluding all others.
+ readwrite Read-write locking on the file. Many
+ readers may access the file at the
+ same time, but writes get exclusive
+ access.
+
+ The option may be post-fixed with a lock batch number. If
+ set, then each thread/process may do that amount of IOs to
+ the file before giving up the lock. Since lock acqusition is
+ expensive, batching the lock/unlocks will speed up IO.
readwrite=str
rw=str Type of io pattern. Accepted values are:
diff --git a/Makefile b/Makefile
index 1b77b25..e0ab0b8 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@
PROGS = fio
SCRIPTS = fio_generate_plots
OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o filesetup.o \
- eta.o verify.o memory.o io_u.o parse.o mutex.o sem.o options.o \
+ eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o
OBJS += crc/crc7.o
diff --git a/filesetup.c b/filesetup.c
index 4e2a36c..4d2017d 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -226,9 +226,33 @@
return ret;
}
-int generic_open_file(struct thread_data *td, struct fio_file *f)
+static int file_lookup_open(struct fio_file *f, int flags)
{
struct fio_file *__f;
+ int from_hash;
+
+ __f = lookup_file_hash(f->file_name);
+ if (__f) {
+ /*
+ * racy, need the __f->lock locked
+ */
+ f->lock = __f->lock;
+ f->lock_owner = __f->lock_owner;
+ f->lock_batch = __f->lock_batch;
+ f->lock_ddir = __f->lock_ddir;
+ f->fd = dup(__f->fd);
+ f->references++;
+ from_hash = 1;
+ } else {
+ f->fd = open(f->file_name, flags, 0600);
+ from_hash = 0;
+ }
+
+ return from_hash;
+}
+
+int generic_open_file(struct thread_data *td, struct fio_file *f)
+{
int is_std = 0;
int flags = 0;
int from_hash = 0;
@@ -267,16 +291,8 @@
if (is_std)
f->fd = dup(STDOUT_FILENO);
- else {
- __f = lookup_file_hash(f->file_name);
- if (__f) {
- f->sem = __f->sem;
- f->fd = dup(__f->fd);
- f->references++;
- from_hash = 1;
- } else
- f->fd = open(f->file_name, flags, 0600);
- }
+ else
+ from_hash = file_lookup_open(f, flags);
} else {
if (f->filetype == FIO_TYPE_CHAR && !read_only)
flags |= O_RDWR;
@@ -285,16 +301,8 @@
if (is_std)
f->fd = dup(STDIN_FILENO);
- else {
- __f = lookup_file_hash(f->file_name);
- if (__f) {
- f->sem = __f->sem;
- f->fd = dup(__f->fd);
- f->references++;
- from_hash = 1;
- } else
- f->fd = open(f->file_name, flags);
- }
+ else
+ from_hash = file_lookup_open(f, flags);
}
if (f->fd == -1) {
@@ -641,8 +649,19 @@
get_file_type(f);
- if (td->o.lockfile)
- f->sem = fio_sem_init(1);
+ switch (td->o.file_lock_mode) {
+ case FILE_LOCK_NONE:
+ break;
+ case FILE_LOCK_READWRITE:
+ f->lock = fio_mutex_rw_init();
+ break;
+ case FILE_LOCK_EXCLUSIVE:
+ f->lock = fio_mutex_init(1);
+ break;
+ default:
+ log_err("fio: unknown lock mode: %d\n", td->o.file_lock_mode);
+ assert(0);
+ }
td->files_index++;
if (f->filetype == FIO_TYPE_FILE)
@@ -682,31 +701,62 @@
return ret;
}
-void lock_file(struct thread_data *td, struct fio_file *f)
+void lock_file(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir)
{
- if (f && f->sem) {
- if (f->sem_owner == td && f->sem_batch--)
- return;
+ if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
+ return;
- fio_sem_down(f->sem);
- f->sem_owner = td;
- f->sem_batch = td->o.lockfile_batch;
+ if (f->lock_owner == td && f->lock_batch--)
+ return;
+
+ if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
+ if (ddir == DDIR_READ)
+ fio_mutex_down_read(f->lock);
+ else
+ fio_mutex_down_write(f->lock);
+ } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
+ fio_mutex_down(f->lock);
+
+ f->lock_owner = td;
+ f->lock_batch = td->o.lockfile_batch;
+ f->lock_ddir = ddir;
+}
+
+void unlock_file(struct thread_data *td, struct fio_file *f)
+{
+ if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
+ return;
+ if (f->lock_batch)
+ return;
+
+ if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
+ const int is_read = f->lock_ddir == DDIR_READ;
+ int val = fio_mutex_getval(f->lock);
+
+ if ((is_read && val == 1) || (!is_read && val == -1))
+ f->lock_owner = NULL;
+
+ if (is_read)
+ fio_mutex_up_read(f->lock);
+ else
+ fio_mutex_up_write(f->lock);
+ } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE) {
+ int val = fio_mutex_getval(f->lock);
+
+ if (val == 0)
+ f->lock_owner = NULL;
+
+ fio_mutex_up(f->lock);
}
}
-void unlock_file(struct fio_file *f)
+void unlock_file_all(struct thread_data *td, struct fio_file *f)
{
- if (f && f->sem) {
- int sem_val;
+ if (f->lock_owner != td)
+ return;
- if (f->sem_batch)
- return;
-
- sem_getvalue(&f->sem->sem, &sem_val);
- if (!sem_val)
- f->sem_owner = NULL;
- fio_sem_up(f->sem);
- }
+ f->lock_batch = 0;
+ unlock_file(td, f);
}
static int recurse_dir(struct thread_data *td, const char *dirname)
diff --git a/fio.h b/fio.h
index ffd3d7d..b22009b 100644
--- a/fio.h
+++ b/fio.h
@@ -22,7 +22,6 @@
#include "arch/arch.h"
#include "os/os.h"
#include "mutex.h"
-#include "sem.h"
#include "log.h"
#include "debug.h"
@@ -47,6 +46,12 @@
TD_DDIR_RANDRW = TD_DDIR_RW | TD_DDIR_RAND,
};
+enum file_lock_mode {
+ FILE_LOCK_NONE,
+ FILE_LOCK_EXCLUSIVE,
+ FILE_LOCK_READWRITE,
+};
+
/*
* Use for maintaining statistics
*/
@@ -313,9 +318,10 @@
/*
* if io is protected by a semaphore, this is set
*/
- struct fio_sem *sem;
- void *sem_owner;
- unsigned int sem_batch;
+ struct fio_mutex *lock;
+ void *lock_owner;
+ unsigned int lock_batch;
+ enum fio_ddir lock_ddir;
/*
* block map for random io
@@ -415,7 +421,7 @@
unsigned int nr_files;
unsigned int open_files;
- unsigned int lockfile;
+ enum file_lock_mode file_lock_mode;
unsigned int lockfile_batch;
unsigned int odirect;
@@ -820,8 +826,9 @@
extern int add_file(struct thread_data *, const char *);
extern void get_file(struct fio_file *);
extern int __must_check put_file(struct thread_data *, struct fio_file *);
-extern void lock_file(struct thread_data *, struct fio_file *);
-extern void unlock_file(struct fio_file *);
+extern void lock_file(struct thread_data *, struct fio_file *, enum fio_ddir);
+extern void unlock_file(struct thread_data *, struct fio_file *);
+extern void unlock_file_all(struct thread_data *, struct fio_file *);
extern int add_dir_files(struct thread_data *, const char *);
extern int init_random_map(struct thread_data *);
extern void dup_files(struct thread_data *, struct thread_data *);
diff --git a/io_u.c b/io_u.c
index 04d7dcb..5a3157a 100644
--- a/io_u.c
+++ b/io_u.c
@@ -665,7 +665,6 @@
* td_io_close() does a put_file() as well, so no need to
* do that here.
*/
- unlock_file(io_u->file);
io_u->file = NULL;
td_io_close_file(td, f);
f->flags |= FIO_FILE_DONE;
diff --git a/ioengines.c b/ioengines.c
index bd2eb4a..87db11c 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -168,14 +168,14 @@
dprint_io_u(io_u, "prep");
fio_ro_check(td, io_u);
- lock_file(td, io_u->file);
+ lock_file(td, io_u->file, io_u->ddir);
if (td->io_ops->prep) {
int ret = td->io_ops->prep(td, io_u);
dprint(FD_IO, "->prep(%p)=%d\n", io_u, ret);
if (ret)
- unlock_file(io_u->file);
+ unlock_file(td, io_u->file);
return ret;
}
@@ -232,7 +232,7 @@
ret = td->io_ops->queue(td, io_u);
- unlock_file(io_u->file);
+ unlock_file(td, io_u->file);
if (ret != FIO_Q_BUSY)
io_u_mark_depth(td, io_u);
@@ -359,10 +359,7 @@
*/
f->flags |= FIO_FILE_CLOSING;
- if (f->sem_owner == td && f->sem_batch) {
- f->sem_batch = 0;
- unlock_file(f);
- }
+ unlock_file_all(td, f);
return put_file(td, f);
}
diff --git a/mutex.c b/mutex.c
index bcc37ae..e6fb3f0 100644
--- a/mutex.c
+++ b/mutex.c
@@ -7,6 +7,7 @@
#include <sys/mman.h>
#include "mutex.h"
+#include "arch/arch.h"
void fio_mutex_remove(struct fio_mutex *mutex)
{
@@ -76,8 +77,13 @@
void fio_mutex_down(struct fio_mutex *mutex)
{
pthread_mutex_lock(&mutex->lock);
- while (mutex->value == 0)
+
+ while (!mutex->value) {
+ mutex->waiters++;
pthread_cond_wait(&mutex->cond, &mutex->lock);
+ mutex->waiters--;
+ }
+
mutex->value--;
pthread_mutex_unlock(&mutex->lock);
}
@@ -85,7 +91,8 @@
void fio_mutex_up(struct fio_mutex *mutex)
{
pthread_mutex_lock(&mutex->lock);
- if (!mutex->value)
+ read_barrier();
+ if (!mutex->value && mutex->waiters)
pthread_cond_signal(&mutex->cond);
mutex->value++;
pthread_mutex_unlock(&mutex->lock);
@@ -94,8 +101,13 @@
void fio_mutex_down_write(struct fio_mutex *mutex)
{
pthread_mutex_lock(&mutex->lock);
- while (mutex->value != 0)
+
+ while (mutex->value != 0) {
+ mutex->waiters++;
pthread_cond_wait(&mutex->cond, &mutex->lock);
+ mutex->waiters--;
+ }
+
mutex->value--;
pthread_mutex_unlock(&mutex->lock);
}
@@ -103,8 +115,13 @@
void fio_mutex_down_read(struct fio_mutex *mutex)
{
pthread_mutex_lock(&mutex->lock);
- while (mutex->value < 0)
+
+ while (mutex->value < 0) {
+ mutex->waiters++;
pthread_cond_wait(&mutex->cond, &mutex->lock);
+ mutex->waiters--;
+ }
+
mutex->value++;
pthread_mutex_unlock(&mutex->lock);
}
@@ -113,7 +130,8 @@
{
pthread_mutex_lock(&mutex->lock);
mutex->value--;
- if (mutex->value >= 0)
+ read_barrier();
+ if (mutex->value >= 0 && mutex->waiters)
pthread_cond_signal(&mutex->cond);
pthread_mutex_unlock(&mutex->lock);
}
@@ -122,7 +140,8 @@
{
pthread_mutex_lock(&mutex->lock);
mutex->value++;
- if (mutex->value >= 0)
+ read_barrier();
+ if (mutex->value >= 0 && mutex->waiters)
pthread_cond_signal(&mutex->cond);
pthread_mutex_unlock(&mutex->lock);
}
diff --git a/mutex.h b/mutex.h
index 40cfc1e..7be0ab1 100644
--- a/mutex.h
+++ b/mutex.h
@@ -7,6 +7,7 @@
pthread_mutex_t lock;
pthread_cond_t cond;
int value;
+ int waiters;
int mutex_fd;
};
@@ -25,4 +26,9 @@
return fio_mutex_init(0);
}
+static inline int fio_mutex_getval(struct fio_mutex *mutex)
+{
+ return mutex->value;
+}
+
#endif
diff --git a/options.c b/options.c
index 27b4fb6..bb77683 100644
--- a/options.c
+++ b/options.c
@@ -351,6 +351,18 @@
return 0;
}
+static int str_lockfile_cb(void *data, const char *str)
+{
+ struct thread_data *td = data;
+ char *nr = get_opt_postfix(str);
+
+ td->o.lockfile_batch = 1;
+ if (nr)
+ td->o.lockfile_batch = atoi(nr);
+
+ return 0;
+}
+
#define __stringify_1(x) #x
#define __stringify(x) __stringify_1(x)
@@ -386,19 +398,27 @@
},
{
.name = "lockfile",
- .type = FIO_OPT_BOOL,
- .off1 = td_var_offset(lockfile),
+ .type = FIO_OPT_STR,
+ .cb = str_lockfile_cb,
+ .off1 = td_var_offset(file_lock_mode),
.help = "Lock file when doing IO to it",
.parent = "filename",
- .def = "0",
- },
- {
- .name = "lockfile_batch",
- .type = FIO_OPT_INT,
- .off1 = td_var_offset(lockfile_batch),
- .help = "Number of IOs to allow per file lock",
- .parent = "lockfile",
- .def = "1",
+ .def = "none",
+ .posval = {
+ { .ival = "none",
+ .oval = FILE_LOCK_NONE,
+ .help = "No file locking",
+ },
+ { .ival = "exclusive",
+ .oval = FILE_LOCK_EXCLUSIVE,
+ .help = "Exclusive file lock",
+ },
+ {
+ .ival = "readwrite",
+ .oval = FILE_LOCK_READWRITE,
+ .help = "Read vs write lock",
+ },
+ },
},
{
.name = "opendir",
diff --git a/sem.c b/sem.c
deleted file mode 100644
index 8b25ad3..0000000
--- a/sem.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <stdio.h>
-
-#include "sem.h"
-#include "smalloc.h"
-
-void fio_sem_remove(struct fio_sem *sem)
-{
- sfree(sem);
-}
-
-struct fio_sem *fio_sem_init(int value)
-{
- struct fio_sem *sem;
-
- sem = smalloc(sizeof(*sem));
- if (!sem)
- return NULL;
-
- sem->sem_val = value;
-
- if (!sem_init(&sem->sem, 1, value))
- return sem;
-
- perror("sem_init");
- sfree(sem);
- return NULL;
-}
-
-void fio_sem_down(struct fio_sem *sem)
-{
- sem_wait(&sem->sem);
-}
-
-void fio_sem_up(struct fio_sem *sem)
-{
- sem_post(&sem->sem);
-}
diff --git a/sem.h b/sem.h
deleted file mode 100644
index a7b4664..0000000
--- a/sem.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef FIO_SEM_H
-#define FIO_SEM_H
-
-#include <semaphore.h>
-
-struct fio_sem {
- sem_t sem;
- int sem_val;
-};
-
-extern struct fio_sem *fio_sem_init(int);
-extern void fio_sem_remove(struct fio_sem *);
-extern void fio_sem_down(struct fio_sem *);
-extern void fio_sem_up(struct fio_sem *);
-
-#endif
diff --git a/smalloc.c b/smalloc.c
index 9a7c25b..85da781 100644
--- a/smalloc.c
+++ b/smalloc.c
@@ -268,11 +268,12 @@
void sinit(void)
{
- int ret = add_pool(&mp[0]);
+ int ret;
#ifdef MP_SAFE
lock = fio_mutex_rw_init();
#endif
+ ret = add_pool(&mp[0]);
assert(!ret);
}