iolog: don't serialize the writing of all logs

Serialize based on filename, so that independent logs can be
written at the same time. If the logs are big, this can take
a while to flush to disk. It's silly to have all jobs waiting
on each other, when they could write their own logs independently.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/Makefile b/Makefile
index 1113c2f..231b4ab 100644
--- a/Makefile
+++ b/Makefile
@@ -35,7 +35,7 @@
 		cconv.c lib/prio_tree.c json.c lib/zipf.c lib/axmap.c \
 		lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \
 		lib/hweight.c lib/getrusage.c idletime.c td_error.c \
-		profiles/tiobench.c profiles/act.c io_u_queue.c
+		profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c
 
 ifdef CONFIG_64BIT_LLP64
   CFLAGS += -DBITS_PER_LONG=32
diff --git a/backend.c b/backend.c
index 1ff8b3f..5e7a388 100644
--- a/backend.c
+++ b/backend.c
@@ -57,7 +57,6 @@
 static pthread_t disk_util_thread;
 static struct fio_mutex *disk_thread_mutex;
 static struct fio_mutex *startup_mutex;
-static struct fio_mutex *writeout_mutex;
 static struct flist_head *cgroup_list;
 static char *cgroup_mnt;
 static int exit_value;
@@ -1224,6 +1223,120 @@
 	return bytes_done[DDIR_WRITE] + bytes_done[DDIR_TRIM];
 }
 
+static int write_this_log(struct thread_data *td, struct io_log *log,
+			  const char *log_file, const char *name, int try)
+{
+	int ret;
+
+	if (!log)
+		return 0;
+
+	if (log_file)
+		ret = finish_log_named(td, log, log_file, name, try);
+	else
+		ret = finish_log(td, log, name, try);
+
+	return ret;
+}
+
+static int write_iops_log(struct thread_data *td, struct thread_options *o,
+			  int try)
+{
+	return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try);
+}
+
+static int write_slat_log(struct thread_data *td, struct thread_options *o,
+			  int try)
+{
+	return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try);
+}
+
+static int write_clat_log(struct thread_data *td, struct thread_options *o,
+			  int try)
+{
+	return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try);
+}
+
+static int write_lat_log(struct thread_data *td, struct thread_options *o,
+			 int try)
+{
+	return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try);
+}
+
+static int write_bandw_log(struct thread_data *td, struct thread_options *o,
+			int try)
+{
+	return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try);
+}
+
+enum {
+	BW_LOG_MASK	= 1,
+	LAT_LOG_MASK	= 2,
+	SLAT_LOG_MASK	= 4,
+	CLAT_LOG_MASK	= 8,
+	IOPS_LOG_MASK	= 16,
+
+	ALL_LOG_MASK	= 31,
+	ALL_LOG_NR	= 5,
+};
+
+static void writeout_logs(struct thread_data *td)
+{
+	struct thread_options *o = &td->o;
+	unsigned int log_mask = ALL_LOG_MASK;
+	unsigned int log_left = ALL_LOG_NR;
+	int old_state;
+
+	old_state = td_bump_runstate(td, TD_FINISHING);
+
+	finalize_logs(td);
+
+	while (log_left) {
+		int ret, prev_log_left = log_left;
+
+		if (log_mask & BW_LOG_MASK) {
+			ret = write_bandw_log(td, o, log_left != 1);
+			if (!ret) {
+				log_left--;
+				log_mask &= ~BW_LOG_MASK;
+			}
+		}
+		if (log_mask & LAT_LOG_MASK) {
+			ret = write_lat_log(td, o, log_left != 1);
+			if (!ret) {
+				log_left--;
+				log_mask &= ~LAT_LOG_MASK;
+			}
+		}
+		if (log_mask & SLAT_LOG_MASK) {
+			ret = write_slat_log(td, o, log_left != 1);
+			if (!ret) {
+				log_left--;
+				log_mask &= ~SLAT_LOG_MASK;
+			}
+		}
+		if (log_mask & CLAT_LOG_MASK) {
+			ret = write_clat_log(td, o, log_left != 1);
+			if (!ret) {
+				log_left--;
+				log_mask &= ~CLAT_LOG_MASK;
+			}
+		}
+		if (log_mask & IOPS_LOG_MASK) {
+			ret = write_iops_log(td, o, log_left != 1);
+			if (!ret) {
+				log_left--;
+				log_mask &= ~IOPS_LOG_MASK;
+			}
+		}
+
+		if (prev_log_left == log_left)
+			usleep(5000);
+	}
+
+	td_restore_runstate(td, old_state);
+}
+
 /*
  * Entry point for the thread based jobs. The process based jobs end up
  * here as well, after a little setup.
@@ -1490,45 +1603,8 @@
 
 	fio_unpin_memory(td);
 
-	fio_mutex_down(writeout_mutex);
-	finalize_logs(td);
-	if (td->bw_log) {
-		if (o->bw_log_file) {
-			finish_log_named(td, td->bw_log,
-						o->bw_log_file, "bw");
-		} else
-			finish_log(td, td->bw_log, "bw");
-	}
-	if (td->lat_log) {
-		if (o->lat_log_file) {
-			finish_log_named(td, td->lat_log,
-						o->lat_log_file, "lat");
-		} else
-			finish_log(td, td->lat_log, "lat");
-	}
-	if (td->slat_log) {
-		if (o->lat_log_file) {
-			finish_log_named(td, td->slat_log,
-						o->lat_log_file, "slat");
-		} else
-			finish_log(td, td->slat_log, "slat");
-	}
-	if (td->clat_log) {
-		if (o->lat_log_file) {
-			finish_log_named(td, td->clat_log,
-						o->lat_log_file, "clat");
-		} else
-			finish_log(td, td->clat_log, "clat");
-	}
-	if (td->iops_log) {
-		if (o->iops_log_file) {
-			finish_log_named(td, td->iops_log,
-						o->iops_log_file, "iops");
-		} else
-			finish_log(td, td->iops_log, "iops");
-	}
+	writeout_logs(td);
 
-	fio_mutex_up(writeout_mutex);
 	if (o->exec_postrun)
 		exec_string(o, o->exec_postrun, (const char *)"postrun");
 
@@ -2033,9 +2109,6 @@
 	startup_mutex = fio_mutex_init(FIO_MUTEX_LOCKED);
 	if (startup_mutex == NULL)
 		return 1;
-	writeout_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED);
-	if (writeout_mutex == NULL)
-		return 1;
 
 	set_genesis_time();
 	stat_init();
@@ -2066,7 +2139,6 @@
 	sfree(cgroup_mnt);
 
 	fio_mutex_remove(startup_mutex);
-	fio_mutex_remove(writeout_mutex);
 	fio_mutex_remove(disk_thread_mutex);
 	stat_exit();
 	return exit_value;
diff --git a/filelock.c b/filelock.c
new file mode 100644
index 0000000..b252a97
--- /dev/null
+++ b/filelock.c
@@ -0,0 +1,157 @@
+/*
+ * Really simple exclusive file locking based on filename.
+ * No hash indexing, just a list, so only works well for < 100 files or
+ * so. But that's more than what fio needs, so should be fine.
+ */
+#include <inttypes.h>
+#include <string.h>
+#include <assert.h>
+
+#include "flist.h"
+#include "filelock.h"
+#include "smalloc.h"
+#include "mutex.h"
+#include "hash.h"
+#include "log.h"
+
+struct fio_filelock {
+	uint32_t hash;
+	struct fio_mutex lock;
+	struct flist_head list;
+	unsigned int references;
+};
+	
+static struct flist_head *filelock_list;
+static struct fio_mutex *filelock_lock;
+
+int fio_filelock_init(void)
+{
+	filelock_list = smalloc(sizeof(*filelock_list));
+	if (!filelock_list)
+		return 1;
+
+	INIT_FLIST_HEAD(filelock_list);
+	filelock_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
+	if (!filelock_lock) {
+		sfree(filelock_list);
+		return 1;
+	}
+
+	return 0;
+}
+
+void fio_filelock_exit(void)
+{
+	if (!filelock_list)
+		return;
+
+	assert(flist_empty(filelock_list));
+	sfree(filelock_list);
+	filelock_list = NULL;
+	fio_mutex_remove(filelock_lock);
+	filelock_lock = NULL;
+}
+
+static struct fio_filelock *fio_hash_find(uint32_t hash)
+{
+	struct flist_head *entry;
+	struct fio_filelock *ff;
+
+	flist_for_each(entry, filelock_list) {
+		ff = flist_entry(entry, struct fio_filelock, list);
+		if (ff->hash == hash)
+			return ff;
+	}
+
+	return NULL;
+}
+
+static struct fio_filelock *fio_hash_get(uint32_t hash)
+{
+	struct fio_filelock *ff;
+
+	ff = fio_hash_find(hash);
+	if (!ff) {
+		ff = smalloc(sizeof(*ff));
+		ff->hash = hash;
+		__fio_mutex_init(&ff->lock, FIO_MUTEX_UNLOCKED);
+		ff->references = 0;
+		flist_add(&ff->list, filelock_list);
+	}
+
+	return ff;
+}
+
+int fio_trylock_file(const char *fname)
+{
+	struct fio_filelock *ff;
+	uint32_t hash;
+
+	hash = jhash(fname, strlen(fname), 0);
+
+	fio_mutex_down(filelock_lock);
+	ff = fio_hash_get(hash);
+	ff->references++;
+	fio_mutex_up(filelock_lock);
+
+	if (!fio_mutex_down_trylock(&ff->lock))
+		return 0;
+
+	fio_mutex_down(filelock_lock);
+
+	/*
+	 * If we raced and the only reference to the lock is us, we can
+	 * grab it
+	 */
+	if (ff->references != 1) {
+		ff->references--;
+		ff = NULL;
+	}
+
+	fio_mutex_up(filelock_lock);
+
+	if (ff) {
+		fio_mutex_down(&ff->lock);
+		return 0;
+	}
+
+	return 1;
+}
+
+void fio_lock_file(const char *fname)
+{
+	struct fio_filelock *ff;
+	uint32_t hash;
+
+	hash = jhash(fname, strlen(fname), 0);
+
+	fio_mutex_down(filelock_lock);
+	ff = fio_hash_get(hash);
+	ff->references++;
+	fio_mutex_up(filelock_lock);
+
+	fio_mutex_down(&ff->lock);
+}
+
+void fio_unlock_file(const char *fname)
+{
+	struct fio_filelock *ff;
+	uint32_t hash;
+
+	hash = jhash(fname, strlen(fname), 0);
+
+	fio_mutex_down(filelock_lock);
+
+	ff = fio_hash_find(hash);
+	if (ff) {
+		ff->references--;
+		fio_mutex_up(&ff->lock);
+		if (!ff->references) {
+			flist_del(&ff->list);
+			sfree(ff);
+		}
+	} else
+		log_err("fio: file not found for unlocking\n");
+
+	fio_mutex_up(filelock_lock);
+}
diff --git a/filelock.h b/filelock.h
new file mode 100644
index 0000000..97d13b7
--- /dev/null
+++ b/filelock.h
@@ -0,0 +1,11 @@
+#ifndef FIO_LOCK_FILE_H
+#define FIO_LOCK_FILE_H
+
+extern void fio_lock_file(const char *);
+extern int fio_trylock_file(const char *);
+extern void fio_unlock_file(const char *);
+
+extern int fio_filelock_init(void);
+extern void fio_filelock_exit(void);
+
+#endif
diff --git a/init.c b/init.c
index e944856..910f2ba 100644
--- a/init.c
+++ b/init.c
@@ -24,6 +24,7 @@
 #include "profile.h"
 #include "server.h"
 #include "idletime.h"
+#include "filelock.h"
 
 #include "lib/getopt.h"
 #include "lib/strcasestr.h"
@@ -259,6 +260,7 @@
 	}
 
 	options_free(fio_options, &def_thread);
+	fio_filelock_exit();
 	scleanup();
 }
 
diff --git a/iolog.c b/iolog.c
index 7cb633b..8589df6 100644
--- a/iolog.c
+++ b/iolog.c
@@ -10,6 +10,7 @@
 #include "fio.h"
 #include "verify.h"
 #include "trim.h"
+#include "filelock.h"
 
 static const char iolog_ver2[] = "fio version 2 iolog";
 
@@ -538,22 +539,32 @@
 	free(log);
 }
 
-void finish_log_named(struct thread_data *td, struct io_log *log,
-		       const char *prefix, const char *postfix)
+int finish_log_named(struct thread_data *td, struct io_log *log,
+		     const char *prefix, const char *postfix, int trylock)
 {
 	char file_name[256];
 
 	snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
 
+	if (trylock) {
+		if (fio_trylock_file(file_name))
+			return 1;
+	} else
+		fio_lock_file(file_name);
+
 	if (td->client_type == FIO_CLIENT_TYPE_GUI) {
 		fio_send_iolog(td, log, file_name);
 		free(log->log);
 		free(log);
 	} else
 		__finish_log(log, file_name);
+
+	fio_unlock_file(file_name);
+	return 0;
 }
 
-void finish_log(struct thread_data *td, struct io_log *log, const char *name)
+int finish_log(struct thread_data *td, struct io_log *log, const char *name,
+	       int trylock)
 {
-	finish_log_named(td, log, td->o.name, name);
+	return finish_log_named(td, log, td->o.name, name, trylock);
 }
diff --git a/iolog.h b/iolog.h
index 0716391..a2bc758 100644
--- a/iolog.h
+++ b/iolog.h
@@ -131,8 +131,8 @@
 extern void init_disk_util(struct thread_data *);
 extern void update_rusage_stat(struct thread_data *);
 extern void setup_log(struct io_log **, unsigned long, int);
-extern void finish_log(struct thread_data *, struct io_log *, const char *);
-extern void finish_log_named(struct thread_data *, struct io_log *, const char *, const char *);
+extern int finish_log(struct thread_data *, struct io_log *, const char *, int);
+extern int finish_log_named(struct thread_data *, struct io_log *, const char *, const char *, int);
 extern void __finish_log(struct io_log *, const char *);
 extern struct io_log *agg_io_log[DDIR_RWDIR_CNT];
 extern int write_bw_log;
diff --git a/libfio.c b/libfio.c
index 8eddab8..1fd77e4 100644
--- a/libfio.c
+++ b/libfio.c
@@ -31,6 +31,7 @@
 #include "fio.h"
 #include "smalloc.h"
 #include "os/os.h"
+#include "filelock.h"
 
 /*
  * Just expose an empty list, if the OS does not support disk util stats
@@ -265,6 +266,11 @@
 
 	sinit();
 
+	if (fio_filelock_init()) {
+		log_err("fio: failed initializing filelock subsys\n");
+		return 1;
+	}
+
 	/*
 	 * We need locale for number printing, if it isn't set then just
 	 * go with the US format.