Add a real semaphore implemtation

I've seen races where job N+1 got started before N, this breaks
for dependent jobs. So give up and implement a real semaphore
in mmap'ed shared storage.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fio.c b/fio.c
index 72cd02b..3cf2a9b 100644
--- a/fio.c
+++ b/fio.c
@@ -46,7 +46,7 @@
 int shm_id = 0;
 int temp_stall_ts;
 
-static volatile int startup_sem;
+static struct fio_sem *startup_sem;
 static volatile int fio_abort;
 static int exit_value;
 
@@ -731,8 +731,8 @@
 		goto err;
 
 	td_set_runstate(td, TD_INITIALIZED);
-	fio_sem_up(&startup_sem);
-	fio_sem_down(&td->mutex);
+	fio_sem_up(startup_sem);
+	fio_sem_down(td->mutex);
 
 	if (!td->create_serialize && setup_files(td))
 		goto err;
@@ -930,6 +930,8 @@
 				perror("pthread_join");
 		}
 
+		fio_sem_remove(td->mutex);
+
 		(*nr_running)--;
 		(*m_rate) -= td->ratemin;
 		(*t_rate) -= td->rate;
@@ -1030,7 +1032,6 @@
 			 */
 			td_set_runstate(td, TD_CREATED);
 			map[this_jobs++] = td;
-			fio_sem_init(&startup_sem, 1);
 			nr_started++;
 
 			if (td->use_thread) {
@@ -1039,14 +1040,13 @@
 					nr_started--;
 				}
 			} else {
-				if (fork())
-					fio_sem_down(&startup_sem);
-				else {
+				if (!fork()) {
 					int ret = fork_main(shm_id, i);
 
 					exit(ret);
 				}
 			}
+			fio_sem_down(startup_sem);
 		}
 
 		/*
@@ -1101,7 +1101,7 @@
 			m_rate += td->ratemin;
 			t_rate += td->rate;
 			todo--;
-			fio_sem_up(&td->mutex);
+			fio_sem_up(td->mutex);
 		}
 
 		reap_threads(&nr_running, &t_rate, &m_rate);
@@ -1151,6 +1151,8 @@
 		setup_log(&agg_io_log[DDIR_WRITE]);
 	}
 
+	startup_sem = fio_sem_init(0);
+
 	set_genesis_time();
 
 	disk_util_timer_arm();
@@ -1165,5 +1167,6 @@
 		}
 	}
 
+	fio_sem_remove(startup_sem);
 	return exit_value;
 }