Blame - backend.c - platform/external/fio

blob: 52791040ff21384d283b2a09de3ff9718e19d866 [file] [log] [blame]

Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1	/*
				2	* fio - the flexible io tester
				3	*
				4	* Copyright (C) 2005 Jens Axboe <axboe@suse.de>
				5	* Copyright (C) 2006-2012 Jens Axboe <axboe@kernel.dk>
				6	*
				7	* The license below covers all files distributed with fio unless otherwise
				8	* noted in the file itself.
				9	*
				10	* This program is free software; you can redistribute it and/or modify
				11	* it under the terms of the GNU General Public License version 2 as
				12	* published by the Free Software Foundation.
				13	*
				14	* This program is distributed in the hope that it will be useful,
				15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				17	* GNU General Public License for more details.
				18	*
				19	* You should have received a copy of the GNU General Public License
				20	* along with this program; if not, write to the Free Software
				21	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*
				23	*/
				24	#include <unistd.h>
				25	#include <fcntl.h>
				26	#include <string.h>
				27	#include <limits.h>
				28	#include <signal.h>
				29	#include <time.h>
				30	#include <locale.h>
				31	#include <assert.h>
				32	#include <time.h>
Bruce Cran	e43606c	2012-02-20 09:34:24 +0100	[diff] [blame]	33	#include <inttypes.h>
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	34	#include <sys/stat.h>
				35	#include <sys/wait.h>
				36	#include <sys/ipc.h>
				37	#include <sys/shm.h>
				38	#include <sys/mman.h>
				39
				40	#include "fio.h"
				41	#include "hash.h"
				42	#include "smalloc.h"
				43	#include "verify.h"
				44	#include "trim.h"
				45	#include "diskutil.h"
				46	#include "cgroup.h"
				47	#include "profile.h"
				48	#include "lib/rand.h"
				49	#include "memalign.h"
				50	#include "server.h"
				51
				52	static pthread_t disk_util_thread;
				53	static struct fio_mutex *startup_mutex;
				54	static struct fio_mutex *writeout_mutex;
				55	static struct flist_head *cgroup_list;
				56	static char *cgroup_mnt;
				57	static int exit_value;
				58	static volatile int fio_abort;
				59
				60	struct io_log *agg_io_log[2];
				61
Jens Axboe	a3efc91	2012-02-09 11:25:24 +0100	[diff] [blame]	62	int groupid = 0;
				63	unsigned int thread_number = 0;
				64	unsigned int nr_process = 0;
				65	unsigned int nr_thread = 0;
				66	int shm_id = 0;
				67	int temp_stall_ts;
				68	unsigned long done_secs = 0;
				69
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	70	#define PAGE_ALIGN(buf) \
Bruce Cran	e43606c	2012-02-20 09:34:24 +0100	[diff] [blame]	71	(char *) (((uintptr_t) (buf) + page_mask) & ~page_mask)
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	72
				73	#define JOB_START_TIMEOUT (5 * 1000)
				74
				75	static void sig_int(int sig)
				76	{
				77	if (threads) {
				78	if (is_backend)
				79	fio_server_got_signal(sig);
				80	else {
				81	log_info("\nfio: terminating on signal %d\n", sig);
				82	fflush(stdout);
				83	exit_value = 128;
				84	}
				85
				86	fio_terminate_threads(TERMINATE_ALL);
				87	}
				88	}
				89
				90	static void set_sig_handlers(void)
				91	{
				92	struct sigaction act;
				93
				94	memset(&act, 0, sizeof(act));
				95	act.sa_handler = sig_int;
				96	act.sa_flags = SA_RESTART;
				97	sigaction(SIGINT, &act, NULL);
				98
				99	memset(&act, 0, sizeof(act));
				100	act.sa_handler = sig_int;
				101	act.sa_flags = SA_RESTART;
				102	sigaction(SIGTERM, &act, NULL);
				103
				104	if (is_backend) {
				105	memset(&act, 0, sizeof(act));
				106	act.sa_handler = sig_int;
				107	act.sa_flags = SA_RESTART;
				108	sigaction(SIGPIPE, &act, NULL);
				109	}
				110	}
				111
				112	/*
				113	* Check if we are above the minimum rate given.
				114	*/
				115	static int __check_min_rate(struct thread_data td, struct timeval now,
				116	enum fio_ddir ddir)
				117	{
				118	unsigned long long bytes = 0;
				119	unsigned long iops = 0;
				120	unsigned long spent;
				121	unsigned long rate;
				122	unsigned int ratemin = 0;
				123	unsigned int rate_iops = 0;
				124	unsigned int rate_iops_min = 0;
				125
				126	assert(ddir_rw(ddir));
				127
				128	if (!td->o.ratemin[ddir] && !td->o.rate_iops_min[ddir])
				129	return 0;
				130
				131	/*
				132	* allow a 2 second settle period in the beginning
				133	*/
				134	if (mtime_since(&td->start, now) < 2000)
				135	return 0;
				136
				137	iops += td->this_io_blocks[ddir];
				138	bytes += td->this_io_bytes[ddir];
				139	ratemin += td->o.ratemin[ddir];
				140	rate_iops += td->o.rate_iops[ddir];
				141	rate_iops_min += td->o.rate_iops_min[ddir];
				142
				143	/*
				144	* if rate blocks is set, sample is running
				145	*/
				146	if (td->rate_bytes[ddir] \|\| td->rate_blocks[ddir]) {
				147	spent = mtime_since(&td->lastrate[ddir], now);
				148	if (spent < td->o.ratecycle)
				149	return 0;
				150
				151	if (td->o.rate[ddir]) {
				152	/*
				153	* check bandwidth specified rate
				154	*/
				155	if (bytes < td->rate_bytes[ddir]) {
				156	log_err("%s: min rate %u not met\n", td->o.name,
				157	ratemin);
				158	return 1;
				159	} else {
				160	rate = ((bytes - td->rate_bytes[ddir]) * 1000) / spent;
				161	if (rate < ratemin \|\|
				162	bytes < td->rate_bytes[ddir]) {
				163	log_err("%s: min rate %u not met, got"
				164	" %luKB/sec\n", td->o.name,
				165	ratemin, rate);
				166	return 1;
				167	}
				168	}
				169	} else {
				170	/*
				171	* checks iops specified rate
				172	*/
				173	if (iops < rate_iops) {
				174	log_err("%s: min iops rate %u not met\n",
				175	td->o.name, rate_iops);
				176	return 1;
				177	} else {
				178	rate = ((iops - td->rate_blocks[ddir]) * 1000) / spent;
				179	if (rate < rate_iops_min \|\|
				180	iops < td->rate_blocks[ddir]) {
				181	log_err("%s: min iops rate %u not met,"
				182	" got %lu\n", td->o.name,
				183	rate_iops_min, rate);
				184	}
				185	}
				186	}
				187	}
				188
				189	td->rate_bytes[ddir] = bytes;
				190	td->rate_blocks[ddir] = iops;
				191	memcpy(&td->lastrate[ddir], now, sizeof(*now));
				192	return 0;
				193	}
				194
				195	static int check_min_rate(struct thread_data td, struct timeval now,
				196	unsigned long *bytes_done)
				197	{
				198	int ret = 0;
				199
				200	if (bytes_done[0])
				201	ret \|= __check_min_rate(td, now, 0);
				202	if (bytes_done[1])
				203	ret \|= __check_min_rate(td, now, 1);
				204
				205	return ret;
				206	}
				207
				208	/*
				209	* When job exits, we can cancel the in-flight IO if we are using async
				210	* io. Attempt to do so.
				211	*/
				212	static void cleanup_pending_aio(struct thread_data *td)
				213	{
				214	struct flist_head entry, n;
				215	struct io_u *io_u;
				216	int r;
				217
				218	/*
				219	* get immediately available events, if any
				220	*/
				221	r = io_u_queued_complete(td, 0, NULL);
				222	if (r < 0)
				223	return;
				224
				225	/*
				226	* now cancel remaining active events
				227	*/
				228	if (td->io_ops->cancel) {
				229	flist_for_each_safe(entry, n, &td->io_u_busylist) {
				230	io_u = flist_entry(entry, struct io_u, list);
				231
				232	/*
				233	* if the io_u isn't in flight, then that generally
				234	* means someone leaked an io_u. complain but fix
				235	* it up, so we don't stall here.
				236	*/
				237	if ((io_u->flags & IO_U_F_FLIGHT) == 0) {
				238	log_err("fio: non-busy IO on busy list\n");
				239	put_io_u(td, io_u);
				240	} else {
				241	r = td->io_ops->cancel(td, io_u);
				242	if (!r)
				243	put_io_u(td, io_u);
				244	}
				245	}
				246	}
				247
				248	if (td->cur_depth)
				249	r = io_u_queued_complete(td, td->cur_depth, NULL);
				250	}
				251
				252	/*
				253	* Helper to handle the final sync of a file. Works just like the normal
				254	* io path, just does everything sync.
				255	*/
				256	static int fio_io_sync(struct thread_data td, struct fio_file f)
				257	{
				258	struct io_u *io_u = __get_io_u(td);
				259	int ret;
				260
				261	if (!io_u)
				262	return 1;
				263
				264	io_u->ddir = DDIR_SYNC;
				265	io_u->file = f;
				266
				267	if (td_io_prep(td, io_u)) {
				268	put_io_u(td, io_u);
				269	return 1;
				270	}
				271
				272	requeue:
				273	ret = td_io_queue(td, io_u);
				274	if (ret < 0) {
				275	td_verror(td, io_u->error, "td_io_queue");
				276	put_io_u(td, io_u);
				277	return 1;
				278	} else if (ret == FIO_Q_QUEUED) {
				279	if (io_u_queued_complete(td, 1, NULL) < 0)
				280	return 1;
				281	} else if (ret == FIO_Q_COMPLETED) {
				282	if (io_u->error) {
				283	td_verror(td, io_u->error, "td_io_queue");
				284	return 1;
				285	}
				286
				287	if (io_u_sync_complete(td, io_u, NULL) < 0)
				288	return 1;
				289	} else if (ret == FIO_Q_BUSY) {
				290	if (td_io_commit(td))
				291	return 1;
				292	goto requeue;
				293	}
				294
				295	return 0;
				296	}
Jens Axboe	a3efc91	2012-02-09 11:25:24 +0100	[diff] [blame]	297
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	298	static inline void __update_tv_cache(struct thread_data *td)
				299	{
				300	fio_gettime(&td->tv_cache, NULL);
				301	}
				302
				303	static inline void update_tv_cache(struct thread_data *td)
				304	{
				305	if ((++td->tv_cache_nr & td->tv_cache_mask) == td->tv_cache_mask)
				306	__update_tv_cache(td);
				307	}
				308
				309	static inline int runtime_exceeded(struct thread_data td, struct timeval t)
				310	{
				311	if (in_ramp_time(td))
				312	return 0;
				313	if (!td->o.timeout)
				314	return 0;
				315	if (mtime_since(&td->epoch, t) >= td->o.timeout * 1000)
				316	return 1;
				317
				318	return 0;
				319	}
				320
				321	static int break_on_this_error(struct thread_data *td, enum fio_ddir ddir,
				322	int *retptr)
				323	{
				324	int ret = *retptr;
				325
				326	if (ret < 0 \|\| td->error) {
				327	int err;
				328
				329	if (ret < 0)
				330	err = -ret;
				331	else
				332	err = td->error;
				333
				334	if (!(td->o.continue_on_error & td_error_type(ddir, err)))
				335	return 1;
				336
				337	if (td_non_fatal_error(err)) {
				338	/*
				339	* Continue with the I/Os in case of
				340	* a non fatal error.
				341	*/
				342	update_error_count(td, err);
				343	td_clear_error(td);
				344	*retptr = 0;
				345	return 0;
				346	} else if (td->o.fill_device && err == ENOSPC) {
				347	/*
				348	* We expect to hit this error if
				349	* fill_device option is set.
				350	*/
				351	td_clear_error(td);
				352	td->terminate = 1;
				353	return 1;
				354	} else {
				355	/*
				356	* Stop the I/O in case of a fatal
				357	* error.
				358	*/
				359	update_error_count(td, err);
				360	return 1;
				361	}
				362	}
				363
				364	return 0;
				365	}
				366
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	367	/*
				368	* The main verify engine. Runs over the writes we previously submitted,
				369	* reads the blocks back in, and checks the crc/md5 of the data.
				370	*/
				371	static void do_verify(struct thread_data *td)
				372	{
				373	struct fio_file *f;
				374	struct io_u *io_u;
				375	int ret, min_events;
				376	unsigned int i;
				377
				378	dprint(FD_VERIFY, "starting loop\n");
				379
				380	/*
				381	* sync io first and invalidate cache, to make sure we really
				382	* read from disk.
				383	*/
				384	for_each_file(td, f, i) {
				385	if (!fio_file_open(f))
				386	continue;
				387	if (fio_io_sync(td, f))
				388	break;
				389	if (file_invalidate_cache(td, f))
				390	break;
				391	}
				392
				393	if (td->error)
				394	return;
				395
				396	td_set_runstate(td, TD_VERIFYING);
				397
				398	io_u = NULL;
				399	while (!td->terminate) {
				400	int ret2, full;
				401
				402	update_tv_cache(td);
				403
				404	if (runtime_exceeded(td, &td->tv_cache)) {
				405	__update_tv_cache(td);
				406	if (runtime_exceeded(td, &td->tv_cache)) {
				407	td->terminate = 1;
				408	break;
				409	}
				410	}
				411
Dan Ehrenberg	9e684a4	2012-02-20 11:05:14 +0100	[diff] [blame]	412	if (flow_threshold_exceeded(td))
				413	continue;
				414
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	415	io_u = __get_io_u(td);
				416	if (!io_u)
				417	break;
				418
				419	if (get_next_verify(td, io_u)) {
				420	put_io_u(td, io_u);
				421	break;
				422	}
				423
				424	if (td_io_prep(td, io_u)) {
				425	put_io_u(td, io_u);
				426	break;
				427	}
				428
				429	if (td->o.verify_async)
				430	io_u->end_io = verify_io_u_async;
				431	else
				432	io_u->end_io = verify_io_u;
				433
				434	ret = td_io_queue(td, io_u);
				435	switch (ret) {
				436	case FIO_Q_COMPLETED:
				437	if (io_u->error) {
				438	ret = -io_u->error;
				439	clear_io_u(td, io_u);
				440	} else if (io_u->resid) {
				441	int bytes = io_u->xfer_buflen - io_u->resid;
				442
				443	/*
				444	* zero read, fail
				445	*/
				446	if (!bytes) {
				447	td_verror(td, EIO, "full resid");
				448	put_io_u(td, io_u);
				449	break;
				450	}
				451
				452	io_u->xfer_buflen = io_u->resid;
				453	io_u->xfer_buf += bytes;
				454	io_u->offset += bytes;
				455
				456	if (ddir_rw(io_u->ddir))
				457	td->ts.short_io_u[io_u->ddir]++;
				458
				459	f = io_u->file;
				460	if (io_u->offset == f->real_file_size)
				461	goto sync_done;
				462
				463	requeue_io_u(td, &io_u);
				464	} else {
				465	sync_done:
				466	ret = io_u_sync_complete(td, io_u, NULL);
				467	if (ret < 0)
				468	break;
				469	}
				470	continue;
				471	case FIO_Q_QUEUED:
				472	break;
				473	case FIO_Q_BUSY:
				474	requeue_io_u(td, &io_u);
				475	ret2 = td_io_commit(td);
				476	if (ret2 < 0)
				477	ret = ret2;
				478	break;
				479	default:
				480	assert(ret < 0);
				481	td_verror(td, -ret, "td_io_queue");
				482	break;
				483	}
				484
				485	if (break_on_this_error(td, io_u->ddir, &ret))
				486	break;
				487
				488	/*
				489	* if we can queue more, do so. but check if there are
				490	* completed io_u's first. Note that we can get BUSY even
				491	* without IO queued, if the system is resource starved.
				492	*/
				493	full = queue_full(td) \|\| (ret == FIO_Q_BUSY && td->cur_depth);
				494	if (full \|\| !td->o.iodepth_batch_complete) {
				495	min_events = min(td->o.iodepth_batch_complete,
				496	td->cur_depth);
				497	if (full && !min_events && td->o.iodepth_batch_complete != 0)
				498	min_events = 1;
				499
				500	do {
				501	/*
				502	* Reap required number of io units, if any,
				503	* and do the verification on them through
				504	* the callback handler
				505	*/
				506	if (io_u_queued_complete(td, min_events, NULL) < 0) {
				507	ret = -1;
				508	break;
				509	}
				510	} while (full && (td->cur_depth > td->o.iodepth_low));
				511	}
				512	if (ret < 0)
				513	break;
				514	}
				515
				516	if (!td->error) {
				517	min_events = td->cur_depth;
				518
				519	if (min_events)
				520	ret = io_u_queued_complete(td, min_events, NULL);
				521	} else
				522	cleanup_pending_aio(td);
				523
				524	td_set_runstate(td, TD_RUNNING);
				525
				526	dprint(FD_VERIFY, "exiting loop\n");
				527	}
				528
				529	/*
				530	* Main IO worker function. It retrieves io_u's to process and queues
				531	* and reaps them, checking for rate and errors along the way.
				532	*/
				533	static void do_io(struct thread_data *td)
				534	{
				535	unsigned int i;
				536	int ret = 0;
				537
				538	if (in_ramp_time(td))
				539	td_set_runstate(td, TD_RAMP);
				540	else
				541	td_set_runstate(td, TD_RUNNING);
				542
				543	while ( (td->o.read_iolog_file && !flist_empty(&td->io_log_list)) \|\|
				544	(!flist_empty(&td->trim_list)) \|\|
				545	((td->this_io_bytes[0] + td->this_io_bytes[1]) < td->o.size) ) {
				546	struct timeval comp_time;
				547	unsigned long bytes_done[2] = { 0, 0 };
				548	int min_evts = 0;
				549	struct io_u *io_u;
				550	int ret2, full;
				551	enum fio_ddir ddir;
				552
				553	if (td->terminate)
				554	break;
				555
				556	update_tv_cache(td);
				557
				558	if (runtime_exceeded(td, &td->tv_cache)) {
				559	__update_tv_cache(td);
				560	if (runtime_exceeded(td, &td->tv_cache)) {
				561	td->terminate = 1;
				562	break;
				563	}
				564	}
				565
Dan Ehrenberg	9e684a4	2012-02-20 11:05:14 +0100	[diff] [blame]	566	if (flow_threshold_exceeded(td))
				567	continue;
				568
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	569	io_u = get_io_u(td);
				570	if (!io_u)
				571	break;
				572
				573	ddir = io_u->ddir;
				574
				575	/*
				576	* Add verification end_io handler, if asked to verify
				577	* a previously written file.
				578	*/
				579	if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_READ &&
				580	!td_rw(td)) {
				581	if (td->o.verify_async)
				582	io_u->end_io = verify_io_u_async;
				583	else
				584	io_u->end_io = verify_io_u;
				585	td_set_runstate(td, TD_VERIFYING);
				586	} else if (in_ramp_time(td))
				587	td_set_runstate(td, TD_RAMP);
				588	else
				589	td_set_runstate(td, TD_RUNNING);
				590
				591	ret = td_io_queue(td, io_u);
				592	switch (ret) {
				593	case FIO_Q_COMPLETED:
				594	if (io_u->error) {
				595	ret = -io_u->error;
				596	clear_io_u(td, io_u);
				597	} else if (io_u->resid) {
				598	int bytes = io_u->xfer_buflen - io_u->resid;
				599	struct fio_file *f = io_u->file;
				600
				601	/*
				602	* zero read, fail
				603	*/
				604	if (!bytes) {
				605	td_verror(td, EIO, "full resid");
				606	put_io_u(td, io_u);
				607	break;
				608	}
				609
				610	io_u->xfer_buflen = io_u->resid;
				611	io_u->xfer_buf += bytes;
				612	io_u->offset += bytes;
				613
				614	if (ddir_rw(io_u->ddir))
				615	td->ts.short_io_u[io_u->ddir]++;
				616
				617	if (io_u->offset == f->real_file_size)
				618	goto sync_done;
				619
				620	requeue_io_u(td, &io_u);
				621	} else {
				622	sync_done:
				623	if (__should_check_rate(td, 0) \|\|
				624	__should_check_rate(td, 1))
				625	fio_gettime(&comp_time, NULL);
				626
				627	ret = io_u_sync_complete(td, io_u, bytes_done);
				628	if (ret < 0)
				629	break;
				630	}
				631	break;
				632	case FIO_Q_QUEUED:
				633	/*
				634	* if the engine doesn't have a commit hook,
				635	* the io_u is really queued. if it does have such
				636	* a hook, it has to call io_u_queued() itself.
				637	*/
				638	if (td->io_ops->commit == NULL)
				639	io_u_queued(td, io_u);
				640	break;
				641	case FIO_Q_BUSY:
				642	requeue_io_u(td, &io_u);
				643	ret2 = td_io_commit(td);
				644	if (ret2 < 0)
				645	ret = ret2;
				646	break;
				647	default:
				648	assert(ret < 0);
				649	put_io_u(td, io_u);
				650	break;
				651	}
				652
				653	if (break_on_this_error(td, ddir, &ret))
				654	break;
				655
				656	/*
				657	* See if we need to complete some commands. Note that we
				658	* can get BUSY even without IO queued, if the system is
				659	* resource starved.
				660	*/
				661	full = queue_full(td) \|\| (ret == FIO_Q_BUSY && td->cur_depth);
				662	if (full \|\| !td->o.iodepth_batch_complete) {
				663	min_evts = min(td->o.iodepth_batch_complete,
				664	td->cur_depth);
				665	if (full && !min_evts && td->o.iodepth_batch_complete != 0)
				666	min_evts = 1;
				667
				668	if (__should_check_rate(td, 0) \|\|
				669	__should_check_rate(td, 1))
				670	fio_gettime(&comp_time, NULL);
				671
				672	do {
				673	ret = io_u_queued_complete(td, min_evts, bytes_done);
				674	if (ret < 0)
				675	break;
				676
				677	} while (full && (td->cur_depth > td->o.iodepth_low));
				678	}
				679
				680	if (ret < 0)
				681	break;
				682	if (!(bytes_done[0] + bytes_done[1]))
				683	continue;
				684
				685	if (!in_ramp_time(td) && should_check_rate(td, bytes_done)) {
				686	if (check_min_rate(td, &comp_time, bytes_done)) {
				687	if (exitall_on_terminate)
				688	fio_terminate_threads(td->groupid);
				689	td_verror(td, EIO, "check_min_rate");
				690	break;
				691	}
				692	}
				693
				694	if (td->o.thinktime) {
				695	unsigned long long b;
				696
				697	b = td->io_blocks[0] + td->io_blocks[1];
				698	if (!(b % td->o.thinktime_blocks)) {
				699	int left;
				700
				701	if (td->o.thinktime_spin)
				702	usec_spin(td->o.thinktime_spin);
				703
				704	left = td->o.thinktime - td->o.thinktime_spin;
				705	if (left)
				706	usec_sleep(td, left);
				707	}
				708	}
				709	}
				710
				711	if (td->trim_entries)
				712	log_err("fio: %d trim entries leaked?\n", td->trim_entries);
				713
				714	if (td->o.fill_device && td->error == ENOSPC) {
				715	td->error = 0;
				716	td->terminate = 1;
				717	}
				718	if (!td->error) {
				719	struct fio_file *f;
				720
				721	i = td->cur_depth;
				722	if (i) {
				723	ret = io_u_queued_complete(td, i, NULL);
				724	if (td->o.fill_device && td->error == ENOSPC)
				725	td->error = 0;
				726	}
				727
				728	if (should_fsync(td) && td->o.end_fsync) {
				729	td_set_runstate(td, TD_FSYNCING);
				730
				731	for_each_file(td, f, i) {
				732	if (!fio_file_open(f))
				733	continue;
				734	fio_io_sync(td, f);
				735	}
				736	}
				737	} else
				738	cleanup_pending_aio(td);
				739
				740	/*
				741	* stop job if we failed doing any IO
				742	*/
				743	if ((td->this_io_bytes[0] + td->this_io_bytes[1]) == 0)
				744	td->done = 1;
				745	}
				746
				747	static void cleanup_io_u(struct thread_data *td)
				748	{
				749	struct flist_head entry, n;
				750	struct io_u *io_u;
				751
				752	flist_for_each_safe(entry, n, &td->io_u_freelist) {
				753	io_u = flist_entry(entry, struct io_u, list);
				754
				755	flist_del(&io_u->list);
				756	fio_memfree(io_u, sizeof(*io_u));
				757	}
				758
				759	free_io_mem(td);
				760	}
				761
				762	static int init_io_u(struct thread_data *td)
				763	{
				764	struct io_u *io_u;
				765	unsigned int max_bs;
				766	int cl_align, i, max_units;
				767	char *p;
				768
				769	max_units = td->o.iodepth;
				770	max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
				771	td->orig_buffer_size = (unsigned long long) max_bs
				772	* (unsigned long long) max_units;
				773
				774	if (td->o.mem_type == MEM_SHMHUGE \|\| td->o.mem_type == MEM_MMAPHUGE) {
				775	unsigned long bs;
				776
				777	bs = td->orig_buffer_size + td->o.hugepage_size - 1;
				778	td->orig_buffer_size = bs & ~(td->o.hugepage_size - 1);
				779	}
				780
				781	if (td->orig_buffer_size != (size_t) td->orig_buffer_size) {
				782	log_err("fio: IO memory too large. Reduce max_bs or iodepth\n");
				783	return 1;
				784	}
				785
				786	if (allocate_io_mem(td))
				787	return 1;
				788
				789	if (td->o.odirect \|\| td->o.mem_align \|\|
				790	(td->io_ops->flags & FIO_RAWIO))
				791	p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
				792	else
				793	p = td->orig_buffer;
				794
				795	cl_align = os_cache_line_size();
				796
				797	for (i = 0; i < max_units; i++) {
				798	void *ptr;
				799
				800	if (td->terminate)
				801	return 1;
				802
				803	ptr = fio_memalign(cl_align, sizeof(*io_u));
				804	if (!ptr) {
				805	log_err("fio: unable to allocate aligned memory\n");
				806	break;
				807	}
				808
				809	io_u = ptr;
				810	memset(io_u, 0, sizeof(*io_u));
				811	INIT_FLIST_HEAD(&io_u->list);
				812	dprint(FD_MEM, "io_u alloc %p, index %u\n", io_u, i);
				813
				814	if (!(td->io_ops->flags & FIO_NOIO)) {
				815	io_u->buf = p;
				816	dprint(FD_MEM, "io_u %p, mem %p\n", io_u, io_u->buf);
				817
				818	if (td_write(td))
				819	io_u_fill_buffer(td, io_u, max_bs);
				820	if (td_write(td) && td->o.verify_pattern_bytes) {
				821	/*
				822	* Fill the buffer with the pattern if we are
				823	* going to be doing writes.
				824	*/
				825	fill_pattern(td, io_u->buf, max_bs, io_u, 0, 0);
				826	}
				827	}
				828
				829	io_u->index = i;
				830	io_u->flags = IO_U_F_FREE;
				831	flist_add(&io_u->list, &td->io_u_freelist);
				832	p += max_bs;
				833	}
				834
				835	return 0;
				836	}
				837
				838	static int switch_ioscheduler(struct thread_data *td)
				839	{
				840	char tmp[256], tmp2[128];
				841	FILE *f;
				842	int ret;
				843
				844	if (td->io_ops->flags & FIO_DISKLESSIO)
				845	return 0;
				846
				847	sprintf(tmp, "%s/queue/scheduler", td->sysfs_root);
				848
				849	f = fopen(tmp, "r+");
				850	if (!f) {
				851	if (errno == ENOENT) {
				852	log_err("fio: os or kernel doesn't support IO scheduler"
				853	" switching\n");
				854	return 0;
				855	}
				856	td_verror(td, errno, "fopen iosched");
				857	return 1;
				858	}
				859
				860	/*
				861	* Set io scheduler.
				862	*/
				863	ret = fwrite(td->o.ioscheduler, strlen(td->o.ioscheduler), 1, f);
				864	if (ferror(f) \|\| ret != 1) {
				865	td_verror(td, errno, "fwrite");
				866	fclose(f);
				867	return 1;
				868	}
				869
				870	rewind(f);
				871
				872	/*
				873	* Read back and check that the selected scheduler is now the default.
				874	*/
				875	ret = fread(tmp, 1, sizeof(tmp), f);
				876	if (ferror(f) \|\| ret < 0) {
				877	td_verror(td, errno, "fread");
				878	fclose(f);
				879	return 1;
				880	}
				881
				882	sprintf(tmp2, "[%s]", td->o.ioscheduler);
				883	if (!strstr(tmp, tmp2)) {
				884	log_err("fio: io scheduler %s not found\n", td->o.ioscheduler);
				885	td_verror(td, EINVAL, "iosched_switch");
				886	fclose(f);
				887	return 1;
				888	}
				889
				890	fclose(f);
				891	return 0;
				892	}
				893
				894	static int keep_running(struct thread_data *td)
				895	{
				896	unsigned long long io_done;
				897
				898	if (td->done)
				899	return 0;
				900	if (td->o.time_based)
				901	return 1;
				902	if (td->o.loops) {
				903	td->o.loops--;
				904	return 1;
				905	}
				906
				907	io_done = td->io_bytes[DDIR_READ] + td->io_bytes[DDIR_WRITE]
				908	+ td->io_skip_bytes;
				909	if (io_done < td->o.size)
				910	return 1;
				911
				912	return 0;
				913	}
				914
				915	static int exec_string(const char *string)
				916	{
				917	int ret, newlen = strlen(string) + 1 + 8;
				918	char *str;
				919
				920	str = malloc(newlen);
				921	sprintf(str, "sh -c %s", string);
				922
				923	ret = system(str);
				924	if (ret == -1)
				925	log_err("fio: exec of cmd <%s> failed\n", str);
				926
				927	free(str);
				928	return ret;
				929	}
				930
				931	/*
				932	* Entry point for the thread based jobs. The process based jobs end up
				933	* here as well, after a little setup.
				934	*/
				935	static void thread_main(void data)
				936	{
				937	unsigned long long elapsed;
				938	struct thread_data *td = data;
				939	pthread_condattr_t attr;
				940	int clear_state;
				941
				942	if (!td->o.use_thread) {
				943	setsid();
				944	td->pid = getpid();
				945	} else
				946	td->pid = gettid();
				947
				948	dprint(FD_PROCESS, "jobs pid=%d started\n", (int) td->pid);
				949
				950	INIT_FLIST_HEAD(&td->io_u_freelist);
				951	INIT_FLIST_HEAD(&td->io_u_busylist);
				952	INIT_FLIST_HEAD(&td->io_u_requeues);
				953	INIT_FLIST_HEAD(&td->io_log_list);
				954	INIT_FLIST_HEAD(&td->io_hist_list);
				955	INIT_FLIST_HEAD(&td->verify_list);
				956	INIT_FLIST_HEAD(&td->trim_list);
				957	pthread_mutex_init(&td->io_u_lock, NULL);
				958	td->io_hist_tree = RB_ROOT;
				959
				960	pthread_condattr_init(&attr);
				961	pthread_cond_init(&td->verify_cond, &attr);
				962	pthread_cond_init(&td->free_cond, &attr);
				963
				964	td_set_runstate(td, TD_INITIALIZED);
				965	dprint(FD_MUTEX, "up startup_mutex\n");
				966	fio_mutex_up(startup_mutex);
				967	dprint(FD_MUTEX, "wait on td->mutex\n");
				968	fio_mutex_down(td->mutex);
				969	dprint(FD_MUTEX, "done waiting on td->mutex\n");
				970
				971	/*
				972	* the ->mutex mutex is now no longer used, close it to avoid
				973	* eating a file descriptor
				974	*/
				975	fio_mutex_remove(td->mutex);
				976
				977	/*
				978	* A new gid requires privilege, so we need to do this before setting
				979	* the uid.
				980	*/
				981	if (td->o.gid != -1U && setgid(td->o.gid)) {
				982	td_verror(td, errno, "setgid");
				983	goto err;
				984	}
				985	if (td->o.uid != -1U && setuid(td->o.uid)) {
				986	td_verror(td, errno, "setuid");
				987	goto err;
				988	}
				989
				990	/*
				991	* If we have a gettimeofday() thread, make sure we exclude that
				992	* thread from this job
				993	*/
				994	if (td->o.gtod_cpu)
				995	fio_cpu_clear(&td->o.cpumask, td->o.gtod_cpu);
				996
				997	/*
				998	* Set affinity first, in case it has an impact on the memory
				999	* allocations.
				1000	*/
				1001	if (td->o.cpumask_set && fio_setaffinity(td->pid, td->o.cpumask) == -1) {
				1002	td_verror(td, errno, "cpu_set_affinity");
				1003	goto err;
				1004	}
				1005
				1006	/*
				1007	* May alter parameters that init_io_u() will use, so we need to
				1008	* do this first.
				1009	*/
				1010	if (init_iolog(td))
				1011	goto err;
				1012
				1013	if (init_io_u(td))
				1014	goto err;
				1015
				1016	if (td->o.verify_async && verify_async_init(td))
				1017	goto err;
				1018
				1019	if (td->ioprio_set) {
				1020	if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
				1021	td_verror(td, errno, "ioprio_set");
				1022	goto err;
				1023	}
				1024	}
				1025
				1026	if (td->o.cgroup_weight && cgroup_setup(td, cgroup_list, &cgroup_mnt))
				1027	goto err;
				1028
Bruce Cran	649c10c	2012-02-20 07:59:06 +0100	[diff] [blame]	1029	errno = 0;
				1030	if (nice(td->o.nice) == -1 && errno != 0) {
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1031	td_verror(td, errno, "nice");
				1032	goto err;
				1033	}
				1034
				1035	if (td->o.ioscheduler && switch_ioscheduler(td))
				1036	goto err;
				1037
				1038	if (!td->o.create_serialize && setup_files(td))
				1039	goto err;
				1040
				1041	if (td_io_init(td))
				1042	goto err;
				1043
				1044	if (init_random_map(td))
				1045	goto err;
				1046
				1047	if (td->o.exec_prerun) {
				1048	if (exec_string(td->o.exec_prerun))
				1049	goto err;
				1050	}
				1051
				1052	if (td->o.pre_read) {
				1053	if (pre_read_files(td) < 0)
				1054	goto err;
				1055	}
				1056
				1057	fio_gettime(&td->epoch, NULL);
				1058	getrusage(RUSAGE_SELF, &td->ru_start);
				1059
				1060	clear_state = 0;
				1061	while (keep_running(td)) {
				1062	fio_gettime(&td->start, NULL);
				1063	memcpy(&td->bw_sample_time, &td->start, sizeof(td->start));
				1064	memcpy(&td->iops_sample_time, &td->start, sizeof(td->start));
				1065	memcpy(&td->tv_cache, &td->start, sizeof(td->start));
				1066
				1067	if (td->o.ratemin[0] \|\| td->o.ratemin[1]) {
				1068	memcpy(&td->lastrate[0], &td->bw_sample_time,
				1069	sizeof(td->bw_sample_time));
				1070	memcpy(&td->lastrate[1], &td->bw_sample_time,
				1071	sizeof(td->bw_sample_time));
				1072	}
				1073
				1074	if (clear_state)
				1075	clear_io_state(td);
				1076
				1077	prune_io_piece_log(td);
				1078
				1079	do_io(td);
				1080
				1081	clear_state = 1;
				1082
				1083	if (td_read(td) && td->io_bytes[DDIR_READ]) {
				1084	elapsed = utime_since_now(&td->start);
				1085	td->ts.runtime[DDIR_READ] += elapsed;
				1086	}
				1087	if (td_write(td) && td->io_bytes[DDIR_WRITE]) {
				1088	elapsed = utime_since_now(&td->start);
				1089	td->ts.runtime[DDIR_WRITE] += elapsed;
				1090	}
				1091
				1092	if (td->error \|\| td->terminate)
				1093	break;
				1094
				1095	if (!td->o.do_verify \|\|
				1096	td->o.verify == VERIFY_NONE \|\|
				1097	(td->io_ops->flags & FIO_UNIDIR))
				1098	continue;
				1099
				1100	clear_io_state(td);
				1101
				1102	fio_gettime(&td->start, NULL);
				1103
				1104	do_verify(td);
				1105
				1106	td->ts.runtime[DDIR_READ] += utime_since_now(&td->start);
				1107
				1108	if (td->error \|\| td->terminate)
				1109	break;
				1110	}
				1111
				1112	update_rusage_stat(td);
				1113	td->ts.runtime[0] = (td->ts.runtime[0] + 999) / 1000;
				1114	td->ts.runtime[1] = (td->ts.runtime[1] + 999) / 1000;
				1115	td->ts.total_run_time = mtime_since_now(&td->epoch);
				1116	td->ts.io_bytes[0] = td->io_bytes[0];
				1117	td->ts.io_bytes[1] = td->io_bytes[1];
				1118
				1119	fio_mutex_down(writeout_mutex);
				1120	if (td->bw_log) {
				1121	if (td->o.bw_log_file) {
				1122	finish_log_named(td, td->bw_log,
				1123	td->o.bw_log_file, "bw");
				1124	} else
				1125	finish_log(td, td->bw_log, "bw");
				1126	}
				1127	if (td->lat_log) {
				1128	if (td->o.lat_log_file) {
				1129	finish_log_named(td, td->lat_log,
				1130	td->o.lat_log_file, "lat");
				1131	} else
				1132	finish_log(td, td->lat_log, "lat");
				1133	}
				1134	if (td->slat_log) {
				1135	if (td->o.lat_log_file) {
				1136	finish_log_named(td, td->slat_log,
				1137	td->o.lat_log_file, "slat");
				1138	} else
				1139	finish_log(td, td->slat_log, "slat");
				1140	}
				1141	if (td->clat_log) {
				1142	if (td->o.lat_log_file) {
				1143	finish_log_named(td, td->clat_log,
				1144	td->o.lat_log_file, "clat");
				1145	} else
				1146	finish_log(td, td->clat_log, "clat");
				1147	}
				1148	if (td->iops_log) {
				1149	if (td->o.iops_log_file) {
				1150	finish_log_named(td, td->iops_log,
				1151	td->o.iops_log_file, "iops");
				1152	} else
				1153	finish_log(td, td->iops_log, "iops");
				1154	}
				1155
				1156	fio_mutex_up(writeout_mutex);
				1157	if (td->o.exec_postrun)
				1158	exec_string(td->o.exec_postrun);
				1159
				1160	if (exitall_on_terminate)
				1161	fio_terminate_threads(td->groupid);
				1162
				1163	err:
				1164	if (td->error)
				1165	log_info("fio: pid=%d, err=%d/%s\n", (int) td->pid, td->error,
				1166	td->verror);
				1167
				1168	if (td->o.verify_async)
				1169	verify_async_exit(td);
				1170
				1171	close_and_free_files(td);
				1172	close_ioengine(td);
				1173	cleanup_io_u(td);
				1174	cgroup_shutdown(td, &cgroup_mnt);
				1175
				1176	if (td->o.cpumask_set) {
				1177	int ret = fio_cpuset_exit(&td->o.cpumask);
				1178
				1179	td_verror(td, ret, "fio_cpuset_exit");
				1180	}
				1181
				1182	/*
				1183	* do this very late, it will log file closing as well
				1184	*/
				1185	if (td->o.write_iolog_file)
				1186	write_iolog_close(td);
				1187
				1188	td_set_runstate(td, TD_EXITED);
Bruce Cran	e43606c	2012-02-20 09:34:24 +0100	[diff] [blame]	1189	return (void *) (uintptr_t) td->error;
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1190	}
				1191
				1192
				1193	/*
				1194	* We cannot pass the td data into a forked process, so attach the td and
				1195	* pass it to the thread worker.
				1196	*/
				1197	static int fork_main(int shmid, int offset)
				1198	{
				1199	struct thread_data *td;
				1200	void data, ret;
				1201
				1202	#ifndef __hpux
				1203	data = shmat(shmid, NULL, 0);
				1204	if (data == (void *) -1) {
				1205	int __err = errno;
				1206
				1207	perror("shmat");
				1208	return __err;
				1209	}
				1210	#else
				1211	/*
				1212	* HP-UX inherits shm mappings?
				1213	*/
				1214	data = threads;
				1215	#endif
				1216
				1217	td = data + offset * sizeof(struct thread_data);
				1218	ret = thread_main(td);
				1219	shmdt(data);
Bruce Cran	e43606c	2012-02-20 09:34:24 +0100	[diff] [blame]	1220	return (int) (uintptr_t) ret;
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1221	}
				1222
				1223	/*
				1224	* Run over the job map and reap the threads that have exited, if any.
				1225	*/
				1226	static void reap_threads(unsigned int nr_running, unsigned int t_rate,
				1227	unsigned int *m_rate)
				1228	{
				1229	struct thread_data *td;
				1230	unsigned int cputhreads, realthreads, pending;
				1231	int i, status, ret;
				1232
				1233	/*
				1234	* reap exited threads (TD_EXITED -> TD_REAPED)
				1235	*/
				1236	realthreads = pending = cputhreads = 0;
				1237	for_each_td(td, i) {
				1238	int flags = 0;
				1239
				1240	/*
				1241	* ->io_ops is NULL for a thread that has closed its
				1242	* io engine
				1243	*/
				1244	if (td->io_ops && !strcmp(td->io_ops->name, "cpuio"))
				1245	cputhreads++;
				1246	else
				1247	realthreads++;
				1248
				1249	if (!td->pid) {
				1250	pending++;
				1251	continue;
				1252	}
				1253	if (td->runstate == TD_REAPED)
				1254	continue;
				1255	if (td->o.use_thread) {
				1256	if (td->runstate == TD_EXITED) {
				1257	td_set_runstate(td, TD_REAPED);
				1258	goto reaped;
				1259	}
				1260	continue;
				1261	}
				1262
				1263	flags = WNOHANG;
				1264	if (td->runstate == TD_EXITED)
				1265	flags = 0;
				1266
				1267	/*
				1268	* check if someone quit or got killed in an unusual way
				1269	*/
				1270	ret = waitpid(td->pid, &status, flags);
				1271	if (ret < 0) {
				1272	if (errno == ECHILD) {
				1273	log_err("fio: pid=%d disappeared %d\n",
				1274	(int) td->pid, td->runstate);
				1275	td_set_runstate(td, TD_REAPED);
				1276	goto reaped;
				1277	}
				1278	perror("waitpid");
				1279	} else if (ret == td->pid) {
				1280	if (WIFSIGNALED(status)) {
				1281	int sig = WTERMSIG(status);
				1282
				1283	if (sig != SIGTERM)
				1284	log_err("fio: pid=%d, got signal=%d\n",
				1285	(int) td->pid, sig);
				1286	td_set_runstate(td, TD_REAPED);
				1287	goto reaped;
				1288	}
				1289	if (WIFEXITED(status)) {
				1290	if (WEXITSTATUS(status) && !td->error)
				1291	td->error = WEXITSTATUS(status);
				1292
				1293	td_set_runstate(td, TD_REAPED);
				1294	goto reaped;
				1295	}
				1296	}
				1297
				1298	/*
				1299	* thread is not dead, continue
				1300	*/
				1301	pending++;
				1302	continue;
				1303	reaped:
				1304	(*nr_running)--;
				1305	(*m_rate) -= (td->o.ratemin[0] + td->o.ratemin[1]);
				1306	(*t_rate) -= (td->o.rate[0] + td->o.rate[1]);
				1307	if (!td->pid)
				1308	pending--;
				1309
				1310	if (td->error)
				1311	exit_value++;
				1312
				1313	done_secs += mtime_since_now(&td->epoch) / 1000;
				1314	}
				1315
				1316	if (*nr_running == cputhreads && !pending && realthreads)
				1317	fio_terminate_threads(TERMINATE_ALL);
				1318	}
				1319
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1320	/*
				1321	* Main function for kicking off and reaping jobs, as needed.
				1322	*/
				1323	static void run_threads(void)
				1324	{
				1325	struct thread_data *td;
				1326	unsigned long spent;
				1327	unsigned int i, todo, nr_running, m_rate, t_rate, nr_started;
				1328
				1329	if (fio_pin_memory())
				1330	return;
				1331
				1332	if (fio_gtod_offload && fio_start_gtod_thread())
				1333	return;
				1334
				1335	set_sig_handlers();
				1336
				1337	if (!terse_output) {
				1338	log_info("Starting ");
				1339	if (nr_thread)
				1340	log_info("%d thread%s", nr_thread,
				1341	nr_thread > 1 ? "s" : "");
				1342	if (nr_process) {
				1343	if (nr_thread)
				1344	log_info(" and ");
				1345	log_info("%d process%s", nr_process,
				1346	nr_process > 1 ? "es" : "");
				1347	}
				1348	log_info("\n");
				1349	fflush(stdout);
				1350	}
				1351
				1352	todo = thread_number;
				1353	nr_running = 0;
				1354	nr_started = 0;
				1355	m_rate = t_rate = 0;
				1356
				1357	for_each_td(td, i) {
				1358	print_status_init(td->thread_number - 1);
				1359
				1360	if (!td->o.create_serialize)
				1361	continue;
				1362
				1363	/*
				1364	* do file setup here so it happens sequentially,
				1365	* we don't want X number of threads getting their
				1366	* client data interspersed on disk
				1367	*/
				1368	if (setup_files(td)) {
				1369	exit_value++;
				1370	if (td->error)
				1371	log_err("fio: pid=%d, err=%d/%s\n",
				1372	(int) td->pid, td->error, td->verror);
				1373	td_set_runstate(td, TD_REAPED);
				1374	todo--;
				1375	} else {
				1376	struct fio_file *f;
				1377	unsigned int j;
				1378
				1379	/*
				1380	* for sharing to work, each job must always open
				1381	* its own files. so close them, if we opened them
				1382	* for creation
				1383	*/
				1384	for_each_file(td, f, j) {
				1385	if (fio_file_open(f))
				1386	td_io_close_file(td, f);
				1387	}
				1388	}
				1389	}
				1390
				1391	set_genesis_time();
				1392
				1393	while (todo) {
				1394	struct thread_data *map[REAL_MAX_JOBS];
				1395	struct timeval this_start;
				1396	int this_jobs = 0, left;
				1397
				1398	/*
				1399	* create threads (TD_NOT_CREATED -> TD_CREATED)
				1400	*/
				1401	for_each_td(td, i) {
				1402	if (td->runstate != TD_NOT_CREATED)
				1403	continue;
				1404
				1405	/*
				1406	* never got a chance to start, killed by other
				1407	* thread for some reason
				1408	*/
				1409	if (td->terminate) {
				1410	todo--;
				1411	continue;
				1412	}
				1413
				1414	if (td->o.start_delay) {
				1415	spent = mtime_since_genesis();
				1416
				1417	if (td->o.start_delay * 1000 > spent)
				1418	continue;
				1419	}
				1420
				1421	if (td->o.stonewall && (nr_started \|\| nr_running)) {
				1422	dprint(FD_PROCESS, "%s: stonewall wait\n",
				1423	td->o.name);
				1424	break;
				1425	}
				1426
				1427	init_disk_util(td);
				1428
				1429	/*
				1430	* Set state to created. Thread will transition
				1431	* to TD_INITIALIZED when it's done setting up.
				1432	*/
				1433	td_set_runstate(td, TD_CREATED);
				1434	map[this_jobs++] = td;
				1435	nr_started++;
				1436
				1437	if (td->o.use_thread) {
				1438	int ret;
				1439
				1440	dprint(FD_PROCESS, "will pthread_create\n");
				1441	ret = pthread_create(&td->thread, NULL,
				1442	thread_main, td);
				1443	if (ret) {
				1444	log_err("pthread_create: %s\n",
				1445	strerror(ret));
				1446	nr_started--;
				1447	break;
				1448	}
				1449	ret = pthread_detach(td->thread);
				1450	if (ret)
				1451	log_err("pthread_detach: %s",
				1452	strerror(ret));
				1453	} else {
				1454	pid_t pid;
				1455	dprint(FD_PROCESS, "will fork\n");
				1456	pid = fork();
				1457	if (!pid) {
				1458	int ret = fork_main(shm_id, i);
				1459
				1460	_exit(ret);
				1461	} else if (i == fio_debug_jobno)
				1462	*fio_debug_jobp = pid;
				1463	}
				1464	dprint(FD_MUTEX, "wait on startup_mutex\n");
				1465	if (fio_mutex_down_timeout(startup_mutex, 10)) {
				1466	log_err("fio: job startup hung? exiting.\n");
				1467	fio_terminate_threads(TERMINATE_ALL);
				1468	fio_abort = 1;
				1469	nr_started--;
				1470	break;
				1471	}
				1472	dprint(FD_MUTEX, "done waiting on startup_mutex\n");
				1473	}
				1474
				1475	/*
				1476	* Wait for the started threads to transition to
				1477	* TD_INITIALIZED.
				1478	*/
				1479	fio_gettime(&this_start, NULL);
				1480	left = this_jobs;
				1481	while (left && !fio_abort) {
				1482	if (mtime_since_now(&this_start) > JOB_START_TIMEOUT)
				1483	break;
				1484
				1485	usleep(100000);
				1486
				1487	for (i = 0; i < this_jobs; i++) {
				1488	td = map[i];
				1489	if (!td)
				1490	continue;
				1491	if (td->runstate == TD_INITIALIZED) {
				1492	map[i] = NULL;
				1493	left--;
				1494	} else if (td->runstate >= TD_EXITED) {
				1495	map[i] = NULL;
				1496	left--;
				1497	todo--;
				1498	nr_running++; /* work-around... */
				1499	}
				1500	}
				1501	}
				1502
				1503	if (left) {
Jens Axboe	4e87c37	2012-02-15 14:27:08 +0100	[diff] [blame]	1504	log_err("fio: %d job%s failed to start\n", left,
				1505	left > 1 ? "s" : "");
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1506	for (i = 0; i < this_jobs; i++) {
				1507	td = map[i];
				1508	if (!td)
				1509	continue;
				1510	kill(td->pid, SIGTERM);
				1511	}
				1512	break;
				1513	}
				1514
				1515	/*
				1516	* start created threads (TD_INITIALIZED -> TD_RUNNING).
				1517	*/
				1518	for_each_td(td, i) {
				1519	if (td->runstate != TD_INITIALIZED)
				1520	continue;
				1521
				1522	if (in_ramp_time(td))
				1523	td_set_runstate(td, TD_RAMP);
				1524	else
				1525	td_set_runstate(td, TD_RUNNING);
				1526	nr_running++;
				1527	nr_started--;
				1528	m_rate += td->o.ratemin[0] + td->o.ratemin[1];
				1529	t_rate += td->o.rate[0] + td->o.rate[1];
				1530	todo--;
				1531	fio_mutex_up(td->mutex);
				1532	}
				1533
				1534	reap_threads(&nr_running, &t_rate, &m_rate);
				1535
				1536	if (todo) {
				1537	if (is_backend)
				1538	fio_server_idle_loop();
				1539	else
				1540	usleep(100000);
				1541	}
				1542	}
				1543
				1544	while (nr_running) {
				1545	reap_threads(&nr_running, &t_rate, &m_rate);
				1546
				1547	if (is_backend)
				1548	fio_server_idle_loop();
				1549	else
				1550	usleep(10000);
				1551	}
				1552
				1553	update_io_ticks();
				1554	fio_unpin_memory();
				1555	}
				1556
				1557	static void disk_thread_main(void data)
				1558	{
				1559	fio_mutex_up(startup_mutex);
				1560
				1561	while (threads) {
				1562	usleep(DISK_UTIL_MSEC * 1000);
				1563	if (!threads)
				1564	break;
				1565	update_io_ticks();
				1566
				1567	if (!is_backend)
				1568	print_thread_status();
				1569	}
				1570
				1571	return NULL;
				1572	}
				1573
				1574	static int create_disk_util_thread(void)
				1575	{
				1576	int ret;
				1577
				1578	ret = pthread_create(&disk_util_thread, NULL, disk_thread_main, NULL);
				1579	if (ret) {
				1580	log_err("Can't create disk util thread: %s\n", strerror(ret));
				1581	return 1;
				1582	}
				1583
				1584	ret = pthread_detach(disk_util_thread);
				1585	if (ret) {
				1586	log_err("Can't detatch disk util thread: %s\n", strerror(ret));
				1587	return 1;
				1588	}
				1589
				1590	dprint(FD_MUTEX, "wait on startup_mutex\n");
				1591	fio_mutex_down(startup_mutex);
				1592	dprint(FD_MUTEX, "done waiting on startup_mutex\n");
				1593	return 0;
				1594	}
				1595
Jens Axboe	2e1df07	2012-02-09 11:15:02 +0100	[diff] [blame]	1596	int fio_backend(void)
				1597	{
				1598	struct thread_data *td;
				1599	int i;
				1600
				1601	if (exec_profile) {
				1602	if (load_profile(exec_profile))
				1603	return 1;
				1604	free(exec_profile);
				1605	exec_profile = NULL;
				1606	}
				1607	if (!thread_number)
				1608	return 0;
				1609
				1610	if (write_bw_log) {
				1611	setup_log(&agg_io_log[DDIR_READ], 0);
				1612	setup_log(&agg_io_log[DDIR_WRITE], 0);
				1613	}
				1614
				1615	startup_mutex = fio_mutex_init(0);
				1616	if (startup_mutex == NULL)
				1617	return 1;
				1618	writeout_mutex = fio_mutex_init(1);
				1619	if (writeout_mutex == NULL)
				1620	return 1;
				1621
				1622	set_genesis_time();
				1623	create_disk_util_thread();
				1624
				1625	cgroup_list = smalloc(sizeof(*cgroup_list));
				1626	INIT_FLIST_HEAD(cgroup_list);
				1627
				1628	run_threads();
				1629
				1630	if (!fio_abort) {
				1631	show_run_stats();
				1632	if (write_bw_log) {
				1633	__finish_log(agg_io_log[DDIR_READ], "agg-read_bw.log");
				1634	__finish_log(agg_io_log[DDIR_WRITE],
				1635	"agg-write_bw.log");
				1636	}
				1637	}
				1638
				1639	for_each_td(td, i)
				1640	fio_options_free(td);
				1641
				1642	cgroup_kill(cgroup_list);
				1643	sfree(cgroup_list);
				1644	sfree(cgroup_mnt);
				1645
				1646	fio_mutex_remove(startup_mutex);
				1647	fio_mutex_remove(writeout_mutex);
				1648	return exit_value;
				1649	}