Add support for trim as a workload type

This only works on Linux so far, and it's always sync given what
the interface to the kernel looks like. Also restricted to pure
block devices.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/backend.c b/backend.c
index e41e8f1..f6d1983 100644
--- a/backend.c
+++ b/backend.c
@@ -58,7 +58,7 @@
 static int exit_value;
 static volatile int fio_abort;
 
-struct io_log *agg_io_log[2];
+struct io_log *agg_io_log[DDIR_RWDIR_CNT];
 
 int groupid = 0;
 unsigned int thread_number = 0;
@@ -208,10 +208,12 @@
 {
 	int ret = 0;
 
-	if (bytes_done[0])
-		ret |= __check_min_rate(td, now, 0);
-	if (bytes_done[1])
-		ret |= __check_min_rate(td, now, 1);
+	if (bytes_done[DDIR_READ])
+		ret |= __check_min_rate(td, now, DDIR_READ);
+	if (bytes_done[DDIR_WRITE])
+		ret |= __check_min_rate(td, now, DDIR_WRITE);
+	if (bytes_done[DDIR_TRIM])
+		ret |= __check_min_rate(td, now, DDIR_TRIM);
 
 	return ret;
 }
@@ -545,11 +547,13 @@
 	unsigned long long bytes;
 
 	if (td_rw(td))
-		bytes = td->this_io_bytes[0] + td->this_io_bytes[1];
+		bytes = td->this_io_bytes[DDIR_READ] + td->this_io_bytes[DDIR_WRITE];
 	else if (td_write(td))
-		bytes = td->this_io_bytes[1];
+		bytes = td->this_io_bytes[DDIR_WRITE];
+	else if (td_read(td))
+		bytes = td->this_io_bytes[DDIR_READ];
 	else
-		bytes = td->this_io_bytes[0];
+		bytes = td->this_io_bytes[DDIR_TRIM];
 
 	return bytes >= td->o.size;
 }
@@ -572,7 +576,7 @@
 		(!flist_empty(&td->trim_list)) || !io_bytes_exceeded(td) ||
 		td->o.time_based) {
 		struct timeval comp_time;
-		unsigned long bytes_done[2] = { 0, 0 };
+		unsigned long bytes_done[DDIR_RWDIR_CNT] = { 0, 0, 0 };
 		int min_evts = 0;
 		struct io_u *io_u;
 		int ret2, full;
@@ -649,8 +653,9 @@
 				requeue_io_u(td, &io_u);
 			} else {
 sync_done:
-				if (__should_check_rate(td, 0) ||
-				    __should_check_rate(td, 1))
+				if (__should_check_rate(td, DDIR_READ) ||
+				    __should_check_rate(td, DDIR_WRITE) ||
+				    __should_check_rate(td, DDIR_TRIM))
 					fio_gettime(&comp_time, NULL);
 
 				ret = io_u_sync_complete(td, io_u, bytes_done);
@@ -697,8 +702,9 @@
 			if (full && !min_evts)
 				min_evts = 1;
 
-			if (__should_check_rate(td, 0) ||
-			    __should_check_rate(td, 1))
+			if (__should_check_rate(td, DDIR_READ) ||
+			    __should_check_rate(td, DDIR_WRITE) ||
+			    __should_check_rate(td, DDIR_TRIM))
 				fio_gettime(&comp_time, NULL);
 
 			do {
@@ -711,7 +717,8 @@
 
 		if (ret < 0)
 			break;
-		if (!(bytes_done[0] + bytes_done[1]))
+		if (!(bytes_done[DDIR_READ] + bytes_done[DDIR_WRITE]
+				+ bytes_done[DDIR_TRIM]))
 			continue;
 
 		if (!in_ramp_time(td) && should_check_rate(td, bytes_done)) {
@@ -726,7 +733,8 @@
 		if (td->o.thinktime) {
 			unsigned long long b;
 
-			b = td->io_blocks[0] + td->io_blocks[1];
+			b = td->io_blocks[DDIR_READ] + td->io_blocks[DDIR_WRITE] +
+				td->io_blocks[DDIR_TRIM];
 			if (!(b % td->o.thinktime_blocks)) {
 				int left;
 
@@ -772,7 +780,8 @@
 	/*
 	 * stop job if we failed doing any IO
 	 */
-	if ((td->this_io_bytes[0] + td->this_io_bytes[1]) == 0)
+	if ((td->this_io_bytes[DDIR_READ] + td->this_io_bytes[DDIR_WRITE] +
+			td->this_io_bytes[DDIR_TRIM]) == 0)
 		td->done = 1;
 }
 
@@ -800,6 +809,7 @@
 
 	max_units = td->o.iodepth;
 	max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
+	max_bs = max(td->o.max_bs[DDIR_TRIM], max_bs);
 	min_write = td->o.min_bs[DDIR_WRITE];
 	td->orig_buffer_size = (unsigned long long) max_bs
 					* (unsigned long long) max_units;
@@ -937,8 +947,8 @@
 		return 1;
 	}
 
-	io_done = td->io_bytes[DDIR_READ] + td->io_bytes[DDIR_WRITE]
-			+ td->io_skip_bytes;
+	io_done = td->io_bytes[DDIR_READ] + td->io_bytes[DDIR_WRITE] +
+			td->io_bytes[DDIR_TRIM] + td->io_skip_bytes;
 	if (io_done < td->o.size)
 		return 1;
 
@@ -1097,10 +1107,13 @@
 		memcpy(&td->iops_sample_time, &td->start, sizeof(td->start));
 		memcpy(&td->tv_cache, &td->start, sizeof(td->start));
 
-		if (td->o.ratemin[0] || td->o.ratemin[1]) {
-		        memcpy(&td->lastrate[0], &td->bw_sample_time,
+		if (td->o.ratemin[DDIR_READ] || td->o.ratemin[DDIR_WRITE] ||
+				td->o.ratemin[DDIR_TRIM]) {
+		        memcpy(&td->lastrate[DDIR_READ], &td->bw_sample_time,
 						sizeof(td->bw_sample_time));
-		        memcpy(&td->lastrate[1], &td->bw_sample_time,
+		        memcpy(&td->lastrate[DDIR_WRITE], &td->bw_sample_time,
+						sizeof(td->bw_sample_time));
+		        memcpy(&td->lastrate[DDIR_TRIM], &td->bw_sample_time,
 						sizeof(td->bw_sample_time));
 		}
 
@@ -1121,6 +1134,10 @@
 			elapsed = utime_since_now(&td->start);
 			td->ts.runtime[DDIR_WRITE] += elapsed;
 		}
+		if (td_trim(td) && td->io_bytes[DDIR_TRIM]) {
+			elapsed = utime_since_now(&td->start);
+			td->ts.runtime[DDIR_TRIM] += elapsed;
+		}
 
 		if (td->error || td->terminate)
 			break;
@@ -1143,11 +1160,13 @@
 	}
 
 	update_rusage_stat(td);
-	td->ts.runtime[0] = (td->ts.runtime[0] + 999) / 1000;
-	td->ts.runtime[1] = (td->ts.runtime[1] + 999) / 1000;
+	td->ts.runtime[DDIR_READ] = (td->ts.runtime[DDIR_READ] + 999) / 1000;
+	td->ts.runtime[DDIR_WRITE] = (td->ts.runtime[DDIR_WRITE] + 999) / 1000;
+	td->ts.runtime[DDIR_TRIM] = (td->ts.runtime[DDIR_TRIM] + 999) / 1000;
 	td->ts.total_run_time = mtime_since_now(&td->epoch);
-	td->ts.io_bytes[0] = td->io_bytes[0];
-	td->ts.io_bytes[1] = td->io_bytes[1];
+	td->ts.io_bytes[DDIR_READ] = td->io_bytes[DDIR_READ];
+	td->ts.io_bytes[DDIR_WRITE] = td->io_bytes[DDIR_WRITE];
+	td->ts.io_bytes[DDIR_TRIM] = td->io_bytes[DDIR_TRIM];
 
 	fio_mutex_down(writeout_mutex);
 	if (td->bw_log) {
@@ -1337,8 +1356,10 @@
 		continue;
 reaped:
 		(*nr_running)--;
-		(*m_rate) -= (td->o.ratemin[0] + td->o.ratemin[1]);
-		(*t_rate) -= (td->o.rate[0] + td->o.rate[1]);
+		(*m_rate) -= (td->o.ratemin[DDIR_READ] + td->o.ratemin[DDIR_WRITE] +
+			td->o.ratemin[DDIR_TRIM]);
+		(*t_rate) -= (td->o.rate[DDIR_READ] + td->o.rate[DDIR_WRITE] +
+			td->o.rate[DDIR_TRIM]);
 		if (!td->pid)
 			pending--;
 
@@ -1560,8 +1581,10 @@
 				td_set_runstate(td, TD_RUNNING);
 			nr_running++;
 			nr_started--;
-			m_rate += td->o.ratemin[0] + td->o.ratemin[1];
-			t_rate += td->o.rate[0] + td->o.rate[1];
+			m_rate += td->o.ratemin[DDIR_READ] +
+				td->o.ratemin[DDIR_WRITE] + td->o.ratemin[DDIR_TRIM];
+			t_rate += td->o.rate[DDIR_READ] +
+				td->o.rate[DDIR_WRITE] + td->o.rate[DDIR_TRIM];
 			todo--;
 			fio_mutex_up(td->mutex);
 		}
@@ -1659,6 +1682,7 @@
 	if (write_bw_log) {
 		setup_log(&agg_io_log[DDIR_READ], 0);
 		setup_log(&agg_io_log[DDIR_WRITE], 0);
+		setup_log(&agg_io_log[DDIR_TRIM], 0);
 	}
 
 	startup_mutex = fio_mutex_init(FIO_MUTEX_LOCKED);
@@ -1682,6 +1706,8 @@
 			__finish_log(agg_io_log[DDIR_READ], "agg-read_bw.log");
 			__finish_log(agg_io_log[DDIR_WRITE],
 					"agg-write_bw.log");
+			__finish_log(agg_io_log[DDIR_TRIM],
+					"agg-write_bw.log");
 		}
 	}
 
diff --git a/engines/sync.c b/engines/sync.c
index 3377f81..bd912e7 100644
--- a/engines/sync.c
+++ b/engines/sync.c
@@ -75,9 +75,10 @@
 		ret = pread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
 	else if (io_u->ddir == DDIR_WRITE)
 		ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
-	else if (io_u->ddir == DDIR_TRIM)
-		ret = do_io_u_trim(td, io_u);
-	else
+	else if (io_u->ddir == DDIR_TRIM) {
+		do_io_u_trim(td, io_u);
+		return FIO_Q_COMPLETED;
+	} else
 		ret = do_io_u_sync(td, io_u);
 
 	return fio_io_end(td, io_u, ret);
@@ -94,9 +95,10 @@
 		ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
 	else if (io_u->ddir == DDIR_WRITE)
 		ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
-	else if (io_u->ddir == DDIR_TRIM)
-		ret = do_io_u_trim(td, io_u);
-	else
+	else if (io_u->ddir == DDIR_TRIM) {
+		do_io_u_trim(td, io_u);
+		return FIO_Q_COMPLETED;
+	} else
 		ret = do_io_u_sync(td, io_u);
 
 	return fio_io_end(td, io_u, ret);
diff --git a/eta.c b/eta.c
index 552845d..e2a5a29 100644
--- a/eta.c
+++ b/eta.c
@@ -53,11 +53,16 @@
 				c = 'r';
 			else
 				c = 'R';
-		} else {
+		} else if (td_write(td)) {
 			if (td_random(td))
 				c = 'w';
 			else
 				c = 'W';
+		} else {
+			if (td_random(td))
+				c = 'd';
+			else
+				c = 'D';
 		}
 		break;
 	case TD_PRE_READING:
@@ -150,7 +155,8 @@
 	if (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING) {
 		double perc, perc_t;
 
-		bytes_done = td->io_bytes[DDIR_READ] + td->io_bytes[DDIR_WRITE];
+		bytes_done = td->io_bytes[DDIR_READ] + td->io_bytes[DDIR_WRITE] +
+			td->io_bytes[DDIR_TRIM];
 		perc = (double) bytes_done / (double) bytes_total;
 		if (perc > 1.0)
 			perc = 1.0;
@@ -189,9 +195,11 @@
 					t_eta -= ramp_left;
 			}
 		}
-		if (td->o.rate[0] || td->o.rate[1]) {
+		if (td->o.rate[DDIR_READ] || td->o.rate[DDIR_WRITE] ||
+		    td->o.rate[DDIR_TRIM]) {
 			r_eta = (bytes_total / 1024) /
-					(td->o.rate[0] + td->o.rate[1]);
+				(td->o.rate[DDIR_READ] + td->o.rate[DDIR_WRITE] +
+				td->o.rate[DDIR_TRIM]);
 			r_eta += td->o.start_delay;
 		}
 
@@ -218,23 +226,25 @@
 {
 	int i;
 
-	for (i = 0; i <= DDIR_WRITE; i++) {
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		unsigned long long diff;
 
 		diff = io_bytes[i] - prev_io_bytes[i];
 		rate[i] = ((1000 * diff) / mtime) / 1024;
+
+		prev_io_bytes[i] = io_bytes[i];
 	}
-	prev_io_bytes[0] = io_bytes[0];
-	prev_io_bytes[1] = io_bytes[1];
 }
 
 static void calc_iops(unsigned long mtime, unsigned long long *io_iops,
 		      unsigned long long *prev_io_iops, unsigned int *iops)
 {
-	iops[0] = ((io_iops[0] - prev_io_iops[0]) * 1000) / mtime;
-	iops[1] = ((io_iops[1] - prev_io_iops[1]) * 1000) / mtime;
-	prev_io_iops[0] = io_iops[0];
-	prev_io_iops[1] = io_iops[1];
+	int i;
+
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+		iops[i] = ((io_iops[i] - prev_io_iops[i]) * 1000) / mtime;
+		prev_io_iops[i] = io_iops[i];
+	}
 }
 
 /*
@@ -246,13 +256,13 @@
 	struct thread_data *td;
 	int i;
 	unsigned long rate_time, disp_time, bw_avg_time, *eta_secs;
-	unsigned long long io_bytes[2];
-	unsigned long long io_iops[2];
+	unsigned long long io_bytes[DDIR_RWDIR_CNT];
+	unsigned long long io_iops[DDIR_RWDIR_CNT];
 	struct timeval now;
 
-	static unsigned long long rate_io_bytes[2];
-	static unsigned long long disp_io_bytes[2];
-	static unsigned long long disp_io_iops[2];
+	static unsigned long long rate_io_bytes[DDIR_RWDIR_CNT];
+	static unsigned long long disp_io_bytes[DDIR_RWDIR_CNT];
+	static unsigned long long disp_io_iops[DDIR_RWDIR_CNT];
 	static struct timeval rate_prev_time, disp_prev_time;
 
 	if (!force) {
@@ -263,9 +273,11 @@
 			return 0;
 	}
 
-	if (!rate_io_bytes[0] && !rate_io_bytes[1])
+	if (!rate_io_bytes[DDIR_READ] && !rate_io_bytes[DDIR_WRITE] &&
+			!rate_io_bytes[DDIR_TRIM])
 		fill_start_time(&rate_prev_time);
-	if (!disp_io_bytes[0] && !disp_io_bytes[1])
+	if (!disp_io_bytes[DDIR_READ] && !disp_io_bytes[DDIR_WRITE] &&
+			!disp_io_bytes[DDIR_TRIM])
 		fill_start_time(&disp_prev_time);
 
 	eta_secs = malloc(thread_number * sizeof(unsigned long));
@@ -273,8 +285,8 @@
 
 	je->elapsed_sec = (mtime_since_genesis() + 999) / 1000;
 
-	io_bytes[0] = io_bytes[1] = 0;
-	io_iops[0] = io_iops[1] = 0;
+	io_bytes[DDIR_READ] = io_bytes[DDIR_WRITE] = io_bytes[DDIR_TRIM] = 0;
+	io_iops[DDIR_READ] = io_iops[DDIR_WRITE] = io_iops[DDIR_TRIM] = 0;
 	bw_avg_time = ULONG_MAX;
 	for_each_td(td, i) {
 		if (is_power_of_2(td->o.kb_base))
@@ -297,6 +309,13 @@
 				je->m_rate += td->o.ratemin[DDIR_WRITE];
 				je->m_iops += td->o.rate_iops_min[DDIR_WRITE];
 			}
+			if (td_trim(td)) {
+				je->t_rate += td->o.rate[DDIR_TRIM];
+				je->t_iops += td->o.rate_iops[DDIR_TRIM];
+				je->m_rate += td->o.ratemin[DDIR_TRIM];
+				je->m_iops += td->o.rate_iops_min[DDIR_TRIM];
+			}
+
 			je->files_open += td->nr_open_files;
 		} else if (td->runstate == TD_RAMP) {
 			je->nr_running++;
@@ -312,10 +331,11 @@
 		check_str_update(td);
 
 		if (td->runstate > TD_RAMP) {
-			io_bytes[0] += td->io_bytes[0];
-			io_bytes[1] += td->io_bytes[1];
-			io_iops[0] += td->io_blocks[0];
-			io_iops[1] += td->io_blocks[1];
+			int ddir;
+			for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
+				io_bytes[ddir] += td->io_bytes[ddir];
+				io_iops[ddir] += td->io_blocks[ddir];
+			}
 		}
 	}
 
@@ -344,6 +364,7 @@
 		memcpy(&rate_prev_time, &now, sizeof(now));
 		add_agg_sample(je->rate[DDIR_READ], DDIR_READ, 0);
 		add_agg_sample(je->rate[DDIR_WRITE], DDIR_WRITE, 0);
+		add_agg_sample(je->rate[DDIR_TRIM], DDIR_TRIM, 0);
 	}
 
 	disp_time = mtime_since(&disp_prev_time, &now);
@@ -394,10 +415,11 @@
 		p += sprintf(p, ", CR=%d/%d IOPS", je->t_iops, je->m_iops);
 	if (je->eta_sec != INT_MAX && je->nr_running) {
 		char perc_str[32];
-		char *iops_str[2];
-		char *rate_str[2];
+		char *iops_str[DDIR_RWDIR_CNT];
+		char *rate_str[DDIR_RWDIR_CNT];
 		size_t left;
 		int l;
+		int ddir;
 
 		if ((!je->eta_sec && !eta_good) || je->nr_ramp == je->nr_running)
 			strcpy(perc_str, "-.-% done");
@@ -407,26 +429,28 @@
 			sprintf(perc_str, "%3.1f%% done", perc);
 		}
 
-		rate_str[0] = num2str(je->rate[0], 5, 1024, je->is_pow2);
-		rate_str[1] = num2str(je->rate[1], 5, 1024, je->is_pow2);
-
-		iops_str[0] = num2str(je->iops[0], 4, 1, 0);
-		iops_str[1] = num2str(je->iops[1], 4, 1, 0);
+		for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
+			rate_str[ddir] = num2str(je->rate[ddir], 5,
+						1024, je->is_pow2);
+			iops_str[ddir] = num2str(je->iops[ddir], 4, 1, 0);
+		}
 
 		left = sizeof(output) - (p - output) - 1;
 
-		l = snprintf(p, left, ": [%s] [%s] [%s/%s /s] [%s/%s iops] [eta %s]",
-				je->run_str, perc_str, rate_str[0],
-				rate_str[1], iops_str[0], iops_str[1], eta_str);
+		l = snprintf(p, left, ": [%s] [%s] [%s/%s/%s /s] [%s/%s/%s iops] [eta %s]",
+				je->run_str, perc_str, rate_str[DDIR_READ],
+				rate_str[DDIR_WRITE], rate_str[DDIR_TRIM],
+				iops_str[DDIR_READ], iops_str[DDIR_WRITE],
+				iops_str[DDIR_TRIM], eta_str);
 		p += l;
 		if (l >= 0 && l < linelen_last)
 			p += sprintf(p, "%*s", linelen_last - l, "");
 		linelen_last = l;
 
-		free(rate_str[0]);
-		free(rate_str[1]);
-		free(iops_str[0]);
-		free(iops_str[1]);
+		for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
+			free(rate_str[ddir]);
+			free(iops_str[ddir]);
+		}
 	}
 	p += sprintf(p, "\r");
 
diff --git a/filesetup.c b/filesetup.c
index 3594a80..c284071 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -468,6 +468,11 @@
 
 	dprint(FD_FILE, "fd open %s\n", f->file_name);
 
+	if (td_trim(td) && f->filetype != FIO_TYPE_BD) {
+		log_err("fio: trim only applies to block device\n");
+		return 1;
+	}
+
 	if (!strcmp(f->file_name, "-")) {
 		if (td_rw(td)) {
 			log_err("fio: can't read/write to stdin/out\n");
@@ -482,14 +487,17 @@
 			f_out = stderr;
 	}
 
+	if (td_trim(td))
+		goto skip_flags;
 	if (td->o.odirect)
 		flags |= OS_O_DIRECT;
 	if (td->o.sync_io)
 		flags |= O_SYNC;
-	if (f->filetype != FIO_TYPE_FILE)
-		flags |= FIO_O_NOATIME;
 	if (td->o.create_on_open)
 		flags |= O_CREAT;
+skip_flags:
+	if (f->filetype != FIO_TYPE_FILE)
+		flags |= FIO_O_NOATIME;
 
 open_again:
 	if (td_write(td)) {
@@ -503,7 +511,7 @@
 			f->fd = dup(STDOUT_FILENO);
 		else
 			from_hash = file_lookup_open(f, flags);
-	} else {
+	} else if (td_read(td)) {
 		if (f->filetype == FIO_TYPE_CHAR && !read_only)
 			flags |= O_RDWR;
 		else
@@ -513,6 +521,9 @@
 			f->fd = dup(STDIN_FILENO);
 		else
 			from_hash = file_lookup_open(f, flags);
+	} else { //td trim
+		flags |= O_RDWR;
+		from_hash = file_lookup_open(f, flags);
 	}
 
 	if (f->fd == -1) {
diff --git a/fio.h b/fio.h
index f2a5a1f..0064a1d 100644
--- a/fio.h
+++ b/fio.h
@@ -108,12 +108,12 @@
 	unsigned long long file_size_high;
 	unsigned long long start_offset;
 
-	unsigned int bs[2];
-	unsigned int ba[2];
-	unsigned int min_bs[2];
-	unsigned int max_bs[2];
-	struct bssplit *bssplit[2];
-	unsigned int bssplit_nr[2];
+	unsigned int bs[DDIR_RWDIR_CNT];
+	unsigned int ba[DDIR_RWDIR_CNT];
+	unsigned int min_bs[DDIR_RWDIR_CNT];
+	unsigned int max_bs[DDIR_RWDIR_CNT];
+	struct bssplit *bssplit[DDIR_RWDIR_CNT];
+	unsigned int bssplit_nr[DDIR_RWDIR_CNT];
 
 	unsigned int nr_files;
 	unsigned int open_files;
@@ -228,11 +228,11 @@
 	char *exec_prerun;
 	char *exec_postrun;
 
-	unsigned int rate[2];
-	unsigned int ratemin[2];
+	unsigned int rate[DDIR_RWDIR_CNT];
+	unsigned int ratemin[DDIR_RWDIR_CNT];
 	unsigned int ratecycle;
-	unsigned int rate_iops[2];
-	unsigned int rate_iops_min[2];
+	unsigned int rate_iops[DDIR_RWDIR_CNT];
+	unsigned int rate_iops_min[DDIR_RWDIR_CNT];
 
 	char *ioscheduler;
 
@@ -290,10 +290,10 @@
 	struct io_log *bw_log;
 	struct io_log *iops_log;
 
-	uint64_t stat_io_bytes[2];
+	uint64_t stat_io_bytes[DDIR_RWDIR_CNT];
 	struct timeval bw_sample_time;
 
-	uint64_t stat_io_blocks[2];
+	uint64_t stat_io_blocks[DDIR_RWDIR_CNT];
 	struct timeval iops_sample_time;
 
 	struct rusage ru_start;
@@ -395,21 +395,21 @@
 	/*
 	 * Rate state
 	 */
-	unsigned long long rate_bps[2];
-	long rate_pending_usleep[2];
-	unsigned long rate_bytes[2];
-	unsigned long rate_blocks[2];
-	struct timeval lastrate[2];
+	unsigned long long rate_bps[DDIR_RWDIR_CNT];
+	long rate_pending_usleep[DDIR_RWDIR_CNT];
+	unsigned long rate_bytes[DDIR_RWDIR_CNT];
+	unsigned long rate_blocks[DDIR_RWDIR_CNT];
+	struct timeval lastrate[DDIR_RWDIR_CNT];
 
 	unsigned long long total_io_size;
 	unsigned long long fill_device_size;
 
-	unsigned long io_issues[2];
-	unsigned long long io_blocks[2];
-	unsigned long long this_io_blocks[2];
-	unsigned long long io_bytes[2];
+	unsigned long io_issues[DDIR_RWDIR_CNT];
+	unsigned long long io_blocks[DDIR_RWDIR_CNT];
+	unsigned long long this_io_blocks[DDIR_RWDIR_CNT];
+	unsigned long long io_bytes[DDIR_RWDIR_CNT];
 	unsigned long long io_skip_bytes;
-	unsigned long long this_io_bytes[2];
+	unsigned long long this_io_bytes[DDIR_RWDIR_CNT];
 	unsigned long long zone_bytes;
 	struct fio_mutex *mutex;
 
@@ -721,10 +721,12 @@
 {
 	int ret = 0;
 
-	if (bytes_done[0])
-		ret |= __should_check_rate(td, 0);
-	if (bytes_done[1])
-		ret |= __should_check_rate(td, 1);
+	if (bytes_done[DDIR_READ])
+		ret |= __should_check_rate(td, DDIR_READ);
+	if (bytes_done[DDIR_WRITE])
+		ret |= __should_check_rate(td, DDIR_WRITE);
+	if (bytes_done[DDIR_TRIM])
+		ret |= __should_check_rate(td, DDIR_TRIM);
 
 	return ret;
 }
diff --git a/init.c b/init.c
index 9f597a9..1840c8f 100644
--- a/init.c
+++ b/init.c
@@ -362,6 +362,8 @@
 		ret = __setup_rate(td, DDIR_READ);
 	if (td->o.rate[DDIR_WRITE] || td->o.rate_iops[DDIR_WRITE])
 		ret |= __setup_rate(td, DDIR_WRITE);
+	if (td->o.rate[DDIR_TRIM] || td->o.rate_iops[DDIR_TRIM])
+		ret |= __setup_rate(td, DDIR_TRIM);
 
 	return ret;
 }
@@ -370,7 +372,9 @@
 {
 	return o->min_bs[DDIR_READ] == o->max_bs[DDIR_READ] &&
 		o->min_bs[DDIR_WRITE] == o->max_bs[DDIR_WRITE] &&
-		o->min_bs[DDIR_READ] == o->min_bs[DDIR_WRITE];
+		o->min_bs[DDIR_TRIM] == o->max_bs[DDIR_TRIM] &&
+		o->min_bs[DDIR_READ] == o->min_bs[DDIR_WRITE] &&
+		o->min_bs[DDIR_READ] == o->min_bs[DDIR_TRIM];
 }
 
 /*
@@ -426,8 +430,14 @@
 		o->min_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
 	if (!o->max_bs[DDIR_WRITE])
 		o->max_bs[DDIR_WRITE] = o->bs[DDIR_WRITE];
+	if (!o->min_bs[DDIR_TRIM])
+		o->min_bs[DDIR_TRIM] = o->bs[DDIR_TRIM];
+	if (!o->max_bs[DDIR_TRIM])
+		o->max_bs[DDIR_TRIM] = o->bs[DDIR_TRIM];
+
 
 	o->rw_min_bs = min(o->min_bs[DDIR_READ], o->min_bs[DDIR_WRITE]);
+	o->rw_min_bs = min(o->min_bs[DDIR_TRIM], o->rw_min_bs);
 
 	/*
 	 * For random IO, allow blockalign offset other than min_bs.
@@ -436,9 +446,12 @@
 		o->ba[DDIR_READ] = o->min_bs[DDIR_READ];
 	if (!o->ba[DDIR_WRITE] || !td_random(td))
 		o->ba[DDIR_WRITE] = o->min_bs[DDIR_WRITE];
+	if (!o->ba[DDIR_TRIM] || !td_random(td))
+		o->ba[DDIR_TRIM] = o->min_bs[DDIR_TRIM];
 
 	if ((o->ba[DDIR_READ] != o->min_bs[DDIR_READ] ||
-	    o->ba[DDIR_WRITE] != o->min_bs[DDIR_WRITE]) &&
+	    o->ba[DDIR_WRITE] != o->min_bs[DDIR_WRITE] ||
+	    o->ba[DDIR_TRIM] != o->min_bs[DDIR_TRIM]) &&
 	    !o->norandommap) {
 		log_err("fio: Any use of blockalign= turns off randommap\n");
 		o->norandommap = 1;
@@ -491,15 +504,19 @@
 	if (o->open_files > o->nr_files || !o->open_files)
 		o->open_files = o->nr_files;
 
-	if (((o->rate[0] + o->rate[1]) && (o->rate_iops[0] + o->rate_iops[1]))||
-	    ((o->ratemin[0] + o->ratemin[1]) && (o->rate_iops_min[0] +
-		o->rate_iops_min[1]))) {
+	if (((o->rate[DDIR_READ] + o->rate[DDIR_WRITE] + o->rate[DDIR_TRIM]) &&
+	    (o->rate_iops[DDIR_READ] + o->rate_iops[DDIR_WRITE] + o->rate_iops[DDIR_TRIM])) ||
+	    ((o->ratemin[DDIR_READ] + o->ratemin[DDIR_WRITE] + o->ratemin[DDIR_TRIM]) &&
+	    (o->rate_iops_min[DDIR_READ] + o->rate_iops_min[DDIR_WRITE] + o->rate_iops_min[DDIR_TRIM]))) {
 		log_err("fio: rate and rate_iops are mutually exclusive\n");
 		ret = 1;
 	}
-	if ((o->rate[0] < o->ratemin[0]) || (o->rate[1] < o->ratemin[1]) ||
-	    (o->rate_iops[0] < o->rate_iops_min[0]) ||
-	    (o->rate_iops[1] < o->rate_iops_min[1])) {
+	if ((o->rate[DDIR_READ] < o->ratemin[DDIR_READ]) ||
+	    (o->rate[DDIR_WRITE] < o->ratemin[DDIR_WRITE]) ||
+	    (o->rate[DDIR_TRIM] < o->ratemin[DDIR_TRIM]) ||
+	    (o->rate_iops[DDIR_READ] < o->rate_iops_min[DDIR_READ]) ||
+	    (o->rate_iops[DDIR_WRITE] < o->rate_iops_min[DDIR_WRITE]) ||
+	    (o->rate_iops[DDIR_TRIM] < o->rate_iops_min[DDIR_TRIM])) {
 		log_err("fio: minimum rate exceeds rate\n");
 		ret = 1;
 	}
@@ -740,7 +757,8 @@
 static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
 {
 	const char *ddir_str[] = { NULL, "read", "write", "rw", NULL,
-				   "randread", "randwrite", "randrw" };
+				   "randread", "randwrite", "randrw",
+				   "trim", NULL, NULL, NULL, "randtrim" };
 	unsigned int i;
 	char fname[PATH_MAX];
 	int numjobs, file_alloced;
@@ -815,10 +833,12 @@
 	else
 		memcpy(td->ts.percentile_list, def_percentile_list, sizeof(def_percentile_list));
 
-	td->ts.clat_stat[0].min_val = td->ts.clat_stat[1].min_val = ULONG_MAX;
-	td->ts.slat_stat[0].min_val = td->ts.slat_stat[1].min_val = ULONG_MAX;
-	td->ts.lat_stat[0].min_val = td->ts.lat_stat[1].min_val = ULONG_MAX;
-	td->ts.bw_stat[0].min_val = td->ts.bw_stat[1].min_val = ULONG_MAX;
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+		td->ts.clat_stat[i].min_val = ULONG_MAX;
+		td->ts.slat_stat[i].min_val = ULONG_MAX;
+		td->ts.lat_stat[i].min_val = ULONG_MAX;
+		td->ts.bw_stat[i].min_val = ULONG_MAX;
+	}
 	td->ddir_seq_nr = td->o.ddir_seq_nr;
 
 	if ((td->o.stonewall || td->o.new_group) && prev_group_jobs) {
@@ -858,18 +878,20 @@
 							td->o.cpuload,
 							td->o.cpucycle);
 			} else {
-				char *c1, *c2, *c3, *c4;
+				char *c1, *c2, *c3, *c4, *c5, *c6;
 
 				c1 = to_kmg(td->o.min_bs[DDIR_READ]);
 				c2 = to_kmg(td->o.max_bs[DDIR_READ]);
 				c3 = to_kmg(td->o.min_bs[DDIR_WRITE]);
 				c4 = to_kmg(td->o.max_bs[DDIR_WRITE]);
+				c5 = to_kmg(td->o.min_bs[DDIR_TRIM]);
+				c6 = to_kmg(td->o.max_bs[DDIR_TRIM]);
 
-				log_info("%s: (g=%d): rw=%s, bs=%s-%s/%s-%s,"
+				log_info("%s: (g=%d): rw=%s, bs=%s-%s/%s-%s/%s-%s,"
 					 " ioengine=%s, iodepth=%u\n",
 						td->o.name, td->groupid,
 						ddir_str[td->o.td_ddir],
-						c1, c2, c3, c4,
+						c1, c2, c3, c4, c5, c6,
 						td->io_ops->name,
 						td->o.iodepth);
 
@@ -877,6 +899,8 @@
 				free(c2);
 				free(c3);
 				free(c4);
+				free(c5);
+				free(c6);
 			}
 		} else if (job_add_num == 1)
 			log_info("...\n");
diff --git a/io_ddir.h b/io_ddir.h
index b234256..fa3b143 100644
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -3,12 +3,13 @@
 
 enum fio_ddir {
 	DDIR_READ = 0,
-	DDIR_WRITE,
-	DDIR_SYNC,
+	DDIR_WRITE = 1,
+	DDIR_TRIM = 2,
+	DDIR_RWDIR_CNT = 3,
+	DDIR_SYNC = 3,
 	DDIR_DATASYNC,
 	DDIR_SYNC_FILE_RANGE,
 	DDIR_WAIT,
-	DDIR_TRIM,
 	DDIR_INVAL = -1,
 };
 
@@ -16,14 +17,17 @@
 	TD_DDIR_READ		= 1 << 0,
 	TD_DDIR_WRITE		= 1 << 1,
 	TD_DDIR_RAND		= 1 << 2,
+	TD_DDIR_TRIM		= 1 << 3,
 	TD_DDIR_RW		= TD_DDIR_READ | TD_DDIR_WRITE,
 	TD_DDIR_RANDREAD	= TD_DDIR_READ | TD_DDIR_RAND,
 	TD_DDIR_RANDWRITE	= TD_DDIR_WRITE | TD_DDIR_RAND,
 	TD_DDIR_RANDRW		= TD_DDIR_RW | TD_DDIR_RAND,
+	TD_DDIR_RANDTRIM	= TD_DDIR_TRIM | TD_DDIR_RAND,
 };
 
 #define td_read(td)		((td)->o.td_ddir & TD_DDIR_READ)
 #define td_write(td)		((td)->o.td_ddir & TD_DDIR_WRITE)
+#define td_trim(td)		((td)->o.td_ddir & TD_DDIR_TRIM)
 #define td_rw(td)		(((td)->o.td_ddir & TD_DDIR_RW) == TD_DDIR_RW)
 #define td_random(td)		((td)->o.td_ddir & TD_DDIR_RAND)
 #define file_randommap(td, f)	(!(td)->o.norandommap && (f)->file_map)
@@ -36,7 +40,9 @@
 
 static inline int ddir_rw(enum fio_ddir ddir)
 {
-	return ddir == DDIR_READ || ddir == DDIR_WRITE;
+	return ddir == DDIR_READ || ddir == DDIR_WRITE || ddir == DDIR_TRIM;
 }
 
+#define ddir_trim(ddir) ((ddir) == DDIR_TRIM)
+
 #endif
diff --git a/io_u.c b/io_u.c
index 2f54562..db0a6dc 100644
--- a/io_u.c
+++ b/io_u.c
@@ -15,7 +15,7 @@
 	int nr;				/* input */
 
 	int error;			/* output */
-	unsigned long bytes_done[2];	/* output */
+	unsigned long bytes_done[DDIR_RWDIR_CNT];	/* output */
 	struct timeval time;		/* output */
 };
 
@@ -543,6 +543,8 @@
 	if (td_rw(td) && __should_check_rate(td, odir))
 		td->rate_pending_usleep[odir] -= usec;
 
+	if (ddir_trim(ddir))
+		return ddir;
 	return ddir;
 }
 
@@ -599,8 +601,10 @@
 		ddir = td->rwmix_ddir;
 	} else if (td_read(td))
 		ddir = DDIR_READ;
-	else
+	else if (td_write(td))
 		ddir = DDIR_WRITE;
+	else
+		ddir = DDIR_TRIM;
 
 	td->rwmix_ddir = rate_ddir(td, ddir);
 	return td->rwmix_ddir;
@@ -1406,7 +1410,7 @@
 					(usec_for_io(td, idx) -
 					 utime_since_now(&td->start));
 			}
-			if (__should_check_rate(td, odx))
+			if (idx != DDIR_TRIM && __should_check_rate(td, odx))
 				td->rate_pending_usleep[odx] =
 					(usec_for_io(td, odx) -
 					 utime_since_now(&td->start));
@@ -1444,13 +1448,15 @@
 static void init_icd(struct thread_data *td, struct io_completion_data *icd,
 		     int nr)
 {
+	int ddir;
 	if (!td->o.disable_clat || !td->o.disable_bw)
 		fio_gettime(&icd->time, NULL);
 
 	icd->nr = nr;
 
 	icd->error = 0;
-	icd->bytes_done[0] = icd->bytes_done[1] = 0;
+	for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++)
+		icd->bytes_done[ddir] = 0;
 }
 
 static void ios_completed(struct thread_data *td,
@@ -1489,8 +1495,10 @@
 	}
 
 	if (bytes) {
-		bytes[0] += icd.bytes_done[0];
-		bytes[1] += icd.bytes_done[1];
+		int ddir;
+
+		for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++)
+			bytes[ddir] += icd.bytes_done[ddir];
 	}
 
 	return 0;
@@ -1527,8 +1535,10 @@
 	}
 
 	if (bytes) {
-		bytes[0] += icd.bytes_done[0];
-		bytes[1] += icd.bytes_done[1];
+		int ddir;
+
+		for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++)
+			bytes[ddir] += icd.bytes_done[ddir];
 	}
 
 	return 0;
diff --git a/ioengines.c b/ioengines.c
index bb7833f..b43374e 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -293,7 +293,7 @@
 			 "support direct IO, or iomem_align= is bad.\n");
 	}
 
-	if (!td->io_ops->commit) {
+	if (!td->io_ops->commit || ddir_trim(io_u->ddir)) {
 		io_u_mark_submit(td, 1);
 		io_u_mark_complete(td, 1);
 	}
@@ -302,8 +302,7 @@
 		if (ddir_rw(io_u->ddir)) {
 			io_u_mark_depth(td, 1);
 			td->ts.total_io_u[io_u->ddir]++;
-		} else if (io_u->ddir == DDIR_TRIM)
-			td->ts.total_io_u[2]++;
+		}
 	} else if (ret == FIO_Q_QUEUED) {
 		int r;
 
diff --git a/iolog.h b/iolog.h
index 1853846..bb5c9b7 100644
--- a/iolog.h
+++ b/iolog.h
@@ -40,7 +40,7 @@
 	 * Windowed average, for logging single entries average over some
 	 * period of time.
 	 */
-	struct io_stat avg_window[2];
+	struct io_stat avg_window[DDIR_RWDIR_CNT];
 	unsigned long avg_msec;
 	unsigned long avg_last;
 };
@@ -111,7 +111,7 @@
 extern void finish_log(struct thread_data *, struct io_log *, const char *);
 extern void finish_log_named(struct thread_data *, struct io_log *, const char *, const char *);
 extern void __finish_log(struct io_log *, const char *);
-extern struct io_log *agg_io_log[2];
+extern struct io_log *agg_io_log[DDIR_RWDIR_CNT];
 extern int write_bw_log;
 extern void add_agg_sample(unsigned long, enum fio_ddir, unsigned int);
 
diff --git a/libfio.c b/libfio.c
index 668df45..43e1a61 100644
--- a/libfio.c
+++ b/libfio.c
@@ -66,13 +66,16 @@
 
 static void reset_io_counters(struct thread_data *td)
 {
-	td->stat_io_bytes[0] = td->stat_io_bytes[1] = 0;
-	td->this_io_bytes[0] = td->this_io_bytes[1] = 0;
-	td->stat_io_blocks[0] = td->stat_io_blocks[1] = 0;
-	td->this_io_blocks[0] = td->this_io_blocks[1] = 0;
+	int ddir;
+	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
+		td->stat_io_bytes[ddir] = 0;
+		td->this_io_bytes[ddir] = 0;
+		td->stat_io_blocks[ddir] = 0;
+		td->this_io_blocks[ddir] = 0;
+		td->rate_bytes[ddir] = 0;
+		td->rate_blocks[ddir] = 0;
+	}
 	td->zone_bytes = 0;
-	td->rate_bytes[0] = td->rate_bytes[1] = 0;
-	td->rate_blocks[0] = td->rate_blocks[1] = 0;
 
 	td->last_was_sync = 0;
 
@@ -107,16 +110,15 @@
 
 	reset_io_counters(td);
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		td->io_bytes[i] = 0;
 		td->io_blocks[i] = 0;
 		td->io_issues[i] = 0;
 		td->ts.total_io_u[i] = 0;
+		td->ts.runtime[i] = 0;
 	}
 
 	fio_gettime(&tv, NULL);
-	td->ts.runtime[0] = 0;
-	td->ts.runtime[1] = 0;
 	memcpy(&td->epoch, &tv, sizeof(tv));
 	memcpy(&td->start, &tv, sizeof(tv));
 }
diff --git a/options.c b/options.c
index 28a228c..54a62d9 100644
--- a/options.c
+++ b/options.c
@@ -166,7 +166,7 @@
 static int str_bssplit_cb(void *data, const char *input)
 {
 	struct thread_data *td = data;
-	char *str, *p, *odir;
+	char *str, *p, *odir, *ddir;
 	int ret = 0;
 
 	p = str = strdup(input);
@@ -176,7 +176,21 @@
 
 	odir = strchr(str, ',');
 	if (odir) {
-		ret = bssplit_ddir(td, DDIR_WRITE, odir + 1);
+		ddir = strchr(odir + 1, ',');
+		if (ddir) {
+			ret = bssplit_ddir(td, DDIR_TRIM, ddir + 1);
+			if (!ret)
+				*ddir = '\0';
+		} else {
+			char *op;
+
+			op = strdup(odir + 1);
+			ret = bssplit_ddir(td, DDIR_TRIM, op);
+
+			free(op);
+		}
+		if (!ret) 
+			ret = bssplit_ddir(td, DDIR_WRITE, odir + 1);
 		if (!ret) {
 			*odir = '\0';
 			ret = bssplit_ddir(td, DDIR_READ, str);
@@ -185,12 +199,15 @@
 		char *op;
 
 		op = strdup(str);
-
-		ret = bssplit_ddir(td, DDIR_READ, str);
-		if (!ret)
-			ret = bssplit_ddir(td, DDIR_WRITE, op);
-
+		ret = bssplit_ddir(td, DDIR_WRITE, op);
 		free(op);
+
+		if (!ret) {
+			op = strdup(str);
+			ret = bssplit_ddir(td, DDIR_TRIM, op);
+			free(op);
+		}
+		ret = bssplit_ddir(td, DDIR_READ, str);
 	}
 
 	free(p);
@@ -932,6 +949,10 @@
 			    .oval = TD_DDIR_WRITE,
 			    .help = "Sequential write",
 			  },
+			  { .ival = "trim",
+			    .oval = TD_DDIR_TRIM,
+			    .help = "Sequential trim",
+			  },
 			  { .ival = "randread",
 			    .oval = TD_DDIR_RANDREAD,
 			    .help = "Random read",
@@ -940,6 +961,10 @@
 			    .oval = TD_DDIR_RANDWRITE,
 			    .help = "Random write",
 			  },
+			  { .ival = "randtrim",
+			    .oval = TD_DDIR_RANDTRIM,
+			    .help = "Random trim",
+			  },
 			  { .ival = "rw",
 			    .oval = TD_DDIR_RW,
 			    .help = "Sequential read and write mix",
@@ -1136,6 +1161,7 @@
 		.type	= FIO_OPT_INT,
 		.off1	= td_var_offset(bs[DDIR_READ]),
 		.off2	= td_var_offset(bs[DDIR_WRITE]),
+		.off3	= td_var_offset(bs[DDIR_TRIM]),
 		.minval = 1,
 		.help	= "Block size unit",
 		.def	= "4k",
@@ -1147,6 +1173,7 @@
 		.type	= FIO_OPT_INT,
 		.off1	= td_var_offset(ba[DDIR_READ]),
 		.off2	= td_var_offset(ba[DDIR_WRITE]),
+		.off3	= td_var_offset(ba[DDIR_TRIM]),
 		.minval	= 1,
 		.help	= "IO block offset alignment",
 		.parent	= "rw",
@@ -1159,6 +1186,8 @@
 		.off2	= td_var_offset(max_bs[DDIR_READ]),
 		.off3	= td_var_offset(min_bs[DDIR_WRITE]),
 		.off4	= td_var_offset(max_bs[DDIR_WRITE]),
+		.off5	= td_var_offset(min_bs[DDIR_TRIM]),
+		.off6	= td_var_offset(max_bs[DDIR_TRIM]),
 		.minval = 1,
 		.help	= "Set block size range (in more detail than bs)",
 		.parent = "rw",
@@ -1796,30 +1825,34 @@
 	{
 		.name	= "rate",
 		.type	= FIO_OPT_INT,
-		.off1	= td_var_offset(rate[0]),
-		.off2	= td_var_offset(rate[1]),
+		.off1	= td_var_offset(rate[DDIR_READ]),
+		.off2	= td_var_offset(rate[DDIR_WRITE]),
+		.off3	= td_var_offset(rate[DDIR_TRIM]),
 		.help	= "Set bandwidth rate",
 	},
 	{
 		.name	= "ratemin",
 		.type	= FIO_OPT_INT,
-		.off1	= td_var_offset(ratemin[0]),
-		.off2	= td_var_offset(ratemin[1]),
+		.off1	= td_var_offset(ratemin[DDIR_READ]),
+		.off2	= td_var_offset(ratemin[DDIR_WRITE]),
+		.off3	= td_var_offset(ratemin[DDIR_TRIM]),
 		.help	= "Job must meet this rate or it will be shutdown",
 		.parent	= "rate",
 	},
 	{
 		.name	= "rate_iops",
 		.type	= FIO_OPT_INT,
-		.off1	= td_var_offset(rate_iops[0]),
-		.off2	= td_var_offset(rate_iops[1]),
+		.off1	= td_var_offset(rate_iops[DDIR_READ]),
+		.off2	= td_var_offset(rate_iops[DDIR_WRITE]),
+		.off3	= td_var_offset(rate_iops[DDIR_TRIM]),
 		.help	= "Limit IO used to this number of IO operations/sec",
 	},
 	{
 		.name	= "rate_iops_min",
 		.type	= FIO_OPT_INT,
-		.off1	= td_var_offset(rate_iops_min[0]),
-		.off2	= td_var_offset(rate_iops_min[1]),
+		.off1	= td_var_offset(rate_iops_min[DDIR_READ]),
+		.off2	= td_var_offset(rate_iops_min[DDIR_WRITE]),
+		.off3	= td_var_offset(rate_iops_min[DDIR_TRIM]),
 		.help	= "Job must meet this rate or it will be shut down",
 		.parent	= "rate_iops",
 	},
diff --git a/parse.c b/parse.c
index f1d5f8f..c8b7545 100644
--- a/parse.c
+++ b/parse.c
@@ -414,11 +414,17 @@
 	case FIO_OPT_INT:
 	case FIO_OPT_STR_VAL: {
 		fio_opt_str_val_fn *fn = o->cb;
+		char tmp[128], *p;
+
+		strncpy(tmp, ptr, sizeof(tmp) - 1);
+		p = strchr(tmp, ',');
+		if (p)
+			*p = '\0';
 
 		if (is_time)
-			ret = check_str_time(ptr, &ull);
+			ret = check_str_time(tmp, &ull);
 		else
-			ret = check_str_bytes(ptr, &ull, data);
+			ret = check_str_bytes(tmp, &ull, data);
 
 		if (ret)
 			break;
@@ -444,12 +450,32 @@
 					else
 						val_store(ilp, ull, o->off1, 0, data);
 				}
-				if (!more) {
+				if (curr == 1) {
 					if (o->roff2)
 						*(unsigned int *) o->roff2 = ull;
 					else if (o->off2)
 						val_store(ilp, ull, o->off2, 0, data);
 				}
+				if (curr == 2) {
+					if (o->roff3)
+						*(unsigned int *) o->roff3 = ull;
+					else if (o->off3)
+						val_store(ilp, ull, o->off3, 0, data);
+				}
+				if (!more) {
+					if (curr < 1) {
+						if (o->roff2)
+							*(unsigned int *) o->roff2 = ull;
+						else if (o->off2)
+							val_store(ilp, ull, o->off2, 0, data);
+					}
+					if (curr < 2) {
+						if (o->roff3)
+							*(unsigned int *) o->roff3 = ull;
+						else if (o->off3)
+							val_store(ilp, ull, o->off3, 0, data);
+					}
+				}
 			} else {
 				if (first) {
 					if (o->roff1)
@@ -598,12 +624,43 @@
 				else
 					val_store(ilp, ul2, o->off2, 0, data);
 			}
-			if (o->roff3 && o->roff4) {
-				*(unsigned int *) o->roff3 = ul1;
-				*(unsigned int *) o->roff4 = ul2;
-			} else if (o->off3 && o->off4) {
-				val_store(ilp, ul1, o->off3, 0, data);
-				val_store(ilp, ul2, o->off4, 0, data);
+			if (curr == 1) {
+				if (o->roff3 && o->roff4) {
+					*(unsigned int *) o->roff3 = ul1;
+					*(unsigned int *) o->roff4 = ul2;
+				} else if (o->off3 && o->off4) {
+					val_store(ilp, ul1, o->off3, 0, data);
+					val_store(ilp, ul2, o->off4, 0, data);
+				}
+			}
+			if (curr == 2) {
+				if (o->roff5 && o->roff6) {
+					*(unsigned int *) o->roff5 = ul1;
+					*(unsigned int *) o->roff6 = ul2;
+				} else if (o->off5 && o->off6) {
+					val_store(ilp, ul1, o->off5, 0, data);
+					val_store(ilp, ul2, o->off6, 0, data);
+				}
+			}
+			if (!more) {
+				if (curr < 1) {
+					if (o->roff3 && o->roff4) {
+						*(unsigned int *) o->roff3 = ul1;
+						*(unsigned int *) o->roff4 = ul2;
+					} else if (o->off3 && o->off4) {
+						val_store(ilp, ul1, o->off3, 0, data);
+						val_store(ilp, ul2, o->off4, 0, data);
+					}
+				}
+				if (curr < 2) {
+					if (o->roff5 && o->roff6) {
+						*(unsigned int *) o->roff5 = ul1;
+						*(unsigned int *) o->roff6 = ul2;
+					} else if (o->off5 && o->off6) {
+						val_store(ilp, ul1, o->off5, 0, data);
+						val_store(ilp, ul2, o->off6, 0, data);
+					}
+				}
 			}
 		}
 
@@ -707,7 +764,7 @@
 			ptr2 = strchr(ptr, ',');
 			if (ptr2 && *(ptr2 + 1) == '\0')
 				*ptr2 = '\0';
-			if (o->type != FIO_OPT_STR_MULTI) {
+			if (o->type != FIO_OPT_STR_MULTI && o->type != FIO_OPT_RANGE) {
 				if (!ptr2)
 					ptr2 = strchr(ptr, ':');
 				if (!ptr2)
diff --git a/parse.h b/parse.h
index 4edf75e..8a9e2f4 100644
--- a/parse.h
+++ b/parse.h
@@ -46,7 +46,9 @@
 	unsigned int off2;
 	unsigned int off3;
 	unsigned int off4;
-	void *roff1, *roff2, *roff3, *roff4;
+	unsigned int off5;
+	unsigned int off6;
+	void *roff1, *roff2, *roff3, *roff4, *roff5, *roff6;
 	unsigned int maxval;		/* max and min value */
 	int minval;
 	double maxfp;			/* max and min floating value */
diff --git a/stat.c b/stat.c
index 0a04798..e29bf0c 100644
--- a/stat.c
+++ b/stat.c
@@ -259,12 +259,12 @@
 void show_group_stats(struct group_run_stats *rs)
 {
 	char *p1, *p2, *p3, *p4;
-	const char *ddir_str[] = { "   READ", "  WRITE" };
+	const char *ddir_str[] = { "   READ", "  WRITE" , "   TRIM"};
 	int i;
 
 	log_info("\nRun status group %d (all jobs):\n", rs->groupid);
 
-	for (i = 0; i <= DDIR_WRITE; i++) {
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		const int i2p = is_power_of_2(rs->kb_base);
 
 		if (!rs->max_run[i])
@@ -288,7 +288,8 @@
 }
 
 #define ts_total_io_u(ts)	\
-	((ts)->total_io_u[0] + (ts)->total_io_u[1])
+	((ts)->total_io_u[DDIR_READ] + (ts)->total_io_u[DDIR_WRITE] +\
+		(ts)->total_io_u[DDIR_TRIM])
 
 static void stat_calc_dist(unsigned int *map, unsigned long total,
 			   double *io_u_dist)
@@ -356,7 +357,7 @@
 static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
 			     int ddir)
 {
-	const char *ddir_str[] = { "read ", "write" };
+	const char *ddir_str[] = { "read ", "write", "trim" };
 	unsigned long min, max, runt;
 	unsigned long long bw, iops;
 	double mean, dev;
@@ -526,8 +527,9 @@
 	time_t time_p;
 	char time_buf[64];
 
-	if (!(ts->io_bytes[0] + ts->io_bytes[1]) &&
-	    !(ts->total_io_u[0] + ts->total_io_u[1]))
+	if (!(ts->io_bytes[DDIR_READ] + ts->io_bytes[DDIR_WRITE] +
+	    ts->io_bytes[DDIR_TRIM]) && !(ts->total_io_u[DDIR_READ] +
+	    ts->total_io_u[DDIR_WRITE] + ts->total_io_u[DDIR_TRIM]))
 		return;
 
 	time(&time_p);
@@ -551,6 +553,8 @@
 		show_ddir_status(rs, ts, DDIR_READ);
 	if (ts->io_bytes[DDIR_WRITE])
 		show_ddir_status(rs, ts, DDIR_WRITE);
+	if (ts->io_bytes[DDIR_TRIM])
+		show_ddir_status(rs, ts, DDIR_TRIM);
 
 	stat_calc_lat_u(ts, io_u_lat_u);
 	stat_calc_lat_m(ts, io_u_lat_m);
@@ -686,9 +690,11 @@
 	/* General Info */
 	log_info("2;%s;%d;%d", ts->name, ts->groupid, ts->error);
 	/* Log Read Status */
-	show_ddir_status_terse(ts, rs, 0);
+	show_ddir_status_terse(ts, rs, DDIR_READ);
 	/* Log Write Status */
-	show_ddir_status_terse(ts, rs, 1);
+	show_ddir_status_terse(ts, rs, DDIR_WRITE);
+	/* Log Trim Status */
+	show_ddir_status_terse(ts, rs, DDIR_TRIM);
 
 	/* CPU Usage */
 	if (ts->total_run_time) {
@@ -747,9 +753,11 @@
 	log_info("%s;%s;%s;%d;%d", FIO_TERSE_VERSION, fio_version_string,
 					ts->name, ts->groupid, ts->error);
 	/* Log Read Status */
-	show_ddir_status_terse(ts, rs, 0);
+	show_ddir_status_terse(ts, rs, DDIR_READ);
 	/* Log Write Status */
-	show_ddir_status_terse(ts, rs, 1);
+	show_ddir_status_terse(ts, rs, DDIR_WRITE);
+	/* Log Trim Status */
+	show_ddir_status_terse(ts, rs, DDIR_TRIM);
 
 	/* CPU Usage */
 	if (ts->total_run_time) {
@@ -846,7 +854,7 @@
 {
 	int i;
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		if (dst->max_run[i] < src->max_run[i])
 			dst->max_run[i] = src->max_run[i];
 		if (dst->min_run[i] && dst->min_run[i] > src->min_run[i])
@@ -866,7 +874,7 @@
 {
 	int l, k;
 
-	for (l = 0; l <= DDIR_WRITE; l++) {
+	for (l = 0; l < DDIR_RWDIR_CNT; l++) {
 		sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr);
 		sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr);
 		sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr);
@@ -895,12 +903,12 @@
 	for (k = 0; k < FIO_IO_U_LAT_M_NR; k++)
 		dst->io_u_lat_m[k] += src->io_u_lat_m[k];
 
-	for (k = 0; k <= 2; k++) {
+	for (k = 0; k < DDIR_RWDIR_CNT; k++) {
 		dst->total_io_u[k] += src->total_io_u[k];
 		dst->short_io_u[k] += src->short_io_u[k];
 	}
 
-	for (k = 0; k <= DDIR_WRITE; k++) {
+	for (k = 0; k < DDIR_RWDIR_CNT; k++) {
 		int m;
 		for (m = 0; m < FIO_IO_U_PLAT_NR; m++)
 			dst->io_u_plat[k][m] += src->io_u_plat[k][m];
@@ -913,9 +921,11 @@
 
 void init_group_run_stat(struct group_run_stats *gs)
 {
+	int i;
 	memset(gs, 0, sizeof(*gs));
-	gs->min_bw[0] = gs->min_run[0] = ~0UL;
-	gs->min_bw[1] = gs->min_run[1] = ~0UL;
+
+	for (i = 0; i < DDIR_RWDIR_CNT; i++)
+		gs->min_bw[i] = gs->min_run[i] = ~0UL;
 }
 
 void init_thread_stat(struct thread_stat *ts)
@@ -924,7 +934,7 @@
 
 	memset(ts, 0, sizeof(*ts));
 
-	for (j = 0; j <= DDIR_WRITE; j++) {
+	for (j = 0; j < DDIR_RWDIR_CNT; j++) {
 		ts->lat_stat[j].min_val = -1UL;
 		ts->clat_stat[j].min_val = -1UL;
 		ts->slat_stat[j].min_val = -1UL;
@@ -1041,7 +1051,7 @@
 		rs = &runstats[ts->groupid];
 		rs->kb_base = ts->kb_base;
 
-		for (j = 0; j <= DDIR_WRITE; j++) {
+		for (j = 0; j < DDIR_RWDIR_CNT; j++) {
 			if (!ts->runtime[j])
 				continue;
 			if (ts->runtime[j] < rs->min_run[j] || !rs->min_run[j])
@@ -1067,12 +1077,15 @@
 	}
 
 	for (i = 0; i < groupid + 1; i++) {
+		int ddir;
+
 		rs = &runstats[i];
 
-		if (rs->max_run[0])
-			rs->agg[0] = (rs->io_kb[0] * 1000) / rs->max_run[0];
-		if (rs->max_run[1])
-			rs->agg[1] = (rs->io_kb[1] * 1000) / rs->max_run[1];
+		for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
+			if (rs->max_run[ddir])
+				rs->agg[ddir] = (rs->io_kb[ddir] * 1000) /
+						rs->max_run[ddir];
+		}
 	}
 
 	/*
@@ -1128,10 +1141,13 @@
 			td->ts.runtime[DDIR_READ] += rt[i];
 		if (td_write(td) && td->io_bytes[DDIR_WRITE])
 			td->ts.runtime[DDIR_WRITE] += rt[i];
+		if (td_trim(td) && td->io_bytes[DDIR_TRIM])
+			td->ts.runtime[DDIR_TRIM] += rt[i];
 
 		update_rusage_stat(td);
-		td->ts.io_bytes[0] = td->io_bytes[0];
-		td->ts.io_bytes[1] = td->io_bytes[1];
+		td->ts.io_bytes[DDIR_READ] = td->io_bytes[DDIR_READ];
+		td->ts.io_bytes[DDIR_WRITE] = td->io_bytes[DDIR_WRITE];
+		td->ts.io_bytes[DDIR_TRIM] = td->io_bytes[DDIR_TRIM];
 		td->ts.total_run_time = mtime_since(&td->epoch, &tv);
 	}
 
@@ -1142,6 +1158,8 @@
 			td->ts.runtime[DDIR_READ] -= rt[i];
 		if (td_write(td) && td->io_bytes[DDIR_WRITE])
 			td->ts.runtime[DDIR_WRITE] -= rt[i];
+		if (td_trim(td) && td->io_bytes[DDIR_TRIM])
+			td->ts.runtime[DDIR_TRIM] -= rt[i];
 	}
 
 	free(rt);
@@ -1259,9 +1277,17 @@
 		mw = iolog->avg_window[DDIR_WRITE].mean.u.f + 0.50;
 		__add_log_sample(iolog, mw, DDIR_WRITE, 0, elapsed);
 	}
+	if (iolog->avg_window[DDIR_TRIM].samples) {
+		unsigned long mw;
+
+		mw = iolog->avg_window[DDIR_TRIM].mean.u.f + 0.50;
+		__add_log_sample(iolog, mw, DDIR_TRIM, 0, elapsed);
+	}
+
 
 	reset_io_stat(&iolog->avg_window[DDIR_READ]);
 	reset_io_stat(&iolog->avg_window[DDIR_WRITE]);
+	reset_io_stat(&iolog->avg_window[DDIR_TRIM]);
 	iolog->avg_last = elapsed;
 }
 
@@ -1346,7 +1372,7 @@
 	/*
 	 * Compute both read and write rates for the interval.
 	 */
-	for (ddir = DDIR_READ; ddir <= DDIR_WRITE; ddir++) {
+	for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
 		uint64_t delta;
 
 		delta = td->this_io_bytes[ddir] - td->stat_io_bytes[ddir];
@@ -1381,7 +1407,7 @@
 	/*
 	 * Compute both read and write rates for the interval.
 	 */
-	for (ddir = DDIR_READ; ddir <= DDIR_WRITE; ddir++) {
+	for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
 		uint64_t delta;
 
 		delta = td->this_io_blocks[ddir] - td->stat_io_blocks[ddir];
diff --git a/stat.h b/stat.h
index c15f26e..b9e0448 100644
--- a/stat.h
+++ b/stat.h
@@ -2,10 +2,10 @@
 #define FIO_STAT_H
 
 struct group_run_stats {
-	uint64_t max_run[2], min_run[2];
-	uint64_t max_bw[2], min_bw[2];
-	uint64_t io_kb[2];
-	uint64_t agg[2];
+	uint64_t max_run[DDIR_RWDIR_CNT], min_run[DDIR_RWDIR_CNT];
+	uint64_t max_bw[DDIR_RWDIR_CNT], min_bw[DDIR_RWDIR_CNT];
+	uint64_t io_kb[DDIR_RWDIR_CNT];
+	uint64_t agg[DDIR_RWDIR_CNT];
 	uint32_t kb_base;
 	uint32_t groupid;
 };
@@ -124,11 +124,11 @@
 	/*
 	 * bandwidth and latency stats
 	 */
-	struct io_stat clat_stat[2];		/* completion latency */
-	struct io_stat slat_stat[2];		/* submission latency */
-	struct io_stat lat_stat[2];		/* total latency */
-	struct io_stat bw_stat[2];		/* bandwidth stats */
-	struct io_stat iops_stat[2];		/* IOPS stats */
+	struct io_stat clat_stat[DDIR_RWDIR_CNT]; /* completion latency */
+	struct io_stat slat_stat[DDIR_RWDIR_CNT]; /* submission latency */
+	struct io_stat lat_stat[DDIR_RWDIR_CNT]; /* total latency */
+	struct io_stat bw_stat[DDIR_RWDIR_CNT]; /* bandwidth stats */
+	struct io_stat iops_stat[DDIR_RWDIR_CNT]; /* IOPS stats */
 
 	/*
 	 * fio system usage accounting
@@ -149,14 +149,14 @@
 	uint32_t io_u_complete[FIO_IO_U_MAP_NR];
 	uint32_t io_u_lat_u[FIO_IO_U_LAT_U_NR];
 	uint32_t io_u_lat_m[FIO_IO_U_LAT_M_NR];
-	uint32_t io_u_plat[2][FIO_IO_U_PLAT_NR];
+	uint32_t io_u_plat[DDIR_RWDIR_CNT][FIO_IO_U_PLAT_NR];
 	uint64_t total_io_u[3];
 	uint64_t short_io_u[3];
 	uint64_t total_submit;
 	uint64_t total_complete;
 
-	uint64_t io_bytes[2];
-	uint64_t runtime[2];
+	uint64_t io_bytes[DDIR_RWDIR_CNT];
+	uint64_t runtime[DDIR_RWDIR_CNT];
 	uint64_t total_run_time;
 
 	/*
@@ -176,8 +176,8 @@
 	uint32_t files_open;
 	uint32_t m_rate, t_rate;
 	uint32_t m_iops, t_iops;
-	uint32_t rate[2];
-	uint32_t iops[2];
+	uint32_t rate[DDIR_RWDIR_CNT];
+	uint32_t iops[DDIR_RWDIR_CNT];
 	uint64_t elapsed_sec;
 	uint64_t eta_sec;
 	uint32_t is_pow2;