[PATCH] Add seperate read/write block size options

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index 79d31df..78a9be0 100644
--- a/HOWTO
+++ b/HOWTO
@@ -220,11 +220,21 @@
 
 bs=siint	The block size used for the io units. Defaults to 4k.
 
+read_bs=siint
+write_bs=siint	If the workload is a mixed read-write workload, you can use
+		these options to set seperate block sizes.
+
 bsrange=irange	Instead of giving a single block size, specify a range
 		and fio will mix the issued io block sizes. The issued
 		io unit will always be a multiple of the minimum value
 		given (also see bs_unaligned).
 
+read_bsrange=irange
+write_bsrange=irange
+		If the workload is a mixed read-write workload, you can use
+		one of these options to set separate block size ranges for
+		reads and writes.
+
 bs_unaligned	If this option is given, any byte size value within bsrange
 		may be used as a block range. This typically wont work with
 		direct IO, as that normally requires sector alignment.
diff --git a/filesetup.c b/filesetup.c
index 0b95640..a163382 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -49,12 +49,12 @@
 		goto err;
 	}
 
-	b = malloc(td->max_bs);
-	memset(b, 0, td->max_bs);
+	b = malloc(td->max_bs[DDIR_WRITE]);
+	memset(b, 0, td->max_bs[DDIR_WRITE]);
 
 	left = f->file_size;
 	while (left && !td->terminate) {
-		bs = td->max_bs;
+		bs = td->max_bs[DDIR_WRITE];
 		if (bs > left)
 			bs = left;
 
diff --git a/fio.c b/fio.c
index 76710e1..011c13a 100644
--- a/fio.c
+++ b/fio.c
@@ -436,7 +436,7 @@
 		gettimeofday(&e, NULL);
 		usec = utime_since(&s, &e);
 
-		rate_throttle(td, usec, icd.bytes_done[td->ddir]);
+		rate_throttle(td, usec, icd.bytes_done[td->ddir], td->ddir);
 
 		if (check_min_rate(td, &e)) {
 			if (rate_quit)
@@ -495,6 +495,7 @@
 static int init_io_u(struct thread_data *td)
 {
 	struct io_u *io_u;
+	unsigned int max_bs;
 	int i, max_units;
 	char *p;
 
@@ -506,7 +507,8 @@
 	else
 		max_units = td->iodepth;
 
-	td->orig_buffer_size = td->max_bs * max_units + MASK;
+	max_bs = max(td->max_bs[DDIR_READ], td->max_bs[DDIR_WRITE]);
+	td->orig_buffer_size = max_bs * max_units + MASK;
 
 	if (allocate_io_mem(td))
 		return 1;
@@ -517,9 +519,9 @@
 		memset(io_u, 0, sizeof(*io_u));
 		INIT_LIST_HEAD(&io_u->list);
 
-		io_u->buf = p + td->max_bs * i;
+		io_u->buf = p + max_bs * i;
 		if (td_write(td) || td_rw(td))
-			fill_rand_buf(io_u, td->max_bs);
+			fill_rand_buf(io_u, max_bs);
 
 		io_u->index = i;
 		list_add(&io_u->list, &td->io_u_freelist);
diff --git a/fio.h b/fio.h
index 6a68ec8..53e174e 100644
--- a/fio.h
+++ b/fio.h
@@ -205,9 +205,10 @@
 	unsigned int norandommap;
 	unsigned int bs_unaligned;
 
-	unsigned int bs;
-	unsigned int min_bs;
-	unsigned int max_bs;
+	unsigned int bs[2];
+	unsigned int min_bs[2];
+	unsigned int max_bs[2];
+	unsigned int rw_min_bs;
 	unsigned int thinktime;
 	unsigned int fsync_blocks;
 	unsigned int start_delay;
@@ -364,7 +365,7 @@
 #define td_rw(td)		((td)->iomix != 0)
 
 #define BLOCKS_PER_MAP		(8 * sizeof(long))
-#define TO_MAP_BLOCK(td, f, b)	((b) - ((f)->file_offset / (td)->min_bs))
+#define TO_MAP_BLOCK(td, f, b)	((b) - ((f)->file_offset / (td)->rw_min_bs))
 #define RAND_MAP_IDX(td, f, b)	(TO_MAP_BLOCK(td, f, b) / BLOCKS_PER_MAP)
 #define RAND_MAP_BIT(td, f, b)	(TO_MAP_BLOCK(td, f, b) & (BLOCKS_PER_MAP - 1))
 
@@ -417,6 +418,9 @@
 #ifndef min
 #define min(a, b)	((a) < (b) ? (a) : (b))
 #endif
+#ifndef max
+#define max(a, b)	((a) > (b) ? (a) : (b))
+#endif
 
 /*
  * Log exports
@@ -454,7 +458,7 @@
 extern unsigned long mtime_since_genesis(void);
 extern void __usec_sleep(unsigned int);
 extern void usec_sleep(struct thread_data *, unsigned long);
-extern void rate_throttle(struct thread_data *, unsigned long, unsigned int);
+extern void rate_throttle(struct thread_data *, unsigned long, unsigned int, int);
 
 /*
  * Init functions
diff --git a/init.c b/init.c
index 3532c49..616c51f 100644
--- a/init.c
+++ b/init.c
@@ -139,7 +139,17 @@
 	{
 		.name	= "bs",
 		.type	= FIO_OPT_STR_VAL,
-		.off1	= td_var_offset(bs),
+		.off1	= td_var_offset(bs[DDIR_READ]),
+	},
+	{
+		.name	= "read_bs",
+		.type	= FIO_OPT_STR_VAL,
+		.off1	= td_var_offset(bs[DDIR_READ]),
+	},
+	{
+		.name	= "write_bs",
+		.type	= FIO_OPT_STR_VAL,
+		.off1	= td_var_offset(bs[DDIR_WRITE]),
 	},
 	{
 		.name	= "offset",
@@ -164,8 +174,20 @@
 	{
 		.name	= "bsrange",
 		.type	= FIO_OPT_RANGE,
-		.off1	= td_var_offset(min_bs),
-		.off2	= td_var_offset(max_bs),
+		.off1	= td_var_offset(min_bs[DDIR_READ]),
+		.off2	= td_var_offset(max_bs[DDIR_READ]),
+	},
+	{
+		.name	= "read_bsrange",
+		.type	= FIO_OPT_RANGE,
+		.off1	= td_var_offset(min_bs[DDIR_READ]),
+		.off2	= td_var_offset(max_bs[DDIR_READ]),
+	},
+	{
+		.name	= "write_bsrange",
+		.type	= FIO_OPT_RANGE,
+		.off1	= td_var_offset(min_bs[DDIR_WRITE]),
+		.off2	= td_var_offset(max_bs[DDIR_WRITE]),
 	},
 	{
 		.name	= "nrfiles",
@@ -484,10 +506,19 @@
 	if (td_read(td) || td_rw(td))
 		td->overwrite = 1;
 
-	if (!td->min_bs)
-		td->min_bs = td->bs;
-	if (!td->max_bs)
-		td->max_bs = td->bs;
+	if (td->bs[DDIR_READ] != DEF_BS)
+		td->bs[DDIR_WRITE] = td->bs[DDIR_READ];
+	if (!td->min_bs[DDIR_READ])
+		td->min_bs[DDIR_READ]= td->bs[DDIR_READ];
+	if (!td->max_bs[DDIR_READ])
+		td->max_bs[DDIR_READ] = td->bs[DDIR_READ];
+	if (!td->min_bs[DDIR_WRITE])
+		td->min_bs[DDIR_WRITE]= td->bs[DDIR_READ];
+	if (!td->max_bs[DDIR_WRITE])
+		td->max_bs[DDIR_WRITE] = td->bs[DDIR_READ];
+
+	td->rw_min_bs = min(td->min_bs[DDIR_READ], td->min_bs[DDIR_WRITE]);
+
 	if (td_read(td) && !td_rw(td))
 		td->verify = 0;
 
@@ -626,7 +657,7 @@
 			if (td->io_ops->flags & FIO_CPUIO)
 				fprintf(f_out, "%s: ioengine=cpu, cpuload=%u, cpucycle=%u\n", td->name, td->cpuload, td->cpucycle);
 			else
-				fprintf(f_out, "%s: (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->name, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_ops->name, td->iodepth);
+				fprintf(f_out, "%s: (g=%d): rw=%s, odir=%d, bs=%d-%d/%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->name, td->groupid, ddir_str[ddir], td->odirect, td->min_bs[DDIR_READ], td->max_bs[DDIR_READ], td->min_bs[DDIR_WRITE], td->max_bs[DDIR_WRITE], td->rate, td->io_ops->name, td->iodepth);
 		} else if (job_add_num == 1)
 			fprintf(f_out, "...\n");
 	}
@@ -694,7 +725,7 @@
 
 	if (!td->norandommap) {
 		for_each_file(td, f, i) {
-			blocks = (f->file_size + td->min_bs - 1) / td->min_bs;
+			blocks = (f->file_size + td->rw_min_bs - 1) / td->rw_min_bs;
 			num_maps = (blocks + BLOCKS_PER_MAP-1)/ BLOCKS_PER_MAP;
 			f->file_map = malloc(num_maps * sizeof(long));
 			f->num_maps = num_maps;
@@ -958,9 +989,10 @@
 	 */
 	def_thread.ddir = DDIR_READ;
 	def_thread.iomix = 0;
-	def_thread.bs = DEF_BS;
-	def_thread.min_bs = 0;
-	def_thread.max_bs = 0;
+	def_thread.bs[DDIR_READ] = DEF_BS;
+	def_thread.bs[DDIR_WRITE] = DEF_BS;
+	def_thread.min_bs[DDIR_READ] = def_thread.min_bs[DDIR_WRITE] = 0;
+	def_thread.max_bs[DDIR_READ] = def_thread.max_bs[DDIR_WRITE] = 0;
 	def_thread.odirect = DEF_ODIRECT;
 	def_thread.ratecycle = DEF_RATE_CYCLE;
 	def_thread.sequential = DEF_SEQUENTIAL;
diff --git a/io_u.c b/io_u.c
index 3000ea7..7698e84 100644
--- a/io_u.c
+++ b/io_u.c
@@ -27,10 +27,13 @@
 static void mark_random_map(struct thread_data *td, struct fio_file *f,
 			    struct io_u *io_u)
 {
-	unsigned long long block = io_u->offset / (unsigned long long) td->min_bs;
-	unsigned int blocks = 0;
+	unsigned int min_bs = td->min_bs[io_u->ddir];
+	unsigned long long block;
+	unsigned int blocks;
 
-	while (blocks < (io_u->buflen / td->min_bs)) {
+	block = io_u->offset / (unsigned long long) min_bs;
+	blocks = 0;
+	while (blocks < (io_u->buflen / min_bs)) {
 		unsigned int idx, bit;
 
 		if (!random_map_free(td, f, block))
@@ -46,8 +49,8 @@
 		blocks++;
 	}
 
-	if ((blocks * td->min_bs) < io_u->buflen)
-		io_u->buflen = blocks * td->min_bs;
+	if ((blocks * min_bs) < io_u->buflen)
+		io_u->buflen = blocks * min_bs;
 }
 
 /*
@@ -60,7 +63,7 @@
 
 	*b = 0;
 	i = 0;
-	while ((*b) * td->min_bs < f->file_size) {
+	while ((*b) * td->rw_min_bs < f->file_size) {
 		if (f->file_map[i] != -1UL) {
 			*b += ffz(f->file_map[i]);
 			return 0;
@@ -79,13 +82,13 @@
  * the last io issued.
  */
 static int get_next_offset(struct thread_data *td, struct fio_file *f,
-			   unsigned long long *offset)
+			   unsigned long long *offset, int ddir)
 {
 	unsigned long long b, rb;
 	long r;
 
 	if (!td->sequential) {
-		unsigned long long max_blocks = td->io_size / td->min_bs;
+		unsigned long long max_blocks = td->io_size / td->min_bs[ddir];
 		int loops = 50;
 
 		do {
@@ -93,7 +96,7 @@
 			b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0));
 			if (td->norandommap)
 				break;
-			rb = b + (f->file_offset / td->min_bs);
+			rb = b + (f->file_offset / td->min_bs[ddir]);
 			loops--;
 		} while (!random_map_free(td, f, rb) && loops);
 
@@ -102,30 +105,30 @@
 				return 1;
 		}
 	} else
-		b = f->last_pos / td->min_bs;
+		b = f->last_pos / td->min_bs[ddir];
 
-	*offset = (b * td->min_bs) + f->file_offset;
+	*offset = (b * td->min_bs[ddir]) + f->file_offset;
 	if (*offset > f->file_size)
 		return 1;
 
 	return 0;
 }
 
-static unsigned int get_next_buflen(struct thread_data *td)
+static unsigned int get_next_buflen(struct thread_data *td, int ddir)
 {
 	unsigned int buflen;
 	long r;
 
-	if (td->min_bs == td->max_bs)
-		buflen = td->min_bs;
+	if (td->min_bs[ddir] == td->max_bs[ddir])
+		buflen = td->min_bs[ddir];
 	else {
 		r = os_random_long(&td->bsrange_state);
-		buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
+		buflen = (1 + (double) (td->max_bs[ddir] - 1) * r / (RAND_MAX + 1.0));
 		if (!td->bs_unaligned)
-			buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
+			buflen = (buflen + td->min_bs[ddir] - 1) & ~(td->min_bs[ddir] - 1);
 	}
 
-	if (buflen > td->io_size - td->this_io_bytes[td->ddir]) {
+	if (buflen > td->io_size - td->this_io_bytes[ddir]) {
 		/*
 		 * if using direct/raw io, we may not be able to
 		 * shrink the size. so just fail it.
@@ -133,7 +136,7 @@
 		if (td->io_ops->flags & FIO_RAWIO)
 			return 0;
 
-		buflen = td->io_size - td->this_io_bytes[td->ddir];
+		buflen = td->io_size - td->this_io_bytes[ddir];
 	}
 
 	return buflen;
@@ -202,15 +205,14 @@
 		return 0;
 	}
 
+	io_u->ddir = get_rw_ddir(td);
+
 	/*
 	 * No log, let the seq/rand engine retrieve the next position.
 	 */
-	if (!get_next_offset(td, f, &io_u->offset)) {
-		io_u->buflen = get_next_buflen(td);
-
+	if (!get_next_offset(td, f, &io_u->offset, io_u->ddir)) {
+		io_u->buflen = get_next_buflen(td, io_u->ddir);
 		if (io_u->buflen) {
-			io_u->ddir = get_rw_ddir(td);
-
 			/*
 			 * If using a write iolog, store this entry.
 			 */
diff --git a/log.c b/log.c
index b151164..a112a31 100644
--- a/log.c
+++ b/log.c
@@ -125,9 +125,9 @@
 		INIT_LIST_HEAD(&ipo->list);
 		ipo->offset = offset;
 		ipo->len = bytes;
-		if (bytes > td->max_bs)
-			td->max_bs = bytes;
 		ipo->ddir = rw;
+		if (bytes > td->max_bs[rw])
+			td->max_bs[rw] = bytes;
 		list_add_tail(&ipo->list, &td->io_log_list);
 	}
 
@@ -195,7 +195,7 @@
 		return -1;
 	}
 
-	nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
+	nr_reads_per_sec = (td->rate * 1024) / td->min_bs[DDIR_READ];
 	td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
 	td->rate_pending_usleep = 0;
 	return 0;
diff --git a/time.c b/time.c
index ad5ee3f..5d3dee2 100644
--- a/time.c
+++ b/time.c
@@ -99,14 +99,14 @@
 }
 
 void rate_throttle(struct thread_data *td, unsigned long time_spent,
-		   unsigned int bytes)
+		   unsigned int bytes, int ddir)
 {
 	unsigned long usec_cycle;
 
 	if (!td->rate)
 		return;
 
-	usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
+	usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs[ddir]);
 
 	if (time_spent < usec_cycle) {
 		unsigned long s = usec_cycle - time_spent;