Add support for the iomem_align option

This allows detailed control of the alignment of the IO buffers
that fio uses.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index f323c20..3107d3a 100644
--- a/HOWTO
+++ b/HOWTO
@@ -704,6 +704,15 @@
 		location should point there. So if it's mounted in /huge,
 		you would use mem=mmaphuge:/huge/somefile.
 
+iomem_align=int	This indiciates the memory alignment of the IO memory buffers.
+		Note that the given alignment is applied to the first IO unit
+		buffer, if using iodepth the alignment of the following buffers
+		are given by the bs used. In other words, if using a bs that is
+		a multiple of the page sized in the system, all buffers will
+		be aligned to this value. If using a bs that is not page
+		aligned, the alignment of subsequent IO memory buffers is the
+		sum of the iomem_align and bs used.
+
 hugepage-size=int
 		Defines the size of a huge page. Must at least be equal
 		to the system setting, see /proc/meminfo. Defaults to 4MiB.
diff --git a/fio.1 b/fio.1
index fc055e4..aa7b9d6 100644
--- a/fio.1
+++ b/fio.1
@@ -512,6 +512,15 @@
 have hugetlbfs mounted, and \fIfile\fR must point there.
 .RE
 .TP
+.BI iomem_align \fR=\fPint
+This indiciates the memory alignment of the IO memory buffers. Note that the
+given alignment is applied to the first IO unit buffer, if using \fBiodepth\fR
+the alignment of the following buffers are given by the \fBbs\fR used. In
+other words, if using a \fBbs\fR that is a multiple of the page sized in the
+system, all buffers will be aligned to this value. If using a \fBbs\fR that
+is not page aligned, the alignment of subsequent IO memory buffers is the
+sum of the \fBiomem_align\fR and \fBbs\fR used.
+.TP
 .BI hugepage\-size \fR=\fPint
 Defines the size of a huge page.  Must be at least equal to the system setting.
 Should be a multiple of 1MiB. Default: 4MiB.
diff --git a/fio.c b/fio.c
index 4927f1c..63d9a1b 100644
--- a/fio.c
+++ b/fio.c
@@ -42,7 +42,8 @@
 
 unsigned long page_mask;
 unsigned long page_size;
-#define ALIGN(buf)	\
+
+#define PAGE_ALIGN(buf)	\
 	(char *) (((unsigned long) (buf) + page_mask) & ~page_mask)
 
 int groupid = 0;
@@ -790,8 +791,8 @@
 	if (allocate_io_mem(td))
 		return 1;
 
-	if (td->o.odirect)
-		p = ALIGN(td->orig_buffer);
+	if (td->o.mem_align)
+		p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
 	else
 		p = td->orig_buffer;
 
@@ -811,9 +812,11 @@
 		io_u = ptr;
 		memset(io_u, 0, sizeof(*io_u));
 		INIT_FLIST_HEAD(&io_u->list);
+		dprint(FD_MEM, "io_u alloc %p, index %u\n", io_u, i);
 
 		if (!(td->io_ops->flags & FIO_NOIO)) {
 			io_u->buf = p + max_bs * i;
+			dprint(FD_MEM, "io_u %p, mem %p\n", io_u, io_u->buf);
 
 			if (td_write(td) && !td->o.refill_buffers)
 				io_u_fill_buffer(td, io_u, max_bs);
diff --git a/fio.h b/fio.h
index 71ae71d..b19101c 100644
--- a/fio.h
+++ b/fio.h
@@ -202,6 +202,7 @@
 	unsigned long long zone_size;
 	unsigned long long zone_skip;
 	enum fio_memtype mem_type;
+	unsigned int mem_align;
 
 	unsigned int stonewall;
 	unsigned int new_group;
@@ -655,4 +656,9 @@
 	return ret;
 }
 
+static inline int is_power_of_2(unsigned int val)
+{
+	return (val != 0 && ((val & (val - 1)) == 0));
+}
+
 #endif
diff --git a/init.c b/init.c
index 02e544a..84c312f 100644
--- a/init.c
+++ b/init.c
@@ -367,6 +367,14 @@
 	if (td->o.pre_read)
 		td->o.invalidate_cache = 0;
 
+	if (td->o.mem_align) {
+		if (td->o.odirect && !is_power_of_2(td->o.mem_align)) {
+			log_err("fio: given IO mem alignment conflicts with"
+					" direct=1. Resetting.\n");
+			td->o.mem_align = page_mask;
+		}
+	}
+			
 	return 0;
 }
 
diff --git a/io_u.c b/io_u.c
index 41b5cdb..2e9dac0 100644
--- a/io_u.c
+++ b/io_u.c
@@ -231,11 +231,6 @@
 	return 0;
 }
 
-static inline int is_power_of_2(unsigned int val)
-{
-	return (val != 0 && ((val & (val - 1)) == 0));
-}
-
 static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
 {
 	const int ddir = io_u->ddir;
diff --git a/memory.c b/memory.c
index d3ef2cb..00339e4 100644
--- a/memory.c
+++ b/memory.c
@@ -188,8 +188,12 @@
 		return 0;
 
 	total_mem = td->orig_buffer_size;
-	if (td->o.odirect)
+
+	if (td->o.odirect || td->o.mem_align) {
 		total_mem += page_mask;
+		if (td->o.mem_align && td->o.mem_align > page_size)
+			total_mem += td->o.mem_align - page_size;
+	}
 
 	if (td->o.mem_type == MEM_MALLOC)
 		ret = alloc_mem_malloc(td, total_mem);
diff --git a/options.c b/options.c
index 394472a..7135231 100644
--- a/options.c
+++ b/options.c
@@ -1037,6 +1037,16 @@
 		  },
 	},
 	{
+		.name	= "iomem_align",
+		.alias	= "mem_align",
+		.type	= FIO_OPT_INT,
+		.off1	= td_var_offset(mem_align),
+		.minval	= 0,
+		.help	= "IO memory buffer offset alignment",
+		.def	= "0",
+		.parent	= "iomem",
+	},
+	{
 		.name	= "verify",
 		.type	= FIO_OPT_STR,
 		.off1	= td_var_offset(verify),