Add support for the iomem_align option
This allows detailed control of the alignment of the IO buffers
that fio uses.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index f323c20..3107d3a 100644
--- a/HOWTO
+++ b/HOWTO
@@ -704,6 +704,15 @@
location should point there. So if it's mounted in /huge,
you would use mem=mmaphuge:/huge/somefile.
+iomem_align=int This indiciates the memory alignment of the IO memory buffers.
+ Note that the given alignment is applied to the first IO unit
+ buffer, if using iodepth the alignment of the following buffers
+ are given by the bs used. In other words, if using a bs that is
+ a multiple of the page sized in the system, all buffers will
+ be aligned to this value. If using a bs that is not page
+ aligned, the alignment of subsequent IO memory buffers is the
+ sum of the iomem_align and bs used.
+
hugepage-size=int
Defines the size of a huge page. Must at least be equal
to the system setting, see /proc/meminfo. Defaults to 4MiB.
diff --git a/fio.1 b/fio.1
index fc055e4..aa7b9d6 100644
--- a/fio.1
+++ b/fio.1
@@ -512,6 +512,15 @@
have hugetlbfs mounted, and \fIfile\fR must point there.
.RE
.TP
+.BI iomem_align \fR=\fPint
+This indiciates the memory alignment of the IO memory buffers. Note that the
+given alignment is applied to the first IO unit buffer, if using \fBiodepth\fR
+the alignment of the following buffers are given by the \fBbs\fR used. In
+other words, if using a \fBbs\fR that is a multiple of the page sized in the
+system, all buffers will be aligned to this value. If using a \fBbs\fR that
+is not page aligned, the alignment of subsequent IO memory buffers is the
+sum of the \fBiomem_align\fR and \fBbs\fR used.
+.TP
.BI hugepage\-size \fR=\fPint
Defines the size of a huge page. Must be at least equal to the system setting.
Should be a multiple of 1MiB. Default: 4MiB.
diff --git a/fio.c b/fio.c
index 4927f1c..63d9a1b 100644
--- a/fio.c
+++ b/fio.c
@@ -42,7 +42,8 @@
unsigned long page_mask;
unsigned long page_size;
-#define ALIGN(buf) \
+
+#define PAGE_ALIGN(buf) \
(char *) (((unsigned long) (buf) + page_mask) & ~page_mask)
int groupid = 0;
@@ -790,8 +791,8 @@
if (allocate_io_mem(td))
return 1;
- if (td->o.odirect)
- p = ALIGN(td->orig_buffer);
+ if (td->o.mem_align)
+ p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
else
p = td->orig_buffer;
@@ -811,9 +812,11 @@
io_u = ptr;
memset(io_u, 0, sizeof(*io_u));
INIT_FLIST_HEAD(&io_u->list);
+ dprint(FD_MEM, "io_u alloc %p, index %u\n", io_u, i);
if (!(td->io_ops->flags & FIO_NOIO)) {
io_u->buf = p + max_bs * i;
+ dprint(FD_MEM, "io_u %p, mem %p\n", io_u, io_u->buf);
if (td_write(td) && !td->o.refill_buffers)
io_u_fill_buffer(td, io_u, max_bs);
diff --git a/fio.h b/fio.h
index 71ae71d..b19101c 100644
--- a/fio.h
+++ b/fio.h
@@ -202,6 +202,7 @@
unsigned long long zone_size;
unsigned long long zone_skip;
enum fio_memtype mem_type;
+ unsigned int mem_align;
unsigned int stonewall;
unsigned int new_group;
@@ -655,4 +656,9 @@
return ret;
}
+static inline int is_power_of_2(unsigned int val)
+{
+ return (val != 0 && ((val & (val - 1)) == 0));
+}
+
#endif
diff --git a/init.c b/init.c
index 02e544a..84c312f 100644
--- a/init.c
+++ b/init.c
@@ -367,6 +367,14 @@
if (td->o.pre_read)
td->o.invalidate_cache = 0;
+ if (td->o.mem_align) {
+ if (td->o.odirect && !is_power_of_2(td->o.mem_align)) {
+ log_err("fio: given IO mem alignment conflicts with"
+ " direct=1. Resetting.\n");
+ td->o.mem_align = page_mask;
+ }
+ }
+
return 0;
}
diff --git a/io_u.c b/io_u.c
index 41b5cdb..2e9dac0 100644
--- a/io_u.c
+++ b/io_u.c
@@ -231,11 +231,6 @@
return 0;
}
-static inline int is_power_of_2(unsigned int val)
-{
- return (val != 0 && ((val & (val - 1)) == 0));
-}
-
static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
{
const int ddir = io_u->ddir;
diff --git a/memory.c b/memory.c
index d3ef2cb..00339e4 100644
--- a/memory.c
+++ b/memory.c
@@ -188,8 +188,12 @@
return 0;
total_mem = td->orig_buffer_size;
- if (td->o.odirect)
+
+ if (td->o.odirect || td->o.mem_align) {
total_mem += page_mask;
+ if (td->o.mem_align && td->o.mem_align > page_size)
+ total_mem += td->o.mem_align - page_size;
+ }
if (td->o.mem_type == MEM_MALLOC)
ret = alloc_mem_malloc(td, total_mem);
diff --git a/options.c b/options.c
index 394472a..7135231 100644
--- a/options.c
+++ b/options.c
@@ -1037,6 +1037,16 @@
},
},
{
+ .name = "iomem_align",
+ .alias = "mem_align",
+ .type = FIO_OPT_INT,
+ .off1 = td_var_offset(mem_align),
+ .minval = 0,
+ .help = "IO memory buffer offset alignment",
+ .def = "0",
+ .parent = "iomem",
+ },
+ {
.name = "verify",
.type = FIO_OPT_STR,
.off1 = td_var_offset(verify),