Add support for O_ATOMIC
O_ATOMIC makes writes atomic, meaning that they are fully stable
on media (in the event of a power cut) when acknowledged by the
device and OS.
This only truly works on Linux with the pending patches to
add O_ATOMIC.
Updated by Jens to:
- Add man page and HOWTO description of the option
- Make O_ATOMIC imply O_DIRECT, so that it actually works if you
don't set O_DIRECT manually.
- Add the option to the conversion list so it works for
client/server.
- Error handling so that if atomic=1 is set and the OS does not
support it, error out instead of just pretending it works.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/HOWTO b/HOWTO
index eb2ed25..250bc58 100644
--- a/HOWTO
+++ b/HOWTO
@@ -695,6 +695,11 @@
O_DIRECT. Note that ZFS on Solaris doesn't support direct io.
On Windows the synchronous ioengines don't support direct io.
+atomic=bool If value is true, attempt to use atomic direct IO. Atomic
+ writes are guaranteed to be stable once acknowledged by
+ the operating system. Only Linux supports O_ATOMIC right
+ now.
+
buffered=bool If value is true, use buffered io. This is the opposite
of the 'direct' option. Defaults to true.
diff --git a/backend.c b/backend.c
index 00a23db..2ec478c 100644
--- a/backend.c
+++ b/backend.c
@@ -926,7 +926,8 @@
* overflow later. this adjustment may be too much if we get
* lucky and the allocator gives us an aligned address.
*/
- if (td->o.odirect || td->o.mem_align || (td->io_ops->flags & FIO_RAWIO))
+ if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
+ (td->io_ops->flags & FIO_RAWIO))
td->orig_buffer_size += page_mask + td->o.mem_align;
if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE) {
@@ -944,7 +945,7 @@
if (data_xfer && allocate_io_mem(td))
return 1;
- if (td->o.odirect || td->o.mem_align ||
+ if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
(td->io_ops->flags & FIO_RAWIO))
p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
else
diff --git a/cconv.c b/cconv.c
index 21e3a51..82383b2 100644
--- a/cconv.c
+++ b/cconv.c
@@ -89,6 +89,7 @@
o->open_files = le32_to_cpu(top->open_files);
o->file_lock_mode = le32_to_cpu(top->file_lock_mode);
o->odirect = le32_to_cpu(top->odirect);
+ o->oatomic = le32_to_cpu(top->oatomic);
o->invalidate_cache = le32_to_cpu(top->invalidate_cache);
o->create_serialize = le32_to_cpu(top->create_serialize);
o->create_fsync = le32_to_cpu(top->create_fsync);
@@ -252,6 +253,7 @@
top->open_files = cpu_to_le32(o->open_files);
top->file_lock_mode = cpu_to_le32(o->file_lock_mode);
top->odirect = cpu_to_le32(o->odirect);
+ top->oatomic = cpu_to_le32(o->oatomic);
top->invalidate_cache = cpu_to_le32(o->invalidate_cache);
top->create_serialize = cpu_to_le32(o->create_serialize);
top->create_fsync = cpu_to_le32(o->create_fsync);
diff --git a/filesetup.c b/filesetup.c
index 4265e38..c9b060b 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -519,6 +519,13 @@
goto skip_flags;
if (td->o.odirect)
flags |= OS_O_DIRECT;
+ if (td->o.oatomic) {
+ if (!FIO_O_ATOMIC) {
+ td_verror(td, EINVAL, "OS does not support atomic IO");
+ return 1;
+ }
+ flags |= OS_O_DIRECT | FIO_O_ATOMIC;
+ }
if (td->o.sync_io)
flags |= O_SYNC;
if (td->o.create_on_open)
diff --git a/fio.1 b/fio.1
index e910e01..15a1ac5 100644
--- a/fio.1
+++ b/fio.1
@@ -569,6 +569,11 @@
.BI direct \fR=\fPbool
If true, use non-buffered I/O (usually O_DIRECT). Default: false.
.TP
+.BI atomic \fR=\fPbool
+If value is true, attempt to use atomic direct IO. Atomic writes are guaranteed
+to be stable once acknowledged by the operating system. Only Linux supports
+O_ATOMIC right now.
+.TP
.BI buffered \fR=\fPbool
If true, use buffered I/O. This is the opposite of the \fBdirect\fR parameter.
Default: true.
diff --git a/init.c b/init.c
index b45b039..1841ffc 100644
--- a/init.c
+++ b/init.c
@@ -629,6 +629,12 @@
ret = 1;
}
+ /*
+ * O_ATOMIC implies O_DIRECT
+ */
+ if (td->o.oatomic)
+ td->o.odirect = 1;
+
return ret;
}
diff --git a/memory.c b/memory.c
index e06cab2..b208320 100644
--- a/memory.c
+++ b/memory.c
@@ -209,7 +209,7 @@
total_mem = td->orig_buffer_size;
- if (td->o.odirect || td->o.mem_align ||
+ if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
(td->io_ops->flags & FIO_MEMALIGN)) {
total_mem += page_mask;
if (td->o.mem_align && td->o.mem_align > page_size)
@@ -240,7 +240,7 @@
unsigned int total_mem;
total_mem = td->orig_buffer_size;
- if (td->o.odirect)
+ if (td->o.odirect || td->o.oatomic)
total_mem += page_mask;
if (td->o.mem_type == MEM_MALLOC)
diff --git a/options.c b/options.c
index f26ff77..4b4c251 100644
--- a/options.c
+++ b/options.c
@@ -1893,6 +1893,16 @@
.group = FIO_OPT_G_IO_TYPE,
},
{
+ .name = "atomic",
+ .lname = "Atomic I/O",
+ .type = FIO_OPT_BOOL,
+ .off1 = td_var_offset(oatomic),
+ .help = "Use Atomic IO with O_DIRECT (implies O_DIRECT)",
+ .def = "0",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_IO_TYPE,
+ },
+ {
.name = "buffered",
.lname = "Buffered I/O",
.type = FIO_OPT_BOOL,
diff --git a/os/os-linux.h b/os/os-linux.h
index 869a25d..5d1d62d 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -196,6 +196,12 @@
#define FIO_O_NOATIME 0
#endif
+#ifdef O_ATOMIC
+#define OS_O_ATOMIC O_ATOMIC
+#else
+#define OS_O_ATOMIC 040000000
+#endif
+
#ifdef MADV_REMOVE
#define FIO_MADV_FREE MADV_REMOVE
#endif
diff --git a/os/os.h b/os/os.h
index 4416ae4..715f226 100644
--- a/os/os.h
+++ b/os/os.h
@@ -90,6 +90,12 @@
#define OS_O_DIRECT O_DIRECT
#endif
+#ifdef OS_O_ATOMIC
+#define FIO_O_ATOMIC OS_O_ATOMIC
+#else
+#define FIO_O_ATOMIC 0
+#endif
+
#ifndef FIO_HAVE_HUGETLB
#define SHM_HUGETLB 0
#define MAP_HUGETLB 0
diff --git a/server.h b/server.h
index 5d9b6cc..405370e 100644
--- a/server.h
+++ b/server.h
@@ -38,7 +38,7 @@
};
enum {
- FIO_SERVER_VER = 26,
+ FIO_SERVER_VER = 27,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
diff --git a/thread_options.h b/thread_options.h
index 484b16a..44cbf91 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -72,6 +72,7 @@
enum file_lock_mode file_lock_mode;
unsigned int odirect;
+ unsigned int oatomic;
unsigned int invalidate_cache;
unsigned int create_serialize;
unsigned int create_fsync;
@@ -286,6 +287,7 @@
uint32_t file_lock_mode;
uint32_t odirect;
+ uint32_t oatomic;
uint32_t invalidate_cache;
uint32_t create_serialize;
uint32_t create_fsync;