Add support for blkio cgroups on Linux
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index 9b3a684..7a7d14e 100644
--- a/HOWTO
+++ b/HOWTO
@@ -994,6 +994,7 @@
for doing these time calls will be excluded from other
uses. Fio will manually clear it from the CPU mask of other
jobs.
+
continue_on_error=bool Normally fio will exit the job on the first observed
failure. If this option is set, fio will continue the job when
there is a 'non-fatal error' (EIO or EILSEQ) until the runtime
@@ -1003,6 +1004,21 @@
given in the stats is the first error that was hit during the
run.
+cgroup_root=str Root of the mounted blkio cgroup file systems. This is a Linux
+ specific IO controller. If your system doesn't have it mounted,
+ you can do so with:
+
+ # mount -t cgroup -o blkio none /cgroup
+
+ The cgroup_root defaults to /cgroup, if mounted elsewhere
+ please specify this option.
+
+cgroup=str Add job to this control group. If it doesn't exist, it will
+ be created.
+
+cgroup_weight=int Set the weight of the cgroup to this value. See
+ the documentation that comes with the kernel, allowed values
+ are in the range of 100..1000.
6.0 Interpreting the output
---------------------------
diff --git a/Makefile b/Makefile
index 4f95a5d..ce63cfc 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,8 @@
SCRIPTS = fio_generate_plots
OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o filesetup.o \
eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
- rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o helpers.o
+ rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o helpers.o \
+ cgroup.o
OBJS += crc/crc7.o
OBJS += crc/crc16.o
diff --git a/cgroup.c b/cgroup.c
new file mode 100644
index 0000000..15641e6
--- /dev/null
+++ b/cgroup.c
@@ -0,0 +1,115 @@
+/*
+ * Code related to setting up a blkio cgroup
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "fio.h"
+#include "cgroup.h"
+
+static char *get_cgroup_root(struct thread_data *td)
+{
+ char *str = malloc(64);
+
+ if (td->o.cgroup)
+ sprintf(str, "%s/%s", td->o.cgroup_root, td->o.cgroup);
+ else
+ sprintf(str, "%s/%s", td->o.cgroup_root, td->o.name);
+
+ return str;
+}
+
+/*
+ * Add pid to given class
+ */
+static int cgroup_add_pid(struct thread_data *td)
+{
+ char *root, tmp[256];
+ FILE *f;
+
+ root = get_cgroup_root(td);
+ sprintf(tmp, "%s/tasks", root);
+
+ f = fopen(tmp, "w");
+ if (!f) {
+ td_verror(td, errno, "cgroup open tasks");
+ return 1;
+ }
+
+ fprintf(f, "%d", td->pid);
+ fclose(f);
+ free(root);
+ return 0;
+}
+
+/*
+ * Move pid to root class
+ */
+static int cgroup_del_pid(struct thread_data *td)
+{
+ char tmp[256];
+ FILE *f;
+
+ sprintf(tmp, "%s/tasks", td->o.cgroup_root);
+ f = fopen(tmp, "w");
+ if (!f) {
+ td_verror(td, errno, "cgroup open tasks");
+ return 1;
+ }
+
+ fprintf(f, "%d", td->pid);
+ fclose(f);
+ return 0;
+}
+
+
+int cgroup_setup(struct thread_data *td)
+{
+ char *root, tmp[256];
+ FILE *f;
+
+ /*
+ * Create container, if it doesn't exist
+ */
+ root = get_cgroup_root(td);
+ if (mkdir(root, 0755) < 0) {
+ int __e = errno;
+
+ if (__e != EEXIST) {
+ td_verror(td, __e, "cgroup mkdir");
+ return 1;
+ }
+ } else
+ td->o.cgroup_was_created = 1;
+
+ sprintf(tmp, "%s/blkio.weight", root);
+ f = fopen(tmp, "w");
+ if (!f) {
+ td_verror(td, errno, "cgroup open weight");
+ return 1;
+ }
+
+ fprintf(f, "%d", td->o.cgroup_weight);
+ fclose(f);
+ free(root);
+
+ if (cgroup_add_pid(td))
+ return 1;
+
+ return 0;
+}
+
+void cgroup_shutdown(struct thread_data *td)
+{
+ if (!td->o.cgroup_weight)
+ return;
+
+ cgroup_del_pid(td);
+
+ if (td->o.cgroup_was_created) {
+ char *root;
+
+ root = get_cgroup_root(td);
+ rmdir(root);
+ free(root);
+ }
+}
diff --git a/cgroup.h b/cgroup.h
new file mode 100644
index 0000000..65fa3ad
--- /dev/null
+++ b/cgroup.h
@@ -0,0 +1,22 @@
+#ifndef FIO_CGROUP_H
+#define FIO_CGROUP_H
+
+#ifdef FIO_HAVE_CGROUPS
+
+int cgroup_setup(struct thread_data *td);
+void cgroup_shutdown(struct thread_data *td);
+
+#else
+
+static inline int cgroup_setup(struct thread_data *td)
+{
+ td_verror(td, EINVAL, "cgroup_setup");
+ return 1;
+}
+
+static inline void cgroup_shutdown(struct thread_data *td)
+{
+}
+
+#endif
+#endif
diff --git a/fio.1 b/fio.1
index 4445d0a..648b4e9 100644
--- a/fio.1
+++ b/fio.1
@@ -725,13 +725,22 @@
these time calls will be excluded from other uses. Fio will manually clear it
from the CPU mask of other jobs.
.TP
-.BI continue_on_error \fR=\fPbool
-Normally fio will exit the job on the first observed failure. If this option is
-set, fio will continue the job when there is a 'non-fatal error'
-(\fBEIO\fR or \fBEILSEQ\fR) until the runtime is exceeded or the I/O size
-specified is completed. If this option is used, there are two more stats that
-are appended, the total error count and the first error. The error field given
-in the stats is the first error that was hit during the run.
+.BI cgroup_root \fR=\fPstr
+Root of the mounted blkio cgroup file systems. This is a Linux
+specific IO controller. If your system doesn't have it mounted,
+you can do so with:
+
+# mount -t cgroup -o blkio none /cgroup
+
+The cgroup_root defaults to /cgroup, if mounted elsewhere please specify this
+option.
+.TP
+.BI cgroup \fR=\fPstr
+Add job to this control group. If it doesn't exist, it will be created.
+.TP
+.BI cgroup_weight \fR=\fPint
+Set the weight of the cgroup to this value. See the documentation that comes
+with the kernel, allowed values are in the range of 100..1000.
.SH OUTPUT
While running, \fBfio\fR will display the status of the created jobs. For
example:
diff --git a/fio.c b/fio.c
index 434b503..4bbab5a 100644
--- a/fio.c
+++ b/fio.c
@@ -39,6 +39,7 @@
#include "smalloc.h"
#include "verify.h"
#include "diskutil.h"
+#include "cgroup.h"
unsigned long page_mask;
unsigned long page_size;
@@ -1075,6 +1076,9 @@
}
}
+ if (td->o.cgroup_weight && cgroup_setup(td))
+ goto err;
+
if (nice(td->o.nice) == -1) {
td_verror(td, errno, "nice");
goto err;
@@ -1204,6 +1208,7 @@
close_and_free_files(td);
close_ioengine(td);
cleanup_io_u(td);
+ cgroup_shutdown(td);
if (td->o.cpumask_set) {
int ret = fio_cpuset_exit(&td->o.cpumask);
diff --git a/fio.h b/fio.h
index 214fbd2..aa5124c 100644
--- a/fio.h
+++ b/fio.h
@@ -271,6 +271,14 @@
* Benchmark profile type
*/
unsigned int profile;
+
+ /*
+ * blkio cgroup support
+ */
+ char *cgroup_root;
+ char *cgroup;
+ unsigned int cgroup_weight;
+ unsigned int cgroup_was_created;
};
#define FIO_VERROR_SIZE 128
diff --git a/options.c b/options.c
index ff27765..cb6337c 100644
--- a/options.c
+++ b/options.c
@@ -1727,6 +1727,28 @@
.help = "Select a specific builtin performance test",
},
{
+ .name = "cgroup_root",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = td_var_offset(cgroup_root),
+ .help = "Root of mounted blkio cgroup",
+ .def = "/cgroup",
+ },
+ {
+ .name = "cgroup",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = td_var_offset(cgroup),
+ .help = "Add job to cgroup of this name",
+ },
+ {
+ .name = "cgroup_weight",
+ .type = FIO_OPT_INT,
+ .off1 = td_var_offset(cgroup_weight),
+ .help = "Use given weight for cgroup",
+ .minval = 100,
+ .maxval = 1000,
+ .def = "0",
+ },
+ {
.name = NULL,
},
};
diff --git a/os/os-linux.h b/os/os-linux.h
index e4c4c3f..ac42264 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -31,6 +31,7 @@
#define FIO_HAVE_POSIXAIO_FSYNC
#define FIO_HAVE_PSHARED_MUTEX
#define FIO_HAVE_CL_SIZE
+#define FIO_HAVE_CGROUPS
#define OS_MAP_ANON MAP_ANONYMOUS