Add verify trim support
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
diff --git a/Makefile b/Makefile
index 288480f..9fec137 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o filesetup.o \
eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o helpers.o \
- cgroup.o profile.o debug.o
+ cgroup.o profile.o debug.o trim.o
OBJS += lib/rand.o
OBJS += lib/flist_sort.o
@@ -62,7 +62,7 @@
all: depend $(PROGS) $(SCRIPTS)
clean:
- -rm -f .depend cscope.out $(OBJS) $(PROGS) core.* core
+ -rm -f .depend $(OBJS) $(PROGS) core.* core
cscope:
@cscope -b -R
diff --git a/blktrace.c b/blktrace.c
index 9ce4ae2..297a8a9 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -168,6 +168,7 @@
struct io_piece *ipo;
ipo = calloc(1, sizeof(*ipo));
+ init_ipo(ipo);
ipo->ddir = DDIR_INVAL;
ipo->fileno = fileno;
@@ -215,8 +216,8 @@
{
struct io_piece *ipo = malloc(sizeof(*ipo));
- memset(ipo, 0, sizeof(*ipo));
- INIT_FLIST_HEAD(&ipo->list);
+ init_ipo(ipo);
+
/*
* the 512 is wrong here, it should be the hardware sector size...
*/
@@ -256,6 +257,7 @@
{
struct io_piece *ipo = malloc(sizeof(*ipo));
+ init_ipo(ipo);
trace_add_file(td, t->device);
ios[DDIR_WRITE]++;
diff --git a/fio.c b/fio.c
index 1d20cf7..c8de2ee 100644
--- a/fio.c
+++ b/fio.c
@@ -1022,6 +1022,7 @@
INIT_FLIST_HEAD(&td->io_log_list);
INIT_FLIST_HEAD(&td->io_hist_list);
INIT_FLIST_HEAD(&td->verify_list);
+ INIT_FLIST_HEAD(&td->trim_list);
pthread_mutex_init(&td->io_u_lock, NULL);
td->io_hist_tree = RB_ROOT;
diff --git a/fio.h b/fio.h
index 729604d..e8c025d 100644
--- a/fio.h
+++ b/fio.h
@@ -120,8 +120,8 @@
unsigned int io_u_complete[FIO_IO_U_MAP_NR];
unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR];
unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR];
- unsigned long total_io_u[2];
- unsigned long short_io_u[2];
+ unsigned long total_io_u[3];
+ unsigned long short_io_u[3];
unsigned long total_submit;
unsigned long total_complete;
@@ -255,6 +255,10 @@
unsigned int gtod_offload;
enum fio_cs clocksource;
unsigned int no_stall;
+ unsigned int trim_percentage;
+ unsigned int trim_batch;
+ unsigned int trim_zero;
+ unsigned long long trim_backlog;
char *read_iolog_file;
char *write_iolog_file;
@@ -347,12 +351,14 @@
char *sysfs_root;
- unsigned long rand_seeds[6];
+ unsigned long rand_seeds[7];
os_random_state_t bsrange_state;
os_random_state_t verify_state;
+ os_random_state_t trim_state;
unsigned int verify_batch;
+ unsigned int trim_batch;
int shm_id;
@@ -437,6 +443,12 @@
struct flist_head io_log_list;
/*
+ * For tracking/handling discards
+ */
+ struct flist_head trim_list;
+ unsigned long trim_entries;
+
+ /*
* for fileservice, how often to switch to a new file
*/
unsigned int file_service_nr;
diff --git a/init.c b/init.c
index f0ee37a..fe4dbf2 100644
--- a/init.c
+++ b/init.c
@@ -447,6 +447,7 @@
os_random_seed(td->rand_seeds[3], &td->next_file_state);
os_random_seed(td->rand_seeds[5], &td->file_size_state);
+ os_random_seed(td->rand_seeds[6], &td->trim_state);
if (!td_random(td))
return;
diff --git a/io_u.c b/io_u.c
index 21a801f..ea0d46c 100644
--- a/io_u.c
+++ b/io_u.c
@@ -8,6 +8,7 @@
#include "fio.h"
#include "hash.h"
#include "verify.h"
+#include "trim.h"
#include "lib/rand.h"
struct io_completion_data {
@@ -982,21 +983,31 @@
return io_u;
}
-/*
- * Return an io_u to be processed. Gets a buflen and offset, sets direction,
- * etc. The returned io_u is fully ready to be prepped and submitted.
- */
-struct io_u *get_io_u(struct thread_data *td)
+static int check_get_trim(struct thread_data *td, struct io_u *io_u)
{
- struct fio_file *f;
- struct io_u *io_u;
+ if (td->o.trim_backlog && td->trim_entries) {
+ int get_trim = 0;
- io_u = __get_io_u(td);
- if (!io_u) {
- dprint(FD_IO, "__get_io_u failed\n");
- return NULL;
+ if (td->trim_batch) {
+ td->trim_batch--;
+ get_trim = 1;
+ } else if (!(td->io_hist_len % td->o.trim_backlog) &&
+ td->last_ddir != DDIR_READ) {
+ td->trim_batch = td->o.trim_batch;
+ if (!td->trim_batch)
+ td->trim_batch = td->o.trim_backlog;
+ get_trim = 1;
+ }
+
+ if (get_trim && !get_next_trim(td, io_u))
+ return 1;
}
+ return 0;
+}
+
+static int check_get_verify(struct thread_data *td, struct io_u *io_u)
+{
if (td->o.verify_backlog && td->io_hist_len) {
int get_verify = 0;
@@ -1012,9 +1023,32 @@
}
if (get_verify && !get_next_verify(td, io_u))
- goto out;
+ return 1;
}
+ return 0;
+}
+
+/*
+ * Return an io_u to be processed. Gets a buflen and offset, sets direction,
+ * etc. The returned io_u is fully ready to be prepped and submitted.
+ */
+struct io_u *get_io_u(struct thread_data *td)
+{
+ struct fio_file *f;
+ struct io_u *io_u;
+
+ io_u = __get_io_u(td);
+ if (!io_u) {
+ dprint(FD_IO, "__get_io_u failed\n");
+ return NULL;
+ }
+
+ if (check_get_verify(td, io_u))
+ goto out;
+ if (check_get_trim(td, io_u))
+ goto out;
+
/*
* from a requeue, io_u already setup
*/
@@ -1064,6 +1098,7 @@
io_u->xfer_buflen = io_u->buflen;
out:
+ assert(io_u->file);
if (!td_io_prep(td, io_u)) {
if (!td->o.disable_slat)
fio_gettime(&io_u->start_time, NULL);
diff --git a/ioengine.h b/ioengine.h
index 389e95a..f6238f8 100644
--- a/ioengine.h
+++ b/ioengine.h
@@ -9,6 +9,7 @@
IO_U_F_FREE_DEF = 1 << 2,
IO_U_F_IN_CUR_DEPTH = 1 << 3,
IO_U_F_BUSY_OK = 1 << 4,
+ IO_U_F_TRIMMED = 1 << 5,
};
/*
diff --git a/ioengines.c b/ioengines.c
index f976efb..7df0aba 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -269,7 +269,8 @@
if (ddir_rw(io_u->ddir)) {
io_u_mark_depth(td, 1);
td->ts.total_io_u[io_u->ddir]++;
- }
+ } else if (io_u->ddir == DDIR_TRIM)
+ td->ts.total_io_u[2]++;
} else if (ret == FIO_Q_QUEUED) {
int r;
diff --git a/iolog.h b/iolog.h
index 2a97e28..c59e6aa 100644
--- a/iolog.h
+++ b/iolog.h
@@ -32,6 +32,12 @@
struct io_sample *log;
};
+enum {
+ IP_F_ONRB = 1,
+ IP_F_ONLIST = 2,
+ IP_F_TRIMMED = 4,
+};
+
/*
* When logging io actions, this matches a single sent io_u
*/
@@ -40,12 +46,14 @@
struct rb_node rb_node;
struct flist_head list;
};
+ struct flist_head trim_list;
union {
int fileno;
struct fio_file *file;
};
unsigned long long offset;
unsigned long len;
+ unsigned long flags;
enum fio_ddir ddir;
union {
unsigned long delay;
@@ -95,4 +103,10 @@
extern int write_bw_log;
extern void add_agg_sample(unsigned long, enum fio_ddir, unsigned int);
+static inline void init_ipo(struct io_piece *ipo)
+{
+ memset(ipo, 0, sizeof(*ipo));
+ INIT_FLIST_HEAD(&ipo->trim_list);
+}
+
#endif
diff --git a/log.c b/log.c
index ce4ac9f..266dc06 100644
--- a/log.c
+++ b/log.c
@@ -9,6 +9,7 @@
#include "flist.h"
#include "fio.h"
#include "verify.h"
+#include "trim.h"
static const char iolog_ver2[] = "fio version 2 iolog";
@@ -115,6 +116,7 @@
ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
flist_del(&ipo->list);
+ remove_trim_entry(td, ipo);
ret = ipo_special(td, ipo);
if (ret < 0) {
@@ -160,6 +162,7 @@
while ((n = rb_first(&td->io_hist_tree)) != NULL) {
ipo = rb_entry(n, struct io_piece, rb_node);
rb_erase(n, &td->io_hist_tree);
+ remove_trim_entry(td, ipo);
td->io_hist_len--;
free(ipo);
}
@@ -167,6 +170,7 @@
while (!flist_empty(&td->io_hist_list)) {
ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
flist_del(&ipo->list);
+ remove_trim_entry(td, ipo);
td->io_hist_len--;
free(ipo);
}
@@ -181,10 +185,16 @@
struct io_piece *ipo, *__ipo;
ipo = malloc(sizeof(struct io_piece));
+ init_ipo(ipo);
ipo->file = io_u->file;
ipo->offset = io_u->offset;
ipo->len = io_u->buflen;
+ if (io_u_should_trim(td, io_u)) {
+ flist_add_tail(&ipo->trim_list, &td->trim_list);
+ td->trim_entries++;
+ }
+
/*
* We don't need to sort the entries, if:
*
@@ -203,6 +213,7 @@
(file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
INIT_FLIST_HEAD(&ipo->list);
flist_add_tail(&ipo->list, &td->io_hist_list);
+ ipo->flags |= IP_F_ONLIST;
td->io_hist_len++;
return;
}
@@ -231,6 +242,7 @@
assert(ipo->len == __ipo->len);
td->io_hist_len--;
rb_erase(parent, &td->io_hist_tree);
+ remove_trim_entry(td, __ipo);
free(__ipo);
goto restart;
}
@@ -238,6 +250,7 @@
rb_link_node(&ipo->rb_node, parent, p);
rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
+ ipo->flags |= IP_F_ONRB;
td->io_hist_len++;
}
@@ -345,8 +358,7 @@
* Make note of file
*/
ipo = malloc(sizeof(*ipo));
- memset(ipo, 0, sizeof(*ipo));
- INIT_FLIST_HEAD(&ipo->list);
+ init_ipo(ipo);
ipo->ddir = rw;
if (rw == DDIR_WAIT) {
ipo->delay = offset;
diff --git a/options.c b/options.c
index 3d32c8e..e255e94 100644
--- a/options.c
+++ b/options.c
@@ -440,6 +440,16 @@
}
#endif
+#ifdef FIO_HAVE_TRIM
+static int str_verify_trim_cb(void *data, unsigned long long *val)
+{
+ struct thread_data *td = data;
+
+ td->o.trim_percentage = *val;
+ return 0;
+}
+#endif
+
static int str_fst_cb(void *data, const char *str)
{
struct thread_data *td = data;
@@ -1458,7 +1468,7 @@
.type = FIO_OPT_INT,
.off1 = td_var_offset(verify_batch),
.help = "Verify this number of IO blocks",
- .parent = "verify_backlog",
+ .parent = "verify",
},
#ifdef FIO_HAVE_CPU_AFFINITY
{
@@ -1469,6 +1479,39 @@
.parent = "verify_async",
},
#endif
+#ifdef FIO_HAVE_TRIM
+ {
+ .name = "trim_percentage",
+ .type = FIO_OPT_INT,
+ .cb = str_verify_trim_cb,
+ .maxval = 100,
+ .help = "Number of verify blocks to discard/trim",
+ .parent = "verify",
+ .def = "0",
+ },
+ {
+ .name = "trim_verify_zero",
+ .type = FIO_OPT_INT,
+ .help = "Verify that trim/discarded blocks are returned as zeroes",
+ .off1 = td_var_offset(trim_zero),
+ .parent = "trim_percentage",
+ .def = "1",
+ },
+ {
+ .name = "trim_backlog",
+ .type = FIO_OPT_STR_VAL,
+ .off1 = td_var_offset(trim_backlog),
+ .help = "Trim after this number of blocks are written",
+ .parent = "trim_percentage",
+ },
+ {
+ .name = "trim_backlog_batch",
+ .type = FIO_OPT_INT,
+ .off1 = td_var_offset(trim_batch),
+ .help = "Trim this number of IO blocks",
+ .parent = "trim_percentage",
+ },
+#endif
{
.name = "write_iolog",
.type = FIO_OPT_STR_STORE,
diff --git a/stat.c b/stat.c
index 8e9fba0..326b1f7 100644
--- a/stat.c
+++ b/stat.c
@@ -351,9 +351,11 @@
io_u_dist[1], io_u_dist[2],
io_u_dist[3], io_u_dist[4],
io_u_dist[5], io_u_dist[6]);
- log_info(" issued r/w: total=%lu/%lu, short=%lu/%lu\n",
+ log_info(" issued r/w/d: total=%lu/%lu/%lu, short=%lu/%lu/%lu\n",
ts->total_io_u[0], ts->total_io_u[1],
- ts->short_io_u[0], ts->short_io_u[1]);
+ ts->total_io_u[2],
+ ts->short_io_u[0], ts->short_io_u[1],
+ ts->short_io_u[2]);
stat_calc_lat_u(ts, io_u_lat_u);
stat_calc_lat_m(ts, io_u_lat_m);
show_latencies(io_u_lat_u, io_u_lat_m);
@@ -615,7 +617,7 @@
ts->io_u_lat_m[k] += td->ts.io_u_lat_m[k];
- for (k = 0; k <= DDIR_WRITE; k++) {
+ for (k = 0; k <= 2; k++) {
ts->total_io_u[k] += td->ts.total_io_u[k];
ts->short_io_u[k] += td->ts.short_io_u[k];
}
diff --git a/trim.c b/trim.c
new file mode 100644
index 0000000..cf42625
--- /dev/null
+++ b/trim.c
@@ -0,0 +1,84 @@
+/*
+ * TRIM/DISCARD support
+ */
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "fio.h"
+#include "trim.h"
+
+#ifdef FIO_HAVE_TRIM
+int get_next_trim(struct thread_data *td, struct io_u *io_u)
+{
+ struct io_piece *ipo;
+
+ /*
+ * this io_u is from a requeue, we already filled the offsets
+ */
+ if (io_u->file)
+ return 0;
+ if (flist_empty(&td->trim_list))
+ return 0;
+
+ assert(td->trim_entries);
+ ipo = flist_entry(td->trim_list.next, struct io_piece, trim_list);
+ remove_trim_entry(td, ipo);
+ ipo->flags |= IP_F_TRIMMED;
+
+ /*
+ * If not verifying that trimmed ranges return zeroed data,
+ * remove this from the to-read verify lists
+ */
+ if (!td->o.trim_zero) {
+ if (ipo->flags & IP_F_ONLIST)
+ flist_del(&ipo->list);
+ else {
+ assert(ipo->flags & IP_F_ONRB);
+ rb_erase(&ipo->rb_node, &td->io_hist_tree);
+ }
+ td->io_hist_len--;
+ }
+
+ io_u->offset = ipo->offset;
+ io_u->buflen = ipo->len;
+ io_u->file = ipo->file;
+
+ if (!fio_file_open(io_u->file)) {
+ int r = td_io_open_file(td, io_u->file);
+
+ if (r) {
+ dprint(FD_VERIFY, "failed file %s open\n",
+ io_u->file->file_name);
+ return 1;
+ }
+ }
+
+ get_file(ipo->file);
+ assert(fio_file_open(io_u->file));
+ io_u->ddir = DDIR_TRIM;
+ io_u->xfer_buf = NULL;
+ io_u->xfer_buflen = io_u->buflen;
+
+ free(ipo);
+ dprint(FD_VERIFY, "get_next_trim: ret io_u %p\n", io_u);
+ return 0;
+}
+
+int io_u_should_trim(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned long long val;
+ long r;
+
+ if (!td->o.trim_percentage)
+ return 0;
+
+ r = os_random_long(&td->trim_state);
+ val = (OS_RAND_MAX / 100ULL);
+ val *= (unsigned long long) td->o.trim_percentage;
+
+ return r <= val;
+}
+#endif
diff --git a/trim.h b/trim.h
new file mode 100644
index 0000000..d0d7a8d
--- /dev/null
+++ b/trim.h
@@ -0,0 +1,37 @@
+#ifndef FIO_TRIM_H
+#define FIO_TRIM_H
+
+#include "fio.h"
+
+#ifdef FIO_HAVE_TRIM
+extern int __must_check get_next_trim(struct thread_data *td, struct io_u *io_u);
+extern int io_u_should_trim(struct thread_data *td, struct io_u *io_u);
+
+/*
+ * Determine whether a given io_u should be logged for verify or
+ * for discard
+ */
+static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo)
+{
+ if (!flist_empty(&ipo->trim_list)) {
+ flist_del_init(&ipo->trim_list);
+ td->trim_entries--;
+ }
+}
+
+#else
+static inline int get_next_trim(struct thread_data *td, struct io_u *io_u)
+{
+ return 1;
+}
+static inline int io_u_should_trim(struct thread_data *td, struct io_u *io_u)
+{
+ return 0;
+}
+static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo)
+{
+}
+#error foo
+#endif
+
+#endif
diff --git a/verify.c b/verify.c
index 7957bd4..073eec5 100644
--- a/verify.c
+++ b/verify.c
@@ -10,6 +10,7 @@
#include "fio.h"
#include "verify.h"
#include "smalloc.h"
+#include "trim.h"
#include "lib/rand.h"
#include "crc/md5.h"
@@ -470,6 +471,38 @@
return 0;
}
+static int verify_trimmed_io_u(struct thread_data *td, struct io_u *io_u)
+{
+ static char zero_buf[1024];
+ unsigned int this_len, len;
+ int ret = 0;
+ void *p;
+
+ if (!td->o.trim_zero)
+ return 0;
+
+ len = io_u->buflen;
+ p = io_u->buf;
+ do {
+ this_len = sizeof(zero_buf);
+ if (this_len > len)
+ this_len = len;
+ if (memcmp(p, zero_buf, this_len)) {
+ ret = EILSEQ;
+ break;
+ }
+ len -= this_len;
+ p += this_len;
+ } while (len);
+
+ if (!ret)
+ return 0;
+
+ log_err("trims: verify failed at file %s offset %llu, length %lu\n",
+ io_u->file->file_name, io_u->offset, io_u->buflen);
+ return ret;
+}
+
int verify_io_u(struct thread_data *td, struct io_u *io_u)
{
struct verify_header *hdr;
@@ -479,6 +512,10 @@
if (td->o.verify == VERIFY_NULL || io_u->ddir != DDIR_READ)
return 0;
+ if (io_u->flags & IO_U_F_TRIMMED) {
+ ret = verify_trimmed_io_u(td, io_u);
+ goto done;
+ }
hdr_inc = io_u->buflen;
if (td->o.verify_interval)
@@ -570,6 +607,7 @@
}
}
+done:
if (ret && td->o.verify_fatal)
td->terminate = 1;
@@ -778,18 +816,21 @@
ipo = rb_entry(n, struct io_piece, rb_node);
rb_erase(n, &td->io_hist_tree);
- td->io_hist_len--;
} else if (!flist_empty(&td->io_hist_list)) {
ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
- td->io_hist_len--;
flist_del(&ipo->list);
}
if (ipo) {
+ td->io_hist_len--;
+
io_u->offset = ipo->offset;
io_u->buflen = ipo->len;
io_u->file = ipo->file;
+ if (ipo->flags & IP_F_TRIMMED)
+ io_u->flags |= IO_U_F_TRIMMED;
+
if (!fio_file_open(io_u->file)) {
int r = td_io_open_file(td, io_u->file);
@@ -805,6 +846,8 @@
io_u->ddir = DDIR_READ;
io_u->xfer_buf = io_u->buf;
io_u->xfer_buflen = io_u->buflen;
+
+ remove_trim_entry(td, ipo);
free(ipo);
dprint(FD_VERIFY, "get_next_verify: ret io_u %p\n", io_u);
return 0;