Fix io piece logging to not have O(n) runtime
Use an rbtree for that log instead.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/Makefile b/Makefile
index c79d36b..10b4ffc 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,8 @@
PROGS = fio
SCRIPTS = fio_generate_plots
OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
- filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+ filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+ rbtree.o
OBJS += engines/cpu.o
OBJS += engines/libaio.o
diff --git a/Makefile.FreeBSD b/Makefile.FreeBSD
index d5e675a..334395b 100644
--- a/Makefile.FreeBSD
+++ b/Makefile.FreeBSD
@@ -3,7 +3,8 @@
PROGS = fio
SCRIPTS = fio_generate_plots
OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
- filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+ filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+ rbtree.o
OBJS += engines/cpu.o
OBJS += engines/mmap.o
diff --git a/Makefile.solaris b/Makefile.solaris
index bdb8274..f386041 100644
--- a/Makefile.solaris
+++ b/Makefile.solaris
@@ -3,7 +3,8 @@
PROGS = fio
SCRIPTS = fio_generate_plots
OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
- filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+ filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+ rbtree.o
OBJS += engines/cpu.o
OBJS += engines/mmap.o
diff --git a/fio.c b/fio.c
index f6c3fc8..504b78d 100644
--- a/fio.c
+++ b/fio.c
@@ -735,7 +735,6 @@
INIT_LIST_HEAD(&td->io_u_freelist);
INIT_LIST_HEAD(&td->io_u_busylist);
INIT_LIST_HEAD(&td->io_u_requeues);
- INIT_LIST_HEAD(&td->io_hist_list);
INIT_LIST_HEAD(&td->io_log_list);
if (init_io_u(td))
diff --git a/fio.h b/fio.h
index 736fefb..4111bff 100644
--- a/fio.h
+++ b/fio.h
@@ -14,6 +14,7 @@
#include <getopt.h>
#include "list.h"
+#include "rbtree.h"
#include "md5.h"
#include "crc32.h"
#include "arch.h"
@@ -78,7 +79,10 @@
* When logging io actions, this matches a single sent io_u
*/
struct io_piece {
- struct list_head list;
+ union {
+ struct rb_node rb_node;
+ struct list_head list;
+ };
struct fio_file *file;
unsigned long long offset;
unsigned long len;
@@ -510,7 +514,7 @@
/*
* IO historic logs
*/
- struct list_head io_hist_list;
+ struct rb_root io_hist_tree;
struct list_head io_log_list;
/*
diff --git a/log.c b/log.c
index dbca3cc..2b90f45 100644
--- a/log.c
+++ b/log.c
@@ -29,11 +29,11 @@
void prune_io_piece_log(struct thread_data *td)
{
struct io_piece *ipo;
+ struct rb_node *n;
- while (!list_empty(&td->io_hist_list)) {
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
- list_del(&ipo->list);
+ while ((n = rb_first(&td->io_hist_tree)) != NULL) {
+ ipo = rb_entry(n, struct io_piece, rb_node);
+ rb_erase(n, &td->io_hist_tree);
free(ipo);
}
}
@@ -43,36 +43,33 @@
*/
void log_io_piece(struct thread_data *td, struct io_u *io_u)
{
- struct io_piece *ipo = malloc(sizeof(struct io_piece));
- struct list_head *entry;
+ struct rb_node **p = &td->io_hist_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct io_piece *ipo, *__ipo;
- INIT_LIST_HEAD(&ipo->list);
+ ipo = malloc(sizeof(struct io_piece));
+ memset(&ipo->rb_node, 0, sizeof(ipo->rb_node));
ipo->file = io_u->file;
ipo->offset = io_u->offset;
ipo->len = io_u->buflen;
/*
- * for random io where the writes extend the file, it will typically
- * be laid out with the block scattered as written. it's faster to
- * read them in in that order again, so don't sort
+ * Sort the entry into the verification list
*/
- if (!td_random(td) || !td->o.overwrite) {
- list_add_tail(&ipo->list, &td->io_hist_list);
- return;
- }
+ while (*p) {
+ parent = *p;
- /*
- * for random io, sort the list so verify will run faster
- */
- entry = &td->io_hist_list;
- while ((entry = entry->prev) != &td->io_hist_list) {
- struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
- if (__ipo->offset < ipo->offset)
+ __ipo = rb_entry(parent, struct io_piece, rb_node);
+ if (ipo->offset < __ipo->offset)
+ p = &(*p)->rb_left;
+ else if (ipo->offset > __ipo->offset)
+ p = &(*p)->rb_right;
+ else
break;
}
- list_add(&ipo->list, entry);
+ rb_link_node(&ipo->rb_node, parent, p);
+ rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
}
void write_iolog_close(struct thread_data *td)
diff --git a/rbtree.c b/rbtree.c
new file mode 100644
index 0000000..cc4093a
--- /dev/null
+++ b/rbtree.c
@@ -0,0 +1,363 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *right = node->rb_right;
+
+ if ((node->rb_right = right->rb_left))
+ right->rb_left->rb_parent = node;
+ right->rb_left = node;
+
+ if ((right->rb_parent = node->rb_parent))
+ {
+ if (node == node->rb_parent->rb_left)
+ node->rb_parent->rb_left = right;
+ else
+ node->rb_parent->rb_right = right;
+ }
+ else
+ root->rb_node = right;
+ node->rb_parent = right;
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *left = node->rb_left;
+
+ if ((node->rb_left = left->rb_right))
+ left->rb_right->rb_parent = node;
+ left->rb_right = node;
+
+ if ((left->rb_parent = node->rb_parent))
+ {
+ if (node == node->rb_parent->rb_right)
+ node->rb_parent->rb_right = left;
+ else
+ node->rb_parent->rb_left = left;
+ }
+ else
+ root->rb_node = left;
+ node->rb_parent = left;
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *parent, *gparent;
+
+ while ((parent = node->rb_parent) && parent->rb_color == RB_RED)
+ {
+ gparent = parent->rb_parent;
+
+ if (parent == gparent->rb_left)
+ {
+ {
+ register struct rb_node *uncle = gparent->rb_right;
+ if (uncle && uncle->rb_color == RB_RED)
+ {
+ uncle->rb_color = RB_BLACK;
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_right == node)
+ {
+ register struct rb_node *tmp;
+ __rb_rotate_left(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ __rb_rotate_right(gparent, root);
+ } else {
+ {
+ register struct rb_node *uncle = gparent->rb_left;
+ if (uncle && uncle->rb_color == RB_RED)
+ {
+ uncle->rb_color = RB_BLACK;
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_left == node)
+ {
+ register struct rb_node *tmp;
+ __rb_rotate_right(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ __rb_rotate_left(gparent, root);
+ }
+ }
+
+ root->rb_node->rb_color = RB_BLACK;
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+ struct rb_root *root)
+{
+ struct rb_node *other;
+
+ while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node)
+ {
+ if (parent->rb_left == node)
+ {
+ other = parent->rb_right;
+ if (other->rb_color == RB_RED)
+ {
+ other->rb_color = RB_BLACK;
+ parent->rb_color = RB_RED;
+ __rb_rotate_left(parent, root);
+ other = parent->rb_right;
+ }
+ if ((!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ && (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK))
+ {
+ other->rb_color = RB_RED;
+ node = parent;
+ parent = node->rb_parent;
+ }
+ else
+ {
+ if (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK)
+ {
+ register struct rb_node *o_left;
+ if ((o_left = other->rb_left))
+ o_left->rb_color = RB_BLACK;
+ other->rb_color = RB_RED;
+ __rb_rotate_right(other, root);
+ other = parent->rb_right;
+ }
+ other->rb_color = parent->rb_color;
+ parent->rb_color = RB_BLACK;
+ if (other->rb_right)
+ other->rb_right->rb_color = RB_BLACK;
+ __rb_rotate_left(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ else
+ {
+ other = parent->rb_left;
+ if (other->rb_color == RB_RED)
+ {
+ other->rb_color = RB_BLACK;
+ parent->rb_color = RB_RED;
+ __rb_rotate_right(parent, root);
+ other = parent->rb_left;
+ }
+ if ((!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ && (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK))
+ {
+ other->rb_color = RB_RED;
+ node = parent;
+ parent = node->rb_parent;
+ }
+ else
+ {
+ if (!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ {
+ register struct rb_node *o_right;
+ if ((o_right = other->rb_right))
+ o_right->rb_color = RB_BLACK;
+ other->rb_color = RB_RED;
+ __rb_rotate_left(other, root);
+ other = parent->rb_left;
+ }
+ other->rb_color = parent->rb_color;
+ parent->rb_color = RB_BLACK;
+ if (other->rb_left)
+ other->rb_left->rb_color = RB_BLACK;
+ __rb_rotate_right(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ }
+ if (node)
+ node->rb_color = RB_BLACK;
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *child, *parent;
+ int color;
+
+ if (!node->rb_left)
+ child = node->rb_right;
+ else if (!node->rb_right)
+ child = node->rb_left;
+ else
+ {
+ struct rb_node *old = node, *left;
+
+ node = node->rb_right;
+ while ((left = node->rb_left) != NULL)
+ node = left;
+ child = node->rb_right;
+ parent = node->rb_parent;
+ color = node->rb_color;
+
+ if (child)
+ child->rb_parent = parent;
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ if (node->rb_parent == old)
+ parent = node;
+ node->rb_parent = old->rb_parent;
+ node->rb_color = old->rb_color;
+ node->rb_right = old->rb_right;
+ node->rb_left = old->rb_left;
+
+ if (old->rb_parent)
+ {
+ if (old->rb_parent->rb_left == old)
+ old->rb_parent->rb_left = node;
+ else
+ old->rb_parent->rb_right = node;
+ } else
+ root->rb_node = node;
+
+ old->rb_left->rb_parent = node;
+ if (old->rb_right)
+ old->rb_right->rb_parent = node;
+ goto color;
+ }
+
+ parent = node->rb_parent;
+ color = node->rb_color;
+
+ if (child)
+ child->rb_parent = parent;
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ color:
+ if (color == RB_BLACK)
+ __rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+
+struct rb_node *rb_last(struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+
+struct rb_node *rb_next(struct rb_node *node)
+{
+ /* If we have a right-hand child, go down and then left as far
+ as we can. */
+ if (node->rb_right) {
+ node = node->rb_right;
+ while (node->rb_left)
+ node=node->rb_left;
+ return node;
+ }
+
+ /* No right-hand children. Everything down and left is
+ smaller than us, so any 'next' node must be in the general
+ direction of our parent. Go up the tree; any time the
+ ancestor is a right-hand child of its parent, keep going
+ up. First time it's a left-hand child of its parent, said
+ parent is our 'next' node. */
+ while (node->rb_parent && node == node->rb_parent->rb_right)
+ node = node->rb_parent;
+
+ return node->rb_parent;
+}
+
+struct rb_node *rb_prev(struct rb_node *node)
+{
+ /* If we have a left-hand child, go down and then right as far
+ as we can. */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return node;
+ }
+
+ /* No left-hand children. Go up till we find an ancestor which
+ is a right-hand child of its parent */
+ while (node->rb_parent && node == node->rb_parent->rb_left)
+ node = node->rb_parent;
+
+ return node->rb_parent;
+}
diff --git a/rbtree.h b/rbtree.h
new file mode 100644
index 0000000..2cb9e37
--- /dev/null
+++ b/rbtree.h
@@ -0,0 +1,147 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree.h
+
+ To use rbtrees you'll have to implement your own insert and search cores.
+ This will avoid us to use callbacks and to drop drammatically performances.
+ I know it's not the cleaner way, but in C (not in C++) to get
+ performances and genericity...
+
+ Some example of insert and search follows here. The search is a plain
+ normal search over an ordered tree. The insert instead must be implemented
+ int two steps: as first thing the code must insert the element in
+ order as a red leaf in the tree, then the support library function
+ rb_insert_color() must be called. Such function will do the
+ not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+ unsigned long offset)
+{
+ struct rb_node * n = inode->i_rb_page_cache.rb_node;
+ struct page * page;
+
+ while (n)
+ {
+ page = rb_entry(n, struct page, rb_page_cache);
+
+ if (offset < page->offset)
+ n = n->rb_left;
+ else if (offset > page->offset)
+ n = n->rb_right;
+ else
+ return page;
+ }
+ return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+ unsigned long offset,
+ struct rb_node * node)
+{
+ struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+ struct rb_node * parent = NULL;
+ struct page * page;
+
+ while (*p)
+ {
+ parent = *p;
+ page = rb_entry(parent, struct page, rb_page_cache);
+
+ if (offset < page->offset)
+ p = &(*p)->rb_left;
+ else if (offset > page->offset)
+ p = &(*p)->rb_right;
+ else
+ return page;
+ }
+
+ rb_link_node(node, parent, p);
+
+ return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+ unsigned long offset,
+ struct rb_node * node)
+{
+ struct page * ret;
+ if ((ret = __rb_insert_page_cache(inode, offset, node)))
+ goto out;
+ rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+ return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef _LINUX_RBTREE_H
+#define _LINUX_RBTREE_H
+
+#include <stdlib.h>
+
+struct rb_node
+{
+ struct rb_node *rb_parent;
+ int rb_color;
+#define RB_RED 0
+#define RB_BLACK 1
+ struct rb_node *rb_right;
+ struct rb_node *rb_left;
+};
+
+struct rb_root
+{
+ struct rb_node *rb_node;
+};
+
+#undef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define RB_ROOT (struct rb_root) { NULL, }
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(struct rb_node *);
+extern struct rb_node *rb_prev(struct rb_node *);
+extern struct rb_node *rb_first(struct rb_root *);
+extern struct rb_node *rb_last(struct rb_root *);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+ struct rb_node ** rb_link)
+{
+ node->rb_parent = parent;
+ node->rb_color = RB_RED;
+ node->rb_left = node->rb_right = NULL;
+
+ *rb_link = node;
+}
+
+#endif /* _LINUX_RBTREE_H */
diff --git a/verify.c b/verify.c
index f748065..7fbb2e6 100644
--- a/verify.c
+++ b/verify.c
@@ -145,6 +145,7 @@
int get_next_verify(struct thread_data *td, struct io_u *io_u)
{
struct io_piece *ipo;
+ struct rb_node *n;
/*
* this io_u is from a requeue, we already filled the offsets
@@ -152,10 +153,11 @@
if (io_u->file)
return 0;
- if (!list_empty(&td->io_hist_list)) {
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+ n = rb_first(&td->io_hist_tree);
+ if (n) {
+ ipo = rb_entry(n, struct io_piece, rb_node);
- list_del(&ipo->list);
+ rb_erase(n, &td->io_hist_tree);
io_u->offset = ipo->offset;
io_u->buflen = ipo->len;