Fix io piece logging to not have O(n) runtime

Use an rbtree for that log instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/Makefile b/Makefile
index c79d36b..10b4ffc 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,8 @@
 PROGS	= fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+	rbtree.o
 
 OBJS += engines/cpu.o
 OBJS += engines/libaio.o
diff --git a/Makefile.FreeBSD b/Makefile.FreeBSD
index d5e675a..334395b 100644
--- a/Makefile.FreeBSD
+++ b/Makefile.FreeBSD
@@ -3,7 +3,8 @@
 PROGS	= fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+	rbtree.o
 
 OBJS += engines/cpu.o
 OBJS += engines/mmap.o
diff --git a/Makefile.solaris b/Makefile.solaris
index bdb8274..f386041 100644
--- a/Makefile.solaris
+++ b/Makefile.solaris
@@ -3,7 +3,8 @@
 PROGS	= fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+	rbtree.o
 
 OBJS += engines/cpu.o
 OBJS += engines/mmap.o
diff --git a/fio.c b/fio.c
index f6c3fc8..504b78d 100644
--- a/fio.c
+++ b/fio.c
@@ -735,7 +735,6 @@
 	INIT_LIST_HEAD(&td->io_u_freelist);
 	INIT_LIST_HEAD(&td->io_u_busylist);
 	INIT_LIST_HEAD(&td->io_u_requeues);
-	INIT_LIST_HEAD(&td->io_hist_list);
 	INIT_LIST_HEAD(&td->io_log_list);
 
 	if (init_io_u(td))
diff --git a/fio.h b/fio.h
index 736fefb..4111bff 100644
--- a/fio.h
+++ b/fio.h
@@ -14,6 +14,7 @@
 #include <getopt.h>
 
 #include "list.h"
+#include "rbtree.h"
 #include "md5.h"
 #include "crc32.h"
 #include "arch.h"
@@ -78,7 +79,10 @@
  * When logging io actions, this matches a single sent io_u
  */
 struct io_piece {
-	struct list_head list;
+	union {
+		struct rb_node rb_node;
+		struct list_head list;
+	};
 	struct fio_file *file;
 	unsigned long long offset;
 	unsigned long len;
@@ -510,7 +514,7 @@
 	/*
 	 * IO historic logs
 	 */
-	struct list_head io_hist_list;
+	struct rb_root io_hist_tree;
 	struct list_head io_log_list;
 
 	/*
diff --git a/log.c b/log.c
index dbca3cc..2b90f45 100644
--- a/log.c
+++ b/log.c
@@ -29,11 +29,11 @@
 void prune_io_piece_log(struct thread_data *td)
 {
 	struct io_piece *ipo;
+	struct rb_node *n;
 
-	while (!list_empty(&td->io_hist_list)) {
-		ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
-		list_del(&ipo->list);
+	while ((n = rb_first(&td->io_hist_tree)) != NULL) {
+		ipo = rb_entry(n, struct io_piece, rb_node);
+		rb_erase(n, &td->io_hist_tree);
 		free(ipo);
 	}
 }
@@ -43,36 +43,33 @@
  */
 void log_io_piece(struct thread_data *td, struct io_u *io_u)
 {
-	struct io_piece *ipo = malloc(sizeof(struct io_piece));
-	struct list_head *entry;
+	struct rb_node **p = &td->io_hist_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct io_piece *ipo, *__ipo;
 
-	INIT_LIST_HEAD(&ipo->list);
+	ipo = malloc(sizeof(struct io_piece));
+	memset(&ipo->rb_node, 0, sizeof(ipo->rb_node));
 	ipo->file = io_u->file;
 	ipo->offset = io_u->offset;
 	ipo->len = io_u->buflen;
 
 	/*
-	 * for random io where the writes extend the file, it will typically
-	 * be laid out with the block scattered as written. it's faster to
-	 * read them in in that order again, so don't sort
+	 * Sort the entry into the verification list
 	 */
-	if (!td_random(td) || !td->o.overwrite) {
-		list_add_tail(&ipo->list, &td->io_hist_list);
-		return;
-	}
+	while (*p) {
+		parent = *p;
 
-	/*
-	 * for random io, sort the list so verify will run faster
-	 */
-	entry = &td->io_hist_list;
-	while ((entry = entry->prev) != &td->io_hist_list) {
-		struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
-		if (__ipo->offset < ipo->offset)
+		__ipo = rb_entry(parent, struct io_piece, rb_node);
+		if (ipo->offset < __ipo->offset)
+			p = &(*p)->rb_left;
+		else if (ipo->offset > __ipo->offset)
+			p = &(*p)->rb_right;
+		else
 			break;
 	}
 
-	list_add(&ipo->list, entry);
+	rb_link_node(&ipo->rb_node, parent, p);
+	rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
 }
 
 void write_iolog_close(struct thread_data *td)
diff --git a/rbtree.c b/rbtree.c
new file mode 100644
index 0000000..cc4093a
--- /dev/null
+++ b/rbtree.c
@@ -0,0 +1,363 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *right = node->rb_right;
+
+	if ((node->rb_right = right->rb_left))
+		right->rb_left->rb_parent = node;
+	right->rb_left = node;
+
+	if ((right->rb_parent = node->rb_parent))
+	{
+		if (node == node->rb_parent->rb_left)
+			node->rb_parent->rb_left = right;
+		else
+			node->rb_parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	node->rb_parent = right;
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *left = node->rb_left;
+
+	if ((node->rb_left = left->rb_right))
+		left->rb_right->rb_parent = node;
+	left->rb_right = node;
+
+	if ((left->rb_parent = node->rb_parent))
+	{
+		if (node == node->rb_parent->rb_right)
+			node->rb_parent->rb_right = left;
+		else
+			node->rb_parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	node->rb_parent = left;
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *parent, *gparent;
+
+	while ((parent = node->rb_parent) && parent->rb_color == RB_RED)
+	{
+		gparent = parent->rb_parent;
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register struct rb_node *uncle = gparent->rb_right;
+				if (uncle && uncle->rb_color == RB_RED)
+				{
+					uncle->rb_color = RB_BLACK;
+					parent->rb_color = RB_BLACK;
+					gparent->rb_color = RB_RED;
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			parent->rb_color = RB_BLACK;
+			gparent->rb_color = RB_RED;
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				register struct rb_node *uncle = gparent->rb_left;
+				if (uncle && uncle->rb_color == RB_RED)
+				{
+					uncle->rb_color = RB_BLACK;
+					parent->rb_color = RB_BLACK;
+					gparent->rb_color = RB_RED;
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			parent->rb_color = RB_BLACK;
+			gparent->rb_color = RB_RED;
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	root->rb_node->rb_color = RB_BLACK;
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+			     struct rb_root *root)
+{
+	struct rb_node *other;
+
+	while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (other->rb_color == RB_RED)
+			{
+				other->rb_color = RB_BLACK;
+				parent->rb_color = RB_RED;
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left ||
+			     other->rb_left->rb_color == RB_BLACK)
+			    && (!other->rb_right ||
+				other->rb_right->rb_color == RB_BLACK))
+			{
+				other->rb_color = RB_RED;
+				node = parent;
+				parent = node->rb_parent;
+			}
+			else
+			{
+				if (!other->rb_right ||
+				    other->rb_right->rb_color == RB_BLACK)
+				{
+					register struct rb_node *o_left;
+					if ((o_left = other->rb_left))
+						o_left->rb_color = RB_BLACK;
+					other->rb_color = RB_RED;
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				other->rb_color = parent->rb_color;
+				parent->rb_color = RB_BLACK;
+				if (other->rb_right)
+					other->rb_right->rb_color = RB_BLACK;
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (other->rb_color == RB_RED)
+			{
+				other->rb_color = RB_BLACK;
+				parent->rb_color = RB_RED;
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left ||
+			     other->rb_left->rb_color == RB_BLACK)
+			    && (!other->rb_right ||
+				other->rb_right->rb_color == RB_BLACK))
+			{
+				other->rb_color = RB_RED;
+				node = parent;
+				parent = node->rb_parent;
+			}
+			else
+			{
+				if (!other->rb_left ||
+				    other->rb_left->rb_color == RB_BLACK)
+				{
+					register struct rb_node *o_right;
+					if ((o_right = other->rb_right))
+						o_right->rb_color = RB_BLACK;
+					other->rb_color = RB_RED;
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				other->rb_color = parent->rb_color;
+				parent->rb_color = RB_BLACK;
+				if (other->rb_left)
+					other->rb_left->rb_color = RB_BLACK;
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		node->rb_color = RB_BLACK;
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *child, *parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		struct rb_node *old = node, *left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left) != NULL)
+			node = left;
+		child = node->rb_right;
+		parent = node->rb_parent;
+		color = node->rb_color;
+
+		if (child)
+			child->rb_parent = parent;
+		if (parent)
+		{
+			if (parent->rb_left == node)
+				parent->rb_left = child;
+			else
+				parent->rb_right = child;
+		}
+		else
+			root->rb_node = child;
+
+		if (node->rb_parent == old)
+			parent = node;
+		node->rb_parent = old->rb_parent;
+		node->rb_color = old->rb_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (old->rb_parent)
+		{
+			if (old->rb_parent->rb_left == old)
+				old->rb_parent->rb_left = node;
+			else
+				old->rb_parent->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		old->rb_left->rb_parent = node;
+		if (old->rb_right)
+			old->rb_right->rb_parent = node;
+		goto color;
+	}
+
+	parent = node->rb_parent;
+	color = node->rb_color;
+
+	if (child)
+		child->rb_parent = parent;
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(struct rb_node *node)
+{
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->rb_right) {
+		node = node->rb_right; 
+		while (node->rb_left)
+			node=node->rb_left;
+		return node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while (node->rb_parent && node == node->rb_parent->rb_right)
+		node = node->rb_parent;
+
+	return node->rb_parent;
+}
+
+struct rb_node *rb_prev(struct rb_node *node)
+{
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->rb_left) {
+		node = node->rb_left; 
+		while (node->rb_right)
+			node=node->rb_right;
+		return node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while (node->rb_parent && node == node->rb_parent->rb_left)
+		node = node->rb_parent;
+
+	return node->rb_parent;
+}
diff --git a/rbtree.h b/rbtree.h
new file mode 100644
index 0000000..2cb9e37
--- /dev/null
+++ b/rbtree.h
@@ -0,0 +1,147 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	struct rb_node * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   struct rb_node * node)
+{
+	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+	struct rb_node * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	rb_link_node(node, parent, p);
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 struct rb_node * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <stdlib.h>
+
+struct rb_node
+{
+	struct rb_node *rb_parent;
+	int rb_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+};
+
+struct rb_root
+{
+	struct rb_node *rb_node;
+};
+
+#undef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define container_of(ptr, type, member) ({                      \
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(struct rb_node *);
+extern struct rb_node *rb_prev(struct rb_node *);
+extern struct rb_node *rb_first(struct rb_root *);
+extern struct rb_node *rb_last(struct rb_root *);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+				struct rb_node ** rb_link)
+{
+	node->rb_parent = parent;
+	node->rb_color = RB_RED;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#endif	/* _LINUX_RBTREE_H */
diff --git a/verify.c b/verify.c
index f748065..7fbb2e6 100644
--- a/verify.c
+++ b/verify.c
@@ -145,6 +145,7 @@
 int get_next_verify(struct thread_data *td, struct io_u *io_u)
 {
 	struct io_piece *ipo;
+	struct rb_node *n;
 
 	/*
 	 * this io_u is from a requeue, we already filled the offsets
@@ -152,10 +153,11 @@
 	if (io_u->file)
 		return 0;
 
-	if (!list_empty(&td->io_hist_list)) {
-		ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+	n = rb_first(&td->io_hist_tree);
+	if (n) {
+		ipo = rb_entry(n, struct io_piece, rb_node);
 
-		list_del(&ipo->list);
+		rb_erase(n, &td->io_hist_tree);
 
 		io_u->offset = ipo->offset;
 		io_u->buflen = ipo->len;