graph: switch tooltip lookups to being range based in a prio tree

This cuts a lot of the CPU usage from browsing bigger graphs. Even
the normal graphs are typically cut in half.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/Makefile b/Makefile
index 063823d..ddf257f 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@
 		lib/num2str.c lib/ieee754.c $(wildcard crc/*.c) engines/cpu.c \
 		engines/mmap.c engines/sync.c engines/null.c engines/net.c \
 		memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
-		cconv.c
+		cconv.c lib/prio_tree.c
 
 ifeq ($(UNAME), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c helpers.c cgroup.c trim.c \
diff --git a/graph.c b/graph.c
index d8f1ba4..91ddc89 100644
--- a/graph.c
+++ b/graph.c
@@ -31,14 +31,24 @@
 
 #include "tickmarks.h"
 #include "graph.h"
+#include "flist.h"
+#include "lib/prio_tree.h"
+#include "gettime.h"
+struct thread_data;
+#include "time.h"
+
+/*
+ * Allowable difference to show tooltip
+ */
+#define TOOLTIP_DELTA	1.02
 
 struct xyvalue {
 	double x, y;
-	int gx, gy;
 };
 
 struct graph_value {
 	struct graph_value *next;
+	struct prio_tree_node node;
 	char *tooltip;
 	void *value;
 };
@@ -48,12 +58,18 @@
 	struct graph_value *tail;
 	struct graph_value *values;
 	struct graph_label *next;
+	struct prio_tree_root prio_tree;
 	double r, g, b;
 	int value_count;
 	unsigned int tooltip_count;
 	struct graph *parent;
 };
 
+struct tick_value {
+	unsigned int offset;
+	double value;
+};
+
 struct graph {
 	char *title;
 	char *xtitle;
@@ -71,6 +87,13 @@
 	double right_extra;	
 	double top_extra;	
 	double bottom_extra;	
+
+	double xtick_zero;
+	double xtick_delta;
+	double xtick_zero_val;
+	double ytick_zero;
+	double ytick_delta;
+	double ytick_zero_val;
 };
 
 void graph_set_size(struct graph *g, unsigned int xdim, unsigned int ydim)
@@ -342,6 +365,15 @@
 	for (i = 0; i < nticks; i++) {
 		tx = (((tm[i].value) - minx) / (maxx - minx)) * (x2 - x1) + x1;
 
+		/*
+		 * Update tick delta
+		 */
+		if (!i) {
+			g->xtick_zero = tx;
+			g->xtick_zero_val = tm[0].value;
+		} else if (i == 1)
+			g->xtick_delta = (tm[1].value - tm[0].value) / (tx - g->xtick_zero);
+
 		/* really tx < yx || tx > x2, but protect against rounding */
 		if (x1 - tx > 0.01 || tx - x2 > 0.01)
 			continue;
@@ -367,7 +399,6 @@
 		/* draw tickmark label */
 		draw_centered_text(g, cr, tx, y2 * 1.04, 12.0, tm[i].string);
 		cairo_stroke(cr);
-		
 	}
 }
 
@@ -395,6 +426,15 @@
 	for (i = 0; i < nticks; i++) {
 		ty = y2 - (((tm[i].value) - miny) / (maxy - miny)) * (y2 - y1);
 
+		/*
+		 * Update tick delta
+		 */
+		if (!i) {
+			g->ytick_zero = ty;
+			g->ytick_zero_val = tm[0].value;
+		} else if (i == 1)
+			g->ytick_delta = (tm[1].value - tm[0].value) / (ty - g->ytick_zero);
+
 		/* really ty < y1 || ty > y2, but protect against rounding */
 		if (y1 - ty > 0.01 || ty - y2 > 0.01)
 			continue;
@@ -574,10 +614,9 @@
 		first = 1;
 		if (i->r < 0) /* invisible data */
 			continue;
+
 		cairo_set_source_rgb(cr, i->r, i->g, i->b);
 		for (j = i->values; j; j = j->next) {
-			struct xyvalue *xy = j->value;
-
 			tx = ((getx(j) - gminx) / (gmaxx - gminx)) * (x2 - x1) + x1;
 			ty = y2 - ((gety(j) - gminy) / (gmaxy - gminy)) * (y2 - y1);
 			if (first) {
@@ -586,8 +625,6 @@
 			} else {
 				cairo_line_to(cr, tx, ty);
 			}
-			xy->gx = tx;
-			xy->gy = ty;
 		}
 		cairo_stroke(cr);
 	}
@@ -597,15 +634,9 @@
 
 }
 
-static void gfree(void *f)
-{
-	if (f)
-		free(f);
-}
-
 static void setstring(char **str, const char *value)
 {
-	gfree(*str);
+	free(*str);
 	*str = strdup(value);
 }
 
@@ -651,6 +682,7 @@
 	else
 		bg->tail->next = i;
 	bg->tail = i;
+	INIT_PRIO_TREE_ROOT(&i->prio_tree);
 }
 
 static void graph_label_add_value(struct graph_label *i, void *value,
@@ -672,8 +704,21 @@
 	}
 	i->tail = x;
 	i->value_count++;
-	if (x->tooltip)
+
+	if (x->tooltip) {
+		double yval = gety(x);
+		double miny = yval / TOOLTIP_DELTA;
+		double maxy = yval * TOOLTIP_DELTA;
+
+		x->node.start = miny;
+		x->node.last = maxy;
+		if (x->node.last == x->node.start)
+			x->node.last++;
+
+		prio_tree_insert(&i->prio_tree, &x->node);
+		printf("insert (x=%u,y=%u) range %lu-%lu (%s)\n", (int)getx(x), (int)gety(x), x->node.start, x->node.last, x->tooltip);
 		i->tooltip_count++;
+	}
 
 	if (i->parent->per_label_limit != -1 &&
 		i->value_count > i->parent->per_label_limit) {
@@ -693,6 +738,7 @@
 			i->values = i->values->next;
 			if (x->tooltip) {
 				free(x->tooltip);
+				prio_tree_remove(&i->prio_tree, &x->node);
 				i->tooltip_count--;
 			}
 			free(x->value);
@@ -741,8 +787,8 @@
 
 	for (i = values; i; i = next) {
 		next = i->next;
-		gfree(i->value);
-		gfree(i);
+		free(i->value);
+		free(i);
 	}	
 }
 
@@ -753,7 +799,7 @@
 	for (i = labels; i; i = next) {
 		next = i->next;
 		graph_free_values(i->values);
-		gfree(i);
+		free(i);
 	}	
 }
 
@@ -777,7 +823,7 @@
 		if (g > 1.0)
 			g = 1.0;
 		if (b > 1.0)
-			b =1.0;
+			b = 1.0;
 	}
 
 	for (i = gr->labels; i; i = i->next)
@@ -791,9 +837,9 @@
 
 void graph_free(struct graph *bg)
 {
-	gfree(bg->title);
-	gfree(bg->xtitle);
-	gfree(bg->ytitle);
+	free(bg->title);
+	free(bg->xtitle);
+	free(bg->ytitle);
 	graph_free_labels(bg->labels);
 }
 
@@ -846,41 +892,66 @@
 	return (x >= first_x && x <= last_x) && (y >= first_y && y <= last_y);
 }
 
-/*
- * Allowable difference to show tooltip
- */
-#define TOOLTIP_XDIFF	10
-#define TOOLTIP_YDIFF	10
-
-static int xy_match(struct xyvalue *xy, int x, int y)
+const char *graph_find_tooltip(struct graph *g, int ix, int iy)
 {
-	int xdiff = abs(xy->gx - x);
-	int ydiff = abs(xy->gy - y);
-
-	return xdiff <= TOOLTIP_XDIFF && ydiff <= TOOLTIP_YDIFF;
-}
-
-const char *graph_find_tooltip(struct graph *g, int x, int y)
-{
+	double x = ix, y = iy;
+	struct prio_tree_iter iter;
+	struct prio_tree_node *n;
 	struct graph_label *i;
-	struct graph_value *j;
+	struct graph_value *best = NULL;
+	double best_delta;
+	double maxx, minx;
 
-	for (i = g->labels; i; i = i->next) {
-		for (j = i->values; j; j = j->next) {
-			struct xyvalue *xy = j->value;
-			int graphx = x - g->xoffset;
+	x -= g->xoffset;
+	y -= g->yoffset;
+
+	x = g->xtick_zero_val + ((x - g->xtick_zero) * g->xtick_delta);
+	y = g->ytick_zero_val + ((y - g->ytick_zero) * g->ytick_delta);
+
+	maxx = x * TOOLTIP_DELTA;
+	minx = x / TOOLTIP_DELTA;
+	best_delta = UINT_MAX;
+	i = g->labels;
+	do {
+		prio_tree_iter_init(&iter, &i->prio_tree, y, y);
+
+		n = prio_tree_next(&iter);
+		if (!n)
+			continue;
+
+		do {
+			struct graph_value *v;
+			double xval, xdiff;
+
+			v = container_of(n, struct graph_value, node);
+			xval = getx(v);
+
+			if (xval > x)
+				xdiff = xval - x;
+			else
+				xdiff = x - xval;
 
 			/*
-			 * Return match if close enough. Take advantage
-			 * of the X axis being monotonically increasing,
-			 * so we can break out if we exceed it.
+			 * zero delta, or within or match critera, break
 			 */
-			if (xy_match(xy, graphx, y))
-				return j->tooltip;
-			else if (xy->gx - graphx > TOOLTIP_XDIFF)
-				break;
-		}
-	}
+			if (xdiff < best_delta) {
+				best_delta = xdiff;
+				if (!best_delta ||
+				    (xval >= minx && xval <= maxx)) {
+					best = v;
+					break;
+				}
+			}
+		} while ((n = prio_tree_next(&iter)) != NULL);
+
+		/*
+		 * If we got matches in one label, don't check others.
+		 */
+		break;
+	} while ((i = i->next) != NULL);
+
+	if (best)
+		return best->tooltip;
 
 	return NULL;
 }
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
new file mode 100644
index 0000000..b0e935c
--- /dev/null
+++ b/lib/prio_tree.c
@@ -0,0 +1,465 @@
+/*
+ * lib/prio_tree.c - priority search tree
+ *
+ * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
+ *
+ * This file is released under the GPL v2.
+ *
+ * Based on the radix priority search tree proposed by Edward M. McCreight
+ * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
+ *
+ * 02Feb2004	Initial version
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+#include "../fio.h"
+#include "prio_tree.h"
+
+/*
+ * A clever mix of heap and radix trees forms a radix priority search tree (PST)
+ * which is useful for storing intervals, e.g, we can consider a vma as a closed
+ * interval of file pages [offset_begin, offset_end], and store all vmas that
+ * map a file in a PST. Then, using the PST, we can answer a stabbing query,
+ * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
+ * given input interval X (a set of consecutive file pages), in "O(log n + m)"
+ * time where 'log n' is the height of the PST, and 'm' is the number of stored
+ * intervals (vmas) that overlap (map) with the input interval X (the set of
+ * consecutive file pages).
+ *
+ * In our implementation, we store closed intervals of the form [radix_index,
+ * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
+ * is designed for storing intervals with unique radix indices, i.e., each
+ * interval have different radix_index. However, this limitation can be easily
+ * overcome by using the size, i.e., heap_index - radix_index, as part of the
+ * index, so we index the tree using [(radix_index,size), heap_index].
+ *
+ * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
+ * machine, the maximum height of a PST can be 64. We can use a balanced version
+ * of the priority search tree to optimize the tree height, but the balanced
+ * tree proposed by McCreight is too complex and memory-hungry for our purpose.
+ */
+
+static void get_index(const struct prio_tree_node *node,
+		      unsigned long *radix, unsigned long *heap)
+{
+	*radix = node->start;
+	*heap = node->last;
+}
+
+static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
+
+void fio_init prio_tree_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
+		index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
+	index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
+}
+
+/*
+ * Maximum heap_index that can be stored in a PST with index_bits bits
+ */
+static inline unsigned long prio_tree_maxindex(unsigned int bits)
+{
+	return index_bits_to_maxindex[bits - 1];
+}
+
+/*
+ * Extend a priority search tree so that it can store a node with heap_index
+ * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
+ * However, this function is used rarely and the common case performance is
+ * not bad.
+ */
+static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
+		struct prio_tree_node *node, unsigned long max_heap_index)
+{
+	struct prio_tree_node *first = NULL, *prev, *last = NULL;
+
+	if (max_heap_index > prio_tree_maxindex(root->index_bits))
+		root->index_bits++;
+
+	while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
+		root->index_bits++;
+
+		if (prio_tree_empty(root))
+			continue;
+
+		if (first == NULL) {
+			first = root->prio_tree_node;
+			prio_tree_remove(root, root->prio_tree_node);
+			INIT_PRIO_TREE_NODE(first);
+			last = first;
+		} else {
+			prev = last;
+			last = root->prio_tree_node;
+			prio_tree_remove(root, root->prio_tree_node);
+			INIT_PRIO_TREE_NODE(last);
+			prev->left = last;
+			last->parent = prev;
+		}
+	}
+
+	INIT_PRIO_TREE_NODE(node);
+
+	if (first) {
+		node->left = first;
+		first->parent = node;
+	} else
+		last = node;
+
+	if (!prio_tree_empty(root)) {
+		last->left = root->prio_tree_node;
+		last->left->parent = last;
+	}
+
+	root->prio_tree_node = node;
+	return node;
+}
+
+/*
+ * Replace a prio_tree_node with a new node and return the old node
+ */
+struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
+		struct prio_tree_node *old, struct prio_tree_node *node)
+{
+	INIT_PRIO_TREE_NODE(node);
+
+	if (prio_tree_root(old)) {
+		assert(root->prio_tree_node == old);
+		/*
+		 * We can reduce root->index_bits here. However, it is complex
+		 * and does not help much to improve performance (IMO).
+		 */
+		node->parent = node;
+		root->prio_tree_node = node;
+	} else {
+		node->parent = old->parent;
+		if (old->parent->left == old)
+			old->parent->left = node;
+		else
+			old->parent->right = node;
+	}
+
+	if (!prio_tree_left_empty(old)) {
+		node->left = old->left;
+		old->left->parent = node;
+	}
+
+	if (!prio_tree_right_empty(old)) {
+		node->right = old->right;
+		old->right->parent = node;
+	}
+
+	return old;
+}
+
+/*
+ * Insert a prio_tree_node @node into a radix priority search tree @root. The
+ * algorithm typically takes O(log n) time where 'log n' is the number of bits
+ * required to represent the maximum heap_index. In the worst case, the algo
+ * can take O((log n)^2) - check prio_tree_expand.
+ *
+ * If a prior node with same radix_index and heap_index is already found in
+ * the tree, then returns the address of the prior node. Otherwise, inserts
+ * @node into the tree and returns @node.
+ */
+struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
+		struct prio_tree_node *node)
+{
+	struct prio_tree_node *cur, *res = node;
+	unsigned long radix_index, heap_index;
+	unsigned long r_index, h_index, index, mask;
+	int size_flag = 0;
+
+	get_index(node, &radix_index, &heap_index);
+
+	if (prio_tree_empty(root) ||
+			heap_index > prio_tree_maxindex(root->index_bits))
+		return prio_tree_expand(root, node, heap_index);
+
+	cur = root->prio_tree_node;
+	mask = 1UL << (root->index_bits - 1);
+
+	while (mask) {
+		get_index(cur, &r_index, &h_index);
+
+		if (r_index == radix_index && h_index == heap_index)
+			return cur;
+
+                if (h_index < heap_index ||
+		    (h_index == heap_index && r_index > radix_index)) {
+			struct prio_tree_node *tmp = node;
+			node = prio_tree_replace(root, cur, node);
+			cur = tmp;
+			/* swap indices */
+			index = r_index;
+			r_index = radix_index;
+			radix_index = index;
+			index = h_index;
+			h_index = heap_index;
+			heap_index = index;
+		}
+
+		if (size_flag)
+			index = heap_index - radix_index;
+		else
+			index = radix_index;
+
+		if (index & mask) {
+			if (prio_tree_right_empty(cur)) {
+				INIT_PRIO_TREE_NODE(node);
+				cur->right = node;
+				node->parent = cur;
+				return res;
+			} else
+				cur = cur->right;
+		} else {
+			if (prio_tree_left_empty(cur)) {
+				INIT_PRIO_TREE_NODE(node);
+				cur->left = node;
+				node->parent = cur;
+				return res;
+			} else
+				cur = cur->left;
+		}
+
+		mask >>= 1;
+
+		if (!mask) {
+			mask = 1UL << (BITS_PER_LONG - 1);
+			size_flag = 1;
+		}
+	}
+	/* Should not reach here */
+	assert(0);
+	return NULL;
+}
+
+/*
+ * Remove a prio_tree_node @node from a radix priority search tree @root. The
+ * algorithm takes O(log n) time where 'log n' is the number of bits required
+ * to represent the maximum heap_index.
+ */
+void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
+{
+	struct prio_tree_node *cur;
+	unsigned long r_index, h_index_right, h_index_left;
+
+	cur = node;
+
+	while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
+		if (!prio_tree_left_empty(cur))
+			get_index(cur->left, &r_index, &h_index_left);
+		else {
+			cur = cur->right;
+			continue;
+		}
+
+		if (!prio_tree_right_empty(cur))
+			get_index(cur->right, &r_index, &h_index_right);
+		else {
+			cur = cur->left;
+			continue;
+		}
+
+		/* both h_index_left and h_index_right cannot be 0 */
+		if (h_index_left >= h_index_right)
+			cur = cur->left;
+		else
+			cur = cur->right;
+	}
+
+	if (prio_tree_root(cur)) {
+		assert(root->prio_tree_node == cur);
+		INIT_PRIO_TREE_ROOT(root);
+		return;
+	}
+
+	if (cur->parent->right == cur)
+		cur->parent->right = cur->parent;
+	else
+		cur->parent->left = cur->parent;
+
+	while (cur != node)
+		cur = prio_tree_replace(root, cur->parent, cur);
+}
+
+/*
+ * Following functions help to enumerate all prio_tree_nodes in the tree that
+ * overlap with the input interval X [radix_index, heap_index]. The enumeration
+ * takes O(log n + m) time where 'log n' is the height of the tree (which is
+ * proportional to # of bits required to represent the maximum heap_index) and
+ * 'm' is the number of prio_tree_nodes that overlap the interval X.
+ */
+
+static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
+		unsigned long *r_index, unsigned long *h_index)
+{
+	if (prio_tree_left_empty(iter->cur))
+		return NULL;
+
+	get_index(iter->cur->left, r_index, h_index);
+
+	if (iter->r_index <= *h_index) {
+		iter->cur = iter->cur->left;
+		iter->mask >>= 1;
+		if (iter->mask) {
+			if (iter->size_level)
+				iter->size_level++;
+		} else {
+			if (iter->size_level) {
+				assert(prio_tree_left_empty(iter->cur));
+				assert(prio_tree_right_empty(iter->cur));
+				iter->size_level++;
+				iter->mask = ULONG_MAX;
+			} else {
+				iter->size_level = 1;
+				iter->mask = 1UL << (BITS_PER_LONG - 1);
+			}
+		}
+		return iter->cur;
+	}
+
+	return NULL;
+}
+
+static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
+		unsigned long *r_index, unsigned long *h_index)
+{
+	unsigned long value;
+
+	if (prio_tree_right_empty(iter->cur))
+		return NULL;
+
+	if (iter->size_level)
+		value = iter->value;
+	else
+		value = iter->value | iter->mask;
+
+	if (iter->h_index < value)
+		return NULL;
+
+	get_index(iter->cur->right, r_index, h_index);
+
+	if (iter->r_index <= *h_index) {
+		iter->cur = iter->cur->right;
+		iter->mask >>= 1;
+		iter->value = value;
+		if (iter->mask) {
+			if (iter->size_level)
+				iter->size_level++;
+		} else {
+			if (iter->size_level) {
+				assert(prio_tree_left_empty(iter->cur));
+				assert(prio_tree_right_empty(iter->cur));
+				iter->size_level++;
+				iter->mask = ULONG_MAX;
+			} else {
+				iter->size_level = 1;
+				iter->mask = 1UL << (BITS_PER_LONG - 1);
+			}
+		}
+		return iter->cur;
+	}
+
+	return NULL;
+}
+
+static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
+{
+	iter->cur = iter->cur->parent;
+	if (iter->mask == ULONG_MAX)
+		iter->mask = 1UL;
+	else if (iter->size_level == 1)
+		iter->mask = 1UL;
+	else
+		iter->mask <<= 1;
+	if (iter->size_level)
+		iter->size_level--;
+	if (!iter->size_level && (iter->value & iter->mask))
+		iter->value ^= iter->mask;
+	return iter->cur;
+}
+
+static inline int overlap(struct prio_tree_iter *iter,
+		unsigned long r_index, unsigned long h_index)
+{
+	return iter->h_index >= r_index && iter->r_index <= h_index;
+}
+
+/*
+ * prio_tree_first:
+ *
+ * Get the first prio_tree_node that overlaps with the interval [radix_index,
+ * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
+ * traversal of the tree.
+ */
+static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
+{
+	struct prio_tree_root *root;
+	unsigned long r_index, h_index;
+
+	INIT_PRIO_TREE_ITER(iter);
+
+	root = iter->root;
+	if (prio_tree_empty(root))
+		return NULL;
+
+	get_index(root->prio_tree_node, &r_index, &h_index);
+
+	if (iter->r_index > h_index)
+		return NULL;
+
+	iter->mask = 1UL << (root->index_bits - 1);
+	iter->cur = root->prio_tree_node;
+
+	while (1) {
+		if (overlap(iter, r_index, h_index))
+			return iter->cur;
+
+		if (prio_tree_left(iter, &r_index, &h_index))
+			continue;
+
+		if (prio_tree_right(iter, &r_index, &h_index))
+			continue;
+
+		break;
+	}
+	return NULL;
+}
+
+/*
+ * prio_tree_next:
+ *
+ * Get the next prio_tree_node that overlaps with the input interval in iter
+ */
+struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
+{
+	unsigned long r_index, h_index;
+
+	if (iter->cur == NULL)
+		return prio_tree_first(iter);
+
+repeat:
+	while (prio_tree_left(iter, &r_index, &h_index))
+		if (overlap(iter, r_index, h_index))
+			return iter->cur;
+
+	while (!prio_tree_right(iter, &r_index, &h_index)) {
+	    	while (!prio_tree_root(iter->cur) &&
+				iter->cur->parent->right == iter->cur)
+			prio_tree_parent(iter);
+
+		if (prio_tree_root(iter->cur))
+			return NULL;
+
+		prio_tree_parent(iter);
+	}
+
+	if (overlap(iter, r_index, h_index))
+		return iter->cur;
+
+	goto repeat;
+}
diff --git a/lib/prio_tree.h b/lib/prio_tree.h
new file mode 100644
index 0000000..e1491db
--- /dev/null
+++ b/lib/prio_tree.h
@@ -0,0 +1,90 @@
+#ifndef _LINUX_PRIO_TREE_H
+#define _LINUX_PRIO_TREE_H
+
+#include <inttypes.h>
+#include "../hash.h"
+
+struct prio_tree_node {
+	struct prio_tree_node	*left;
+	struct prio_tree_node	*right;
+	struct prio_tree_node	*parent;
+	uint64_t		start;
+	uint64_t		last;	/* last location _in_ interval */
+};
+
+struct prio_tree_root {
+	struct prio_tree_node	*prio_tree_node;
+	unsigned short 		index_bits;
+};
+
+struct prio_tree_iter {
+	struct prio_tree_node	*cur;
+	unsigned long		mask;
+	unsigned long		value;
+	int			size_level;
+
+	struct prio_tree_root	*root;
+	uint64_t		r_index;
+	uint64_t		h_index;
+};
+
+static inline void prio_tree_iter_init(struct prio_tree_iter *iter,
+		struct prio_tree_root *root, uint64_t r_index, uint64_t h_index)
+{
+	iter->root = root;
+	iter->r_index = r_index;
+	iter->h_index = h_index;
+	iter->cur = NULL;
+}
+
+#define INIT_PRIO_TREE_ROOT(ptr)	\
+do {					\
+	(ptr)->prio_tree_node = NULL;	\
+	(ptr)->index_bits = 1;		\
+} while (0)
+
+#define INIT_PRIO_TREE_NODE(ptr)				\
+do {								\
+	(ptr)->left = (ptr)->right = (ptr)->parent = (ptr);	\
+} while (0)
+
+#define INIT_PRIO_TREE_ITER(ptr)	\
+do {					\
+	(ptr)->cur = NULL;		\
+	(ptr)->mask = 0UL;		\
+	(ptr)->value = 0UL;		\
+	(ptr)->size_level = 0;		\
+} while (0)
+
+#define prio_tree_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+static inline int prio_tree_empty(const struct prio_tree_root *root)
+{
+	return root->prio_tree_node == NULL;
+}
+
+static inline int prio_tree_root(const struct prio_tree_node *node)
+{
+	return node->parent == node;
+}
+
+static inline int prio_tree_left_empty(const struct prio_tree_node *node)
+{
+	return node->left == node;
+}
+
+static inline int prio_tree_right_empty(const struct prio_tree_node *node)
+{
+	return node->right == node;
+}
+
+
+struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
+                struct prio_tree_node *old, struct prio_tree_node *node);
+struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
+                struct prio_tree_node *node);
+void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node);
+struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter);
+
+#endif /* _LINUX_PRIO_TREE_H */