ceph: use rbtree for snap_realms

Switch from radix tree to rbtree for snap realms.  This is much more
appropriate given that realm keys are few and far between.

Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 81840d6..02834ce 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2097,9 +2097,8 @@
 {
 	struct ceph_mds_session *session = NULL;
 	struct ceph_msg *reply;
+	struct rb_node *p;
 	int err;
-	int got;
-	u64 next_snap_ino = 0;
 	struct ceph_pagelist *pagelist;
 
 	pr_info("reconnect to recovering mds%d\n", mds);
@@ -2155,14 +2154,10 @@
 	 * parent for all of our realms.  If the mds has any newer info,
 	 * it will tell us.
 	 */
-	next_snap_ino = 0;
-	while (1) {
-		struct ceph_snap_realm *realm;
+	for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) {
+		struct ceph_snap_realm *realm =
+			rb_entry(p, struct ceph_snap_realm, node);
 		struct ceph_mds_snaprealm_reconnect sr_rec;
-		got = radix_tree_gang_lookup(&mdsc->snap_realms,
-					     (void **)&realm, next_snap_ino, 1);
-		if (!got)
-			break;
 
 		dout(" adding snap realm %llx seq %lld parent %llx\n",
 		     realm->ino, realm->seq, realm->parent_ino);
@@ -2172,7 +2167,6 @@
 		err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec));
 		if (err)
 			goto fail;
-		next_snap_ino = realm->ino + 1;
 	}
 
 send:
@@ -2603,7 +2597,7 @@
 	mdsc->max_sessions = 0;
 	mdsc->stopping = 0;
 	init_rwsem(&mdsc->snap_rwsem);
-	INIT_RADIX_TREE(&mdsc->snap_realms, GFP_NOFS);
+	mdsc->snap_realms = RB_ROOT;
 	INIT_LIST_HEAD(&mdsc->snap_empty);
 	spin_lock_init(&mdsc->snap_empty_lock);
 	mdsc->last_tid = 0;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 98f09cd..9d6b9017 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -5,7 +5,6 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-#include <linux/radix-tree.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 
@@ -246,7 +245,7 @@
 	 * should be destroyed.
 	 */
 	struct rw_semaphore     snap_rwsem;
-	struct radix_tree_root  snap_realms;
+	struct rb_root          snap_realms;
 	struct list_head        snap_empty;
 	spinlock_t              snap_empty_lock;  /* protect snap_empty */
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index dcf18d9..49d0c4c 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,5 @@
 #include "ceph_debug.h"
 
-#include <linux/radix-tree.h>
 #include <linux/sort.h>
 
 #include "super.h"
@@ -77,6 +76,28 @@
 	atomic_inc(&realm->nref);
 }
 
+static void __insert_snap_realm(struct rb_root *root,
+				struct ceph_snap_realm *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_snap_realm *r = NULL;
+
+	while (*p) {
+		parent = *p;
+		r = rb_entry(parent, struct ceph_snap_realm, node);
+		if (new->ino < r->ino)
+			p = &(*p)->rb_left;
+		else if (new->ino > r->ino)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+}
+
 /*
  * create and get the realm rooted at @ino and bump its ref count.
  *
@@ -92,8 +113,6 @@
 	if (!realm)
 		return ERR_PTR(-ENOMEM);
 
-	radix_tree_insert(&mdsc->snap_realms, ino, realm);
-
 	atomic_set(&realm->nref, 0);    /* tree does not take a ref */
 	realm->ino = ino;
 	INIT_LIST_HEAD(&realm->children);
@@ -101,24 +120,34 @@
 	INIT_LIST_HEAD(&realm->empty_item);
 	INIT_LIST_HEAD(&realm->inodes_with_caps);
 	spin_lock_init(&realm->inodes_with_caps_lock);
+	__insert_snap_realm(&mdsc->snap_realms, realm);
 	dout("create_snap_realm %llx %p\n", realm->ino, realm);
 	return realm;
 }
 
 /*
- * find and get (if found) the realm rooted at @ino and bump its ref count.
+ * lookup the realm rooted at @ino.
  *
  * caller must hold snap_rwsem for write.
  */
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
 					       u64 ino)
 {
-	struct ceph_snap_realm *realm;
+	struct rb_node *n = mdsc->snap_realms.rb_node;
+	struct ceph_snap_realm *r;
 
-	realm = radix_tree_lookup(&mdsc->snap_realms, ino);
-	if (realm)
-		dout("lookup_snap_realm %llx %p\n", realm->ino, realm);
-	return realm;
+	while (n) {
+		r = rb_entry(n, struct ceph_snap_realm, node);
+		if (ino < r->ino)
+			n = n->rb_left;
+		else if (ino > r->ino)
+			n = n->rb_right;
+		else {
+			dout("lookup_snap_realm %llx %p\n", r->ino, r);
+			return r;
+		}
+	}
+	return NULL;
 }
 
 static void __put_snap_realm(struct ceph_mds_client *mdsc,
@@ -132,7 +161,7 @@
 {
 	dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
 
-	radix_tree_delete(&mdsc->snap_realms, realm->ino);
+	rb_erase(&realm->node, &mdsc->snap_realms);
 
 	if (realm->parent) {
 		list_del_init(&realm->child_item);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b2adfcc..1f39287 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -656,6 +656,8 @@
 struct ceph_snap_realm {
 	u64 ino;
 	atomic_t nref;
+	struct rb_node node;
+
 	u64 created, seq;
 	u64 parent_ino;
 	u64 parent_since;   /* snapid when our current parent became so */