Btrfs: add a radix back bit tree

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index be7d74c..5346f70 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -3,7 +3,8 @@
 
 obj-m  := btrfs.o
 btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
-	   hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o
+	   hash.o file-item.o inode-item.o inode-map.o disk-io.o \
+	   transaction.o bit-radix.o
 
 #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
 #	  root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c
new file mode 100644
index 0000000..43807cc
--- /dev/null
+++ b/fs/btrfs/bit-radix.c
@@ -0,0 +1,107 @@
+#include <linux/module.h>
+#include "bit-radix.h"
+
+#define BIT_ARRAY_BYTES 256
+#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
+
+int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+	unsigned long *bits;
+	unsigned long slot;
+	int bit_slot;
+	int ret;
+
+	slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+	bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+	bits = radix_tree_lookup(radix, slot);
+	if (!bits) {
+		bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOIO);
+		if (!bits)
+			return -ENOMEM;
+		memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
+		bits[0] = slot;
+		ret = radix_tree_insert(radix, slot, bits);
+		if (ret)
+			return ret;
+	}
+	set_bit(bit_slot, bits + 1);
+	return 0;
+}
+
+int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+	unsigned long *bits;
+	unsigned long slot;
+	int bit_slot;
+
+	slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+	bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+	bits = radix_tree_lookup(radix, slot);
+	if (!bits)
+		return 0;
+	return test_bit(bit_slot, bits + 1);
+}
+
+int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+	unsigned long *bits;
+	unsigned long slot;
+	int bit_slot;
+	int i;
+	int empty = 1;
+
+	slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+	bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+	bits = radix_tree_lookup(radix, slot);
+	if (!bits)
+		return 0;
+	clear_bit(bit_slot, bits + 1);
+
+	for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) {
+		if (bits[i]) {
+			empty = 0;
+			break;
+		}
+	}
+
+	if (empty) {
+		bits = radix_tree_delete(radix, slot);
+		BUG_ON(!bits);
+	}
+	return 0;
+}
+
+int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
+			 int nr)
+{
+	unsigned long *bits;
+	unsigned long *gang[4];
+	int found;
+	int ret;
+	int i;
+	int total_found = 0;
+
+	ret = radix_tree_gang_lookup(radix, (void *)&gang, 0, ARRAY_SIZE(gang));
+	for (i = 0; i < ret && nr > 0; i++) {
+		found = 0;
+		bits = gang[i];
+		while(nr > 0) {
+			found = find_next_bit(bits + 1,
+					      BIT_RADIX_BITS_PER_ARRAY,
+					      found);
+			if (found < BIT_RADIX_BITS_PER_ARRAY) {
+				*retbits = bits[0] *
+					BIT_RADIX_BITS_PER_ARRAY + found;
+				retbits++;
+				nr--;
+				total_found++;
+				found++;
+			} else
+				break;
+		}
+	}
+	return total_found;
+}
diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h
new file mode 100644
index 0000000..56aad4c
--- /dev/null
+++ b/fs/btrfs/bit-radix.h
@@ -0,0 +1,15 @@
+#ifndef __BIT_RADIX__
+#define __BIT_RADIX__
+#include <linux/radix-tree.h>
+
+int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
+			 int nr);
+
+static inline void init_bit_radix(struct radix_tree_root *radix)
+{
+	INIT_RADIX_TREE(radix, GFP_NOFS);
+}
+#endif
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68cafae..0aa1052 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1,8 +1,8 @@
 #ifndef __BTRFS__
 #define __BTRFS__
 
-#include <linux/radix-tree.h>
 #include <linux/fs.h>
+#include "bit-radix.h"
 
 struct btrfs_trans_handle;
 struct btrfs_transaction;
@@ -222,6 +222,7 @@
 	struct btrfs_root *inode_root;
 	struct btrfs_key current_insert;
 	struct btrfs_key last_insert;
+	struct radix_tree_root pending_del_radix;
 	struct radix_tree_root pinned_radix;
 	u64 last_inode_alloc;
 	u64 last_inode_alloc_dirid;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e32ddff..758a62a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -104,7 +104,8 @@
 	/* FIXME: don't be stupid */
 	if (!btrfs_super_root(disk_super))
 		return NULL;
-	INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL);
+	init_bit_radix(&fs_info->pinned_radix);
+	init_bit_radix(&fs_info->pending_del_radix);
 	fs_info->running_transaction = NULL;
 	fs_info->fs_root = root;
 	fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 369b960..b141042 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1,5 +1,4 @@
 #include <linux/module.h>
-#include <linux/radix-tree.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "print-tree.h"
@@ -12,15 +11,6 @@
 				 btrfs_root *extent_root);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
 			       btrfs_root *extent_root);
-/*
- * pending extents are blocks that we're trying to allocate in the extent
- * map while trying to grow the map because of other allocations.  To avoid
- * recursing, they are tagged in the radix tree and cleaned up after
- * other allocations are done.  The pending tag is also used in the same
- * manner for deletes.
- */
-#define CTREE_EXTENT_PENDING_DEL 0
-#define CTREE_EXTENT_PINNED 1
 
 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
 			 *root, u64 blocknr)
@@ -104,24 +94,21 @@
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
 			       btrfs_root *root)
 {
-	struct buffer_head *gang[8];
+	unsigned long gang[8];
 	u64 first = 0;
 	int ret;
 	int i;
+	struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
 
 	while(1) {
-		ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix,
-					     (void **)gang, 0,
-					     ARRAY_SIZE(gang),
-					     CTREE_EXTENT_PINNED);
+		ret = find_first_radix_bit(pinned_radix, gang,
+					   ARRAY_SIZE(gang));
 		if (!ret)
 			break;
 		if (!first)
-			first = gang[0]->b_blocknr;
+			first = gang[0];
 		for (i = 0; i < ret; i++) {
-			radix_tree_delete(&root->fs_info->pinned_radix,
-					  gang[i]->b_blocknr);
-			brelse(gang[i]);
+			clear_radix_bit(pinned_radix, gang[i]);
 		}
 	}
 	if (root->fs_info->last_insert.objectid > first)
@@ -161,29 +148,27 @@
 	return 0;
 }
 
-static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag)
+static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
 {
 	int err;
-	struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr);
 	struct btrfs_header *header;
-	BUG_ON(!bh);
+	struct buffer_head *bh;
 
-	header = btrfs_buffer_header(bh);
-	if (btrfs_header_generation(header) ==
-	    root->fs_info->running_transaction->transid) {
-		return 0;
-	}
-
-	err = radix_tree_insert(&root->fs_info->pinned_radix,
-				blocknr, bh);
-	if (err && err != -EEXIST) {
-		BUG();
-		return err;
-	}
-	if (err == -EEXIST)
+	bh = sb_find_get_block(root->fs_info->sb, blocknr);
+	if (bh) {
+		header = btrfs_buffer_header(bh);
+		if (btrfs_header_generation(header) ==
+		    root->fs_info->running_transaction->transid) {
+			brelse(bh);
+			return 0;
+		}
 		brelse(bh);
-	radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr,
-			   tag);
+	}
+	if (pending)
+		err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
+	else
+		err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
+	BUG_ON(err);
 	return 0;
 }
 
@@ -225,8 +210,7 @@
 		u64 super_blocks_used;
 
 		if (pin) {
-			ret = pin_down_block(root, blocknr,
-					     CTREE_EXTENT_PINNED);
+			ret = pin_down_block(root, blocknr, 0);
 			BUG_ON(ret);
 		}
 
@@ -255,25 +239,26 @@
 	int ret;
 	int wret;
 	int err = 0;
-	struct buffer_head *gang[4];
+	unsigned long gang[4];
 	int i;
-	struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix;
+	struct radix_tree_root *pending_radix;
+	struct radix_tree_root *pinned_radix;
+
+	pending_radix = &extent_root->fs_info->pending_del_radix;
+	pinned_radix = &extent_root->fs_info->pinned_radix;
 
 	while(1) {
-		ret = radix_tree_gang_lookup_tag(
-					&extent_root->fs_info->pinned_radix,
-					(void **)gang, 0,
-					ARRAY_SIZE(gang),
-					CTREE_EXTENT_PENDING_DEL);
+		ret = find_first_radix_bit(pending_radix, gang,
+					   ARRAY_SIZE(gang));
 		if (!ret)
 			break;
 		for (i = 0; i < ret; i++) {
-			radix_tree_tag_set(radix, gang[i]->b_blocknr,
-					   CTREE_EXTENT_PINNED);
-			radix_tree_tag_clear(radix, gang[i]->b_blocknr,
-					     CTREE_EXTENT_PENDING_DEL);
+			wret = set_radix_bit(pinned_radix, gang[i]);
+			BUG_ON(wret);
+			wret = clear_radix_bit(pending_radix, gang[i]);
+			BUG_ON(wret);
 			wret = __free_extent(trans, extent_root,
-					     gang[i]->b_blocknr, 1, 0);
+					     gang[i], 1, 0);
 			if (wret)
 				err = wret;
 		}
@@ -294,7 +279,7 @@
 
 	if (root == extent_root) {
 		t = find_tree_block(root, blocknr);
-		pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL);
+		pin_down_block(root, blocknr, 1);
 		return 0;
 	}
 	ret = __free_extent(trans, root, blocknr, num_blocks, pin);
@@ -393,7 +378,7 @@
 	BUG_ON(ins->objectid < search_start);
 	for (test_block = ins->objectid;
 	     test_block < ins->objectid + total_needed; test_block++) {
-		if (radix_tree_lookup(&root->fs_info->pinned_radix,
+		if (test_radix_bit(&root->fs_info->pinned_radix,
 				      test_block)) {
 			search_start = test_block + 1;
 			goto check_failed;