Btrfs: create a logical->phsyical block number mapping scheme

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f31ac84..e2fc6f0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -66,21 +66,21 @@
 	if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
 		WARN_ON(1);
 	memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
-	btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
+	btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
 	btrfs_set_header_generation(&cow_node->header, trans->transid);
 	btrfs_inc_ref(trans, root, buf);
 	if (buf == root->node) {
 		root->node = cow;
 		get_bh(cow);
 		if (buf != root->commit_root) {
-			btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
+			btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
 		}
 		btrfs_block_release(root, buf);
 	} else {
 		btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
-					cow->b_blocknr);
+					bh_blocknr(cow));
 		btrfs_mark_buffer_dirty(parent);
-		btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
+		btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
 	}
 	btrfs_block_release(root, buf);
 	mark_buffer_dirty(cow);
@@ -312,7 +312,7 @@
 	 */
 	if (!parent_buf) {
 		struct buffer_head *child;
-		u64 blocknr = mid_buf->b_blocknr;
+		u64 blocknr = bh_blocknr(mid_buf);
 
 		if (btrfs_header_nritems(&mid->header) != 1)
 			return 0;
@@ -361,7 +361,7 @@
 		if (wret < 0)
 			ret = wret;
 		if (btrfs_header_nritems(&right->header) == 0) {
-			u64 blocknr = right_buf->b_blocknr;
+			u64 blocknr = bh_blocknr(right_buf);
 			clean_tree_block(trans, root, right_buf);
 			wait_on_buffer(right_buf);
 			btrfs_block_release(root, right_buf);
@@ -400,7 +400,7 @@
 	}
 	if (btrfs_header_nritems(&mid->header) == 0) {
 		/* we've managed to empty the middle node, drop it */
-		u64 blocknr = mid_buf->b_blocknr;
+		u64 blocknr = bh_blocknr(mid_buf);
 		clean_tree_block(trans, root, mid_buf);
 		wait_on_buffer(mid_buf);
 		btrfs_block_release(root, mid_buf);
@@ -686,7 +686,7 @@
 	memset(c, 0, root->blocksize);
 	btrfs_set_header_nritems(&c->header, 1);
 	btrfs_set_header_level(&c->header, level);
-	btrfs_set_header_blocknr(&c->header, t->b_blocknr);
+	btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
 	btrfs_set_header_generation(&c->header, trans->transid);
 	lower = btrfs_buffer_node(path->nodes[level-1]);
 	memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
@@ -697,7 +697,7 @@
 		lower_key = &lower->ptrs[0].key;
 	btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
 		     sizeof(struct btrfs_disk_key));
-	btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr);
+	btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
 
 	btrfs_mark_buffer_dirty(t);
 
@@ -780,7 +780,7 @@
 	split = btrfs_buffer_node(split_buffer);
 	btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
 	btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
-	btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr);
+	btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
 	btrfs_set_header_generation(&split->header, trans->transid);
 	memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
 	       sizeof(split->header.fsid));
@@ -794,7 +794,7 @@
 	btrfs_mark_buffer_dirty(t);
 	btrfs_mark_buffer_dirty(split_buffer);
 	wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
-			  split_buffer->b_blocknr, path->slots[level + 1] + 1,
+			  bh_blocknr(split_buffer), path->slots[level + 1] + 1,
 			  level + 1);
 	if (wret)
 		ret = wret;
@@ -1138,7 +1138,7 @@
 	BUG_ON(!right_buffer);
 	right = btrfs_buffer_leaf(right_buffer);
 	memset(&right->header, 0, sizeof(right->header));
-	btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
+	btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
 	btrfs_set_header_generation(&right->header, trans->transid);
 	btrfs_set_header_level(&right->header, 0);
 	memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
@@ -1152,7 +1152,7 @@
 				btrfs_set_header_nritems(&right->header, 0);
 				wret = insert_ptr(trans, root, path,
 						  &disk_key,
-						  right_buffer->b_blocknr,
+						  bh_blocknr(right_buffer),
 						  path->slots[1] + 1, 1);
 				if (wret)
 					ret = wret;
@@ -1173,7 +1173,7 @@
 				btrfs_set_header_nritems(&right->header, 0);
 				wret = insert_ptr(trans, root, path,
 						  &disk_key,
-						  right_buffer->b_blocknr,
+						  bh_blocknr(right_buffer),
 						  path->slots[1] - 1, 1);
 				if (wret)
 					ret = wret;
@@ -1207,7 +1207,7 @@
 	btrfs_set_header_nritems(&l->header, mid);
 	ret = 0;
 	wret = insert_ptr(trans, root, path, &right->items[0].key,
-			  right_buffer->b_blocknr, path->slots[1] + 1, 1);
+			  bh_blocknr(right_buffer), path->slots[1] + 1, 1);
 	if (wret)
 		ret = wret;
 	btrfs_mark_buffer_dirty(right_buffer);
@@ -1228,7 +1228,7 @@
 	BUG_ON(!right_buffer);
 	right = btrfs_buffer_leaf(right_buffer);
 	memset(&right->header, 0, sizeof(right->header));
-	btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
+	btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
 	btrfs_set_header_generation(&right->header, trans->transid);
 	btrfs_set_header_level(&right->header, 0);
 	memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
@@ -1237,7 +1237,7 @@
 	btrfs_set_header_nritems(&right->header, 0);
 	wret = insert_ptr(trans, root, path,
 			  &disk_key,
-			  right_buffer->b_blocknr,
+			  bh_blocknr(right_buffer),
 			  path->slots[1], 1);
 	if (wret)
 		ret = wret;
@@ -1456,7 +1456,7 @@
 			if (wret)
 				ret = wret;
 			wret = btrfs_free_extent(trans, root,
-						 leaf_buf->b_blocknr, 1, 1);
+						 bh_blocknr(leaf_buf), 1, 1);
 			if (wret)
 				ret = wret;
 		}
@@ -1487,7 +1487,7 @@
 					ret = wret;
 			}
 			if (btrfs_header_nritems(&leaf->header) == 0) {
-				u64 blocknr = leaf_buf->b_blocknr;
+				u64 blocknr = bh_blocknr(leaf_buf);
 				clean_tree_block(trans, root, leaf_buf);
 				wait_on_buffer(leaf_buf);
 				wret = del_ptr(trans, root, path, 1, slot);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 01310de..454eb88 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -242,6 +242,7 @@
 	struct radix_tree_root fs_roots_radix;
 	struct radix_tree_root pending_del_radix;
 	struct radix_tree_root pinned_radix;
+	struct radix_tree_root dev_radix;
 	u64 generation;
 	struct btrfs_transaction *running_transaction;
 	struct btrfs_super_block *disk_super;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e092332..c872a7e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -10,10 +10,30 @@
 #include "transaction.h"
 #include "btrfs_inode.h"
 
+struct dev_lookup {
+	u64 block_start;
+	u64 num_blocks;
+	struct block_device *bdev;
+};
+
+u64 bh_blocknr(struct buffer_head *bh)
+{
+	int blkbits = bh->b_page->mapping->host->i_blkbits;
+	u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits);
+	unsigned long offset;
+
+	if (PageHighMem(bh->b_page))
+		offset = (unsigned long)bh->b_data;
+	else
+		offset = bh->b_data - (char *)page_address(bh->b_page);
+	blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits);
+	return blocknr;
+}
+
 static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
 {
 	struct btrfs_node *node = btrfs_buffer_node(buf);
-	if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
+	if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) {
 		BUG();
 	}
 	return 0;
@@ -40,7 +60,7 @@
 	head = page_buffers(page);
 	bh = head;
 	do {
-		if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
+		if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) {
 			ret = bh;
 			get_bh(bh);
 			goto out_unlock;
@@ -56,6 +76,33 @@
 	return ret;
 }
 
+static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh,
+			     u64 logical)
+{
+	struct dev_lookup *lookup[2];
+	char b[BDEVNAME_SIZE];
+
+	int ret;
+
+	root = root->fs_info->dev_root;
+	ret = radix_tree_gang_lookup(&root->fs_info->dev_radix,
+				     (void **)lookup,
+				     (unsigned long)logical,
+				     ARRAY_SIZE(lookup));
+	if (ret == 0 || lookup[0]->block_start > logical ||
+	    lookup[0]->block_start + lookup[0]->num_blocks <= logical) {
+		ret = -ENOENT;
+		goto out;
+	}
+	bh->b_bdev = lookup[0]->bdev;
+	bh->b_blocknr = logical - lookup[0]->block_start;
+printk("logical mapping %Lu to %lu bdev  %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b));
+	set_buffer_mapped(bh);
+	ret = 0;
+out:
+	return ret;
+}
+
 struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
 						 u64 blocknr)
 {
@@ -66,6 +113,7 @@
 	struct buffer_head *bh;
 	struct buffer_head *head;
 	struct buffer_head *ret = NULL;
+	int err;
 	u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
 
 	page = grab_cache_page(mapping, index);
@@ -78,11 +126,10 @@
 	bh = head;
 	do {
 		if (!buffer_mapped(bh)) {
-			bh->b_bdev = root->fs_info->sb->s_bdev;
-			bh->b_blocknr = first_block;
-			set_buffer_mapped(bh);
+			err = map_bh_to_logical(root, bh, first_block);
+			BUG_ON(err);
 		}
-		if (bh->b_blocknr == blocknr) {
+		if (bh_blocknr(bh) == blocknr) {
 			ret = bh;
 			get_bh(bh);
 			goto out_unlock;
@@ -98,38 +145,13 @@
 	return ret;
 }
 
-static sector_t max_block(struct block_device *bdev)
-{
-	sector_t retval = ~((sector_t)0);
-	loff_t sz = i_size_read(bdev->bd_inode);
-
-	if (sz) {
-		unsigned int size = block_size(bdev);
-		unsigned int sizebits = blksize_bits(size);
-		retval = (sz >> sizebits);
-	}
-	return retval;
-}
-
 static int btree_get_block(struct inode *inode, sector_t iblock,
 			   struct buffer_head *bh, int create)
 {
-	if (iblock >= max_block(inode->i_sb->s_bdev)) {
-		if (create)
-			return -EIO;
-
-		/*
-		 * for reads, we're just trying to fill a partial page.
-		 * return a hole, they will have to call get_block again
-		 * before they can fill it, and they will get -EIO at that
-		 * time
-		 */
-		return 0;
-	}
-	bh->b_bdev = inode->i_sb->s_bdev;
-	bh->b_blocknr = iblock;
-	set_buffer_mapped(bh);
-	return 0;
+	int err;
+	struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
+	err = map_bh_to_logical(root, bh, iblock);
+	return err;
 }
 
 int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
@@ -164,8 +186,8 @@
 		return ret;
 	if (verify) {
 		if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
-			printk("checksum verify failed on %lu\n",
-			       bh->b_blocknr);
+			printk("checksum verify failed on %Lu\n",
+			       bh_blocknr(bh));
 			return 1;
 		}
 	} else {
@@ -386,10 +408,12 @@
 						GFP_NOFS);
 	int ret;
 	struct btrfs_super_block *disk_super;
+	struct dev_lookup *dev_lookup;
 
 	init_bit_radix(&fs_info->pinned_radix);
 	init_bit_radix(&fs_info->pending_del_radix);
 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+	INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS);
 	sb_set_blocksize(sb, 4096);
 	fs_info->running_transaction = NULL;
 	fs_info->tree_root = tree_root;
@@ -422,6 +446,13 @@
 
 	__setup_root(sb->s_blocksize, tree_root,
 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
+
+	dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS);
+	dev_lookup->block_start = 0;
+	dev_lookup->num_blocks = (u32)-2;
+	dev_lookup->bdev = sb->s_bdev;
+	ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup);
+	BUG_ON(ret);
 	fs_info->sb_buffer = read_tree_block(tree_root,
 					     BTRFS_SUPER_INFO_OFFSET /
 					     sb->s_blocksize);
@@ -432,6 +463,14 @@
 	if (!btrfs_super_root(disk_super))
 		return NULL;
 
+	radix_tree_delete(&fs_info->dev_radix, (u32)-2);
+	dev_lookup->block_start = btrfs_super_device_block_start(disk_super);
+	dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super);
+	ret = radix_tree_insert(&fs_info->dev_radix,
+				dev_lookup->block_start +
+				dev_lookup->num_blocks, dev_lookup);
+	BUG_ON(ret);
+
 	fs_info->disk_super = disk_super;
 	dev_root->node = read_tree_block(tree_root,
 					  btrfs_super_device_root(disk_super));
@@ -459,7 +498,7 @@
 	struct buffer_head *bh = root->fs_info->sb_buffer;
 
 	btrfs_set_super_root(root->fs_info->disk_super,
-			     root->fs_info->tree_root->node->b_blocknr);
+			     bh_blocknr(root->fs_info->tree_root->node));
 	lock_buffer(bh);
 	WARN_ON(atomic_read(&bh->b_count) < 1);
 	clear_buffer_dirty(bh);
@@ -506,6 +545,29 @@
 	}
 	return 0;
 }
+static int free_dev_radix(struct btrfs_fs_info *fs_info)
+{
+	struct dev_lookup *lookup[8];
+	struct block_device *super_bdev = fs_info->sb->s_bdev;
+	int ret;
+	int i;
+	while(1) {
+		ret = radix_tree_gang_lookup(&fs_info->dev_radix,
+					     (void **)lookup, 0,
+					     ARRAY_SIZE(lookup));
+		if (!ret)
+			break;
+		for (i = 0; i < ret; i++) {
+			if (lookup[i]->bdev != super_bdev)
+				close_bdev_excl(lookup[i]->bdev);
+			radix_tree_delete(&fs_info->dev_radix,
+					  lookup[i]->block_start +
+					  lookup[i]->num_blocks);
+			kfree(lookup[i]);
+		}
+	}
+	return 0;
+}
 
 int close_ctree(struct btrfs_root *root)
 {
@@ -537,6 +599,8 @@
 	crypto_free_hash(fs_info->hash_tfm);
 	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
 	iput(fs_info->btree_inode);
+
+	free_dev_radix(fs_info);
 	del_fs_roots(fs_info);
 	kfree(fs_info->extent_root);
 	kfree(fs_info->tree_root);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 0ef6e6f..46cc5dc 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -41,4 +41,5 @@
 		    char *result);
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
 				      struct btrfs_key *location);
+u64 bh_blocknr(struct buffer_head *bh);
 #endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index be9630d..d560831 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -80,7 +80,7 @@
 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root)
 {
-	return inc_block_ref(trans, root, root->node->b_blocknr, 1);
+	return inc_block_ref(trans, root, bh_blocknr(root->node), 1);
 }
 
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -563,7 +563,7 @@
 
 	WARN_ON(*level < 0);
 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
-	ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr,
+	ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]),
 			       1, &refs);
 	BUG_ON(ret);
 	if (refs > 1)
@@ -607,7 +607,7 @@
 	WARN_ON(*level < 0);
 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
 	ret = btrfs_free_extent(trans, root,
-				path->nodes[*level]->b_blocknr, 1, 1);
+				bh_blocknr(path->nodes[*level]), 1, 1);
 	btrfs_block_release(root, path->nodes[*level]);
 	path->nodes[*level] = NULL;
 	*level += 1;
@@ -635,7 +635,7 @@
 			return 0;
 		} else {
 			ret = btrfs_free_extent(trans, root,
-						path->nodes[*level]->b_blocknr,
+						bh_blocknr(path->nodes[*level]),
 						1, 1);
 			BUG_ON(ret);
 			btrfs_block_release(root, path->nodes[*level]);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2972aaa..4ff0cc1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1700,7 +1700,7 @@
 	leaf = btrfs_buffer_leaf(subvol);
 	btrfs_set_header_nritems(&leaf->header, 0);
 	btrfs_set_header_level(&leaf->header, 0);
-	btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr);
+	btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
 	btrfs_set_header_generation(&leaf->header, trans->transid);
 	memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
 	       sizeof(leaf->header.fsid));
@@ -1713,7 +1713,7 @@
 	btrfs_set_inode_nblocks(inode_item, 1);
 	btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
 
-	btrfs_set_root_blocknr(&root_item, subvol->b_blocknr);
+	btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
 	btrfs_set_root_refs(&root_item, 1);
 
 	mark_buffer_dirty(subvol);
@@ -1803,7 +1803,7 @@
 	key.offset = 1;
 	key.flags = 0;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
-	btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr);
+	btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
 
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				&new_root_item);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e15a072..3a15943 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -119,10 +119,10 @@
 
 	while(1) {
 		old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
-		if (old_extent_block == extent_root->node->b_blocknr)
+		if (old_extent_block == bh_blocknr(extent_root->node))
 			break;
 		btrfs_set_root_blocknr(&extent_root->root_item,
-				       extent_root->node->b_blocknr);
+				       bh_blocknr(extent_root->node));
 		ret = btrfs_update_root(trans, tree_root,
 					&extent_root->root_key,
 					&extent_root->root_item);
@@ -176,7 +176,7 @@
 				     (unsigned long)root->root_key.objectid,
 				     BTRFS_ROOT_TRANS_TAG);
 			if (root->commit_root == root->node) {
-				WARN_ON(root->node->b_blocknr !=
+				WARN_ON(bh_blocknr(root->node) !=
 					btrfs_root_blocknr(&root->root_item));
 				brelse(root->commit_root);
 				root->commit_root = NULL;
@@ -191,7 +191,7 @@
 			dirty->root = root;
 			root->root_key.offset = root->fs_info->generation;
 			btrfs_set_root_blocknr(&root->root_item,
-					       root->node->b_blocknr);
+					       bh_blocknr(root->node));
 			err = btrfs_insert_root(trans, root->fs_info->tree_root,
 						&root->root_key,
 						&root->root_item);