Btrfs: create a logical->phsyical block number mapping scheme
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f31ac84..e2fc6f0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -66,21 +66,21 @@
if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
WARN_ON(1);
memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
- btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
+ btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
btrfs_set_header_generation(&cow_node->header, trans->transid);
btrfs_inc_ref(trans, root, buf);
if (buf == root->node) {
root->node = cow;
get_bh(cow);
if (buf != root->commit_root) {
- btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
+ btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
} else {
btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
- cow->b_blocknr);
+ bh_blocknr(cow));
btrfs_mark_buffer_dirty(parent);
- btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
+ btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
mark_buffer_dirty(cow);
@@ -312,7 +312,7 @@
*/
if (!parent_buf) {
struct buffer_head *child;
- u64 blocknr = mid_buf->b_blocknr;
+ u64 blocknr = bh_blocknr(mid_buf);
if (btrfs_header_nritems(&mid->header) != 1)
return 0;
@@ -361,7 +361,7 @@
if (wret < 0)
ret = wret;
if (btrfs_header_nritems(&right->header) == 0) {
- u64 blocknr = right_buf->b_blocknr;
+ u64 blocknr = bh_blocknr(right_buf);
clean_tree_block(trans, root, right_buf);
wait_on_buffer(right_buf);
btrfs_block_release(root, right_buf);
@@ -400,7 +400,7 @@
}
if (btrfs_header_nritems(&mid->header) == 0) {
/* we've managed to empty the middle node, drop it */
- u64 blocknr = mid_buf->b_blocknr;
+ u64 blocknr = bh_blocknr(mid_buf);
clean_tree_block(trans, root, mid_buf);
wait_on_buffer(mid_buf);
btrfs_block_release(root, mid_buf);
@@ -686,7 +686,7 @@
memset(c, 0, root->blocksize);
btrfs_set_header_nritems(&c->header, 1);
btrfs_set_header_level(&c->header, level);
- btrfs_set_header_blocknr(&c->header, t->b_blocknr);
+ btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
btrfs_set_header_generation(&c->header, trans->transid);
lower = btrfs_buffer_node(path->nodes[level-1]);
memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
@@ -697,7 +697,7 @@
lower_key = &lower->ptrs[0].key;
btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
sizeof(struct btrfs_disk_key));
- btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr);
+ btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
btrfs_mark_buffer_dirty(t);
@@ -780,7 +780,7 @@
split = btrfs_buffer_node(split_buffer);
btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
- btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr);
+ btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
btrfs_set_header_generation(&split->header, trans->transid);
memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
sizeof(split->header.fsid));
@@ -794,7 +794,7 @@
btrfs_mark_buffer_dirty(t);
btrfs_mark_buffer_dirty(split_buffer);
wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
- split_buffer->b_blocknr, path->slots[level + 1] + 1,
+ bh_blocknr(split_buffer), path->slots[level + 1] + 1,
level + 1);
if (wret)
ret = wret;
@@ -1138,7 +1138,7 @@
BUG_ON(!right_buffer);
right = btrfs_buffer_leaf(right_buffer);
memset(&right->header, 0, sizeof(right->header));
- btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
+ btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
btrfs_set_header_generation(&right->header, trans->transid);
btrfs_set_header_level(&right->header, 0);
memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
@@ -1152,7 +1152,7 @@
btrfs_set_header_nritems(&right->header, 0);
wret = insert_ptr(trans, root, path,
&disk_key,
- right_buffer->b_blocknr,
+ bh_blocknr(right_buffer),
path->slots[1] + 1, 1);
if (wret)
ret = wret;
@@ -1173,7 +1173,7 @@
btrfs_set_header_nritems(&right->header, 0);
wret = insert_ptr(trans, root, path,
&disk_key,
- right_buffer->b_blocknr,
+ bh_blocknr(right_buffer),
path->slots[1] - 1, 1);
if (wret)
ret = wret;
@@ -1207,7 +1207,7 @@
btrfs_set_header_nritems(&l->header, mid);
ret = 0;
wret = insert_ptr(trans, root, path, &right->items[0].key,
- right_buffer->b_blocknr, path->slots[1] + 1, 1);
+ bh_blocknr(right_buffer), path->slots[1] + 1, 1);
if (wret)
ret = wret;
btrfs_mark_buffer_dirty(right_buffer);
@@ -1228,7 +1228,7 @@
BUG_ON(!right_buffer);
right = btrfs_buffer_leaf(right_buffer);
memset(&right->header, 0, sizeof(right->header));
- btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
+ btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
btrfs_set_header_generation(&right->header, trans->transid);
btrfs_set_header_level(&right->header, 0);
memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
@@ -1237,7 +1237,7 @@
btrfs_set_header_nritems(&right->header, 0);
wret = insert_ptr(trans, root, path,
&disk_key,
- right_buffer->b_blocknr,
+ bh_blocknr(right_buffer),
path->slots[1], 1);
if (wret)
ret = wret;
@@ -1456,7 +1456,7 @@
if (wret)
ret = wret;
wret = btrfs_free_extent(trans, root,
- leaf_buf->b_blocknr, 1, 1);
+ bh_blocknr(leaf_buf), 1, 1);
if (wret)
ret = wret;
}
@@ -1487,7 +1487,7 @@
ret = wret;
}
if (btrfs_header_nritems(&leaf->header) == 0) {
- u64 blocknr = leaf_buf->b_blocknr;
+ u64 blocknr = bh_blocknr(leaf_buf);
clean_tree_block(trans, root, leaf_buf);
wait_on_buffer(leaf_buf);
wret = del_ptr(trans, root, path, 1, slot);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 01310de..454eb88 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -242,6 +242,7 @@
struct radix_tree_root fs_roots_radix;
struct radix_tree_root pending_del_radix;
struct radix_tree_root pinned_radix;
+ struct radix_tree_root dev_radix;
u64 generation;
struct btrfs_transaction *running_transaction;
struct btrfs_super_block *disk_super;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e092332..c872a7e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -10,10 +10,30 @@
#include "transaction.h"
#include "btrfs_inode.h"
+struct dev_lookup {
+ u64 block_start;
+ u64 num_blocks;
+ struct block_device *bdev;
+};
+
+u64 bh_blocknr(struct buffer_head *bh)
+{
+ int blkbits = bh->b_page->mapping->host->i_blkbits;
+ u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits);
+ unsigned long offset;
+
+ if (PageHighMem(bh->b_page))
+ offset = (unsigned long)bh->b_data;
+ else
+ offset = bh->b_data - (char *)page_address(bh->b_page);
+ blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits);
+ return blocknr;
+}
+
static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
{
struct btrfs_node *node = btrfs_buffer_node(buf);
- if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
+ if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) {
BUG();
}
return 0;
@@ -40,7 +60,7 @@
head = page_buffers(page);
bh = head;
do {
- if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
+ if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) {
ret = bh;
get_bh(bh);
goto out_unlock;
@@ -56,6 +76,33 @@
return ret;
}
+static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh,
+ u64 logical)
+{
+ struct dev_lookup *lookup[2];
+ char b[BDEVNAME_SIZE];
+
+ int ret;
+
+ root = root->fs_info->dev_root;
+ ret = radix_tree_gang_lookup(&root->fs_info->dev_radix,
+ (void **)lookup,
+ (unsigned long)logical,
+ ARRAY_SIZE(lookup));
+ if (ret == 0 || lookup[0]->block_start > logical ||
+ lookup[0]->block_start + lookup[0]->num_blocks <= logical) {
+ ret = -ENOENT;
+ goto out;
+ }
+ bh->b_bdev = lookup[0]->bdev;
+ bh->b_blocknr = logical - lookup[0]->block_start;
+printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b));
+ set_buffer_mapped(bh);
+ ret = 0;
+out:
+ return ret;
+}
+
struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 blocknr)
{
@@ -66,6 +113,7 @@
struct buffer_head *bh;
struct buffer_head *head;
struct buffer_head *ret = NULL;
+ int err;
u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
page = grab_cache_page(mapping, index);
@@ -78,11 +126,10 @@
bh = head;
do {
if (!buffer_mapped(bh)) {
- bh->b_bdev = root->fs_info->sb->s_bdev;
- bh->b_blocknr = first_block;
- set_buffer_mapped(bh);
+ err = map_bh_to_logical(root, bh, first_block);
+ BUG_ON(err);
}
- if (bh->b_blocknr == blocknr) {
+ if (bh_blocknr(bh) == blocknr) {
ret = bh;
get_bh(bh);
goto out_unlock;
@@ -98,38 +145,13 @@
return ret;
}
-static sector_t max_block(struct block_device *bdev)
-{
- sector_t retval = ~((sector_t)0);
- loff_t sz = i_size_read(bdev->bd_inode);
-
- if (sz) {
- unsigned int size = block_size(bdev);
- unsigned int sizebits = blksize_bits(size);
- retval = (sz >> sizebits);
- }
- return retval;
-}
-
static int btree_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- if (iblock >= max_block(inode->i_sb->s_bdev)) {
- if (create)
- return -EIO;
-
- /*
- * for reads, we're just trying to fill a partial page.
- * return a hole, they will have to call get_block again
- * before they can fill it, and they will get -EIO at that
- * time
- */
- return 0;
- }
- bh->b_bdev = inode->i_sb->s_bdev;
- bh->b_blocknr = iblock;
- set_buffer_mapped(bh);
- return 0;
+ int err;
+ struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
+ err = map_bh_to_logical(root, bh, iblock);
+ return err;
}
int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
@@ -164,8 +186,8 @@
return ret;
if (verify) {
if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
- printk("checksum verify failed on %lu\n",
- bh->b_blocknr);
+ printk("checksum verify failed on %Lu\n",
+ bh_blocknr(bh));
return 1;
}
} else {
@@ -386,10 +408,12 @@
GFP_NOFS);
int ret;
struct btrfs_super_block *disk_super;
+ struct dev_lookup *dev_lookup;
init_bit_radix(&fs_info->pinned_radix);
init_bit_radix(&fs_info->pending_del_radix);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+ INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS);
sb_set_blocksize(sb, 4096);
fs_info->running_transaction = NULL;
fs_info->tree_root = tree_root;
@@ -422,6 +446,13 @@
__setup_root(sb->s_blocksize, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
+
+ dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS);
+ dev_lookup->block_start = 0;
+ dev_lookup->num_blocks = (u32)-2;
+ dev_lookup->bdev = sb->s_bdev;
+ ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup);
+ BUG_ON(ret);
fs_info->sb_buffer = read_tree_block(tree_root,
BTRFS_SUPER_INFO_OFFSET /
sb->s_blocksize);
@@ -432,6 +463,14 @@
if (!btrfs_super_root(disk_super))
return NULL;
+ radix_tree_delete(&fs_info->dev_radix, (u32)-2);
+ dev_lookup->block_start = btrfs_super_device_block_start(disk_super);
+ dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super);
+ ret = radix_tree_insert(&fs_info->dev_radix,
+ dev_lookup->block_start +
+ dev_lookup->num_blocks, dev_lookup);
+ BUG_ON(ret);
+
fs_info->disk_super = disk_super;
dev_root->node = read_tree_block(tree_root,
btrfs_super_device_root(disk_super));
@@ -459,7 +498,7 @@
struct buffer_head *bh = root->fs_info->sb_buffer;
btrfs_set_super_root(root->fs_info->disk_super,
- root->fs_info->tree_root->node->b_blocknr);
+ bh_blocknr(root->fs_info->tree_root->node));
lock_buffer(bh);
WARN_ON(atomic_read(&bh->b_count) < 1);
clear_buffer_dirty(bh);
@@ -506,6 +545,29 @@
}
return 0;
}
+static int free_dev_radix(struct btrfs_fs_info *fs_info)
+{
+ struct dev_lookup *lookup[8];
+ struct block_device *super_bdev = fs_info->sb->s_bdev;
+ int ret;
+ int i;
+ while(1) {
+ ret = radix_tree_gang_lookup(&fs_info->dev_radix,
+ (void **)lookup, 0,
+ ARRAY_SIZE(lookup));
+ if (!ret)
+ break;
+ for (i = 0; i < ret; i++) {
+ if (lookup[i]->bdev != super_bdev)
+ close_bdev_excl(lookup[i]->bdev);
+ radix_tree_delete(&fs_info->dev_radix,
+ lookup[i]->block_start +
+ lookup[i]->num_blocks);
+ kfree(lookup[i]);
+ }
+ }
+ return 0;
+}
int close_ctree(struct btrfs_root *root)
{
@@ -537,6 +599,8 @@
crypto_free_hash(fs_info->hash_tfm);
truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
iput(fs_info->btree_inode);
+
+ free_dev_radix(fs_info);
del_fs_roots(fs_info);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 0ef6e6f..46cc5dc 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -41,4 +41,5 @@
char *result);
struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *location);
+u64 bh_blocknr(struct buffer_head *bh);
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index be9630d..d560831 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -80,7 +80,7 @@
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return inc_block_ref(trans, root, root->node->b_blocknr, 1);
+ return inc_block_ref(trans, root, bh_blocknr(root->node), 1);
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -563,7 +563,7 @@
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
- ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr,
+ ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]),
1, &refs);
BUG_ON(ret);
if (refs > 1)
@@ -607,7 +607,7 @@
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
ret = btrfs_free_extent(trans, root,
- path->nodes[*level]->b_blocknr, 1, 1);
+ bh_blocknr(path->nodes[*level]), 1, 1);
btrfs_block_release(root, path->nodes[*level]);
path->nodes[*level] = NULL;
*level += 1;
@@ -635,7 +635,7 @@
return 0;
} else {
ret = btrfs_free_extent(trans, root,
- path->nodes[*level]->b_blocknr,
+ bh_blocknr(path->nodes[*level]),
1, 1);
BUG_ON(ret);
btrfs_block_release(root, path->nodes[*level]);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2972aaa..4ff0cc1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1700,7 +1700,7 @@
leaf = btrfs_buffer_leaf(subvol);
btrfs_set_header_nritems(&leaf->header, 0);
btrfs_set_header_level(&leaf->header, 0);
- btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr);
+ btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
btrfs_set_header_generation(&leaf->header, trans->transid);
memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
sizeof(leaf->header.fsid));
@@ -1713,7 +1713,7 @@
btrfs_set_inode_nblocks(inode_item, 1);
btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
- btrfs_set_root_blocknr(&root_item, subvol->b_blocknr);
+ btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
btrfs_set_root_refs(&root_item, 1);
mark_buffer_dirty(subvol);
@@ -1803,7 +1803,7 @@
key.offset = 1;
key.flags = 0;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
- btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr);
+ btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
&new_root_item);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e15a072..3a15943 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -119,10 +119,10 @@
while(1) {
old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
- if (old_extent_block == extent_root->node->b_blocknr)
+ if (old_extent_block == bh_blocknr(extent_root->node))
break;
btrfs_set_root_blocknr(&extent_root->root_item,
- extent_root->node->b_blocknr);
+ bh_blocknr(extent_root->node));
ret = btrfs_update_root(trans, tree_root,
&extent_root->root_key,
&extent_root->root_item);
@@ -176,7 +176,7 @@
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
if (root->commit_root == root->node) {
- WARN_ON(root->node->b_blocknr !=
+ WARN_ON(bh_blocknr(root->node) !=
btrfs_root_blocknr(&root->root_item));
brelse(root->commit_root);
root->commit_root = NULL;
@@ -191,7 +191,7 @@
dirty->root = root;
root->root_key.offset = root->fs_info->generation;
btrfs_set_root_blocknr(&root->root_item,
- root->node->b_blocknr);
+ bh_blocknr(root->node));
err = btrfs_insert_root(trans, root->fs_info->tree_root,
&root->root_key,
&root->root_item);