Btrfs: Add chunk uuids and update multi-device back references
Block headers now store the chunk tree uuid
Chunk items records the device uuid for each stripes
Device extent items record better back refs to the chunk tree
Block groups record better back refs to the chunk tree
The chunk tree format has also changed. The objectid of BTRFS_CHUNK_ITEM_KEY
used to be the logical offset of the chunk. Now it is a chunk tree id,
with the logical offset being stored in the offset field of the key.
This allows a single chunk tree to record multiple logical address spaces,
upping the number of bytes indexed by a chunk tree from 2^64 to
2^128.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f81519f..23ebd95 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -180,7 +180,7 @@
list_for_each(cur, head) {
device = list_entry(cur, struct btrfs_device, dev_list);
bdev = open_bdev_excl(device->name, flags, holder);
-printk("opening %s devid %Lu\n", device->name, device->devid);
+
if (IS_ERR(bdev)) {
printk("open %s failed\n", device->name);
ret = PTR_ERR(bdev);
@@ -190,7 +190,6 @@
fs_devices->latest_bdev = bdev;
if (device->devid == fs_devices->lowest_devid) {
fs_devices->lowest_bdev = bdev;
-printk("lowest bdev %s\n", device->name);
}
device->bdev = bdev;
}
@@ -372,7 +371,9 @@
int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device,
- u64 owner, u64 num_bytes, u64 *start)
+ u64 chunk_tree, u64 chunk_objectid,
+ u64 chunk_offset,
+ u64 num_bytes, u64 *start)
{
int ret;
struct btrfs_path *path;
@@ -400,7 +401,14 @@
leaf = path->nodes[0];
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_dev_extent);
- btrfs_set_dev_extent_owner(leaf, extent, owner);
+ btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
+
+ write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
+ BTRFS_UUID_SIZE);
+
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
btrfs_mark_buffer_dirty(leaf);
err:
@@ -408,17 +416,18 @@
return ret;
}
-static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
+static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
{
struct btrfs_path *path;
int ret;
struct btrfs_key key;
+ struct btrfs_chunk *chunk;
struct btrfs_key found_key;
path = btrfs_alloc_path();
BUG_ON(!path);
- key.objectid = (u64)-1;
+ key.objectid = objectid;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -430,11 +439,18 @@
ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
if (ret) {
- *objectid = 0;
+ *offset = 0;
} else {
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
- *objectid = found_key.objectid + found_key.offset;
+ if (found_key.objectid != objectid)
+ *offset = 0;
+ else {
+ chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_chunk);
+ *offset = found_key.offset +
+ btrfs_chunk_length(path->nodes[0], chunk);
+ }
}
ret = 0;
error:
@@ -520,9 +536,12 @@
btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+ btrfs_set_device_group(leaf, dev_item, 0);
+ btrfs_set_device_seek_speed(leaf, dev_item, 0);
+ btrfs_set_device_bandwidth(leaf, dev_item, 0);
ptr = (unsigned long)btrfs_device_uuid(dev_item);
- write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
+ write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
ret = 0;
@@ -674,7 +693,10 @@
return -ENOSPC;
}
- ret = find_next_chunk(chunk_root, &key.objectid);
+ key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+ ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+ &key.offset);
if (ret)
return ret;
@@ -696,8 +718,9 @@
*num_bytes = calc_size * num_stripes;
index = 0;
-printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes);
+printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes);
while(index < num_stripes) {
+ struct btrfs_stripe *stripe;
BUG_ON(list_empty(&private_devs));
cur = private_devs.next;
device = list_entry(cur, struct btrfs_device, dev_list);
@@ -708,26 +731,28 @@
list_move_tail(&device->dev_list, dev_list);
ret = btrfs_alloc_dev_extent(trans, device,
- key.objectid,
- calc_size, &dev_offset);
+ info->chunk_root->root_key.objectid,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
+ calc_size, &dev_offset);
BUG_ON(ret);
-printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type);
+printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, calc_size, device->devid, type);
device->bytes_used += calc_size;
ret = btrfs_update_device(trans, device);
BUG_ON(ret);
map->stripes[index].dev = device;
map->stripes[index].physical = dev_offset;
- btrfs_set_stack_stripe_devid(stripes + index, device->devid);
- btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
+ stripe = stripes + index;
+ btrfs_set_stack_stripe_devid(stripe, device->devid);
+ btrfs_set_stack_stripe_offset(stripe, dev_offset);
+ memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
physical = dev_offset;
index++;
}
BUG_ON(!list_empty(&private_devs));
- /* key.objectid was set above */
- key.offset = *num_bytes;
- key.type = BTRFS_CHUNK_ITEM_KEY;
+ /* key was set above */
+ btrfs_set_stack_chunk_length(chunk, *num_bytes);
btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
btrfs_set_stack_chunk_type(chunk, type);
@@ -745,14 +770,14 @@
ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
btrfs_chunk_item_size(num_stripes));
BUG_ON(ret);
- *start = key.objectid;
+ *start = key.offset;;
em = alloc_extent_map(GFP_NOFS);
if (!em)
return -ENOMEM;
em->bdev = (struct block_device *)map;
- em->start = key.objectid;
- em->len = key.offset;
+ em->start = key.offset;
+ em->len = *num_bytes;
em->block_start = 0;
kfree(chunk);
@@ -1056,8 +1081,8 @@
int ret;
int i;
- logical = key->objectid;
- length = key->offset;
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
spin_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
spin_unlock(&map_tree->map_tree.lock);
@@ -1131,7 +1156,7 @@
device->sector_size = btrfs_device_sector_size(leaf, dev_item);
ptr = (unsigned long)btrfs_device_uuid(dev_item);
- read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
+ read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
return 0;
}
@@ -1143,7 +1168,6 @@
struct btrfs_device *device;
u64 devid;
int ret;
-
devid = btrfs_device_id(leaf, dev_item);
device = btrfs_find_device(root, devid);
if (!device) {