Btrfs: progress on file_write
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index ec68999..6a56416 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1089,7 +1089,6 @@
extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
extent_start = extent_start >> inode->i_blkbits;
- extent_start += btrfs_file_extent_offset(item);
extent_end = extent_start + btrfs_file_extent_num_blocks(item);
if (iblock >= extent_start && iblock < extent_end) {
err = 0;
@@ -1103,6 +1102,7 @@
err = 0;
goto out;
}
+#if 0
ret = btrfs_alloc_file_extent(trans, root, objectid,
iblock << inode->i_blkbits,
1, extent_end, &blocknr);
@@ -1115,9 +1115,11 @@
map_bh(result, inode->i_sb, blocknr);
btrfs_map_bh_to_logical(root, result, blocknr);
+#endif
out:
btrfs_release_path(root, path);
btrfs_free_path(path);
+printk("mapping iblock %lu to %lu\n", iblock, result->b_blocknr);
if (trans)
btrfs_end_transaction(trans, root);
return err;
@@ -1273,8 +1275,244 @@
return err;
}
-static int prepare_pages(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static int drop_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ u64 start, u64 end)
+{
+ struct btrfs_path *path;
+ struct btrfs_leaf *leaf;
+ struct btrfs_key key;
+ int slot;
+ struct btrfs_csum_item *item;
+ char *old_block = NULL;
+ u64 cur = start;
+ u64 found_end;
+ u64 num_csums;
+ u64 item_size;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ while(cur < end) {
+ item = btrfs_lookup_csum(trans, root, path,
+ inode->i_ino, cur, 1);
+ if (IS_ERR(item)) {
+ cur += root->blocksize;
+ continue;
+ }
+ leaf = btrfs_buffer_leaf(path->nodes[0]);
+ slot = path->slots[0];
+ btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
+ item_size = btrfs_item_size(leaf->items + slot);
+ num_csums = item_size / sizeof(struct btrfs_csum_item);
+ found_end = key.offset + (num_csums << inode->i_blkbits);
+ cur = found_end;
+
+ if (found_end > end) {
+ char *src;
+ old_block = kmalloc(root->blocksize, GFP_NOFS);
+ src = btrfs_item_ptr(leaf, slot, char);
+ memcpy(old_block, src, item_size);
+ }
+ if (key.offset < start) {
+ u64 new_size = (start - key.offset) >>
+ inode->i_blkbits;
+ new_size *= sizeof(struct btrfs_csum_item);
+ ret = btrfs_truncate_item(trans, root, path, new_size);
+ BUG_ON(ret);
+ } else {
+ btrfs_del_item(trans, root, path);
+ }
+ btrfs_release_path(root, path);
+ if (found_end > end) {
+ char *dst;
+ int i;
+ int new_size;
+
+ num_csums = (found_end - end) >> inode->i_blkbits;
+ new_size = num_csums * sizeof(struct btrfs_csum_item);
+ key.offset = end;
+ ret = btrfs_insert_empty_item(trans, root, path,
+ &key, new_size);
+ BUG_ON(ret);
+ dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
+ path->slots[0], char);
+ memcpy(dst, old_block + item_size - new_size,
+ new_size);
+ item = (struct btrfs_csum_item *)dst;
+ for (i = 0; i < num_csums; i++) {
+ btrfs_set_csum_extent_offset(item, end);
+ item++;
+ }
+ mark_buffer_dirty(path->nodes[0]);
+ kfree(old_block);
+ break;
+ }
+ }
+ btrfs_free_path(path);
+ return 0;
+}
+
+static int drop_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ u64 start, u64 end)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_leaf *leaf;
+ int slot;
+ struct btrfs_file_extent_item *extent;
+ u64 extent_end;
+ int keep;
+ struct btrfs_file_extent_item old;
+ struct btrfs_path *path;
+ u64 search_start = start;
+ int bookend;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+search_again:
+printk("drop extent inode %lu start %Lu end %Lu\n", inode->i_ino, start, end);
+ ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+ search_start, -1);
+ if (ret != 0) {
+printk("lookup failed\n");
+ goto out;
+ }
+ while(1) {
+ keep = 0;
+ bookend = 0;
+ leaf = btrfs_buffer_leaf(path->nodes[0]);
+ slot = path->slots[0];
+ btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
+
+printk("found key %Lu %Lu %u\n", key.objectid, key.offset, key.flags);
+
+ extent = btrfs_item_ptr(leaf, slot,
+ struct btrfs_file_extent_item);
+ extent_end = key.offset +
+ (btrfs_file_extent_num_blocks(extent) <<
+ inode->i_blkbits);
+printk("extent end is %Lu\n", extent_end);
+ if (key.offset >= end || key.objectid != inode->i_ino) {
+ ret = 0;
+ goto out;
+ }
+ if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
+ goto next_leaf;
+
+ if (end < extent_end && end >= key.offset) {
+ memcpy(&old, extent, sizeof(old));
+ ret = btrfs_inc_extent_ref(trans, root,
+ btrfs_file_extent_disk_blocknr(&old),
+ btrfs_file_extent_disk_num_blocks(&old));
+ BUG_ON(ret);
+ bookend = 1;
+ }
+
+ if (start > key.offset) {
+ u64 new_num;
+ /* truncate existing extent */
+ keep = 1;
+ WARN_ON(start & (root->blocksize - 1));
+ new_num = (start - key.offset) >> inode->i_blkbits;
+printk("truncating existing extent, was %Lu ", btrfs_file_extent_num_blocks(extent));
+ btrfs_set_file_extent_num_blocks(extent, new_num);
+printk("now %Lu\n", btrfs_file_extent_num_blocks(extent));
+
+ mark_buffer_dirty(path->nodes[0]);
+ }
+ if (!keep) {
+ u64 disk_blocknr;
+ u64 disk_num_blocks;
+printk("del old\n");
+ disk_blocknr = btrfs_file_extent_disk_blocknr(extent);
+ disk_num_blocks =
+ btrfs_file_extent_disk_num_blocks(extent);
+ search_start = key.offset +
+ (btrfs_file_extent_num_blocks(extent) <<
+ inode->i_blkbits);
+ ret = btrfs_del_item(trans, root, path);
+ BUG_ON(ret);
+ btrfs_release_path(root, path);
+
+ ret = btrfs_free_extent(trans, root, disk_blocknr,
+ disk_num_blocks, 0);
+
+ BUG_ON(ret);
+ if (!bookend && search_start >= end) {
+ ret = 0;
+ goto out;
+ }
+ if (!bookend)
+ goto search_again;
+ }
+ if (bookend) {
+ /* create bookend */
+ struct btrfs_key ins;
+printk("bookend! extent end %Lu\n", extent_end);
+ ins.objectid = inode->i_ino;
+ ins.offset = end;
+ ins.flags = 0;
+ btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
+
+ btrfs_release_path(root, path);
+ ret = drop_csums(trans, root, inode, start, end);
+ BUG_ON(ret);
+ ret = btrfs_insert_empty_item(trans, root, path, &ins,
+ sizeof(*extent));
+ BUG_ON(ret);
+ extent = btrfs_item_ptr(
+ btrfs_buffer_leaf(path->nodes[0]),
+ path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_disk_blocknr(extent,
+ btrfs_file_extent_disk_blocknr(&old));
+ btrfs_set_file_extent_disk_num_blocks(extent,
+ btrfs_file_extent_disk_num_blocks(&old));
+
+ btrfs_set_file_extent_offset(extent,
+ btrfs_file_extent_offset(&old) +
+ ((end - key.offset) >> inode->i_blkbits));
+ WARN_ON(btrfs_file_extent_num_blocks(&old) <
+ (end - key.offset) >> inode->i_blkbits);
+ btrfs_set_file_extent_num_blocks(extent,
+ btrfs_file_extent_num_blocks(&old) -
+ ((end - key.offset) >> inode->i_blkbits));
+
+ btrfs_set_file_extent_generation(extent,
+ btrfs_file_extent_generation(&old));
+printk("new bookend at offset %Lu, file_extent_offset %Lu, file_extent_num_blocks %Lu\n", end, btrfs_file_extent_offset(extent), btrfs_file_extent_num_blocks(extent));
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ ret = 0;
+ goto out_nocsum;
+ }
+next_leaf:
+ if (slot >= btrfs_header_nritems(&leaf->header) - 1) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ path->slots[0]++;
+ }
+ }
+
+out:
+ ret = drop_csums(trans, root, inode, start, end);
+ BUG_ON(ret);
+
+out_nocsum:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int prepare_pages(struct btrfs_root *root,
struct file *file,
struct page **pages,
size_t num_pages,
@@ -1289,7 +1527,6 @@
struct inode *inode = file->f_path.dentry->d_inode;
int offset;
int err = 0;
- int ret;
int this_write;
struct buffer_head *bh;
struct buffer_head *head;
@@ -1305,18 +1542,21 @@
}
offset = pos & (PAGE_CACHE_SIZE -1);
this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
- if (!PageUptodate(pages[i]) &&
- (pages[i]->index == first_index ||
- pages[i]->index == last_index) && pos < isize) {
+#if 0
+ if ((pages[i]->index == first_index ||
+ pages[i]->index == last_index) && pos < isize &&
+ !PageUptodate(pages[i])) {
ret = mpage_readpage(pages[i], btrfs_get_block);
BUG_ON(ret);
lock_page(pages[i]);
}
+#endif
create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
(1 << BH_Uptodate));
head = page_buffers(pages[i]);
bh = head;
do {
+printk("mapping page %lu to block %Lu\n", pages[i]->index, alloc_extent_start);
err = btrfs_map_bh_to_logical(root, bh,
alloc_extent_start);
BUG_ON(err);
@@ -1351,7 +1591,7 @@
int ret = 0;
struct inode *inode = file->f_path.dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pages[1];
+ struct page *pages[8];
unsigned long first_index;
unsigned long last_index;
u64 start_pos;
@@ -1359,6 +1599,7 @@
u64 alloc_extent_start;
u64 orig_extent_start;
struct btrfs_trans_handle *trans;
+ struct btrfs_key ins;
if (file->f_flags & O_DIRECT)
return -EINVAL;
@@ -1390,16 +1631,24 @@
trans = btrfs_start_transaction(root, 1);
if (!trans) {
err = -ENOMEM;
+ mutex_unlock(&root->fs_info->fs_mutex);
goto out_unlock;
}
- ret = btrfs_alloc_file_extent(trans, root, inode->i_ino,
- start_pos, num_blocks, 1,
- &alloc_extent_start);
- BUG_ON(ret);
-
+ if (start_pos < inode->i_size) {
+ ret = drop_extents(trans, root, inode,
+ start_pos,
+ (pos + count + root->blocksize -1) &
+ ~(root->blocksize - 1));
+ }
orig_extent_start = start_pos;
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_alloc_extent(trans, root, num_blocks, 1,
+ (u64)-1, &ins);
BUG_ON(ret);
+ ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
+ start_pos, ins.objectid, ins.offset);
+ BUG_ON(ret);
+ alloc_extent_start = ins.objectid;
+ ret = btrfs_end_transaction(trans, root);
mutex_unlock(&root->fs_info->fs_mutex);
while(count > 0) {
@@ -1407,16 +1656,21 @@
size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
- ret = prepare_pages(NULL, root, file, pages, num_pages,
+printk("num_pages is %lu\n", num_pages);
+
+ memset(pages, 0, sizeof(pages));
+ ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, last_index,
write_bytes, alloc_extent_start);
BUG_ON(ret);
+
/* FIXME blocks != pagesize */
alloc_extent_start += num_pages;
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
BUG_ON(ret);
+printk("2num_pages is %lu\n", num_pages);
ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, orig_extent_start,
pos, write_bytes);