Btrfs: working file_write, reorganized key flags

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5c160c2..4efcd1b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -893,7 +893,11 @@
 	}
 
 	left_nritems = btrfs_header_nritems(&left->header);
-	for (i = left_nritems - 1; i >= 0; i--) {
+	if (left_nritems == 0) {
+		btrfs_block_release(root, right_buf);
+		return 1;
+	}
+	for (i = left_nritems - 1; i >= 1; i--) {
 		item = left->items + i;
 		if (path->slots[0] == i)
 			push_space += data_size + sizeof(*item);
@@ -907,6 +911,8 @@
 		btrfs_block_release(root, right_buf);
 		return 1;
 	}
+	if (push_items == left_nritems)
+		WARN_ON(1);
 	right_nritems = btrfs_header_nritems(&right->header);
 	/* push left to right */
 	push_space = btrfs_item_end(left->items + left_nritems - push_items);
@@ -943,6 +949,7 @@
 
 	btrfs_mark_buffer_dirty(left_buf);
 	btrfs_mark_buffer_dirty(right_buf);
+
 	btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
 		&right->items[0].key, sizeof(struct btrfs_disk_key));
 	btrfs_mark_buffer_dirty(upper);
@@ -1004,7 +1011,12 @@
 		return 1;
 	}
 
-	for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
+	if (btrfs_header_nritems(&right->header) == 0) {
+		btrfs_block_release(root, t);
+		return 1;
+	}
+
+	for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) {
 		item = right->items + i;
 		if (path->slots[0] == i)
 			push_space += data_size + sizeof(*item);
@@ -1018,6 +1030,8 @@
 		btrfs_block_release(root, t);
 		return 1;
 	}
+	if (push_items == btrfs_header_nritems(&right->header))
+		WARN_ON(1);
 	/* push data from right to left */
 	btrfs_memcpy(root, left, left->items +
 		     btrfs_header_nritems(&left->header),
@@ -1064,7 +1078,6 @@
 
 	btrfs_mark_buffer_dirty(t);
 	btrfs_mark_buffer_dirty(right_buf);
-
 	wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
 	if (wret)
 		ret = wret;
@@ -1181,6 +1194,12 @@
 				path->nodes[0] = right_buffer;
 				path->slots[0] = 0;
 				path->slots[1] -= 1;
+				if (path->slots[1] == 0) {
+					wret = fixup_low_keys(trans, root,
+					           path, &disk_key, 1);
+					if (wret)
+						ret = wret;
+				}
 				return ret;
 			}
 			mid = slot;
@@ -1241,6 +1260,11 @@
 			  path->slots[1], 1);
 	if (wret)
 		ret = wret;
+	if (path->slots[1] == 0) {
+		wret = fixup_low_keys(trans, root, path, &disk_key, 1);
+		if (wret)
+			ret = wret;
+	}
 	btrfs_block_release(root, path->nodes[0]);
 	path->nodes[0] = right_buffer;
 	path->slots[0] = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d75a4d5..8a329d3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -282,11 +282,12 @@
 
 /* the lower bits in the key flags defines the item type */
 #define BTRFS_KEY_TYPE_MAX	256
-#define BTRFS_KEY_TYPE_MASK	(BTRFS_KEY_TYPE_MAX - 1)
+#define BTRFS_KEY_TYPE_SHIFT	24
+#define BTRFS_KEY_TYPE_MASK	(((u32)BTRFS_KEY_TYPE_MAX - 1) << \
+				  BTRFS_KEY_TYPE_SHIFT)
 
 #define BTRFS_KEY_OVERFLOW_MAX 128
-#define BTRFS_KEY_OVERFLOW_SHIFT 8
-#define BTRFS_KEY_OVERFLOW_MASK (0x7FULL << BTRFS_KEY_OVERFLOW_SHIFT)
+#define BTRFS_KEY_OVERFLOW_MASK ((u32)BTRFS_KEY_OVERFLOW_MAX - 1)
 
 /*
  * inode items have the data typically returned from stat and store other
@@ -586,56 +587,55 @@
 	disk->flags = cpu_to_le32(val);
 }
 
+static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key)
+{
+	return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT;
+}
+
+static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key,
+					       u32 val)
+{
+	u32 flags = btrfs_disk_key_flags(key);
+	BUG_ON(val >= BTRFS_KEY_TYPE_MAX);
+	val = val << BTRFS_KEY_TYPE_SHIFT;
+	flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val;
+	btrfs_set_disk_key_flags(key, flags);
+}
+
+static inline u32 btrfs_key_type(struct btrfs_key *key)
+{
+	return key->flags >> BTRFS_KEY_TYPE_SHIFT;
+}
+
+static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val)
+{
+	BUG_ON(val >= BTRFS_KEY_TYPE_MAX);
+	val = val << BTRFS_KEY_TYPE_SHIFT;
+	key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val;
+}
+
 static inline u32 btrfs_key_overflow(struct btrfs_key *key)
 {
-	u32 over = key->flags & BTRFS_KEY_OVERFLOW_MASK;
-	return over >> BTRFS_KEY_OVERFLOW_SHIFT;
+	return key->flags & BTRFS_KEY_OVERFLOW_MASK;
 }
 
 static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over)
 {
 	BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX);
-	over = over << BTRFS_KEY_OVERFLOW_SHIFT;
-	key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over;
-}
-
-static inline u32 btrfs_key_type(struct btrfs_key *key)
-{
-	return key->flags & BTRFS_KEY_TYPE_MASK;
-}
-
-static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key)
-{
-	return le32_to_cpu(key->flags) & BTRFS_KEY_TYPE_MASK;
-}
-
-static inline void btrfs_set_key_type(struct btrfs_key *key, u32 type)
-{
-	BUG_ON(type >= BTRFS_KEY_TYPE_MAX);
-	key->flags = (key->flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type;
-}
-
-static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type)
-{
-	u32 flags = btrfs_disk_key_flags(key);
-	BUG_ON(type >= BTRFS_KEY_TYPE_MAX);
-	flags = (flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type;
-	btrfs_set_disk_key_flags(key, flags);
+	key->flags = (key->flags & ~BTRFS_KEY_OVERFLOW_MASK) | over;
 }
 
 static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key)
 {
-	u32 over = le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK;
-	return over >> BTRFS_KEY_OVERFLOW_SHIFT;
+	return le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK;
 }
 
-static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key,
+static inline void btrfs_set_disk_key_overflow(struct btrfs_disk_key *key,
 					       u32 over)
 {
 	u32 flags = btrfs_disk_key_flags(key);
 	BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX);
-	over = over << BTRFS_KEY_OVERFLOW_SHIFT;
-	flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over;
+	flags = (flags & ~BTRFS_KEY_OVERFLOW_MASK) | over;
 	btrfs_set_disk_key_flags(key, flags);
 }
 
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index cd4137a..a43deb7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -58,30 +58,6 @@
 
 	btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
-
-	/* FIXME, use some real flag for selecting the extra index */
-	if (root == root->fs_info->tree_root)
-		goto out;
-
-	btrfs_release_path(root, path);
-
-	btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
-	key.offset = location->objectid;
-	ret = insert_with_overflow(trans, root, path, &key, data_size);
-	// FIXME clear the dirindex bit
-	if (ret)
-		goto out;
-
-	dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
-				  path->slots[0],
-				  struct btrfs_dir_item);
-	btrfs_cpu_key_to_disk(&dir_item->location, location);
-	btrfs_set_dir_type(dir_item, type);
-	btrfs_set_dir_flags(dir_item, 0);
-	btrfs_set_dir_name_len(dir_item, name_len);
-	name_ptr = (char *)(dir_item + 1);
-	btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len);
-	btrfs_mark_buffer_dirty(path->nodes[0]);
 out:
 	btrfs_release_path(root, path);
 	btrfs_free_path(path);
@@ -135,52 +111,6 @@
 	return 1;
 }
 
-int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root,
-				struct btrfs_path *path, u64 dir,
-				u64 objectid, int mod)
-{
-	int ret;
-	struct btrfs_key key;
-	int ins_len = mod < 0 ? -1 : 0;
-	int cow = mod != 0;
-	struct btrfs_disk_key *found_key;
-	struct btrfs_leaf *leaf;
-	int overflow = 0;
-
-	key.objectid = dir;
-	key.flags = 0;
-	btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
-	key.offset = objectid;
-
-	while(1) {
-		btrfs_set_key_overflow(&key, overflow);
-		ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
-		if (ret < 0)
-			return ret;
-		if (ret > 0) {
-			if (overflow >= BTRFS_KEY_OVERFLOW_MAX)
-				return 1;
-			overflow++;
-			btrfs_set_key_overflow(&key, overflow);
-			btrfs_release_path(root, path);
-			continue;
-		} else {
-			/* found */
-			break;
-		}
-	}
-	leaf = btrfs_buffer_leaf(path->nodes[0]);
-	found_key = &leaf->items[path->slots[0]].key;
-
-	if (btrfs_disk_key_objectid(found_key) != dir ||
-	    btrfs_disk_key_type(found_key) != BTRFS_DIR_INDEX_KEY)
-		return 1;
-	if (btrfs_disk_key_offset(found_key) == objectid)
-		return 0;
-	return 1;
-}
-
 int btrfs_match_dir_item_name(struct btrfs_root *root,
 			      struct btrfs_path *path,
 			      const char *name, int name_len)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2cee9df..cb04a70 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,8 +35,10 @@
 	key.offset = num_blocks;
 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
 				0, 1);
-	if (ret != 0)
+	if (ret != 0) {
+printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
 		BUG();
+	}
 	BUG_ON(ret != 0);
 	l = btrfs_buffer_leaf(path->nodes[0]);
 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ff8f333..8cc3c1d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -4,7 +4,7 @@
 #include "transaction.h"
 
 #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \
-				 sizeof(struct btrfs_item)) / \
+				 sizeof(struct btrfs_item) * 2) / \
 				sizeof(struct btrfs_csum_item)) - 1))
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
@@ -19,11 +19,6 @@
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 	btrfs_init_path(path);
-	/*
-	ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block,
-				 (u64)-1, &ins);
-				 */
-	BUG_ON(ret);
 	file_key.objectid = objectid;
 	file_key.offset = pos;
 	file_key.flags = 0;
@@ -40,6 +35,7 @@
 	btrfs_set_file_extent_num_blocks(item, num_blocks);
 	btrfs_set_file_extent_generation(item, trans->transid);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
+
 	btrfs_release_path(root, path);
 	btrfs_free_path(path);
 	return 0;
@@ -57,6 +53,7 @@
 	struct btrfs_csum_item *item;
 	struct btrfs_leaf *leaf;
 	u64 csum_offset = 0;
+	int csums_in_item;
 
 	file_key.objectid = objectid;
 	file_key.offset = offset;
@@ -79,9 +76,11 @@
 		}
 		csum_offset = (offset - found_key.offset) >>
 				root->fs_info->sb->s_blocksize_bits;
-		if (csum_offset >=
-		    btrfs_item_size(leaf->items + path->slots[0]) /
-		    sizeof(struct btrfs_csum_item)) {
+		csums_in_item = btrfs_item_size(leaf->items + path->slots[0]);
+		csums_in_item /= sizeof(struct btrfs_csum_item);
+
+		if (csum_offset >= csums_in_item) {
+			ret = -EFBIG;
 			goto fail;
 		}
 	}
@@ -128,16 +127,36 @@
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
-	btrfs_init_path(path);
 
-	item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0);
-	if (!IS_ERR(item))
-		goto found;
-	btrfs_release_path(root, path);
 	file_key.objectid = objectid;
 	file_key.offset = offset;
 	file_key.flags = 0;
 	btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+
+	item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
+	if (!IS_ERR(item))
+		goto found;
+	ret = PTR_ERR(item);
+	if (ret == -EFBIG) {
+		u32 item_size;
+		/* we found one, but it isn't big enough yet */
+		leaf = btrfs_buffer_leaf(path->nodes[0]);
+		item_size = btrfs_item_size(leaf->items + path->slots[0]);
+		if ((item_size / sizeof(struct btrfs_csum_item)) >=
+		    MAX_CSUM_ITEMS(root)) {
+			/* already at max size, make a new one */
+			goto insert;
+		}
+	} else {
+		/* we didn't find a csum item, insert one */
+		goto insert;
+	}
+
+	/*
+	 * at this point, we know the tree has an item, but it isn't big
+	 * enough yet to put our csum in.  Grow it
+	 */
+	btrfs_release_path(root, path);
 	ret = btrfs_search_slot(trans, root, &file_key, path,
 				sizeof(struct btrfs_csum_item), 1);
 	if (ret < 0)
@@ -146,7 +165,6 @@
 		BUG();
 	}
 	if (path->slots[0] == 0) {
-		btrfs_release_path(root, path);
 		goto insert;
 	}
 	path->slots[0]--;
@@ -157,29 +175,36 @@
 	if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
 	    found_key.objectid != objectid ||
 	    csum_offset >= MAX_CSUM_ITEMS(root)) {
-		btrfs_release_path(root, path);
+		WARN_ON(1);
 		goto insert;
 	}
 	if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) /
 	    sizeof(struct btrfs_csum_item)) {
-		ret = btrfs_extend_item(trans, root, path,
-					sizeof(struct btrfs_csum_item));
+		u32 diff = (csum_offset + 1) * sizeof(struct btrfs_csum_item);
+		diff = diff - btrfs_item_size(leaf->items + path->slots[0]);
+		WARN_ON(diff != sizeof(struct btrfs_csum_item));
+		ret = btrfs_extend_item(trans, root, path, diff);
 		BUG_ON(ret);
 		goto csum;
 	}
 
 insert:
+	btrfs_release_path(root, path);
 	csum_offset = 0;
 	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
 				      sizeof(struct btrfs_csum_item));
-	if (ret != 0 && ret != -EEXIST)
+	if (ret != 0) {
+		printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret);
+		WARN_ON(1);
 		goto fail;
+	}
 csum:
 	item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
 			      struct btrfs_csum_item);
 	ret = 0;
 	item += csum_offset;
 found:
+	btrfs_check_bounds(item->csum, BTRFS_CSUM_SIZE, path->nodes[0]->b_data, root->fs_info->sb->s_blocksize);
 	ret = btrfs_csum_data(root, data, len, item->csum);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 fail:
@@ -210,6 +235,9 @@
 	item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0);
 	if (IS_ERR(item)) {
 		ret = PTR_ERR(item);
+		/* a csum that isn't present is a preallocated region. */
+		if (ret == -ENOENT || ret == -EFBIG)
+			ret = 1;
 		goto fail;
 	}
 
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0732a2f..1e7038b 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -19,7 +19,7 @@
 	for (i = 0 ; i < nr ; i++) {
 		item = l->items + i;
 		type = btrfs_disk_key_type(&item->key);
-		printk("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n",
+		printk("\titem %d key (%Lu %x %Lu) itemoff %d itemsize %d\n",
 			i,
 			btrfs_disk_key_objectid(&item->key),
 			btrfs_disk_key_flags(&item->key),
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2a2220..583cd87 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -150,11 +150,6 @@
 	BUG_ON(ret);
 
 	btrfs_release_path(root, path);
-	ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
-					  objectid, -1);
-	BUG_ON(ret);
-	ret = btrfs_del_item(trans, root, path);
-	BUG_ON(ret);
 	dentry->d_inode->i_ctime = dir->i_ctime;
 err:
 	btrfs_release_path(root, path);
@@ -329,8 +324,9 @@
 			extent_start = btrfs_file_extent_disk_blocknr(fi);
 			extent_num_blocks =
 				btrfs_file_extent_disk_num_blocks(fi);
+			/* FIXME blocksize != 4096 */
 			inode->i_blocks -=
-				btrfs_file_extent_num_blocks(fi) >> 9;
+				btrfs_file_extent_num_blocks(fi) << 3;
 			found_extent = 1;
 		} else {
 			found_extent = 0;
@@ -562,7 +558,7 @@
 		if (btrfs_disk_key_objectid(&item->key) != key.objectid)
 			break;
 		if (btrfs_disk_key_type(&item->key) != key_type)
-			continue;
+			break;
 		if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
 			continue;
 		filp->f_pos = btrfs_disk_key_offset(&item->key);
@@ -1285,29 +1281,27 @@
 	struct btrfs_path *path;
 	u64 search_start = start;
 	int bookend;
-
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
-search_again:
-	ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
-				       search_start, -1);
-	if (ret < 0)
-		goto out;
-	if (ret > 0) {
-		if (path->slots[0] == 0) {
-			ret = -ENOENT;
-			goto out;
-		}
-		path->slots[0]--;
-	}
 	while(1) {
+		btrfs_release_path(root, path);
+		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+					       search_start, -1);
+		if (ret < 0)
+			goto out;
+		if (ret > 0) {
+			if (path->slots[0] == 0) {
+				ret = -ENOENT;
+				goto out;
+			}
+			path->slots[0]--;
+		}
 		keep = 0;
 		bookend = 0;
 		leaf = btrfs_buffer_leaf(path->nodes[0]);
 		slot = path->slots[0];
 		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
-
 		extent = btrfs_item_ptr(leaf, slot,
 					struct btrfs_file_extent_item);
 		extent_end = key.offset +
@@ -1318,7 +1312,10 @@
 			goto out;
 		}
 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-			goto next_leaf;
+			goto out;
+		if (search_start >= extent_end)
+			goto out;
+		search_start = extent_end;
 
 		if (end < extent_end && end >= key.offset) {
 			memcpy(&old, extent, sizeof(old));
@@ -1331,10 +1328,13 @@
 
 		if (start > key.offset) {
 			u64 new_num;
+			u64 old_num;
 			/* truncate existing extent */
 			keep = 1;
 			WARN_ON(start & (root->blocksize - 1));
 			new_num = (start - key.offset) >> inode->i_blkbits;
+			old_num = btrfs_file_extent_num_blocks(extent);
+			inode->i_blocks -= (old_num - new_num) << 3;
 			btrfs_set_file_extent_num_blocks(extent, new_num);
 			mark_buffer_dirty(path->nodes[0]);
 		}
@@ -1344,13 +1344,11 @@
 			disk_blocknr = btrfs_file_extent_disk_blocknr(extent);
 			disk_num_blocks =
 				btrfs_file_extent_disk_num_blocks(extent);
-			search_start = key.offset +
-				(btrfs_file_extent_num_blocks(extent) <<
-				inode->i_blkbits);
 			ret = btrfs_del_item(trans, root, path);
 			BUG_ON(ret);
+			inode->i_blocks -=
+				btrfs_file_extent_num_blocks(extent) << 3;
 			btrfs_release_path(root, path);
-
 			ret = btrfs_free_extent(trans, root, disk_blocknr,
 						disk_num_blocks, 0);
 
@@ -1360,7 +1358,7 @@
 				goto out;
 			}
 			if (!bookend)
-				goto search_again;
+				continue;
 		}
 		if (bookend) {
 			/* create bookend */
@@ -1395,21 +1393,12 @@
 			btrfs_set_file_extent_generation(extent,
 				    btrfs_file_extent_generation(&old));
 			btrfs_mark_buffer_dirty(path->nodes[0]);
+			inode->i_blocks +=
+				btrfs_file_extent_num_blocks(extent) << 3;
 			ret = 0;
 			goto out;
 		}
-next_leaf:
-		if (slot >= btrfs_header_nritems(&leaf->header) - 1) {
-			ret = btrfs_next_leaf(root, path);
-			if (ret) {
-				ret = 0;
-				goto out;
-			}
-		} else {
-			path->slots[0]++;
-		}
 	}
-
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -1445,15 +1434,6 @@
 		}
 		offset = pos & (PAGE_CACHE_SIZE -1);
 		this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
-#if 0
-		if ((pages[i]->index == first_index ||
-		    pages[i]->index == last_index) && pos < isize &&
-		    !PageUptodate(pages[i])) {
-			ret = mpage_readpage(pages[i], btrfs_get_block);
-			BUG_ON(ret);
-			lock_page(pages[i]);
-		}
-#endif
 		create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
 				     (1 << BH_Uptodate));
 		head = page_buffers(pages[i]);
@@ -1494,6 +1474,7 @@
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct page *pages[8];
+	struct page *pinned[2] = { NULL, NULL };
 	unsigned long first_index;
 	unsigned long last_index;
 	u64 start_pos;
@@ -1505,14 +1486,6 @@
 	if (file->f_flags & O_DIRECT)
 		return -EINVAL;
 	pos = *ppos;
-
-	start_pos = pos & ~(root->blocksize - 1);
-	/* FIXME */
-	if (start_pos != pos)
-		return -EINVAL;
-	num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
-			inode->i_blkbits;
-
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
@@ -1524,10 +1497,37 @@
 	if (err)
 		goto out;
 	file_update_time(file);
+
+	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+	num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
+			inode->i_blkbits;
+
 	mutex_lock(&inode->i_mutex);
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
 
+	if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
+	    (pos & (PAGE_CACHE_SIZE - 1))) {
+		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
+		if (!PageUptodate(pinned[0])) {
+			ret = mpage_readpage(pinned[0], btrfs_get_block);
+			BUG_ON(ret);
+		} else {
+			unlock_page(pinned[0]);
+		}
+	}
+	if (first_index != last_index &&
+	    (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
+	    (count & (PAGE_CACHE_SIZE - 1))) {
+		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
+		if (!PageUptodate(pinned[1])) {
+			ret = mpage_readpage(pinned[1], btrfs_get_block);
+			BUG_ON(ret);
+		} else {
+			unlock_page(pinned[1]);
+		}
+	}
+
 	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
@@ -1535,11 +1535,14 @@
 		mutex_unlock(&root->fs_info->fs_mutex);
 		goto out_unlock;
 	}
+	/* FIXME blocksize != 4096 */
+	inode->i_blocks += num_blocks << 3;
 	if (start_pos < inode->i_size) {
+		/* FIXME blocksize != pagesize */
 		ret = drop_extents(trans, root, inode,
 				   start_pos,
 				   (pos + count + root->blocksize -1) &
-				   ~(root->blocksize - 1));
+				   ~((u64)root->blocksize - 1));
 	}
 	ret = btrfs_alloc_extent(trans, root, num_blocks, 1,
 				 (u64)-1, &ins);
@@ -1585,8 +1588,13 @@
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
 out:
+	if (pinned[0])
+		page_cache_release(pinned[0]);
+	if (pinned[1])
+		page_cache_release(pinned[1]);
 	*ppos = pos;
 	current->backing_dev_info = NULL;
+	mark_inode_dirty(inode);
 	return num_written ? num_written : err;
 }