Btrfs: reference counts on data extents

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO
index 2ca301b..7aec75e 100644
--- a/fs/btrfs/TODO
+++ b/fs/btrfs/TODO
@@ -8,7 +8,10 @@
 * Add block mapping tree (simple dm layer)
 * Add simple tree locking (semaphore per tree)
 * Make allocator smarter
+* make level a field in header
+* add a block group to struct inode
 * Make directory hashing work on 32 bit
+* Make sure nobh stuff is working properly for cows
 * Do actual block accounting
 * Check compat and incompat flags on the inode
 * Add virtual filesystems, mountable snapshots
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7b7120d..1a98952 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -9,10 +9,11 @@
 
 #define BTRFS_MAGIC "_BtRfS_M"
 
-#define BTRFS_ROOT_TREE_OBJECTID 1
-#define BTRFS_EXTENT_TREE_OBJECTID 2
-#define BTRFS_INODE_MAP_OBJECTID 3
-#define BTRFS_FS_TREE_OBJECTID 4
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+#define BTRFS_INODE_MAP_OBJECTID 3ULL
+#define BTRFS_FS_TREE_OBJECTID 4ULL
+#define BTRFS_FIRST_FREE_OBJECTID 5ULL
 
 /*
  * we can actually store much bigger names, but lets not confuse the rest
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 82f6e9e..4d4fc48 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -13,7 +13,7 @@
 			       btrfs_root *extent_root);
 
 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
-			 *root, u64 blocknr)
+			 *root, u64 blocknr, u64 num_blocks)
 {
 	struct btrfs_path path;
 	int ret;
@@ -29,7 +29,7 @@
 	key.objectid = blocknr;
 	key.flags = 0;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-	key.offset = 1;
+	key.offset = num_blocks;
 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
 				0, 1);
 	if (ret != 0)
@@ -48,7 +48,7 @@
 }
 
 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
-			    *root, u64 blocknr, u32 *refs)
+			    *root, u64 blocknr, u64 num_blocks, u32 *refs)
 {
 	struct btrfs_path path;
 	int ret;
@@ -57,7 +57,7 @@
 	struct btrfs_extent_item *item;
 	btrfs_init_path(&path);
 	key.objectid = blocknr;
-	key.offset = 1;
+	key.offset = num_blocks;
 	key.flags = 0;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
@@ -76,17 +76,34 @@
 {
 	u64 blocknr;
 	struct btrfs_node *buf_node;
+	struct btrfs_leaf *buf_leaf;
+	struct btrfs_disk_key *key;
+	struct btrfs_file_extent_item *fi;
 	int i;
+	int leaf;
+	int ret;
 
 	if (!root->ref_cows)
 		return 0;
 	buf_node = btrfs_buffer_node(buf);
-	if (btrfs_is_leaf(buf_node))
-		return 0;
-
+	leaf = btrfs_is_leaf(buf_node);
+	buf_leaf = btrfs_buffer_leaf(buf);
 	for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
-		blocknr = btrfs_node_blockptr(buf_node, i);
-		inc_block_ref(trans, root, blocknr);
+		if (leaf) {
+			key = &buf_leaf->items[i].key;
+			if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
+				continue;
+			fi = btrfs_item_ptr(buf_leaf, i,
+					    struct btrfs_file_extent_item);
+			ret = inc_block_ref(trans, root,
+				    btrfs_file_extent_disk_blocknr(fi),
+				    btrfs_file_extent_disk_num_blocks(fi));
+			BUG_ON(ret);
+		} else {
+			blocknr = btrfs_node_blockptr(buf_node, i);
+			ret = inc_block_ref(trans, root, blocknr, 1);
+			BUG_ON(ret);
+		}
 	}
 	return 0;
 }
@@ -469,6 +486,37 @@
 	return buf;
 }
 
+static int drop_leaf_ref(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root, struct buffer_head *cur)
+{
+	struct btrfs_disk_key *key;
+	struct btrfs_leaf *leaf;
+	struct btrfs_file_extent_item *fi;
+	int i;
+	int nritems;
+	int ret;
+
+	BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
+	leaf = btrfs_buffer_leaf(cur);
+	nritems = btrfs_header_nritems(&leaf->header);
+	for (i = 0; i < nritems; i++) {
+		key = &leaf->items[i].key;
+		if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
+			continue;
+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+		/*
+		 * FIXME make sure to insert a trans record that
+		 * repeats the snapshot del on crash
+		 */
+		ret = btrfs_free_extent(trans, root,
+					btrfs_file_extent_disk_blocknr(fi),
+					btrfs_file_extent_disk_num_blocks(fi),
+					0);
+		BUG_ON(ret);
+	}
+	return 0;
+}
+
 /*
  * helper function for drop_snapshot, this walks down the tree dropping ref
  * counts as it goes.
@@ -483,28 +531,33 @@
 	u32 refs;
 
 	ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr,
-			       &refs);
+			       1, &refs);
 	BUG_ON(ret);
 	if (refs > 1)
 		goto out;
 	/*
 	 * walk down to the last node level and free all the leaves
 	 */
-	while(*level > 0) {
+	while(*level >= 0) {
 		cur = path->nodes[*level];
 		if (path->slots[*level] >=
 		    btrfs_header_nritems(btrfs_buffer_header(cur)))
 			break;
+		if (*level == 0) {
+			ret = drop_leaf_ref(trans, root, cur);
+			BUG_ON(ret);
+			break;
+		}
 		blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
 					      path->slots[*level]);
-		ret = lookup_block_ref(trans, root, blocknr, &refs);
-		if (refs != 1 || *level == 1) {
+		ret = lookup_block_ref(trans, root, blocknr, 1, &refs);
+		BUG_ON(ret);
+		if (refs != 1) {
 			path->slots[*level]++;
 			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
 			BUG_ON(ret);
 			continue;
 		}
-		BUG_ON(ret);
 		next = read_tree_block(root, blocknr);
 		if (path->nodes[*level-1])
 			btrfs_block_release(root, path->nodes[*level-1]);
@@ -513,8 +566,8 @@
 		path->slots[*level] = 0;
 	}
 out:
-	ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr,
-				1, 1);
+	ret = btrfs_free_extent(trans, root,
+				path->nodes[*level]->b_blocknr, 1, 1);
 	btrfs_block_release(root, path->nodes[*level]);
 	path->nodes[*level] = NULL;
 	*level += 1;
@@ -544,10 +597,10 @@
 			ret = btrfs_free_extent(trans, root,
 						path->nodes[*level]->b_blocknr,
 						1, 1);
+			BUG_ON(ret);
 			btrfs_block_release(root, path->nodes[*level]);
 			path->nodes[*level] = NULL;
 			*level = i + 1;
-			BUG_ON(ret);
 		}
 	}
 	return 1;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ad2d375..1b2c5e0 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -25,6 +25,7 @@
 	if (fs_root->fs_info->last_inode_alloc_dirid == dirid)
 		search_start = fs_root->fs_info->last_inode_alloc;
 
+	search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID);
 	search_key.objectid = search_start;
 	search_key.flags = 0;
 	btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2c2883f..487da5a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -700,7 +700,7 @@
 static int btrfs_prepare_write(struct file *file, struct page *page,
 			       unsigned from, unsigned to)
 {
-	return block_prepare_write(page, from, to, btrfs_get_block);
+	return nobh_prepare_write(page, from, to, btrfs_get_block);
 }
 
 static void btrfs_write_super(struct super_block *sb)
@@ -721,7 +721,7 @@
 
 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	return block_write_full_page(page, btrfs_get_block, wbc);
+	return nobh_writepage(page, btrfs_get_block, wbc);
 }
 
 static int btrfs_get_sb(struct file_system_type *fs_type,
@@ -768,7 +768,7 @@
 	.writepage	= btrfs_writepage,
 	.sync_page	= block_sync_page,
 	.prepare_write	= btrfs_prepare_write,
-	.commit_write	= generic_commit_write,
+	.commit_write	= nobh_commit_write,
 };
 
 static struct inode_operations btrfs_file_inode_operations = {