Btrfs: still corruption hunting

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c
index 8454223..a50a1c4 100644
--- a/fs/btrfs/bit-radix.c
+++ b/fs/btrfs/bit-radix.c
@@ -4,6 +4,7 @@
 #define BIT_ARRAY_BYTES 256
 #define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
 
+extern struct kmem_cache *btrfs_bit_radix_cachep;
 int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
 {
 	unsigned long *bits;
@@ -16,7 +17,7 @@
 
 	bits = radix_tree_lookup(radix, slot);
 	if (!bits) {
-		bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS);
+		bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS);
 		if (!bits)
 			return -ENOMEM;
 		memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
@@ -68,12 +69,10 @@
 			break;
 		}
 	}
-
 	if (empty) {
 		bits = radix_tree_delete(radix, slot);
-		synchronize_rcu();
 		BUG_ON(!bits);
-		kfree(bits);
+		kmem_cache_free(btrfs_bit_radix_cachep, bits);
 	}
 #endif
 	return 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
new file mode 100644
index 0000000..e159841
--- /dev/null
+++ b/fs/btrfs/btrfs_inode.h
@@ -0,0 +1,14 @@
+#ifndef __BTRFS_I__
+#define __BTRFS_I__
+
+struct btrfs_inode {
+	u32 magic;
+	struct inode vfs_inode;
+	u32 magic2;
+};
+static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
+{
+	return container_of(inode, struct btrfs_inode, vfs_inode);
+}
+
+#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a0dfa2d..453ce83 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -16,6 +16,16 @@
 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path, int level, int slot);
 
+struct btrfs_path *btrfs_alloc_path(void)
+{
+	return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+	kmem_cache_free(btrfs_path_cachep, p);
+}
+
 inline void btrfs_init_path(struct btrfs_path *p)
 {
 	memset(p, 0, sizeof(*p));
@@ -47,17 +57,18 @@
 	}
 	cow = btrfs_alloc_free_block(trans, root);
 	cow_node = btrfs_buffer_node(cow);
+	if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
+		WARN_ON(1);
 	memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
 	btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
 	btrfs_set_header_generation(&cow_node->header, trans->transid);
-	*cow_ret = cow;
-	btrfs_mark_buffer_dirty(cow);
 	btrfs_inc_ref(trans, root, buf);
 	if (buf == root->node) {
 		root->node = cow;
 		get_bh(cow);
-		if (buf != root->commit_root)
+		if (buf != root->commit_root) {
 			btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
+		}
 		btrfs_block_release(root, buf);
 	} else {
 		btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
@@ -66,6 +77,7 @@
 		btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
 	}
 	btrfs_block_release(root, buf);
+	*cow_ret = cow;
 	return 0;
 }
 
@@ -477,9 +489,12 @@
 					       p->slots[level + 1],
 					       &cow_buf);
 			b = cow_buf;
+			c = btrfs_buffer_node(b);
 		}
 		BUG_ON(!cow && ins_len);
-		c = btrfs_buffer_node(b);
+		if (level != btrfs_header_level(&c->header))
+			WARN_ON(1);
+		level = btrfs_header_level(&c->header);
 		p->nodes[level] = b;
 		ret = check_block(root, p, level);
 		if (ret)
@@ -1257,19 +1272,22 @@
 		      data_size)
 {
 	int ret = 0;
-	struct btrfs_path path;
+	struct btrfs_path *path;
 	u8 *ptr;
 
-	btrfs_init_path(&path);
-	ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
+	btrfs_init_path(path);
+	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
 	if (!ret) {
-		ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
-				     path.slots[0], u8);
-		btrfs_memcpy(root, path.nodes[0]->b_data,
+		ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
+				     path->slots[0], u8);
+		btrfs_memcpy(root, path->nodes[0]->b_data,
 			     ptr, data, data_size);
-		btrfs_mark_buffer_dirty(path.nodes[0]);
+		btrfs_mark_buffer_dirty(path->nodes[0]);
 	}
-	btrfs_release_path(root, &path);
+	btrfs_release_path(root, path);
+	btrfs_free_path(path);
 	return ret;
 }
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9ec0d65..d8e03bd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -7,6 +7,7 @@
 
 struct btrfs_trans_handle;
 struct btrfs_transaction;
+extern struct kmem_cache *btrfs_path_cachep;
 
 #define BTRFS_MAGIC "_BtRfS_M"
 
@@ -888,6 +889,8 @@
 		      *root, struct btrfs_key *key, struct btrfs_path *p, int
 		      ins_len, int cow);
 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
 void btrfs_init_path(struct btrfs_path *p);
 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bb13310..2dbd550 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -8,18 +8,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 
-#define PATTERN 0xDEADBEEFUL
-static inline void check_pattern(struct buffer_head *buf)
-{
-	if (buf->b_private != (void *)PATTERN)
-		WARN_ON(1);
-}
-
-static inline void set_pattern(struct buffer_head *buf)
-{
-	buf->b_private = (void *)PATTERN;
-}
-
 static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
 {
 	struct btrfs_node *node = btrfs_buffer_node(buf);
@@ -35,6 +23,8 @@
 
 struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
 {
+	return sb_find_get_block(root->fs_info->sb, blocknr);
+#if 0
 	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
 	int blockbits = root->fs_info->sb->s_blocksize_bits;
 	unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
@@ -43,6 +33,7 @@
 	struct buffer_head *head;
 	struct buffer_head *ret = NULL;
 
+
 	page = find_lock_page(mapping, index);
 	if (!page)
 		return NULL;
@@ -64,15 +55,17 @@
 	unlock_page(page);
 	if (ret) {
 		touch_buffer(ret);
-		check_pattern(ret);
 	}
 	page_cache_release(page);
 	return ret;
+#endif
 }
 
 struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
 						 u64 blocknr)
 {
+	return sb_getblk(root->fs_info->sb, blocknr);
+#if 0
 	struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
 	int blockbits = root->fs_info->sb->s_blocksize_bits;
 	unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
@@ -95,7 +88,6 @@
 			bh->b_bdev = root->fs_info->sb->s_bdev;
 			bh->b_blocknr = first_block;
 			set_buffer_mapped(bh);
-			set_pattern(bh);
 		}
 		if (bh->b_blocknr == blocknr) {
 			ret = bh;
@@ -111,6 +103,7 @@
 		touch_buffer(ret);
 	page_cache_release(page);
 	return ret;
+#endif
 }
 
 static sector_t max_block(struct block_device *bdev)
@@ -225,6 +218,8 @@
 
 struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
 {
+	return sb_bread(root->fs_info->sb, blocknr);
+#if 0
 	struct buffer_head *bh = NULL;
 
 	bh = btrfs_find_create_tree_block(root, blocknr);
@@ -239,7 +234,6 @@
 		if (!buffer_uptodate(bh))
 			goto fail;
 		csum_tree_block(root, bh, 1);
-		set_pattern(bh);
 	} else {
 		unlock_buffer(bh);
 	}
@@ -250,6 +244,7 @@
 	brelse(bh);
 	return NULL;
 
+#endif
 }
 
 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -268,14 +263,14 @@
 	return 0;
 }
 
-static int __setup_root(struct btrfs_super_block *super,
+static int __setup_root(int blocksize,
 			struct btrfs_root *root,
 			struct btrfs_fs_info *fs_info,
 			u64 objectid)
 {
 	root->node = NULL;
 	root->commit_root = NULL;
-	root->blocksize = btrfs_super_blocksize(super);
+	root->blocksize = blocksize;
 	root->ref_cows = 0;
 	root->fs_info = fs_info;
 	memset(&root->root_key, 0, sizeof(root->root_key));
@@ -283,7 +278,7 @@
 	return 0;
 }
 
-static int find_and_setup_root(struct btrfs_super_block *super,
+static int find_and_setup_root(int blocksize,
 			       struct btrfs_root *tree_root,
 			       struct btrfs_fs_info *fs_info,
 			       u64 objectid,
@@ -291,7 +286,7 @@
 {
 	int ret;
 
-	__setup_root(super, root, fs_info, objectid);
+	__setup_root(blocksize, root, fs_info, objectid);
 	ret = btrfs_find_last_root(tree_root, objectid,
 				   &root->root_item, &root->root_key);
 	BUG_ON(ret);
@@ -302,9 +297,7 @@
 	return 0;
 }
 
-struct btrfs_root *open_ctree(struct super_block *sb,
-			      struct buffer_head *sb_buffer,
-			      struct btrfs_super_block *disk_super)
+struct btrfs_root *open_ctree(struct super_block *sb)
 {
 	struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root),
 					  GFP_NOFS);
@@ -317,13 +310,11 @@
 	struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
 						GFP_NOFS);
 	int ret;
+	struct btrfs_super_block *disk_super;
 
-	if (!btrfs_super_root(disk_super)) {
-		return NULL;
-	}
 	init_bit_radix(&fs_info->pinned_radix);
 	init_bit_radix(&fs_info->pending_del_radix);
-	sb_set_blocksize(sb, sb_buffer->b_size);
+	sb_set_blocksize(sb, 4096);
 	fs_info->running_transaction = NULL;
 	fs_info->fs_root = root;
 	fs_info->tree_root = tree_root;
@@ -331,55 +322,59 @@
 	fs_info->inode_root = inode_root;
 	fs_info->last_inode_alloc = 0;
 	fs_info->last_inode_alloc_dirid = 0;
-	fs_info->disk_super = disk_super;
 	fs_info->sb = sb;
+	fs_info->btree_inode = NULL;
+#if 0
 	fs_info->btree_inode = new_inode(sb);
 	fs_info->btree_inode->i_ino = 1;
+	fs_info->btree_inode->i_nlink = 1;
 	fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
 	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
 	insert_inode_hash(fs_info->btree_inode);
-
 	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
+#endif
 	fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
 	spin_lock_init(&fs_info->hash_lock);
-
 	if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
 		printk("failed to allocate sha256 hash\n");
 		return NULL;
 	}
-
 	mutex_init(&fs_info->trans_mutex);
 	mutex_init(&fs_info->fs_mutex);
 	memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
 	memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
 
-	__setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
-
-	fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr);
+	__setup_root(sb->s_blocksize, tree_root,
+		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
+	fs_info->sb_buffer = read_tree_block(tree_root,
+					     BTRFS_SUPER_INFO_OFFSET /
+					     sb->s_blocksize);
 
 	if (!fs_info->sb_buffer) {
 printk("failed2\n");
 		return NULL;
 	}
-	brelse(sb_buffer);
-	sb_buffer = NULL;
 	disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
+	if (!btrfs_super_root(disk_super)) {
+		return NULL;
+	}
 	fs_info->disk_super = disk_super;
-
 	tree_root->node = read_tree_block(tree_root,
 					  btrfs_super_root(disk_super));
 	BUG_ON(!tree_root->node);
 
-	ret = find_and_setup_root(disk_super, tree_root, fs_info,
+	mutex_lock(&fs_info->fs_mutex);
+	ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
 				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
 	BUG_ON(ret);
 
-	ret = find_and_setup_root(disk_super, tree_root, fs_info,
+	ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
 				  BTRFS_INODE_MAP_OBJECTID, inode_root);
 	BUG_ON(ret);
 
-	ret = find_and_setup_root(disk_super, tree_root, fs_info,
+	ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
 				  BTRFS_FS_TREE_OBJECTID, root);
+	mutex_unlock(&fs_info->fs_mutex);
 	BUG_ON(ret);
 	root->commit_root = root->node;
 	get_bh(root->node);
@@ -392,9 +387,11 @@
 		      *root)
 {
 	struct buffer_head *bh = root->fs_info->sb_buffer;
+
 	btrfs_set_super_root(root->fs_info->disk_super,
 			     root->fs_info->tree_root->node->b_blocknr);
 	lock_buffer(bh);
+	WARN_ON(atomic_read(&bh->b_count) < 1);
 	clear_buffer_dirty(bh);
 	csum_tree_block(root, bh, 0);
 	bh->b_end_io = end_buffer_write_sync;
@@ -413,6 +410,7 @@
 	int ret;
 	struct btrfs_trans_handle *trans;
 
+	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_commit_transaction(trans, root);
 	/* run commit again to  drop the original snapshot */
@@ -421,6 +419,7 @@
 	ret = btrfs_write_and_wait_transaction(NULL, root);
 	BUG_ON(ret);
 	write_ctree_super(NULL, root);
+	mutex_unlock(&root->fs_info->fs_mutex);
 
 	if (root->node)
 		btrfs_block_release(root, root->node);
@@ -436,8 +435,8 @@
 	btrfs_block_release(root, root->commit_root);
 	btrfs_block_release(root, root->fs_info->sb_buffer);
 	crypto_free_hash(root->fs_info->hash_tfm);
-	truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0);
-	iput(root->fs_info->btree_inode);
+	// truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0);
+	// iput(root->fs_info->btree_inode);
 	kfree(root->fs_info->extent_root);
 	kfree(root->fs_info->inode_root);
 	kfree(root->fs_info->tree_root);
@@ -448,7 +447,6 @@
 
 void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
 {
-	check_pattern(buf);
-	brelse(buf);
+	// brelse(buf);
 }
 
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index f6998e2..ac6764b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -31,9 +31,7 @@
 		     struct btrfs_root *root, struct buffer_head *buf);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root);
-struct btrfs_root *open_ctree(struct super_block *sb,
-			      struct buffer_head *sb_buffer,
-			      struct btrfs_super_block *disk_super);
+struct btrfs_root *open_ctree(struct super_block *sb);
 int close_ctree(struct btrfs_root *root);
 void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf);
 int write_ctree_super(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c4194da..37b87e2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -173,12 +173,16 @@
 
 	if (!pending) {
 		bh = btrfs_find_tree_block(root, blocknr);
-		if (bh && buffer_uptodate(bh)) {
-			header = btrfs_buffer_header(bh);
-			if (btrfs_header_generation(header) ==
-			    root->fs_info->running_transaction->transid) {
-				btrfs_block_release(root, bh);
-				return 0;
+		if (bh) {
+			if (buffer_uptodate(bh)) {
+				u64 transid =
+				    root->fs_info->running_transaction->transid;
+				header = btrfs_buffer_header(bh);
+				if (btrfs_header_generation(header) ==
+				    transid) {
+					btrfs_block_release(root, bh);
+					return 0;
+				}
 			}
 			btrfs_block_release(root, bh);
 		}
@@ -539,6 +543,8 @@
 	 */
 	while(*level >= 0) {
 		cur = path->nodes[*level];
+		if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
+			WARN_ON(1);
 		if (path->slots[*level] >=
 		    btrfs_header_nritems(btrfs_buffer_header(cur)))
 			break;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 894a70b..6969b67 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -14,6 +14,7 @@
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
+#include "btrfs_inode.h"
 
 #define BTRFS_SUPER_MAGIC 0x9123682E
 
@@ -24,6 +25,14 @@
 static struct address_space_operations btrfs_aops;
 static struct file_operations btrfs_file_operations;
 
+static int check_inode(struct inode *inode)
+{
+	struct btrfs_inode *ei = BTRFS_I(inode);
+	WARN_ON(ei->magic != 0xDEADBEEF);
+	WARN_ON(ei->magic2 != 0xDEADBEAF);
+	return 0;
+}
+
 static void btrfs_read_locked_inode(struct inode *inode)
 {
 	struct btrfs_path path;
@@ -34,6 +43,7 @@
 	btrfs_init_path(&path);
 	mutex_lock(&root->fs_info->fs_mutex);
 
+	check_inode(inode);
 	ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0);
 	if (ret) {
 		btrfs_release_path(root, &path);
@@ -41,6 +51,7 @@
 		make_bad_inode(inode);
 		return;
 	}
+	check_inode(inode);
 	inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
 				  path.slots[0],
 				  struct btrfs_inode_item);
@@ -60,6 +71,7 @@
 	inode->i_generation = btrfs_inode_generation(inode_item);
 	btrfs_release_path(root, &path);
 	mutex_unlock(&root->fs_info->fs_mutex);
+	check_inode(inode);
 	switch (inode->i_mode & S_IFMT) {
 #if 0
 	default:
@@ -80,6 +92,7 @@
 		// inode->i_op = &page_symlink_inode_operations;
 		break;
 	}
+	check_inode(inode);
 	return;
 }
 
@@ -347,6 +360,7 @@
 				    namelen, 0);
 	if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) {
 		*ino = 0;
+		ret = 0;
 		goto out;
 	}
 	di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
@@ -354,6 +368,7 @@
 	*ino = btrfs_dir_objectid(di);
 out:
 	btrfs_release_path(root, &path);
+	check_inode(dir);
 	return ret;
 }
 
@@ -367,7 +382,6 @@
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
-
 	mutex_lock(&root->fs_info->fs_mutex);
 	ret = btrfs_inode_by_name(dir, dentry, &ino);
 	mutex_unlock(&root->fs_info->fs_mutex);
@@ -378,7 +392,9 @@
 		inode = iget(dir->i_sb, ino);
 		if (!inode)
 			return ERR_PTR(-EACCES);
+		check_inode(inode);
 	}
+	check_inode(dir);
 	return d_splice_alias(inode, dentry);
 }
 
@@ -471,23 +487,14 @@
 	struct inode * inode;
 	struct dentry * root_dentry;
 	struct btrfs_super_block *disk_super;
-	struct buffer_head *bh;
 	struct btrfs_root *root;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = BTRFS_SUPER_MAGIC;
 	sb->s_op = &btrfs_super_ops;
 	sb->s_time_gran = 1;
 
-	bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize);
-	if (!bh) {
-		printk("btrfs: unable to read on disk super\n");
-		return -EIO;
-	}
-	disk_super = (struct btrfs_super_block *)bh->b_data;
-	root = open_ctree(sb, bh, disk_super);
+	root = open_ctree(sb);
 
 	if (!root) {
 		printk("btrfs: open_ctree failed\n");
@@ -533,6 +540,7 @@
 	btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
 	btrfs_set_inode_nblocks(item, inode->i_blocks);
 	btrfs_set_inode_generation(item, inode->i_generation);
+	check_inode(inode);
 }
 
 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
@@ -560,6 +568,7 @@
 	btrfs_mark_buffer_dirty(path.nodes[0]);
 failed:
 	btrfs_release_path(root, &path);
+	check_inode(inode);
 	return 0;
 }
 
@@ -577,6 +586,7 @@
 	else
 		btrfs_end_transaction(trans, root);
 	mutex_unlock(&root->fs_info->fs_mutex);
+	check_inode(inode);
 	return ret;
 }
 
@@ -594,6 +604,7 @@
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
+	check_inode(inode);
 	ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
 	BUG_ON(ret);
 
@@ -616,6 +627,8 @@
 	BUG_ON(ret);
 
 	insert_inode_hash(inode);
+	check_inode(inode);
+	check_inode(dir);
 	return inode;
 }
 
@@ -632,7 +645,8 @@
 		ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb),
 					 dentry->d_parent->d_inode);
 	}
-
+	check_inode(inode);
+	check_inode(dentry->d_parent->d_inode);
 	return ret;
 }
 
@@ -644,6 +658,9 @@
 		d_instantiate(dentry, inode);
 		return 0;
 	}
+	if (err > 0)
+		err = -EEXIST;
+	check_inode(inode);
 	return err;
 }
 
@@ -675,6 +692,9 @@
 out_unlock:
 	btrfs_end_transaction(trans, root);
 	mutex_unlock(&root->fs_info->fs_mutex);
+	check_inode(inode);
+	check_inode(dir);
+
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
@@ -755,11 +775,11 @@
 
 	sb->s_dirt = 0;
 	if (!wait) {
-		filemap_flush(root->fs_info->btree_inode->i_mapping);
+		// filemap_flush(root->fs_info->btree_inode->i_mapping);
+		filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping);
 		return 0;
 	}
-	filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
-
+	filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping);
 	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
 	ret = btrfs_commit_transaction(trans, root);
@@ -1242,6 +1262,95 @@
 	return retval;
 }
 
+static struct kmem_cache *btrfs_inode_cachep;
+struct kmem_cache *btrfs_trans_handle_cachep;
+struct kmem_cache *btrfs_transaction_cachep;
+struct kmem_cache *btrfs_bit_radix_cachep;
+struct kmem_cache *btrfs_path_cachep;
+
+/*
+ * Called inside transaction, so use GFP_NOFS
+ */
+static struct inode *btrfs_alloc_inode(struct super_block *sb)
+{
+	struct btrfs_inode *ei;
+
+	ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
+	if (!ei)
+		return NULL;
+	ei->magic = 0xDEADBEEF;
+	ei->magic2 = 0xDEADBEAF;
+	return &ei->vfs_inode;
+}
+
+static void btrfs_destroy_inode(struct inode *inode)
+{
+	struct btrfs_inode *ei = BTRFS_I(inode);
+	WARN_ON(ei->magic != 0xDEADBEEF);
+	WARN_ON(ei->magic2 != 0xDEADBEAF);
+	WARN_ON(!list_empty(&inode->i_dentry));
+	WARN_ON(inode->i_ino == 1);
+	WARN_ON(inode->i_data.nrpages);
+
+	ei->magic = 0;
+	ei->magic2 = 0;
+	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
+static void init_once(void * foo, struct kmem_cache * cachep,
+		      unsigned long flags)
+{
+	struct btrfs_inode *ei = (struct btrfs_inode *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		inode_init_once(&ei->vfs_inode);
+	}
+}
+
+static int init_inodecache(void)
+{
+	btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
+					     sizeof(struct btrfs_inode),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     init_once, NULL);
+	btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
+					     sizeof(struct btrfs_trans_handle),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     NULL, NULL);
+	btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
+					     sizeof(struct btrfs_transaction),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     NULL, NULL);
+	btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
+					     sizeof(struct btrfs_transaction),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     NULL, NULL);
+	btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
+					     256,
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD |
+						SLAB_DESTROY_BY_RCU),
+					     NULL, NULL);
+	if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
+	    btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(btrfs_inode_cachep);
+	kmem_cache_destroy(btrfs_trans_handle_cachep);
+	kmem_cache_destroy(btrfs_transaction_cachep);
+	kmem_cache_destroy(btrfs_bit_radix_cachep);
+	kmem_cache_destroy(btrfs_path_cachep);
+}
+
 static int btrfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
@@ -1265,6 +1374,8 @@
 	.write_super	= btrfs_write_super,
 	.sync_fs	= btrfs_sync_fs,
 	.write_inode	= btrfs_write_inode,
+	.alloc_inode	= btrfs_alloc_inode,
+	.destroy_inode	= btrfs_destroy_inode,
 };
 
 static struct inode_operations btrfs_dir_inode_operations = {
@@ -1305,12 +1416,17 @@
 
 static int __init init_btrfs_fs(void)
 {
+	int err;
 	printk("btrfs loaded!\n");
+	err = init_inodecache();
+	if (err)
+		return err;
 	return register_filesystem(&btrfs_fs_type);
 }
 
 static void __exit exit_btrfs_fs(void)
 {
+	destroy_inodecache();
 	unregister_filesystem(&btrfs_fs_type);
 	printk("btrfs unloaded\n");
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 84c4e27..72b52e1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -5,13 +5,20 @@
 #include "transaction.h"
 
 static int total_trans = 0;
+extern struct kmem_cache *btrfs_trans_handle_cachep;
+extern struct kmem_cache *btrfs_transaction_cachep;
+
+#define TRANS_MAGIC 0xE1E10E
 static void put_transaction(struct btrfs_transaction *transaction)
 {
+	WARN_ON(transaction->use_count == 0);
 	transaction->use_count--;
+	WARN_ON(transaction->magic != TRANS_MAGIC);
 	if (transaction->use_count == 0) {
 		WARN_ON(total_trans == 0);
 		total_trans--;
-		kfree(transaction);
+		memset(transaction, 0, sizeof(*transaction));
+		kmem_cache_free(btrfs_transaction_cachep, transaction);
 	}
 }
 
@@ -20,7 +27,8 @@
 	struct btrfs_transaction *cur_trans;
 	cur_trans = root->fs_info->running_transaction;
 	if (!cur_trans) {
-		cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS);
+		cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
+					     GFP_NOFS);
 		total_trans++;
 		BUG_ON(!cur_trans);
 		root->fs_info->running_transaction = cur_trans;
@@ -28,6 +36,7 @@
 		cur_trans->transid = root->root_key.offset + 1;
 		init_waitqueue_head(&cur_trans->writer_wait);
 		init_waitqueue_head(&cur_trans->commit_wait);
+		cur_trans->magic = TRANS_MAGIC;
 		cur_trans->in_commit = 0;
 		cur_trans->use_count = 1;
 		cur_trans->commit_done = 0;
@@ -39,7 +48,8 @@
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   int num_blocks)
 {
-	struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS);
+	struct btrfs_trans_handle *h =
+		kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 	int ret;
 
 	mutex_lock(&root->fs_info->trans_mutex);
@@ -51,6 +61,7 @@
 	h->blocks_used = 0;
 	root->fs_info->running_transaction->use_count++;
 	mutex_unlock(&root->fs_info->trans_mutex);
+	h->magic = h->magic2 = TRANS_MAGIC;
 	return h;
 }
 
@@ -58,6 +69,8 @@
 			  struct btrfs_root *root)
 {
 	struct btrfs_transaction *cur_trans;
+	WARN_ON(trans->magic != TRANS_MAGIC);
+	WARN_ON(trans->magic2 != TRANS_MAGIC);
 	mutex_lock(&root->fs_info->trans_mutex);
 	cur_trans = root->fs_info->running_transaction;
 	WARN_ON(cur_trans->num_writers < 1);
@@ -67,7 +80,7 @@
 	put_transaction(cur_trans);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	memset(trans, 0, sizeof(*trans));
-	kfree(trans);
+	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	return 0;
 }
 
@@ -75,7 +88,7 @@
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root)
 {
-	filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
+	filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping);
 	return 0;
 }
 
@@ -137,6 +150,7 @@
 
 	mutex_lock(&root->fs_info->trans_mutex);
 	if (trans->transaction->in_commit) {
+printk("already in commit!, waiting\n");
 		cur_trans = trans->transaction;
 		trans->transaction->use_count++;
 		btrfs_end_transaction(trans, root);
@@ -146,7 +160,10 @@
 		mutex_unlock(&root->fs_info->trans_mutex);
 		return 0;
 	}
+	cur_trans = trans->transaction;
+	trans->transaction->in_commit = 1;
 	while (trans->transaction->num_writers > 1) {
+		WARN_ON(cur_trans != trans->transaction);
 		prepare_to_wait(&trans->transaction->writer_wait, &wait,
 				TASK_UNINTERRUPTIBLE);
 		if (trans->transaction->num_writers <= 1)
@@ -154,15 +171,15 @@
 		mutex_unlock(&root->fs_info->trans_mutex);
 		schedule();
 		mutex_lock(&root->fs_info->trans_mutex);
+		finish_wait(&trans->transaction->writer_wait, &wait);
 	}
 	finish_wait(&trans->transaction->writer_wait, &wait);
-
+	WARN_ON(cur_trans != trans->transaction);
 	if (root->node != root->commit_root) {
 		memcpy(&snap_key, &root->root_key, sizeof(snap_key));
 		root->root_key.offset++;
 	}
 
-
 	if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) {
 		btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr);
 		ret = btrfs_insert_root(trans, root->fs_info->tree_root,
@@ -172,22 +189,21 @@
 
 	ret = btrfs_commit_tree_roots(trans, root);
 	BUG_ON(ret);
-
 	cur_trans = root->fs_info->running_transaction;
 	root->fs_info->running_transaction = NULL;
 	mutex_unlock(&root->fs_info->trans_mutex);
-
 	ret = btrfs_write_and_wait_transaction(trans, root);
 	BUG_ON(ret);
 
 	write_ctree_super(trans, root);
 	btrfs_finish_extent_commit(trans, root);
 	mutex_lock(&root->fs_info->trans_mutex);
+	cur_trans->commit_done = 1;
+	wake_up(&cur_trans->commit_wait);
 	put_transaction(cur_trans);
 	put_transaction(cur_trans);
 	mutex_unlock(&root->fs_info->trans_mutex);
-	kfree(trans);
-
+	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	if (root->node != root->commit_root) {
 		trans = btrfs_start_transaction(root, 1);
 		snap = root->commit_root;
@@ -203,7 +219,6 @@
 		ret = btrfs_end_transaction(trans, root);
 		BUG_ON(ret);
 	}
-
 	return ret;
 }
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7e6c08a0..4f1496a 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -7,15 +7,18 @@
 	int in_commit;
 	int use_count;
 	int commit_done;
+	int magic;
 	wait_queue_head_t writer_wait;
 	wait_queue_head_t commit_wait;
 };
 
 struct btrfs_trans_handle {
+	int magic;
 	u64 transid;
 	unsigned long blocks_reserved;
 	unsigned long blocks_used;
 	struct btrfs_transaction *transaction;
+	int magic2;
 };