Btrfs: transaction rework

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 1a0fb74..be7d74c 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -3,7 +3,7 @@
 
 obj-m  := btrfs.o
 btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
-	   hash.o file-item.o inode-item.o inode-map.o disk-io.o
+	   hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o
 
 #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
 #	  root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7748eec..4c19a3f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -5,6 +5,7 @@
 #include <linux/fs.h>
 
 struct btrfs_trans_handle;
+struct btrfs_transaction;
 
 #define BTRFS_MAGIC "_BtRfS_M"
 
@@ -224,10 +225,11 @@
 	u64 last_inode_alloc;
 	u64 last_inode_alloc_dirid;
 	u64 generation;
-	struct btrfs_trans_handle *running_transaction;
+	struct btrfs_transaction *running_transaction;
 	struct btrfs_super_block *disk_super;
 	struct buffer_head *sb_buffer;
 	struct super_block *sb;
+	struct mutex trans_mutex;
 };
 
 /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index df2061a..9cacca0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,89 +50,6 @@
 	return 0;
 }
 
-int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		     struct buffer_head *buf)
-{
-	mark_buffer_dirty(buf);
-	return 0;
-}
-
-static int __commit_transaction(struct btrfs_trans_handle *trans, struct
-				btrfs_root *root)
-{
-	filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping);
-	return 0;
-}
-
-static int commit_tree_roots(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info)
-{
-	int ret;
-	u64 old_extent_block;
-	struct btrfs_root *tree_root = fs_info->tree_root;
-	struct btrfs_root *extent_root = fs_info->extent_root;
-	struct btrfs_root *inode_root = fs_info->inode_root;
-
-	btrfs_set_root_blocknr(&inode_root->root_item,
-			       inode_root->node->b_blocknr);
-	ret = btrfs_update_root(trans, tree_root,
-				&inode_root->root_key,
-				&inode_root->root_item);
-	BUG_ON(ret);
-	while(1) {
-		old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
-		if (old_extent_block == extent_root->node->b_blocknr)
-			break;
-		btrfs_set_root_blocknr(&extent_root->root_item,
-				       extent_root->node->b_blocknr);
-		ret = btrfs_update_root(trans, tree_root,
-					&extent_root->root_key,
-					&extent_root->root_item);
-		BUG_ON(ret);
-	}
-	return 0;
-}
-
-int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct
-			     btrfs_root *root, struct btrfs_super_block *s)
-{
-	int ret = 0;
-	struct buffer_head *snap = root->commit_root;
-	struct btrfs_key snap_key;
-
-	if (root->commit_root == root->node)
-		return 0;
-
-	memcpy(&snap_key, &root->root_key, sizeof(snap_key));
-	root->root_key.offset++;
-
-	btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr);
-	ret = btrfs_insert_root(trans, root->fs_info->tree_root,
-				&root->root_key, &root->root_item);
-	BUG_ON(ret);
-
-	ret = commit_tree_roots(trans, root->fs_info);
-	BUG_ON(ret);
-
-	ret = __commit_transaction(trans, root);
-	BUG_ON(ret);
-
-	write_ctree_super(trans, root, s);
-	btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
-	btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
-
-	root->commit_root = root->node;
-	get_bh(root->node);
-	ret = btrfs_drop_snapshot(trans, root, snap);
-	BUG_ON(ret);
-
-	ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key);
-	BUG_ON(ret);
-	root->fs_info->generation = root->root_key.offset + 1;
-
-	return ret;
-}
-
 static int __setup_root(struct btrfs_super_block *super,
 			struct btrfs_root *root,
 			struct btrfs_fs_info *fs_info,
@@ -197,6 +114,7 @@
 	fs_info->disk_super = disk_super;
 	fs_info->sb_buffer = sb_buffer;
 	fs_info->sb = sb;
+	mutex_init(&fs_info->trans_mutex);
 	memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
 	memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
 
@@ -225,7 +143,7 @@
 }
 
 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
-		      *root, struct btrfs_super_block *s)
+		      *root)
 {
 	return 0;
 #if 0
@@ -242,34 +160,19 @@
 #endif
 }
 
-static int drop_cache(struct btrfs_root *root)
-{
-	return 0;
-#if 0
-	while(!list_empty(&root->fs_info->cache)) {
-		struct buffer_head *b = list_entry(root->fs_info->cache.next,
-						    struct buffer_head,
-						    cache);
-		list_del_init(&b->cache);
-		btrfs_block_release(root, b);
-	}
-	return 0;
-#endif
-}
-
 int close_ctree(struct btrfs_root *root)
 {
 	int ret;
 	struct btrfs_trans_handle *trans;
 
-	trans = root->fs_info->running_transaction;
-	btrfs_commit_transaction(trans, root, root->fs_info->disk_super);
-	ret = commit_tree_roots(trans, root->fs_info);
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_commit_transaction(trans, root);
+	/* run commit again to  drop the original snapshot */
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_commit_transaction(trans, root);
+	ret = btrfs_write_and_wait_transaction(NULL, root);
 	BUG_ON(ret);
-	ret = __commit_transaction(trans, root);
-	BUG_ON(ret);
-	write_ctree_super(trans, root, root->fs_info->disk_super);
-	drop_cache(root);
+	write_ctree_super(NULL, root);
 
 	if (root->node)
 		btrfs_block_release(root, root->node);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 7f4bb72..099f7ee 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -28,14 +28,14 @@
 		     struct buffer_head *buf);
 int clean_tree_block(struct btrfs_trans_handle *trans,
 		     struct btrfs_root *root, struct buffer_head *buf);
-int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root
-			     *root, struct btrfs_super_block *s);
+int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root);
 struct btrfs_root *open_ctree(struct super_block *sb,
 			      struct buffer_head *sb_buffer,
 			      struct btrfs_super_block *disk_super);
 int close_ctree(struct btrfs_root *root);
 void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf);
-int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		      struct btrfs_super_block *s);
+int write_ctree_super(struct btrfs_trans_handle *trans,
+		      struct btrfs_root *root);
 int mkfs(int fd, u64 num_blocks, u32 blocksize);
 #endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index ccc056a..6204308 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -141,7 +141,6 @@
 	struct btrfs_inode_item *inode_item;
 	struct btrfs_root *root = btrfs_sb(inode->i_sb);
 	int ret;
-printk("read locked inode %lu\n", inode->i_ino);
 	btrfs_init_path(&path);
 	ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0);
 	if (ret) {
@@ -152,7 +151,6 @@
 				  path.slots[0],
 				  struct btrfs_inode_item);
 
-printk("found locked inode %lu\n", inode->i_ino);
 	inode->i_mode = btrfs_inode_mode(inode_item);
 	inode->i_nlink = btrfs_inode_nlink(inode_item);
 	inode->i_uid = btrfs_inode_uid(inode_item);
@@ -166,7 +164,6 @@
 	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
 	inode->i_blocks = btrfs_inode_nblocks(inode_item);
 	inode->i_generation = btrfs_inode_generation(inode_item);
-printk("about to release\n");
 	btrfs_release_path(root, &path);
 	switch (inode->i_mode & S_IFMT) {
 #if 0
@@ -176,19 +173,15 @@
 		break;
 #endif
 	case S_IFREG:
-printk("inode %lu now a file\n", inode->i_ino);
 		break;
 	case S_IFDIR:
-printk("inode %lu now a directory\n", inode->i_ino);
 		inode->i_op = &btrfs_dir_inode_operations;
 		inode->i_fop = &btrfs_dir_file_operations;
 		break;
 	case S_IFLNK:
-printk("inode %lu now a link\n", inode->i_ino);
 		// inode->i_op = &page_symlink_inode_operations;
 		break;
 	}
-printk("returning!\n");
 	return;
 }
 
@@ -232,7 +225,6 @@
 		return ERR_PTR(ret);
 	inode = NULL;
 	if (ino) {
-printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino);
 		inode = iget(dir->i_sb, ino);
 		if (!inode)
 			return ERR_PTR(-EACCES);
@@ -257,7 +249,6 @@
 	int over;
 
 	key.objectid = inode->i_ino;
-printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos);
 	key.flags = 0;
 	btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
 	key.offset = filp->f_pos;
@@ -266,15 +257,12 @@
 	if (ret < 0) {
 		goto err;
 	}
-printk("first ret %d\n", ret);
 	advance = filp->f_pos > 0 && ret != 0;
 	while(1) {
 		leaf = btrfs_buffer_leaf(path.nodes[0]);
 		nritems = btrfs_header_nritems(&leaf->header);
 		slot = path.slots[0];
-printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot);
 		if (advance) {
-printk("advancing!\n");
 			if (slot == nritems -1) {
 				ret = btrfs_next_leaf(root, &path);
 				if (ret)
@@ -282,7 +270,6 @@
 				leaf = btrfs_buffer_leaf(path.nodes[0]);
 				nritems = btrfs_header_nritems(&leaf->header);
 				slot = path.slots[0];
-printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot);
 			} else {
 				slot++;
 				path.slots[0]++;
@@ -290,15 +277,11 @@
 		}
 		advance = 1;
 		item = leaf->items + slot;
-printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key),
-       btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key));
 		if (btrfs_disk_key_objectid(&item->key) != key.objectid)
 			break;
 		if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY)
 			continue;
 		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
-printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di),
-       (const char *)(di + 1), btrfs_dir_objectid(di));
 		over = filldir(dirent, (const char *)(di + 1),
 			       btrfs_dir_name_len(di),
 			       btrfs_disk_key_offset(&item->key),
@@ -307,7 +290,6 @@
 			break;
 		filp->f_pos = btrfs_disk_key_offset(&item->key) + 1;
 	}
-printk("filldir all done\n");
 	ret = 0;
 err:
 	btrfs_release_path(root, &path);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
new file mode 100644
index 0000000..8dc1c17
--- /dev/null
+++ b/fs/btrfs/transaction.c
@@ -0,0 +1,199 @@
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+
+
+static void put_transaction(struct btrfs_transaction *transaction)
+{
+	transaction->use_count--;
+	if (transaction->use_count == 0)
+		kfree(transaction);
+}
+
+static int join_transaction(struct btrfs_root *root)
+{
+	struct btrfs_transaction *cur_trans;
+	cur_trans = root->fs_info->running_transaction;
+	if (!cur_trans) {
+		cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS);
+		BUG_ON(!cur_trans);
+		root->fs_info->running_transaction = cur_trans;
+		cur_trans->num_writers = 0;
+		cur_trans->transid = root->root_key.offset + 1;
+		init_waitqueue_head(&cur_trans->writer_wait);
+		init_waitqueue_head(&cur_trans->commit_wait);
+		cur_trans->in_commit = 0;
+		cur_trans->use_count = 0;
+		cur_trans->commit_done = 0;
+	}
+	cur_trans->num_writers++;
+	return 0;
+}
+
+struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
+						   int num_blocks)
+{
+	struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS);
+	int ret;
+
+	mutex_lock(&root->fs_info->trans_mutex);
+	ret = join_transaction(root);
+	BUG_ON(ret);
+	h->transid = root->fs_info->running_transaction->transid;
+	h->transaction = root->fs_info->running_transaction;
+	h->blocks_reserved = num_blocks;
+	h->blocks_used = 0;
+	root->fs_info->running_transaction->use_count++;
+	mutex_unlock(&root->fs_info->trans_mutex);
+	return h;
+}
+
+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root)
+{
+	struct btrfs_transaction *cur_trans;
+	mutex_lock(&root->fs_info->trans_mutex);
+	cur_trans = root->fs_info->running_transaction;
+	WARN_ON(cur_trans->num_writers <= 1);
+	if (waitqueue_active(&cur_trans->writer_wait))
+		wake_up(&cur_trans->writer_wait);
+	cur_trans->num_writers--;
+	put_transaction(cur_trans);
+	mutex_unlock(&root->fs_info->trans_mutex);
+	kfree(trans);
+	return 0;
+}
+
+
+int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root)
+{
+	filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping);
+	return 0;
+}
+
+int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root)
+{
+	int ret;
+	u64 old_extent_block;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_root *extent_root = fs_info->extent_root;
+	struct btrfs_root *inode_root = fs_info->inode_root;
+
+	btrfs_set_root_blocknr(&inode_root->root_item,
+			       inode_root->node->b_blocknr);
+	ret = btrfs_update_root(trans, tree_root,
+				&inode_root->root_key,
+				&inode_root->root_item);
+	BUG_ON(ret);
+	while(1) {
+		old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
+		if (old_extent_block == extent_root->node->b_blocknr)
+			break;
+		btrfs_set_root_blocknr(&extent_root->root_item,
+				       extent_root->node->b_blocknr);
+		ret = btrfs_update_root(trans, tree_root,
+					&extent_root->root_key,
+					&extent_root->root_item);
+		BUG_ON(ret);
+	}
+	return 0;
+}
+
+static int wait_for_commit(struct btrfs_root *root,
+			   struct btrfs_transaction *commit)
+{
+	DEFINE_WAIT(wait);
+	commit->use_count++;
+	while(!commit->commit_done) {
+		prepare_to_wait(&commit->commit_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (commit->commit_done)
+			break;
+		mutex_unlock(&root->fs_info->trans_mutex);
+		schedule();
+		mutex_lock(&root->fs_info->trans_mutex);
+	}
+	finish_wait(&commit->commit_wait, &wait);
+	return 0;
+}
+
+int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root)
+{
+	int ret = 0;
+	struct buffer_head *snap = root->commit_root;
+	struct btrfs_key snap_key;
+	struct btrfs_transaction *cur_trans;
+	DEFINE_WAIT(wait);
+
+	mutex_lock(&root->fs_info->trans_mutex);
+	if (trans->transaction->in_commit) {
+		cur_trans = trans->transaction;
+		trans->transaction->use_count++;
+		btrfs_end_transaction(trans, root);
+		ret = wait_for_commit(root, cur_trans);
+		BUG_ON(ret);
+		put_transaction(cur_trans);
+		mutex_unlock(&root->fs_info->trans_mutex);
+		return 0;
+	}
+	while (trans->transaction->num_writers > 1) {
+		prepare_to_wait(&trans->transaction->writer_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (trans->transaction->num_writers <= 1)
+			break;
+		mutex_unlock(&root->fs_info->trans_mutex);
+		schedule();
+		mutex_lock(&root->fs_info->trans_mutex);
+	}
+	finish_wait(&trans->transaction->writer_wait, &wait);
+
+	cur_trans = root->fs_info->running_transaction;
+	root->fs_info->running_transaction = NULL;
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	memcpy(&snap_key, &root->root_key, sizeof(snap_key));
+	root->root_key.offset++;
+
+	if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) {
+		btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr);
+		ret = btrfs_insert_root(trans, root->fs_info->tree_root,
+					&root->root_key, &root->root_item);
+		BUG_ON(ret);
+	}
+
+	ret = btrfs_commit_tree_roots(trans, root);
+	BUG_ON(ret);
+
+	ret = btrfs_write_and_wait_transaction(trans, root);
+	BUG_ON(ret);
+
+	write_ctree_super(trans, root);
+	btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
+	btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
+	put_transaction(cur_trans);
+	kfree(trans);
+
+	if (root->node != root->commit_root) {
+		trans = btrfs_start_transaction(root, 1);
+		root->commit_root = root->node;
+		get_bh(root->node);
+		ret = btrfs_drop_snapshot(trans, root, snap);
+		BUG_ON(ret);
+
+		ret = btrfs_del_root(trans, root->fs_info->tree_root,
+				     &snap_key);
+		BUG_ON(ret);
+		root->fs_info->generation = root->root_key.offset + 1;
+		ret = btrfs_end_transaction(trans, root);
+		BUG_ON(ret);
+	}
+
+	return ret;
+}
+
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9ab27b7..7e6c08a 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -1,27 +1,30 @@
 #ifndef __TRANSACTION__
 #define __TRANSACTION__
 
+struct btrfs_transaction {
+	u64 transid;
+	unsigned long num_writers;
+	int in_commit;
+	int use_count;
+	int commit_done;
+	wait_queue_head_t writer_wait;
+	wait_queue_head_t commit_wait;
+};
+
 struct btrfs_trans_handle {
 	u64 transid;
 	unsigned long blocks_reserved;
 	unsigned long blocks_used;
+	struct btrfs_transaction *transaction;
 };
 
-static inline struct btrfs_trans_handle *
-btrfs_start_transaction(struct btrfs_root *root, int num_blocks)
-{
-	struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS);
-	h->transid = root->root_key.offset;
-	h->blocks_reserved = num_blocks;
-	h->blocks_used = 0;
-	return h;
-}
 
-static inline void btrfs_free_transaction(struct btrfs_root *root,
-					  struct btrfs_trans_handle *handle)
-{
-	memset(handle, 0, sizeof(*handle));
-	kfree(handle);
-}
-
+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root);
+struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
+						   int num_blocks);
+int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root);
+int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root);
 #endif