Btrfs: Fix nodatacow for the new data=ordered mode

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 116aee2..f90e5a7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1403,7 +1403,8 @@
 }
 
 /* extent-tree.c */
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root,
 			   struct btrfs_key *key, u64 bytenr);
 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 33cb2ac..fff219e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -893,10 +893,10 @@
 	return ret;
 }
 
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root,
 			   struct btrfs_key *key, u64 bytenr)
 {
-	struct btrfs_trans_handle *trans;
 	struct btrfs_root *old_root;
 	struct btrfs_path *path = NULL;
 	struct extent_buffer *eb;
@@ -908,6 +908,7 @@
 	int level;
 	int ret;
 
+	BUG_ON(trans == NULL);
 	BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
 	ret = get_reference_status(root, bytenr, 0, key->objectid,
 				   &min_generation, &ref_count);
@@ -917,7 +918,6 @@
 	if (ref_count != 1)
 		return 1;
 
-	trans = btrfs_start_transaction(root, 0);
 	old_root = root->dirty_root->root;
 	ref_generation = old_root->root_key.offset;
 
@@ -973,7 +973,6 @@
 out:
 	if (path)
 		btrfs_free_path(path);
-	btrfs_end_transaction(trans, root);
 	return ret;
 }
 
@@ -3320,7 +3319,7 @@
 	mutex_unlock(&root->fs_info->alloc_mutex);
 
 	btrfs_start_delalloc_inodes(root);
-	btrfs_wait_ordered_extents(tree_root);
+	btrfs_wait_ordered_extents(tree_root, 0);
 
 	mutex_lock(&root->fs_info->alloc_mutex);
 
@@ -3407,7 +3406,7 @@
 		btrfs_clean_old_snapshots(tree_root);
 
 		btrfs_start_delalloc_inodes(root);
-		btrfs_wait_ordered_extents(tree_root);
+		btrfs_wait_ordered_extents(tree_root, 0);
 
 		trans = btrfs_start_transaction(tree_root, 1);
 		btrfs_commit_transaction(trans, tree_root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4d8ffc0..c33053b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -166,7 +166,7 @@
 
 		cur_alloc_size = ins.offset;
 		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
-					       ins.offset);
+					       ins.offset, 0);
 		BUG_ON(ret);
 		if (num_bytes < cur_alloc_size) {
 			printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
@@ -187,31 +187,32 @@
 	u64 extent_start;
 	u64 extent_end;
 	u64 bytenr;
-	u64 cow_end;
 	u64 loops = 0;
 	u64 total_fs_bytes;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_block_group_cache *block_group;
+	struct btrfs_trans_handle *trans;
 	struct extent_buffer *leaf;
 	int found_type;
 	struct btrfs_path *path;
 	struct btrfs_file_extent_item *item;
 	int ret;
-	int err;
+	int err = 0;
 	struct btrfs_key found_key;
 
 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
+	trans = btrfs_join_transaction(root, 1);
+	BUG_ON(!trans);
 again:
 	ret = btrfs_lookup_file_extent(NULL, root, path,
 				       inode->i_ino, start, 0);
 	if (ret < 0) {
-		btrfs_free_path(path);
-		return ret;
+		err = ret;
+		goto out;
 	}
 
-	cow_end = end;
 	if (ret != 0) {
 		if (path->slots[0] == 0)
 			goto not_found;
@@ -244,12 +245,11 @@
 		if (start < extent_start || start >= extent_end)
 			goto not_found;
 
-		cow_end = min(end, extent_end - 1);
 		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
 		if (bytenr == 0)
 			goto not_found;
 
-		if (btrfs_cross_ref_exists(root, &found_key, bytenr))
+		if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
 			goto not_found;
 		/*
 		 * we may be called by the resizer, make sure we're inside
@@ -260,24 +260,32 @@
 		if (!block_group || block_group->ro)
 			goto not_found;
 
-		start = extent_end;
-	} else {
-		goto not_found;
-	}
-loop:
-	if (start > end) {
-		btrfs_free_path(path);
-		return 0;
-	}
-	btrfs_release_path(root, path);
-	loops++;
-	goto again;
+		bytenr += btrfs_file_extent_offset(leaf, item);
+		extent_num_bytes = min(end + 1, extent_end) - start;
+		ret = btrfs_add_ordered_extent(inode, start, bytenr,
+						extent_num_bytes, 1);
+		if (ret) {
+			err = ret;
+			goto out;
+		}
 
+		btrfs_release_path(root, path);
+		start = extent_end;
+		if (start <= end) {
+			loops++;
+			goto again;
+		}
+	} else {
 not_found:
-	btrfs_release_path(root, path);
-	cow_file_range(inode, start, end);
-	start = end + 1;
-	goto loop;
+		btrfs_end_transaction(trans, root);
+		btrfs_free_path(path);
+		return cow_file_range(inode, start, end);
+	}
+out:
+	WARN_ON(err);
+	btrfs_end_transaction(trans, root);
+	btrfs_free_path(path);
+	return err;
 }
 
 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
@@ -385,6 +393,11 @@
 		goto mapit;
 	}
 
+	if (btrfs_test_opt(root, NODATASUM) ||
+	    btrfs_test_flag(inode, NODATASUM)) {
+		goto mapit;
+	}
+
 	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
 				   inode, rw, bio, mirror_num,
 				   __btrfs_submit_bio_hook);
@@ -527,6 +540,8 @@
 
 	ordered_extent = btrfs_lookup_ordered_extent(inode, start);
 	BUG_ON(!ordered_extent);
+	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
+		goto nocow;
 
 	lock_extent(io_tree, ordered_extent->file_offset,
 		    ordered_extent->file_offset + ordered_extent->len - 1,
@@ -567,6 +582,7 @@
 	unlock_extent(io_tree, ordered_extent->file_offset,
 		    ordered_extent->file_offset + ordered_extent->len - 1,
 		    GFP_NOFS);
+nocow:
 	add_pending_csums(trans, inode, ordered_extent->file_offset,
 			  &ordered_extent->list);
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3932c7c..59b64c7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -36,6 +36,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/version.h>
 #include <linux/xattr.h>
+#include <linux/vmalloc.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index db200e6..da6d43e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -152,7 +152,7 @@
  * inserted.
  */
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-			     u64 start, u64 len)
+			     u64 start, u64 len, int nocow)
 {
 	struct btrfs_ordered_inode_tree *tree;
 	struct rb_node *node;
@@ -168,6 +168,8 @@
 	entry->start = start;
 	entry->len = len;
 	entry->inode = inode;
+	if (nocow)
+		set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);
 
 	/* one ref for the tree */
 	atomic_set(&entry->refs, 1);
@@ -303,10 +305,11 @@
 	return 0;
 }
 
-int btrfs_wait_ordered_extents(struct btrfs_root *root)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
 {
 	struct list_head splice;
 	struct list_head *cur;
+	struct list_head *tmp;
 	struct btrfs_ordered_extent *ordered;
 	struct inode *inode;
 
@@ -314,10 +317,16 @@
 
 	spin_lock(&root->fs_info->ordered_extent_lock);
 	list_splice_init(&root->fs_info->ordered_extents, &splice);
-	while(!list_empty(&splice)) {
+	list_for_each_safe(cur, tmp, &splice) {
 		cur = splice.next;
 		ordered = list_entry(cur, struct btrfs_ordered_extent,
 				     root_extent_list);
+		if (nocow_only &&
+		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+			cond_resched_lock(&root->fs_info->ordered_extent_lock);
+			continue;
+		}
+
 		list_del_init(&ordered->root_extent_list);
 		atomic_inc(&ordered->refs);
 		inode = ordered->inode;
@@ -338,6 +347,7 @@
 
 		spin_lock(&root->fs_info->ordered_extent_lock);
 	}
+	list_splice_init(&splice, &root->fs_info->ordered_extents);
 	spin_unlock(&root->fs_info->ordered_extent_lock);
 	return 0;
 }
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5efe6b6..fd45519 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -64,6 +64,8 @@
 
 #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
 
+#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
+
 struct btrfs_ordered_extent {
 	/* logical offset in the file */
 	u64 file_offset;
@@ -125,7 +127,7 @@
 int btrfs_dec_test_ordered_pending(struct inode *inode,
 				       u64 file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-			     u64 start, u64 len);
+			     u64 start, u64 len, int nocow);
 int btrfs_add_ordered_sum(struct inode *inode,
 			  struct btrfs_ordered_extent *entry,
 			  struct btrfs_ordered_sum *sum);
@@ -143,5 +145,5 @@
 				       pgoff_t start, pgoff_t end);
 int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
 			   loff_t end, int sync_mode);
-int btrfs_wait_ordered_extents(struct btrfs_root *root);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
 #endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ebf5362..9d3d08e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -438,6 +438,7 @@
 
 				free_extent_buffer(root->commit_root);
 				root->commit_root = NULL;
+				root->dirty_root = NULL;
 
 				spin_lock(&root->list_lock);
 				list_del_init(&dirty->root->dead_list);
@@ -461,6 +462,7 @@
 			       sizeof(struct btrfs_disk_key));
 			root->root_item.drop_level = 0;
 			root->commit_root = NULL;
+			root->dirty_root = NULL;
 			root->root_key.offset = root->fs_info->generation;
 			btrfs_set_root_bytenr(&root->root_item,
 					      root->node->start);
@@ -762,7 +764,11 @@
 	}
 
 	do {
+		int snap_pending = 0;
 		joined = cur_trans->num_joined;
+		if (!list_empty(&trans->transaction->pending_snapshots))
+			snap_pending = 1;
+
 		WARN_ON(cur_trans != trans->transaction);
 		prepare_to_wait(&cur_trans->writer_wait, &wait,
 				TASK_UNINTERRUPTIBLE);
@@ -774,6 +780,11 @@
 
 		mutex_unlock(&root->fs_info->trans_mutex);
 
+		if (snap_pending) {
+			ret = btrfs_wait_ordered_extents(root, 1);
+			BUG_ON(ret);
+		}
+
 		schedule_timeout(timeout);
 
 		mutex_lock(&root->fs_info->trans_mutex);