Btrfs: write barriers on commit, balance level before split

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4efcd1b..744fd72 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -151,6 +151,11 @@
 	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
 		struct btrfs_key cpukey;
 		btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
+if (comp_keys(&node->ptrs[i].key, &cpukey) >= 0) {
+	struct btrfs_key bad;
+	btrfs_disk_key_to_cpu(&bad, &node->ptrs[i].key);
+printk("check_node level %d i is %d bad comp %Lu %u %Lu, %Lu %u %Lu\n",level, i, bad.objectid, bad.flags, bad.offset, cpukey.objectid, cpukey.flags, cpukey.offset);
+}
 		BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
 	}
 	return 0;
@@ -448,6 +453,111 @@
 	return ret;
 }
 
+/* returns zero if the push worked, non-zero otherwise */
+static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_path *path, int level)
+{
+	struct buffer_head *right_buf;
+	struct buffer_head *mid_buf;
+	struct buffer_head *left_buf;
+	struct buffer_head *parent_buf = NULL;
+	struct btrfs_node *right = NULL;
+	struct btrfs_node *mid;
+	struct btrfs_node *left = NULL;
+	struct btrfs_node *parent = NULL;
+	int ret = 0;
+	int wret;
+	int pslot;
+	int orig_slot = path->slots[level];
+	u64 orig_ptr;
+
+	if (level == 0)
+		return 1;
+
+	mid_buf = path->nodes[level];
+	mid = btrfs_buffer_node(mid_buf);
+	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+	if (level < BTRFS_MAX_LEVEL - 1)
+		parent_buf = path->nodes[level + 1];
+	pslot = path->slots[level + 1];
+
+	if (!parent_buf)
+		return 1;
+	parent = btrfs_buffer_node(parent_buf);
+
+	left_buf = read_node_slot(root, parent_buf, pslot - 1);
+
+	/* first, try to make some room in the middle buffer */
+	if (left_buf) {
+		u32 left_nr;
+		btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
+				&left_buf);
+		left = btrfs_buffer_node(left_buf);
+		left_nr = btrfs_header_nritems(&left->header);
+		wret = push_node_left(trans, root, left_buf, mid_buf);
+		if (wret < 0)
+			ret = wret;
+		if (wret == 0) {
+			orig_slot += left_nr;
+			btrfs_memcpy(root, parent,
+				     &parent->ptrs[pslot].key,
+				     &mid->ptrs[0].key,
+				     sizeof(struct btrfs_disk_key));
+			btrfs_mark_buffer_dirty(parent_buf);
+			if (btrfs_header_nritems(&left->header) > orig_slot) {
+				path->nodes[level] = left_buf;
+				path->slots[level + 1] -= 1;
+				path->slots[level] = orig_slot;
+				btrfs_block_release(root, mid_buf);
+			} else {
+				orig_slot -=
+					btrfs_header_nritems(&left->header);
+				path->slots[level] = orig_slot;
+				btrfs_block_release(root, left_buf);
+			}
+			check_node(root, path, level);
+			return 0;
+		}
+		btrfs_block_release(root, left_buf);
+	}
+	right_buf = read_node_slot(root, parent_buf, pslot + 1);
+
+	/*
+	 * then try to empty the right most buffer into the middle
+	 */
+	if (right_buf) {
+		btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
+				&right_buf);
+		right = btrfs_buffer_node(right_buf);
+		wret = balance_node_right(trans, root, right_buf, mid_buf);
+		if (wret < 0)
+			ret = wret;
+		if (wret == 0) {
+			btrfs_memcpy(root, parent,
+				     &parent->ptrs[pslot + 1].key,
+				     &right->ptrs[0].key,
+				     sizeof(struct btrfs_disk_key));
+			btrfs_mark_buffer_dirty(parent_buf);
+			if (btrfs_header_nritems(&mid->header) <= orig_slot) {
+				path->nodes[level] = right_buf;
+				path->slots[level + 1] += 1;
+				path->slots[level] = orig_slot -
+					btrfs_header_nritems(&mid->header);
+				btrfs_block_release(root, mid_buf);
+			} else {
+				btrfs_block_release(root, right_buf);
+			}
+			check_node(root, path, level);
+			return 0;
+		}
+		btrfs_block_release(root, right_buf);
+	}
+	check_node(root, path, level);
+	return 1;
+}
+
 /*
  * look for key in the tree.  path is filled in with nodes along the way
  * if key is found, we return zero and you can find the item in the leaf
@@ -774,7 +884,16 @@
 		ret = insert_new_root(trans, root, path, level + 1);
 		if (ret)
 			return ret;
+	} else {
+		ret = push_nodes_for_insert(trans, root, path, level);
+		t = path->nodes[level];
+		c = btrfs_buffer_node(t);
+		if (!ret &&
+		    btrfs_header_nritems(&c->header) <
+		    BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
+			return 0;
 	}
+
 	c_nritems = btrfs_header_nritems(&c->header);
 	split_buffer = btrfs_alloc_free_block(trans, root);
 	split = btrfs_buffer_node(split_buffer);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 047a847..681b239 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -259,6 +259,7 @@
 	struct list_head trans_list;
 	struct crypto_hash *hash_tfm;
 	spinlock_t hash_lock;
+	int do_barriers;
 	struct kobject kobj;
 };
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b666c35..1304629 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -540,6 +540,7 @@
 	fs_info->btree_inode->i_nlink = 1;
 	fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
 	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
+	fs_info->do_barriers = 1;
 	BTRFS_I(fs_info->btree_inode)->root = tree_root;
 	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
 	       sizeof(struct btrfs_key));
@@ -622,6 +623,7 @@
 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root)
 {
+	int ret;
 	struct buffer_head *bh = root->fs_info->sb_buffer;
 
 	btrfs_set_super_root(root->fs_info->disk_super,
@@ -632,7 +634,15 @@
 	csum_tree_block(root, bh, 0);
 	bh->b_end_io = end_buffer_write_sync;
 	get_bh(bh);
-	submit_bh(WRITE, bh);
+	if (root->fs_info->do_barriers)
+		ret = submit_bh(WRITE_BARRIER, bh);
+	else
+		ret = submit_bh(WRITE, bh);
+	if (ret == -EOPNOTSUPP) {
+		set_buffer_uptodate(bh);
+		root->fs_info->do_barriers = 0;
+		ret = submit_bh(WRITE, bh);
+	}
 	wait_on_buffer(bh);
 	if (!buffer_uptodate(bh)) {
 		WARN_ON(1);