Btrfs: Keep extent mappings in ram until pending ordered extents are done

It was possible for stale mappings from disk to be used instead of the
new pending ordered extent.  This adds a flag to the extent map struct
to keep it pinned until the pending ordered extent is actually on disk.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3153b4f..d4a63ae 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2000,7 +2000,7 @@
 	struct block_device *bdev;
 	int ret;
 	int nr = 0;
-	size_t page_offset = 0;
+	size_t pg_offset = 0;
 	size_t blocksize;
 	loff_t i_size = i_size_read(inode);
 	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -2008,9 +2008,9 @@
 	u64 delalloc_end;
 
 	WARN_ON(!PageLocked(page));
-	page_offset = i_size & (PAGE_CACHE_SIZE - 1);
+	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
 	if (page->index > end_index ||
-	   (page->index == end_index && !page_offset)) {
+	   (page->index == end_index && !pg_offset)) {
 		page->mapping->a_ops->invalidatepage(page, 0);
 		unlock_page(page);
 		return 0;
@@ -2020,12 +2020,12 @@
 		char *userpage;
 
 		userpage = kmap_atomic(page, KM_USER0);
-		memset(userpage + page_offset, 0,
-		       PAGE_CACHE_SIZE - page_offset);
+		memset(userpage + pg_offset, 0,
+		       PAGE_CACHE_SIZE - pg_offset);
 		kunmap_atomic(userpage, KM_USER0);
 		flush_dcache_page(page);
 	}
-	page_offset = 0;
+	pg_offset = 0;
 
 	set_page_extent_mapped(page);
 
@@ -2088,7 +2088,7 @@
 			unlock_start = page_end + 1;
 			break;
 		}
-		em = epd->get_extent(inode, page, page_offset, cur,
+		em = epd->get_extent(inode, page, pg_offset, cur,
 				     end - cur + 1, 1);
 		if (IS_ERR(em) || !em) {
 			SetPageError(page);
@@ -2113,12 +2113,13 @@
 
 			unlock_extent(tree, unlock_start, cur + iosize -1,
 				      GFP_NOFS);
+
 			if (tree->ops && tree->ops->writepage_end_io_hook)
 				tree->ops->writepage_end_io_hook(page, cur,
 							 cur + iosize - 1,
 							 NULL, 1);
 			cur = cur + iosize;
-			page_offset += iosize;
+			pg_offset += iosize;
 			unlock_start = cur;
 			continue;
 		}
@@ -2127,7 +2128,7 @@
 		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
 				   EXTENT_DIRTY, 0)) {
 			cur = cur + iosize;
-			page_offset += iosize;
+			pg_offset += iosize;
 			continue;
 		}
 		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -2141,6 +2142,7 @@
 			SetPageError(page);
 		} else {
 			unsigned long max_nr = end_index + 1;
+
 			set_range_writeback(tree, cur, cur + iosize - 1);
 			if (!PageWriteback(page)) {
 				printk("warning page %lu not writeback, "
@@ -2150,14 +2152,14 @@
 			}
 
 			ret = submit_extent_page(WRITE, tree, page, sector,
-						 iosize, page_offset, bdev,
+						 iosize, pg_offset, bdev,
 						 &epd->bio, max_nr,
 						 end_bio_extent_writepage, 0);
 			if (ret)
 				SetPageError(page);
 		}
 		cur = cur + iosize;
-		page_offset += iosize;
+		pg_offset += iosize;
 		nr++;
 	}
 done:
@@ -2579,7 +2581,8 @@
 				spin_unlock(&map->lock);
 				break;
 			}
-			if (em->start != start) {
+			if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
+			    em->start != start) {
 				spin_unlock(&map->lock);
 				free_extent_map(em);
 				break;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 8112327..71b1ac1 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -173,6 +173,9 @@
 
 static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 {
+	if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
+		return 0;
+
 	if (extent_map_end(prev) == next->start &&
 	    prev->flags == next->flags &&
 	    prev->bdev == next->bdev &&
@@ -320,6 +323,7 @@
 {
 	int ret = 0;
 
+	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
 	BUG_ON(spin_trylock(&tree->lock));
 	rb_erase(&em->rb_node, &tree->map);
 	em->in_tree = 0;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 56314217..a3978ec 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -8,6 +8,9 @@
 #define EXTENT_MAP_INLINE (u64)-2
 #define EXTENT_MAP_DELALLOC (u64)-1
 
+/* bits for the flags field */
+#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
+
 struct extent_map {
 	struct rb_node rb_node;
 
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index e02f1e5..d9c69e1 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -192,7 +192,6 @@
 				 (char *)&sector_sum->sum);
 		sector_sum->offset = page_offset(bvec->bv_page) +
 			bvec->bv_offset;
-
 		sector_sum++;
 		bio_index++;
 		total_bytes += bvec->bv_len;
@@ -201,9 +200,6 @@
 	}
 	btrfs_add_ordered_sum(inode, ordered, sums);
 	btrfs_put_ordered_extent(ordered);
-	if (total_bytes != bio->bi_size) {
-printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size);
-	}
 	return 0;
 }
 
@@ -372,6 +368,7 @@
 		write_extent_buffer(leaf, &sector_sum->sum,
 				    (unsigned long)item, BTRFS_CRC32_SIZE);
 	}
+
 	total_bytes += root->sectorsize;
 	sector_sum++;
 	if (total_bytes < sums->len) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 40ad1b2..eccdb95 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -358,9 +358,7 @@
 	struct extent_map *split = NULL;
 	struct extent_map *split2 = NULL;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-	struct extent_map *tmp;
 	u64 len = end - start + 1;
-	u64 next_start;
 	int ret;
 	int testend = 1;
 
@@ -381,8 +379,16 @@
 			spin_unlock(&em_tree->lock);
 			break;
 		}
-		tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
-		next_start = tmp->start;
+		if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+			start = em->start + em->len;
+			free_extent_map(em);
+			spin_unlock(&em_tree->lock);
+			if (start < end) {
+				len = end - start + 1;
+				continue;
+			}
+			break;
+		}
 		remove_extent_mapping(em_tree, em);
 
 		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8803abc..08dbe73 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -144,6 +144,7 @@
 		em->len = ins.offset;
 		em->block_start = ins.objectid;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
+		set_bit(EXTENT_FLAG_PINNED, &em->flags);
 		while(1) {
 			spin_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, em);
@@ -483,6 +484,8 @@
 	struct btrfs_trans_handle *trans;
 	struct btrfs_ordered_extent *ordered_extent;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map *em;
 	u64 alloc_hint = 0;
 	struct list_head list;
 	struct btrfs_key ins;
@@ -524,6 +527,17 @@
 				       ordered_extent->len,
 				       ordered_extent->len, 0);
 	BUG_ON(ret);
+
+
+	spin_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, ordered_extent->file_offset,
+			       ordered_extent->len);
+	if (em) {
+		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+		free_extent_map(em);
+	}
+	spin_unlock(&em_tree->lock);
+
 	btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
 				ordered_extent->file_offset +
 				ordered_extent->len - 1);
@@ -538,6 +552,7 @@
 
 	btrfs_ordered_update_i_size(inode, ordered_extent);
 	btrfs_remove_ordered_extent(inode, ordered_extent);
+
 	/* once for us */
 	btrfs_put_ordered_extent(ordered_extent);
 	/* once for the tree */