Btrfs: Add file data csums back in via hooks in the extent map code

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a9c7419..6053f9d 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -57,7 +57,7 @@
 {
 	tree->map.rb_node = NULL;
 	tree->state.rb_node = NULL;
-	tree->fill_delalloc = NULL;
+	tree->ops = NULL;
 	rwlock_init(&tree->lock);
 	tree->mapping = mapping;
 }
@@ -93,6 +93,7 @@
 		return state;
 	state->state = 0;
 	state->in_tree = 0;
+	state->private = 0;
 	atomic_set(&state->refs, 1);
 	init_waitqueue_head(&state->wq);
 	return state;
@@ -1034,6 +1035,61 @@
 }
 EXPORT_SYMBOL(unlock_range);
 
+int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
+{
+	struct rb_node *node;
+	struct extent_state *state;
+	int ret = 0;
+
+	write_lock_irq(&tree->lock);
+	/*
+	 * this search will find all the extents that end after
+	 * our range starts.
+	 */
+	node = tree_search(&tree->state, start);
+	if (!node || IS_ERR(node)) {
+		ret = -ENOENT;
+		goto out;
+	}
+	state = rb_entry(node, struct extent_state, rb_node);
+	if (state->start != start) {
+		ret = -ENOENT;
+		goto out;
+	}
+	state->private = private;
+out:
+	write_unlock_irq(&tree->lock);
+	return ret;
+
+}
+
+int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
+{
+	struct rb_node *node;
+	struct extent_state *state;
+	int ret = 0;
+
+	read_lock_irq(&tree->lock);
+	/*
+	 * this search will find all the extents that end after
+	 * our range starts.
+	 */
+	node = tree_search(&tree->state, start);
+	if (!node || IS_ERR(node)) {
+		ret = -ENOENT;
+		goto out;
+	}
+	state = rb_entry(node, struct extent_state, rb_node);
+	if (state->start != start) {
+		ret = -ENOENT;
+		goto out;
+	}
+	*private = state->private;
+out:
+	read_unlock_irq(&tree->lock);
+	return ret;
+}
+
 /*
  * searches a range in the state tree for a given mask.
  * If 'filled' == 1, this returns 1 only if ever extent in the tree
@@ -1185,12 +1241,13 @@
 static int end_bio_extent_readpage(struct bio *bio,
 				   unsigned int bytes_done, int err)
 {
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct extent_map_tree *tree = bio->bi_private;
 	u64 start;
 	u64 end;
 	int whole_page;
+	int ret;
 
 	if (bio->bi_size)
 		return 1;
@@ -1208,6 +1265,11 @@
 		if (--bvec >= bio->bi_io_vec)
 			prefetchw(&bvec->bv_page->flags);
 
+		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
+			ret = tree->ops->readpage_end_io_hook(page, start, end);
+			if (ret)
+				uptodate = 0;
+		}
 		if (uptodate) {
 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
 			if (whole_page)
@@ -1388,9 +1450,16 @@
 			continue;
 		}
 
-		ret = submit_extent_page(READ, tree, page,
-					 sector, iosize, page_offset, bdev,
-					 end_bio_extent_readpage);
+		ret = 0;
+		if (tree->ops && tree->ops->readpage_io_hook) {
+			ret = tree->ops->readpage_io_hook(page, cur,
+							  cur + iosize - 1);
+		}
+		if (!ret) {
+			ret = submit_extent_page(READ, tree, page,
+						 sector, iosize, page_offset,
+						 bdev, end_bio_extent_readpage);
+		}
 		if (ret)
 			SetPageError(page);
 		cur = cur + iosize;
@@ -1462,7 +1531,7 @@
 					       &delalloc_end,
 					       128 * 1024 * 1024);
 	if (nr_delalloc) {
-		tree->fill_delalloc(inode, start, delalloc_end);
+		tree->ops->fill_delalloc(inode, start, delalloc_end);
 		if (delalloc_end >= page_end + 1) {
 			clear_extent_bit(tree, page_end + 1, delalloc_end,
 					 EXTENT_LOCKED | EXTENT_DELALLOC,
@@ -1528,12 +1597,17 @@
 			continue;
 		}
 		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
-		set_range_writeback(tree, cur, cur + iosize - 1);
-		ret = submit_extent_page(WRITE, tree, page,
-					 sector, iosize, page_offset, bdev,
-					 end_bio_extent_writepage);
+		ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1);
 		if (ret)
 			SetPageError(page);
+		else {
+			set_range_writeback(tree, cur, cur + iosize - 1);
+			ret = submit_extent_page(WRITE, tree, page, sector,
+						 iosize, page_offset, bdev,
+						 end_bio_extent_writepage);
+			if (ret)
+				SetPageError(page);
+		}
 		cur = cur + iosize;
 		page_offset += iosize;
 		nr++;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index e91a2e9..0eae6c4 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -6,12 +6,19 @@
 #define EXTENT_MAP_INLINE (u64)-2
 #define EXTENT_MAP_DELALLOC (u64)-1
 
+struct extent_map_ops {
+	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
+	int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
+	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
+	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end);
+};
+
 struct extent_map_tree {
 	struct rb_root map;
 	struct rb_root state;
 	struct address_space *mapping;
 	rwlock_t lock;
-	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
+	struct extent_map_ops *ops;
 };
 
 /* note, this must start with the same fields as fs/extent_map.c:tree_entry */
@@ -36,6 +43,10 @@
 	wait_queue_head_t wq;
 	atomic_t refs;
 	unsigned long state;
+
+	/* for use by the FS */
+	u64 private;
+
 	struct list_head list;
 };
 
@@ -89,4 +100,6 @@
 			struct inode *inode, struct page *page,
 			unsigned from, unsigned to);
 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end);
+int set_state_private(struct extent_map_tree *tree, u64 start, u64 private);
+int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private);
 #endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 483eea6..3ee6b2f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -52,6 +52,7 @@
 static struct address_space_operations btrfs_aops;
 static struct address_space_operations btrfs_symlink_aops;
 static struct file_operations btrfs_dir_file_operations;
+static struct extent_map_ops btrfs_extent_map_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
@@ -103,6 +104,90 @@
 	return ret;
 }
 
+int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
+{
+	struct inode *inode = page->mapping->host;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_trans_handle *trans;
+	char *kaddr;
+	int ret;
+	u64 page_start = page->index << PAGE_CACHE_SHIFT;
+	size_t offset = start - page_start;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_set_trans_block_group(trans, inode);
+	kaddr = kmap(page);
+	btrfs_csum_file_block(trans, root, inode->i_ino,
+			      start, kaddr + offset, end - start + 1);
+	kunmap(page);
+	ret = btrfs_end_transaction(trans, root);
+	BUG_ON(ret);
+	mutex_unlock(&root->fs_info->fs_mutex);
+	return ret;
+}
+
+int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
+{
+	int ret = 0;
+	struct inode *inode = page->mapping->host;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct btrfs_csum_item *item;
+	struct btrfs_path *path = NULL;
+	u64 private;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	path = btrfs_alloc_path();
+	item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
+	if (IS_ERR(item)) {
+		ret = PTR_ERR(item);
+		/* a csum that isn't present is a preallocated region. */
+		if (ret == -ENOENT || ret == -EFBIG)
+			ret = 0;
+		private = 0;
+		goto out;
+	}
+	memcpy((char *)&private, &item->csum, BTRFS_CRC32_SIZE);
+	set_state_private(em_tree, start, private);
+out:
+	if (path)
+		btrfs_free_path(path);
+	mutex_unlock(&root->fs_info->fs_mutex);
+	return ret;
+}
+
+int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
+{
+	char csum[BTRFS_CRC32_SIZE];
+	size_t offset = start - (page->index << PAGE_CACHE_SHIFT);
+	struct inode *inode = page->mapping->host;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	char *kaddr;
+	u64 private;
+	int ret;
+
+	ret = get_state_private(em_tree, start, &private);
+	kaddr = kmap_atomic(page, KM_IRQ0);
+	if (ret) {
+		goto zeroit;
+	}
+	ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum);
+	BUG_ON(ret);
+	if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) {
+		goto zeroit;
+	}
+	kunmap_atomic(kaddr, KM_IRQ0);
+	return 0;
+
+zeroit:
+	printk("btrfs csum failed ino %lu off %llu\n",
+	       page->mapping->host->i_ino, (unsigned long long)start);
+	memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page);
+	kunmap_atomic(kaddr, KM_IRQ0);
+	return 0;
+}
 
 void btrfs_read_locked_inode(struct inode *inode)
 {
@@ -155,7 +240,7 @@
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_mapping->a_ops = &btrfs_aops;
-		BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range;
+		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
 		inode->i_fop = &btrfs_file_operations;
 		inode->i_op = &btrfs_file_inode_operations;
 		break;
@@ -1148,7 +1233,7 @@
 		inode->i_op = &btrfs_file_inode_operations;
 		extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
 				     inode->i_mapping, GFP_NOFS);
-		BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range;
+		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
 	}
 	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
@@ -2286,7 +2371,7 @@
 		inode->i_op = &btrfs_file_inode_operations;
 		extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
 				     inode->i_mapping, GFP_NOFS);
-		BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range;
+		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
 	}
 	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
@@ -2362,6 +2447,13 @@
 #endif
 };
 
+static struct extent_map_ops btrfs_extent_map_ops = {
+	.fill_delalloc = run_delalloc_range,
+	.writepage_io_hook = btrfs_writepage_io_hook,
+	.readpage_io_hook = btrfs_readpage_io_hook,
+	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+};
+
 static struct address_space_operations btrfs_aops = {
 	.readpage	= btrfs_readpage,
 	.writepage	= btrfs_writepage,