Btrfs: Add back metadata checksumming

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d1c6f02..1af0a96 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,9 +310,11 @@
 	struct extent_buffer *sb_buffer;
 	struct super_block *sb;
 	struct inode *btree_inode;
+	spinlock_t hash_lock;
 	struct mutex trans_mutex;
 	struct mutex fs_mutex;
 	struct list_head trans_list;
+	struct list_head hashers;
 	struct list_head dead_roots;
 	struct delayed_work trans_work;
 	struct kobject super_kobj;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 16f0260..1176e54 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -60,7 +60,7 @@
 	struct extent_buffer *eb;
 
 	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
-				 bytenr, blocksize, GFP_NOFS);
+				 bytenr, blocksize, NULL, GFP_NOFS);
 	return eb;
 }
 
@@ -99,10 +99,102 @@
 	return em;
 }
 
+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+{
+	return crc32c(seed, data, len);
+}
+
+void btrfs_csum_final(u32 crc, char *result)
+{
+	*(__le32 *)result = ~cpu_to_le32(crc);
+}
+
+static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+			   int verify)
+{
+	char result[BTRFS_CRC32_SIZE];
+	unsigned long len;
+	unsigned long cur_len;
+	unsigned long offset = BTRFS_CSUM_SIZE;
+	char *map_token = NULL;
+	char *kaddr;
+	unsigned long map_start;
+	unsigned long map_len;
+	int err;
+	u32 crc = ~(u32)0;
+
+	len = buf->len - offset;
+	while(len > 0) {
+		err = map_private_extent_buffer(buf, offset, 32,
+					&map_token, &kaddr,
+					&map_start, &map_len, KM_USER0);
+		if (err) {
+			printk("failed to map extent buffer! %lu\n",
+			       offset);
+			return 1;
+		}
+		cur_len = min(len, map_len - (offset - map_start));
+		crc = btrfs_csum_data(root, kaddr + offset - map_start,
+				      crc, cur_len);
+		len -= cur_len;
+		offset += cur_len;
+		unmap_extent_buffer(buf, map_token, KM_USER0);
+	}
+	btrfs_csum_final(crc, result);
+
+	if (verify) {
+		if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
+			printk("btrfs: %s checksum verify failed on %llu\n",
+			       root->fs_info->sb->s_id,
+			       buf->start);
+			return 1;
+		}
+	} else {
+		write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
+	}
+	return 0;
+}
+
+
+int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
+{
+	struct extent_map_tree *tree;
+	u64 start = page->index << PAGE_CACHE_SHIFT;
+	u64 found_start;
+	int found_level;
+	unsigned long len;
+	struct extent_buffer *eb;
+	tree = &BTRFS_I(page->mapping->host)->extent_tree;
+
+	if (page->private == EXTENT_PAGE_PRIVATE)
+		goto out;
+	if (!page->private)
+		goto out;
+	len = page->private >> 2;
+	if (len == 0) {
+		WARN_ON(1);
+	}
+	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+	read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1);
+	found_start = btrfs_header_bytenr(eb);
+	if (found_start != start) {
+		printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
+		       start, found_start, len);
+	}
+	found_level = btrfs_header_level(eb);
+	csum_tree_block(root, eb, 0);
+	free_extent_buffer(eb);
+out:
+	return 0;
+}
+
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct extent_map_tree *tree;
+	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
+
+	csum_dirty_buffer(root, page);
 	return extent_write_full_page(tree, page, btree_get_extent, wbc);
 }
 int btree_readpage(struct file *file, struct page *page)
@@ -117,7 +209,6 @@
 	struct extent_map_tree *tree;
 	int ret;
 
-	BUG_ON(page->private != 1);
 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
 	ret = try_release_extent_mapping(tree, page);
 	if (ret == 1) {
@@ -136,46 +227,6 @@
 	btree_releasepage(page, GFP_NOFS);
 }
 
-int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
-		    char *result)
-{
-	return 0;
-#if 0
-	u32 crc;
-	crc = crc32c(0, data, len);
-	memcpy(result, &crc, BTRFS_CRC32_SIZE);
-	return 0;
-#endif
-}
-
-#if 0
-static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
-			   int verify)
-{
-	return 0;
-	char result[BTRFS_CRC32_SIZE];
-	int ret;
-	struct btrfs_node *node;
-
-	ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
-			      bh->b_size - BTRFS_CSUM_SIZE, result);
-	if (ret)
-		return ret;
-	if (verify) {
-		if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) {
-			printk("btrfs: %s checksum verify failed on %llu\n",
-			       root->fs_info->sb->s_id,
-			       (unsigned long long)bh_blocknr(bh));
-			return 1;
-		}
-	} else {
-		node = btrfs_buffer_node(bh);
-		memcpy(node->header.csum, result, BTRFS_CRC32_SIZE);
-	}
-	return 0;
-}
-#endif
-
 #if 0
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
@@ -215,7 +266,7 @@
 	if (!buf)
 		return 0;
 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
-				 buf, 0);
+				 buf, 0, 0);
 	free_extent_buffer(buf);
 	return ret;
 }
@@ -225,12 +276,29 @@
 {
 	struct extent_buffer *buf = NULL;
 	struct inode *btree_inode = root->fs_info->btree_inode;
+	struct extent_map_tree *extent_tree;
+	int ret;
+
+	extent_tree = &BTRFS_I(btree_inode)->extent_tree;
 
 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
 	if (!buf)
 		return NULL;
 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
-				 buf, 1);
+				 buf, 0, 1);
+	if (buf->flags & EXTENT_CSUM) {
+		return buf;
+	}
+	if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1,
+			   EXTENT_CSUM, 1)) {
+		buf->flags |= EXTENT_CSUM;
+		return buf;
+	}
+	ret = csum_tree_block(root, buf, 1);
+	set_extent_bits(extent_tree, buf->start,
+			buf->start + buf->len - 1,
+			EXTENT_CSUM, GFP_NOFS);
+	buf->flags |= EXTENT_CSUM;
 	return buf;
 }
 
@@ -251,13 +319,6 @@
 	return 0;
 }
 
-int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf)
-{
-	struct inode *btree_inode = root->fs_info->btree_inode;
-	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
-	return 0;
-}
-
 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 			struct btrfs_root *root,
 			struct btrfs_fs_info *fs_info,
@@ -416,7 +477,24 @@
 
 	return root;
 }
+#if 0
+static int add_hasher(struct btrfs_fs_info *info, char *type) {
+	struct btrfs_hasher *hasher;
 
+	hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
+	if (!hasher)
+		return -ENOMEM;
+	hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
+	if (!hasher->hash_tfm) {
+		kfree(hasher);
+		return -EINVAL;
+	}
+	spin_lock(&info->hash_lock);
+	list_add(&hasher->list, &info->hashers);
+	spin_unlock(&info->hash_lock);
+	return 0;
+}
+#endif
 struct btrfs_root *open_ctree(struct super_block *sb)
 {
 	u32 sectorsize;
@@ -440,6 +518,9 @@
 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
 	INIT_LIST_HEAD(&fs_info->trans_list);
 	INIT_LIST_HEAD(&fs_info->dead_roots);
+	INIT_LIST_HEAD(&fs_info->hashers);
+	spin_lock_init(&fs_info->hash_lock);
+
 	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
 	init_completion(&fs_info->kobj_unregister);
 	sb_set_blocksize(sb, 4096);
@@ -479,6 +560,14 @@
 	mutex_init(&fs_info->trans_mutex);
 	mutex_init(&fs_info->fs_mutex);
 
+#if 0
+	ret = add_hasher(fs_info, "crc32c");
+	if (ret) {
+		printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
+		err = -ENOMEM;
+		goto fail_iput;
+	}
+#endif
 	__setup_root(512, 512, 512, tree_root,
 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
@@ -509,25 +598,21 @@
 	i_size_write(fs_info->btree_inode,
 		     btrfs_super_total_bytes(disk_super));
 
-
 	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
 		    sizeof(disk_super->magic))) {
 		printk("btrfs: valid FS not found on %s\n", sb->s_id);
 		goto fail_sb_buffer;
 	}
+
 	blocksize = btrfs_level_size(tree_root,
 				     btrfs_super_root_level(disk_super));
+
 	tree_root->node = read_tree_block(tree_root,
 					  btrfs_super_root(disk_super),
 					  blocksize);
 	if (!tree_root->node)
 		goto fail_sb_buffer;
 
-#if 0
-	btrfs_print_leaf(tree_root, tree_root->node);
-	err = -EIO;
-	goto fail_tree_root;
-#endif
 	mutex_lock(&fs_info->fs_mutex);
 
 	ret = find_and_setup_root(tree_root, fs_info,
@@ -634,9 +719,19 @@
 
 	btrfs_free_block_groups(root->fs_info);
 	del_fs_roots(fs_info);
-	extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree);
+	extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
 	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
 	iput(fs_info->btree_inode);
+#if 0
+	while(!list_empty(&fs_info->hashers)) {
+		struct btrfs_hasher *hasher;
+		hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
+				    hashers);
+		list_del(&hasher->hashers);
+		crypto_free_hash(&fs_info->hash_tfm);
+		kfree(hasher);
+	}
+#endif
 	kfree(fs_info->extent_root);
 	kfree(fs_info->tree_root);
 	return 0;
@@ -733,5 +828,5 @@
 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
-					buf, 1);
+					buf, 0, 1);
 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e4e68ea..6b2f2b4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1178,6 +1178,10 @@
 	btrfs_set_buffer_uptodate(buf);
 	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
+	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
+			buf->start, buf->start + buf->len - 1,
+			EXTENT_CSUM, GFP_NOFS);
+	buf->flags |= EXTENT_CSUM;
 	btrfs_set_buffer_defrag(buf);
 	trans->blocks_used++;
 	return buf;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f8aaba8..2a8bc4b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -81,7 +81,7 @@
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
-void extent_map_tree_cleanup(struct extent_map_tree *tree)
+void extent_map_tree_empty_lru(struct extent_map_tree *tree)
 {
 	struct extent_buffer *eb;
 	while(!list_empty(&tree->buffer_lru)) {
@@ -91,7 +91,7 @@
 		free_extent_buffer(eb);
 	}
 }
-EXPORT_SYMBOL(extent_map_tree_cleanup);
+EXPORT_SYMBOL(extent_map_tree_empty_lru);
 
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
@@ -1464,7 +1464,7 @@
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
 		WARN_ON(!page->mapping->a_ops->invalidatepage);
-		set_page_private(page, 1);
+		set_page_private(page, EXTENT_PAGE_PRIVATE);
 		page_cache_get(page);
 	}
 }
@@ -1979,8 +1979,9 @@
 
 	spin_lock(&tree->lru_lock);
 	eb = find_lru(tree, start, len);
-	if (eb)
+	if (eb) {
 		goto lru_add;
+	}
 	spin_unlock(&tree->lru_lock);
 
 	if (eb) {
@@ -2007,6 +2008,7 @@
 
 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
 					  u64 start, unsigned long len,
+					  struct page *page0,
 					  gfp_t mask)
 {
 	unsigned long num_pages = num_extent_pages(start, len);
@@ -2024,7 +2026,18 @@
 	if (eb->flags & EXTENT_BUFFER_FILLED)
 		return eb;
 
-	for (i = 0; i < num_pages; i++, index++) {
+	if (page0) {
+		eb->first_page = page0;
+		i = 1;
+		index++;
+		page_cache_get(page0);
+		set_page_extent_mapped(page0);
+		set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+				 len << 2);
+	} else {
+		i = 0;
+	}
+	for (; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
 		if (!p) {
 			WARN_ON(1);
@@ -2036,8 +2049,13 @@
 			goto fail;
 		}
 		set_page_extent_mapped(p);
-		if (i == 0)
+		if (i == 0) {
 			eb->first_page = p;
+			set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+					 len << 2);
+		} else {
+			set_page_private(p, EXTENT_PAGE_PRIVATE);
+		}
 		if (!PageUptodate(p))
 			uptodate = 0;
 		unlock_page(p);
@@ -2057,8 +2075,7 @@
 					  gfp_t mask)
 {
 	unsigned long num_pages = num_extent_pages(start, len);
-	unsigned long i;
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
+	unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT;
 	struct extent_buffer *eb;
 	struct page *p;
 	struct address_space *mapping = tree->mapping;
@@ -2082,8 +2099,15 @@
 			goto fail;
 		}
 		set_page_extent_mapped(p);
-		if (i == 0)
+
+		if (i == 0) {
 			eb->first_page = p;
+			set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+					 len << 2);
+		} else {
+			set_page_private(p, EXTENT_PAGE_PRIVATE);
+		}
+
 		if (!PageUptodate(p))
 			uptodate = 0;
 		unlock_page(p);
@@ -2174,7 +2198,21 @@
 
 	num_pages = num_extent_pages(eb->start, eb->len);
 	for (i = 0; i < num_pages; i++) {
+		struct page *page = extent_buffer_page(eb, i);
+		/* writepage may need to do something special for the
+		 * first page, we have to make sure page->private is
+		 * properly set.  releasepage may drop page->private
+		 * on us if the page isn't already dirty.
+		 */
+		if (i == 0) {
+			lock_page(page);
+			set_page_private(page,
+					 EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+					 eb->len << 2);
+		}
 		__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+		if (i == 0)
+			unlock_page(page);
 	}
 	return set_extent_dirty(tree, eb->start,
 				eb->start + eb->len - 1, GFP_NOFS);
@@ -2217,9 +2255,12 @@
 EXPORT_SYMBOL(extent_buffer_uptodate);
 
 int read_extent_buffer_pages(struct extent_map_tree *tree,
-			     struct extent_buffer *eb, int wait)
+			     struct extent_buffer *eb,
+			     u64 start,
+			     int wait)
 {
 	unsigned long i;
+	unsigned long start_i;
 	struct page *page;
 	int err;
 	int ret = 0;
@@ -2232,9 +2273,16 @@
 			   EXTENT_UPTODATE, 1)) {
 		return 0;
 	}
+	if (start) {
+		WARN_ON(start < eb->start);
+		start_i = (start >> PAGE_CACHE_SHIFT) -
+			(eb->start >> PAGE_CACHE_SHIFT);
+	} else {
+		start_i = 0;
+	}
 
 	num_pages = num_extent_pages(eb->start, eb->len);
-	for (i = 0; i < num_pages; i++) {
+	for (i = start_i; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
 		if (PageUptodate(page)) {
 			continue;
@@ -2260,7 +2308,7 @@
 		return ret;
 	}
 
-	for (i = 0; i < num_pages; i++) {
+	for (i = start_i; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
 		wait_on_page_locked(page);
 		if (!PageUptodate(page)) {
@@ -2314,7 +2362,7 @@
 }
 EXPORT_SYMBOL(read_extent_buffer);
 
-static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start,
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long min_len, char **token, char **map,
 			       unsigned long *map_start,
 			       unsigned long *map_len, int km)
@@ -2337,6 +2385,10 @@
 		offset = 0;
 		*map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
 	}
+	if (start + min_len >= eb->len) {
+printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
+		WARN_ON(1);
+	}
 
 	p = extent_buffer_page(eb, i);
 	WARN_ON(!PageUptodate(p));
@@ -2346,6 +2398,7 @@
 	*map_len = PAGE_CACHE_SIZE - offset;
 	return 0;
 }
+EXPORT_SYMBOL(map_private_extent_buffer);
 
 int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
 		      unsigned long min_len,
@@ -2360,8 +2413,8 @@
 		eb->map_token = NULL;
 		save = 1;
 	}
-	err = __map_extent_buffer(eb, start, min_len, token, map,
-				   map_start, map_len, km);
+	err = map_private_extent_buffer(eb, start, min_len, token, map,
+				       map_start, map_len, km);
 	if (!err && save) {
 		eb->map_token = *token;
 		eb->kaddr = *map;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index f1dc28d..39d78d3 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -17,8 +17,17 @@
 #define EXTENT_DEFRAG (1 << 6)
 #define EXTENT_DEFRAG_DONE (1 << 7)
 #define EXTENT_BUFFER_FILLED (1 << 8)
+#define EXTENT_CSUM (1 << 9)
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
 
+/*
+ * page->private values.  Every page that is controlled by the extent
+ * map has page->private set to one.
+ */
+
+#define EXTENT_PAGE_PRIVATE 1
+#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
+
 
 struct extent_map_ops {
 	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
@@ -89,7 +98,7 @@
 
 void extent_map_tree_init(struct extent_map_tree *tree,
 			  struct address_space *mapping, gfp_t mask);
-void extent_map_tree_cleanup(struct extent_map_tree *tree);
+void extent_map_tree_empty_lru(struct extent_map_tree *tree);
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 					 u64 start, u64 end);
 int add_extent_mapping(struct extent_map_tree *tree,
@@ -143,13 +152,14 @@
 
 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
 					  u64 start, unsigned long len,
+					  struct page *page0,
 					  gfp_t mask);
 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
 					 u64 start, unsigned long len,
 					  gfp_t mask);
 void free_extent_buffer(struct extent_buffer *eb);
 int read_extent_buffer_pages(struct extent_map_tree *tree,
-			     struct extent_buffer *eb, int wait);
+			     struct extent_buffer *eb, u64 start, int wait);
 
 static inline void extent_buffer_get(struct extent_buffer *eb)
 {
@@ -187,5 +197,9 @@
 		      unsigned long min_len, char **token, char **map,
 		      unsigned long *map_start,
 		      unsigned long *map_len, int km);
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
+		      unsigned long min_len, char **token, char **map,
+		      unsigned long *map_start,
+		      unsigned long *map_len, int km);
 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
 #endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 84f496c..80f84b4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1713,10 +1713,6 @@
 	struct extent_map_tree *tree;
 	int ret;
 
-	if (page->private != 1) {
-		WARN_ON(1);
-		return try_to_free_buffers(page);
-	}
 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
 	ret = try_release_extent_mapping(tree, page);
 	if (ret == 1) {