Btrfs: Add an extent buffer LRU to reduce radix tree hits

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 08ddf18..bef61ee 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -88,8 +88,6 @@
 	if (IS_ERR(cow))
 		return PTR_ERR(cow);
 
-	cow->alloc_addr = (unsigned long)__builtin_return_address(0);
-
 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
 	btrfs_set_header_bytenr(cow, cow->start);
 	btrfs_set_header_generation(cow, trans->transid);
@@ -151,7 +149,6 @@
 	search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
 	ret = __btrfs_cow_block(trans, root, buf, parent,
 				 parent_slot, cow_ret, search_start, 0);
-	(*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0);
 	return ret;
 }
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8e606e6..fd7e6c1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,8 +50,6 @@
 	struct extent_buffer *eb;
 	eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
 				bytenr, blocksize, GFP_NOFS);
-	if (eb)
-		eb->alloc_addr = (unsigned long)__builtin_return_address(0);
 	return eb;
 }
 
@@ -63,7 +61,6 @@
 
 	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
 				 bytenr, blocksize, GFP_NOFS);
-	eb->alloc_addr = (unsigned long)__builtin_return_address(0);
 	return eb;
 }
 
@@ -234,7 +231,6 @@
 		return NULL;
 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
 				 buf, 1);
-	buf->alloc_addr = (unsigned long)__builtin_return_address(0);
 	return buf;
 }
 
@@ -638,6 +634,7 @@
 
 	btrfs_free_block_groups(root->fs_info);
 	del_fs_roots(fs_info);
+	extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree);
 	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
 	iput(fs_info->btree_inode);
 	kfree(fs_info->extent_root);
@@ -647,20 +644,20 @@
 
 int btrfs_buffer_uptodate(struct extent_buffer *buf)
 {
-	struct inode *btree_inode = buf->pages[0]->mapping->host;
+	struct inode *btree_inode = buf->last_page->mapping->host;
 	return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
 }
 
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
 {
-	struct inode *btree_inode = buf->pages[0]->mapping->host;
+	struct inode *btree_inode = buf->last_page->mapping->host;
 	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
 					  buf);
 }
 
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	u64 transid = btrfs_header_generation(buf);
 	struct inode *btree_inode = root->fs_info->btree_inode;
 
@@ -681,7 +678,7 @@
 
 void btrfs_set_buffer_defrag(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
 			buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
@@ -689,7 +686,7 @@
 
 void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
 			buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
@@ -698,7 +695,7 @@
 
 int btrfs_buffer_defrag(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
 		     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
@@ -706,7 +703,7 @@
 
 int btrfs_buffer_defrag_done(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
 		     buf->start, buf->start + buf->len - 1,
@@ -715,7 +712,7 @@
 
 int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
 		     buf->start, buf->start + buf->len - 1,
@@ -724,7 +721,7 @@
 
 int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
 		     buf->start, buf->start + buf->len - 1,
@@ -733,7 +730,7 @@
 
 int btrfs_read_buffer(struct extent_buffer *buf)
 {
-	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+	struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
 					buf, 1);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72e6b27..525fa84 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1176,7 +1176,6 @@
 		return ERR_PTR(-ENOMEM);
 	}
 	btrfs_set_buffer_uptodate(buf);
-	buf->alloc_addr = (unsigned long)__builtin_return_address(0);
 	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
 	btrfs_set_buffer_defrag(buf);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index e241699..85b28a6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/swap.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -20,14 +21,11 @@
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 
-static LIST_HEAD(extent_buffers);
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-static spinlock_t extent_buffers_lock;
 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-static int nr_extent_buffers;
-#define MAX_EXTENT_BUFFER_CACHE 128
+#define BUFFER_LRU_MAX 64
 
 struct tree_entry {
 	u64 start;
@@ -47,20 +45,12 @@
 	extent_buffer_cache = btrfs_cache_create("extent_buffers",
 					    sizeof(struct extent_buffer), 0,
 					    NULL);
-	spin_lock_init(&extent_buffers_lock);
 }
 
 void __exit extent_map_exit(void)
 {
-	struct extent_buffer *eb;
 	struct extent_state *state;
 
-	while (!list_empty(&extent_buffers)) {
-		eb = list_entry(extent_buffers.next,
-				struct extent_buffer, list);
-		list_del(&eb->list);
-		kmem_cache_free(extent_buffer_cache, eb);
-	}
 	while (!list_empty(&states)) {
 		state = list_entry(states.next, struct extent_state, list);
 		printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
@@ -68,14 +58,6 @@
 		kmem_cache_free(extent_state_cache, state);
 
 	}
-	while (!list_empty(&buffers)) {
-		eb = list_entry(buffers.next,
-				struct extent_buffer, leak_list);
-		printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr);
-		list_del(&eb->leak_list);
-		kmem_cache_free(extent_buffer_cache, eb);
-	}
-
 
 	if (extent_map_cache)
 		kmem_cache_destroy(extent_map_cache);
@@ -92,10 +74,25 @@
 	tree->state.rb_node = NULL;
 	tree->ops = NULL;
 	rwlock_init(&tree->lock);
+	spin_lock_init(&tree->lru_lock);
 	tree->mapping = mapping;
+	INIT_LIST_HEAD(&tree->buffer_lru);
+	tree->lru_size = 0;
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
+void extent_map_tree_cleanup(struct extent_map_tree *tree)
+{
+	struct extent_buffer *eb;
+	while(!list_empty(&tree->buffer_lru)) {
+		eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+				lru);
+		list_del(&eb->lru);
+		free_extent_buffer(eb);
+	}
+}
+EXPORT_SYMBOL(extent_map_tree_cleanup);
+
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
 	struct extent_map *em;
@@ -1915,59 +1912,43 @@
 	return (em->block_start + start - em->start) >> inode->i_blkbits;
 }
 
-static struct extent_buffer *__alloc_extent_buffer(gfp_t mask)
+static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
 {
-	struct extent_buffer *eb = NULL;
-
-	spin_lock(&extent_buffers_lock);
-	if (!list_empty(&extent_buffers)) {
-		eb = list_entry(extent_buffers.next, struct extent_buffer,
-				list);
-		list_del(&eb->list);
-		WARN_ON(nr_extent_buffers == 0);
-		nr_extent_buffers--;
-	}
-	spin_unlock(&extent_buffers_lock);
-
-	if (eb) {
-		memset(eb, 0, sizeof(*eb));
-	} else {
-		eb = kmem_cache_zalloc(extent_buffer_cache, mask);
-	}
-	spin_lock(&extent_buffers_lock);
-	list_add(&eb->leak_list, &buffers);
-	spin_unlock(&extent_buffers_lock);
-
-	return eb;
+	if (list_empty(&eb->lru)) {
+		extent_buffer_get(eb);
+		list_add(&eb->lru, &tree->buffer_lru);
+		tree->lru_size++;
+		if (tree->lru_size >= BUFFER_LRU_MAX) {
+			struct extent_buffer *rm;
+			rm = list_entry(tree->buffer_lru.prev,
+					struct extent_buffer, lru);
+			tree->lru_size--;
+			list_del(&rm->lru);
+			free_extent_buffer(rm);
+		}
+	} else
+		list_move(&eb->lru, &tree->buffer_lru);
+	return 0;
 }
-
-static void __free_extent_buffer(struct extent_buffer *eb)
+static struct extent_buffer *find_lru(struct extent_map_tree *tree,
+				      u64 start, unsigned long len)
 {
+	struct list_head *lru = &tree->buffer_lru;
+	struct list_head *cur = lru->next;
+	struct extent_buffer *eb;
 
-	spin_lock(&extent_buffers_lock);
-	list_del_init(&eb->leak_list);
-	spin_unlock(&extent_buffers_lock);
+	if (list_empty(lru))
+		return NULL;
 
-	if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) {
-		kmem_cache_free(extent_buffer_cache, eb);
-	} else {
-		spin_lock(&extent_buffers_lock);
-		list_add(&eb->list, &extent_buffers);
-		nr_extent_buffers++;
-		spin_unlock(&extent_buffers_lock);
-	}
-}
-
-static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i)
-{
-	struct page *p;
-
-	if (i < EXTENT_INLINE_PAGES)
-		return eb->pages[i];
-	i += eb->start >> PAGE_CACHE_SHIFT;
-	p = find_get_page(eb->pages[0]->mapping, i);
-	page_cache_release(p);
-	return p;
+	do {
+		eb = list_entry(cur, struct extent_buffer, lru);
+		if (eb->start == start && eb->len == len) {
+			extent_buffer_get(eb);
+			return eb;
+		}
+		cur = cur->next;
+	} while (cur != lru);
+	return NULL;
 }
 
 static inline unsigned long num_extent_pages(u64 start, u64 len)
@@ -1975,6 +1956,55 @@
 	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
 		(start >> PAGE_CACHE_SHIFT);
 }
+
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+					      unsigned long i)
+{
+	struct page *p;
+
+	if (i == 0)
+		return eb->last_page;
+	i += eb->start >> PAGE_CACHE_SHIFT;
+	p = find_get_page(eb->last_page->mapping, i);
+	page_cache_release(p);
+	return p;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
+						   u64 start,
+						   unsigned long len,
+						   gfp_t mask)
+{
+	struct extent_buffer *eb = NULL;
+
+	spin_lock(&tree->lru_lock);
+	eb = find_lru(tree, start, len);
+	if (eb)
+		goto lru_add;
+	spin_unlock(&tree->lru_lock);
+
+	if (eb) {
+		memset(eb, 0, sizeof(*eb));
+	} else {
+		eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+	}
+	INIT_LIST_HEAD(&eb->lru);
+	eb->start = start;
+	eb->len = len;
+	atomic_set(&eb->refs, 1);
+
+	spin_lock(&tree->lru_lock);
+lru_add:
+	add_lru(tree, eb);
+	spin_unlock(&tree->lru_lock);
+	return eb;
+}
+
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+	kmem_cache_free(extent_buffer_cache, eb);
+}
+
 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
 					  u64 start, unsigned long len,
 					  gfp_t mask)
@@ -1987,14 +2017,12 @@
 	struct address_space *mapping = tree->mapping;
 	int uptodate = 0;
 
-	eb = __alloc_extent_buffer(mask);
+	eb = __alloc_extent_buffer(tree, start, len, mask);
 	if (!eb || IS_ERR(eb))
 		return NULL;
 
-	eb->alloc_addr = (unsigned long)__builtin_return_address(0);
-	eb->start = start;
-	eb->len = len;
-	atomic_set(&eb->refs, 1);
+	if (eb->flags & EXTENT_BUFFER_FILLED)
+		return eb;
 
 	for (i = 0; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
@@ -2008,14 +2036,15 @@
 			goto fail;
 		}
 		set_page_extent_mapped(p);
-		if (i < EXTENT_INLINE_PAGES)
-			eb->pages[i] = p;
+		if (i == 0)
+			eb->last_page = p;
 		if (!PageUptodate(p))
 			uptodate = 0;
 		unlock_page(p);
 	}
 	if (uptodate)
 		eb->flags |= EXTENT_UPTODATE;
+	eb->flags |= EXTENT_BUFFER_FILLED;
 	return eb;
 fail:
 	free_extent_buffer(eb);
@@ -2035,14 +2064,12 @@
 	struct address_space *mapping = tree->mapping;
 	int uptodate = 1;
 
-	eb = __alloc_extent_buffer(mask);
+	eb = __alloc_extent_buffer(tree, start, len, mask);
 	if (!eb || IS_ERR(eb))
 		return NULL;
 
-	eb->alloc_addr = (unsigned long)__builtin_return_address(0);
-	eb->start = start;
-	eb->len = len;
-	atomic_set(&eb->refs, 1);
+	if (eb->flags & EXTENT_BUFFER_FILLED)
+		return eb;
 
 	for (i = 0; i < num_pages; i++, index++) {
 		p = find_lock_page(mapping, index);
@@ -2055,14 +2082,15 @@
 			goto fail;
 		}
 		set_page_extent_mapped(p);
-		if (i < EXTENT_INLINE_PAGES)
-			eb->pages[i] = p;
+		if (i == 0)
+			eb->last_page = p;
 		if (!PageUptodate(p))
 			uptodate = 0;
 		unlock_page(p);
 	}
 	if (uptodate)
 		eb->flags |= EXTENT_UPTODATE;
+	eb->flags |= EXTENT_BUFFER_FILLED;
 	return eb;
 fail:
 	free_extent_buffer(eb);
@@ -2231,7 +2259,8 @@
 			ret = -EIO;
 		}
 	}
-	eb->flags |= EXTENT_UPTODATE;
+	if (!ret)
+		eb->flags |= EXTENT_UPTODATE;
 	return ret;
 }
 EXPORT_SYMBOL(read_extent_buffer_pages);
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 8409b5c..52a8b93 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -16,6 +16,7 @@
 #define EXTENT_DELALLOC (1 << 5)
 #define EXTENT_DEFRAG (1 << 6)
 #define EXTENT_DEFRAG_DONE (1 << 7)
+#define EXTENT_BUFFER_FILLED (1 << 8)
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
 
 
@@ -33,6 +34,9 @@
 	struct address_space *mapping;
 	rwlock_t lock;
 	struct extent_map_ops *ops;
+	spinlock_t lru_lock;
+	struct list_head buffer_lru;
+	int lru_size;
 };
 
 /* note, this must start with the same fields as fs/extent_map.c:tree_entry */
@@ -64,20 +68,17 @@
 	struct list_head list;
 };
 
-#define EXTENT_INLINE_PAGES 32
 struct extent_buffer {
 	u64 start;
 	unsigned long len;
-	atomic_t refs;
-	int flags;
-	struct list_head list;
-	struct list_head leak_list;
-	unsigned long alloc_addr;
 	char *map_token;
 	char *kaddr;
 	unsigned long map_start;
 	unsigned long map_len;
-	struct page *pages[EXTENT_INLINE_PAGES];
+	struct page *last_page;
+	struct list_head lru;
+	atomic_t refs;
+	int flags;
 };
 
 typedef struct extent_map *(get_extent_t)(struct inode *inode,
@@ -88,6 +89,7 @@
 
 void extent_map_tree_init(struct extent_map_tree *tree,
 			  struct address_space *mapping, gfp_t mask);
+void extent_map_tree_cleanup(struct extent_map_tree *tree);
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 					 u64 start, u64 end);
 int add_extent_mapping(struct extent_map_tree *tree,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87456ab..67e4aca 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -443,8 +443,7 @@
 			BUG_ON(ret);
 			mutex_unlock(&tree_root->fs_info->fs_mutex);
 			btrfs_btree_balance_dirty(tree_root, nr);
-			schedule();
-
+			cond_resched();
 			mutex_lock(&tree_root->fs_info->fs_mutex);
 		}
 		BUG_ON(ret);
@@ -471,7 +470,7 @@
 		mutex_unlock(&tree_root->fs_info->fs_mutex);
 
 		btrfs_btree_balance_dirty(tree_root, nr);
-		schedule();
+		cond_resched();
 	}
 	return ret;
 }