Btrfs: Cache free inode numbers in memory
Currently btrfs stores the highest objectid of the fs tree, and it always
returns (highest+1) inode number when we create a file, so inode numbers
won't be reclaimed when we delete files, so we'll run out of inode numbers
as we keep create/delete files in 32bits machines.
This fixes it, and it works similarly to how we cache free space in block
cgroups.
We start a kernel thread to read the file tree. By scanning inode items,
we know which chunks of inode numbers are free, and we cache them in
an rb-tree.
Because we are searching the commit root, we have to carefully handle the
cross-transaction case.
The rb-tree is a hybrid extent+bitmap tree, so if we have too many small
chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram
of extents, and a bitmap will be used if we exceed this threshold. The
extents threshold is adjusted in runtime.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d4fb4f0..2ce89bf 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -25,6 +25,7 @@
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
+#include "inode-map.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -105,7 +106,7 @@
u64 objectid;
int ret;
- ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret < 0)
return ret;
@@ -1496,10 +1497,9 @@
return merged;
}
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes)
+int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
+ u64 offset, u64 bytes)
{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
int ret = 0;
@@ -1751,11 +1751,29 @@
return 0;
}
+void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
+{
+ struct btrfs_free_space *info;
+ struct rb_node *node;
+
+ spin_lock(&ctl->tree_lock);
+ while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
+ unlink_free_space(ctl, info);
+ kfree(info->bitmap);
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ if (need_resched()) {
+ spin_unlock(&ctl->tree_lock);
+ cond_resched();
+ spin_lock(&ctl->tree_lock);
+ }
+ }
+ spin_unlock(&ctl->tree_lock);
+}
+
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *info;
- struct rb_node *node;
struct btrfs_free_cluster *cluster;
struct list_head *head;
@@ -1773,21 +1791,9 @@
spin_lock(&ctl->tree_lock);
}
}
-
- while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
- info = rb_entry(node, struct btrfs_free_space, offset_index);
- unlink_free_space(ctl, info);
- if (info->bitmap)
- kfree(info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, info);
- if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
- cond_resched();
- spin_lock(&ctl->tree_lock);
- }
- }
-
spin_unlock(&ctl->tree_lock);
+
+ __btrfs_remove_free_space_cache(ctl);
}
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
@@ -2352,3 +2358,53 @@
return ret;
}
+
+/*
+ * Find the left-most item in the cache tree, and then return the
+ * smallest inode number in the item.
+ *
+ * Note: the returned inode number may not be the smallest one in
+ * the tree, if the left-most item is a bitmap.
+ */
+u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
+{
+ struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
+ struct btrfs_free_space *entry = NULL;
+ u64 ino = 0;
+
+ spin_lock(&ctl->tree_lock);
+
+ if (RB_EMPTY_ROOT(&ctl->free_space_offset))
+ goto out;
+
+ entry = rb_entry(rb_first(&ctl->free_space_offset),
+ struct btrfs_free_space, offset_index);
+
+ if (!entry->bitmap) {
+ ino = entry->offset;
+
+ unlink_free_space(ctl, entry);
+ entry->offset++;
+ entry->bytes--;
+ if (!entry->bytes)
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ else
+ link_free_space(ctl, entry);
+ } else {
+ u64 offset = 0;
+ u64 count = 1;
+ int ret;
+
+ ret = search_bitmap(ctl, entry, &offset, &count);
+ BUG_ON(ret);
+
+ ino = offset;
+ bitmap_clear_bits(ctl, entry, offset, 1);
+ if (entry->bytes == 0)
+ free_bitmap(ctl, entry);
+ }
+out:
+ spin_unlock(&ctl->tree_lock);
+
+ return ino;
+}