Btrfs: switch to early splits
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2891b58..1b4e82d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5,7 +5,12 @@
#include "ctree.h"
#include "disk-io.h"
+#define SEARCH_READ 0
+#define SEARCH_WRITE 1
+
static int refill_alloc_extent(struct ctree_root *root);
+int split_node(struct ctree_root *root, struct ctree_path *path, int level);
+int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
static inline void init_path(struct ctree_path *p)
{
@@ -125,14 +130,14 @@
* If the key isn't found, the path points to the slot where it should
* be inserted.
*/
-int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
+int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len)
{
struct tree_buffer *b = root->node;
struct node *c;
-
int slot;
int ret;
int level;
+
b->count++;
while (b) {
c = &b->node;
@@ -143,10 +148,26 @@
if (ret && slot > 0)
slot -= 1;
p->slots[level] = slot;
+ if (ins_len && c->header.nritems == NODEPTRS_PER_BLOCK) {
+ int sret = split_node(root, p, level);
+ BUG_ON(sret > 0);
+ if (sret)
+ return sret;
+ b = p->nodes[level];
+ c = &b->node;
+ slot = p->slots[level];
+ }
b = read_tree_block(root, c->blockptrs[slot]);
continue;
} else {
+ struct leaf *l = (struct leaf *)c;
p->slots[level] = slot;
+ if (ins_len && leaf_free_space(l) < sizeof(struct item) + ins_len) {
+ int sret = split_leaf(root, p, ins_len);
+ BUG_ON(sret > 0);
+ if (sret)
+ return sret;
+ }
return ret;
}
}
@@ -331,50 +352,54 @@
return 0;
}
+static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level)
+{
+ struct tree_buffer *t;
+ struct node *lower;
+ struct node *c;
+ struct key *lower_key;
+
+ BUG_ON(path->nodes[level]);
+ BUG_ON(path->nodes[level-1] != root->node);
+
+ t = alloc_free_block(root);
+ c = &t->node;
+ memset(c, 0, sizeof(c));
+ c->header.nritems = 1;
+ c->header.flags = node_level(level);
+ c->header.blocknr = t->blocknr;
+ c->header.parentid = root->node->node.header.parentid;
+ lower = &path->nodes[level-1]->node;
+ if (is_leaf(lower->header.flags))
+ lower_key = &((struct leaf *)lower)->items[0].key;
+ else
+ lower_key = lower->keys;
+ memcpy(c->keys, lower_key, sizeof(struct key));
+ c->blockptrs[0] = path->nodes[level-1]->blocknr;
+ /* the super has an extra ref to root->node */
+ tree_block_release(root, root->node);
+ root->node = t;
+ t->count++;
+ write_tree_block(root, t);
+ path->nodes[level] = t;
+ path->slots[level] = 0;
+ return 0;
+}
+
/*
* worker function to insert a single pointer in a node.
* the node should have enough room for the pointer already
* slot and level indicate where you want the key to go, and
* blocknr is the block the key points to.
*/
-int __insert_ptr(struct ctree_root *root,
+int insert_ptr(struct ctree_root *root,
struct ctree_path *path, struct key *key,
u64 blocknr, int slot, int level)
{
- struct node *c;
struct node *lower;
- struct key *lower_key;
int nritems;
- /* need a new root */
- if (!path->nodes[level]) {
- struct tree_buffer *t;
- t = alloc_free_block(root);
- c = &t->node;
- memset(c, 0, sizeof(c));
- c->header.nritems = 2;
- c->header.flags = node_level(level);
- c->header.blocknr = t->blocknr;
- c->header.parentid = root->node->node.header.parentid;
- lower = &path->nodes[level-1]->node;
- if (is_leaf(lower->header.flags))
- lower_key = &((struct leaf *)lower)->items[0].key;
- else
- lower_key = lower->keys;
- memcpy(c->keys, lower_key, sizeof(struct key));
- memcpy(c->keys + 1, key, sizeof(struct key));
- c->blockptrs[0] = path->nodes[level-1]->blocknr;
- c->blockptrs[1] = blocknr;
- /* the super has an extra ref to root->node */
- tree_block_release(root, root->node);
- root->node = t;
- t->count++;
- write_tree_block(root, t);
- path->nodes[level] = t;
- path->slots[level] = 0;
- if (c->keys[1].objectid == 0)
- BUG();
- return 0;
- }
+
+ BUG_ON(!path->nodes[level]);
lower = &path->nodes[level]->node;
nritems = lower->header.nritems;
if (slot > nritems)
@@ -396,93 +421,54 @@
return 0;
}
-
-/*
- * insert a key,blocknr pair into the tree at a given level
- * If the node at that level in the path doesn't have room,
- * it is split or shifted as appropriate.
- */
-int insert_ptr(struct ctree_root *root,
- struct ctree_path *path, struct key *key,
- u64 blocknr, int level)
+int split_node(struct ctree_root *root, struct ctree_path *path, int level)
{
- struct tree_buffer *t = path->nodes[level];
- struct node *c = &path->nodes[level]->node;
- struct node *b;
- struct tree_buffer *b_buffer;
- struct tree_buffer *bal[MAX_LEVEL];
- int bal_level = level;
+ struct tree_buffer *t;
+ struct node *c;
+ struct tree_buffer *split_buffer;
+ struct node *split;
int mid;
- int bal_start = -1;
+ int ret;
- /*
- * check to see if we need to make room in the node for this
- * pointer. If we do, keep walking the tree, making sure there
- * is enough room in each level for the required insertions.
- *
- * The bal array is filled in with any nodes to be inserted
- * due to splitting. Once we've done all the splitting required
- * do the inserts based on the data in the bal array.
- */
- memset(bal, 0, sizeof(bal));
- while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
- c = &t->node;
- if (push_node_left(root, path,
- node_level(c->header.flags)) == 0)
- break;
- if (push_node_right(root, path,
- node_level(c->header.flags)) == 0)
- break;
- bal_start = bal_level;
- if (bal_level == MAX_LEVEL - 1)
- BUG();
- b_buffer = alloc_free_block(root);
- b = &b_buffer->node;
- b->header.flags = c->header.flags;
- b->header.blocknr = b_buffer->blocknr;
- b->header.parentid = root->node->node.header.parentid;
- mid = (c->header.nritems + 1) / 2;
- memcpy(b->keys, c->keys + mid,
- (c->header.nritems - mid) * sizeof(struct key));
- memcpy(b->blockptrs, c->blockptrs + mid,
- (c->header.nritems - mid) * sizeof(u64));
- b->header.nritems = c->header.nritems - mid;
- c->header.nritems = mid;
-
- write_tree_block(root, t);
- write_tree_block(root, b_buffer);
-
- bal[bal_level] = b_buffer;
- if (bal_level == MAX_LEVEL - 1)
- break;
- bal_level += 1;
- t = path->nodes[bal_level];
+ ret = push_node_left(root, path, level);
+ if (!ret)
+ return 0;
+ ret = push_node_right(root, path, level);
+ if (!ret)
+ return 0;
+ t = path->nodes[level];
+ c = &t->node;
+ if (t == root->node) {
+ /* trying to split the root, lets make a new one */
+ ret = insert_new_root(root, path, level + 1);
+ if (ret)
+ return ret;
}
- /*
- * bal_start tells us the first level in the tree that needed to
- * be split. Go through the bal array inserting the new nodes
- * as needed. The path is fixed as we go.
- */
- while(bal_start > 0) {
- b_buffer = bal[bal_start];
- c = &path->nodes[bal_start]->node;
- __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
- path->slots[bal_start + 1] + 1, bal_start + 1);
- if (path->slots[bal_start] >= c->header.nritems) {
- path->slots[bal_start] -= c->header.nritems;
- tree_block_release(root, path->nodes[bal_start]);
- path->nodes[bal_start] = b_buffer;
- path->slots[bal_start + 1] += 1;
- } else {
- tree_block_release(root, b_buffer);
- }
- bal_start--;
- if (!bal[bal_start])
- break;
+ split_buffer = alloc_free_block(root);
+ split = &split_buffer->node;
+ split->header.flags = c->header.flags;
+ split->header.blocknr = split_buffer->blocknr;
+ split->header.parentid = root->node->node.header.parentid;
+ mid = (c->header.nritems + 1) / 2;
+ memcpy(split->keys, c->keys + mid,
+ (c->header.nritems - mid) * sizeof(struct key));
+ memcpy(split->blockptrs, c->blockptrs + mid,
+ (c->header.nritems - mid) * sizeof(u64));
+ split->header.nritems = c->header.nritems - mid;
+ c->header.nritems = mid;
+ write_tree_block(root, t);
+ write_tree_block(root, split_buffer);
+ insert_ptr(root, path, split->keys, split_buffer->blocknr,
+ path->slots[level + 1] + 1, level + 1);
+ if (path->slots[level] > mid) {
+ path->slots[level] -= mid;
+ tree_block_release(root, t);
+ path->nodes[level] = split_buffer;
+ path->slots[level + 1] += 1;
+ } else {
+ tree_block_release(root, split_buffer);
}
- /* Now that the tree has room, insert the requested pointer */
- return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
- level);
+ return 0;
}
/*
@@ -623,6 +609,11 @@
if (leaf_free_space(l) >= sizeof(struct item) + data_size)
return 0;
}
+ if (!path->nodes[1]) {
+ ret = insert_new_root(root, path, 1);
+ if (ret)
+ return ret;
+ }
slot = path->slots[0];
nritems = l->header.nritems;
mid = (nritems + 1)/ 2;
@@ -659,8 +650,7 @@
l->header.nritems = mid;
ret = insert_ptr(root, path, &right->items[0].key,
- right_buffer->blocknr, 1);
-
+ right_buffer->blocknr, path->slots[1] + 1, 1);
write_tree_block(root, right_buffer);
write_tree_block(root, l_buf);
@@ -695,21 +685,10 @@
refill_alloc_extent(root);
/* create a root if there isn't one */
- if (!root->node) {
+ if (!root->node)
BUG();
-#if 0
- struct tree_buffer *t;
- t = alloc_free_block(root);
- BUG_ON(!t);
- t->node.header.nritems = 0;
- t->node.header.flags = node_level(0);
- t->node.header.blocknr = t->blocknr;
- root->node = t;
- write_tree_block(root, t);
-#endif
- }
init_path(&path);
- ret = search_slot(root, key, &path);
+ ret = search_slot(root, key, &path, data_size);
if (ret == 0) {
release_path(root, &path);
return -EEXIST;
@@ -719,12 +698,6 @@
leaf_buf = path.nodes[0];
leaf = &leaf_buf->leaf;
- /* make room if needed */
- if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
- split_leaf(root, &path, data_size);
- leaf_buf = path.nodes[0];
- leaf = &path.nodes[0]->leaf;
- }
nritems = leaf->header.nritems;
data_end = leaf_data_end(leaf);
@@ -950,7 +923,7 @@
ins->offset = 0;
ins->flags = 0;
- ret = search_slot(root, ins, &path);
+ ret = search_slot(root, ins, &path, sizeof(struct extent_item));
while (1) {
l = &path.nodes[0]->leaf;
slot = path.slots[0];
@@ -1097,8 +1070,8 @@
/* for testing only */
int next_key(int i, int max_key) {
- return rand() % max_key;
- // return i;
+ // return rand() % max_key;
+ return i;
}
int main() {
@@ -1154,7 +1127,7 @@
num = next_key(i, max_key);
ins.objectid = num;
init_path(&path);
- ret = search_slot(root, &ins, &path);
+ ret = search_slot(root, &ins, &path, 0);
if (ret) {
print_tree(root, root->node);
printf("unable to find %d\n", num);
@@ -1176,7 +1149,7 @@
num = next_key(i, max_key);
ins.objectid = num;
init_path(&path);
- ret = search_slot(root, &ins, &path);
+ ret = search_slot(root, &ins, &path, 0);
if (ret)
continue;
ret = del_item(root, &path);
@@ -1204,7 +1177,7 @@
num = next_key(i, max_key);
ins.objectid = num;
init_path(&path);
- ret = search_slot(root, &ins, &path);
+ ret = search_slot(root, &ins, &path, 0);
if (ret) {
print_tree(root, root->node);
printf("unable to find %d\n", num);
@@ -1218,7 +1191,7 @@
int slot;
ins.objectid = (u64)-1;
init_path(&path);
- ret = search_slot(root, &ins, &path);
+ ret = search_slot(root, &ins, &path, 0);
if (ret == 0)
BUG();