blob: 260ca86a5cf8d03ed20266994c9ffa7714a8d6c9 [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
Chris Masond352ac62008-09-29 15:18:18 -04002 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Chris Masona6b6e752007-10-15 16:22:39 -040019#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Jan Schmidtbd989ba2012-05-16 17:18:50 +020021#include <linux/rbtree.h>
David Sterbaadf02122017-05-31 19:44:31 +020022#include <linux/mm.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050023#include "ctree.h"
24#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040025#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040026#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040027#include "locking.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050028
Chris Masone089f052007-03-16 16:20:31 -040029static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
30 *root, struct btrfs_path *path, int level);
Omar Sandoval310712b2017-01-17 23:24:37 -080031static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
32 const struct btrfs_key *ins_key, struct btrfs_path *path,
33 int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040034static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040035 struct btrfs_fs_info *fs_info,
36 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040037 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040038static int balance_node_right(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040039 struct btrfs_fs_info *fs_info,
Chris Mason5f39d392007-10-15 16:14:19 -040040 struct extent_buffer *dst_buf,
41 struct extent_buffer *src_buf);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +000042static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
43 int level, int slot);
Chris Masond97e63b2007-02-20 16:40:44 -050044
Chris Mason2c90e5d2007-04-02 10:50:19 -040045struct btrfs_path *btrfs_alloc_path(void)
46{
Masahiro Yamadae2c89902016-09-13 04:35:52 +090047 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Mason2c90e5d2007-04-02 10:50:19 -040048}
49
Chris Masonb4ce94d2009-02-04 09:25:08 -050050/*
51 * set all locked nodes in the path to blocking locks. This should
52 * be done before scheduling
53 */
54noinline void btrfs_set_path_blocking(struct btrfs_path *p)
55{
56 int i;
57 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbd681512011-07-16 15:23:14 -040058 if (!p->nodes[i] || !p->locks[i])
59 continue;
60 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
61 if (p->locks[i] == BTRFS_READ_LOCK)
62 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
63 else if (p->locks[i] == BTRFS_WRITE_LOCK)
64 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Masonb4ce94d2009-02-04 09:25:08 -050065 }
66}
67
68/*
69 * reset all the locked nodes in the patch to spinning locks.
Chris Mason4008c042009-02-12 14:09:45 -050070 *
71 * held is used to keep lockdep happy, when lockdep is enabled
72 * we set held to a blocking lock before we go around and
73 * retake all the spinlocks in the path. You can safely use NULL
74 * for held
Chris Masonb4ce94d2009-02-04 09:25:08 -050075 */
Chris Mason4008c042009-02-12 14:09:45 -050076noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -040077 struct extent_buffer *held, int held_rw)
Chris Masonb4ce94d2009-02-04 09:25:08 -050078{
79 int i;
Chris Mason4008c042009-02-12 14:09:45 -050080
Chris Masonbd681512011-07-16 15:23:14 -040081 if (held) {
82 btrfs_set_lock_blocking_rw(held, held_rw);
83 if (held_rw == BTRFS_WRITE_LOCK)
84 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
85 else if (held_rw == BTRFS_READ_LOCK)
86 held_rw = BTRFS_READ_LOCK_BLOCKING;
87 }
Chris Mason4008c042009-02-12 14:09:45 -050088 btrfs_set_path_blocking(p);
Chris Mason4008c042009-02-12 14:09:45 -050089
90 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
Chris Masonbd681512011-07-16 15:23:14 -040091 if (p->nodes[i] && p->locks[i]) {
92 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
93 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
94 p->locks[i] = BTRFS_WRITE_LOCK;
95 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
96 p->locks[i] = BTRFS_READ_LOCK;
97 }
Chris Masonb4ce94d2009-02-04 09:25:08 -050098 }
Chris Mason4008c042009-02-12 14:09:45 -050099
Chris Mason4008c042009-02-12 14:09:45 -0500100 if (held)
Chris Masonbd681512011-07-16 15:23:14 -0400101 btrfs_clear_lock_blocking_rw(held, held_rw);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500102}
103
Chris Masond352ac62008-09-29 15:18:18 -0400104/* this also releases the path */
Chris Mason2c90e5d2007-04-02 10:50:19 -0400105void btrfs_free_path(struct btrfs_path *p)
106{
Jesper Juhlff175d52010-12-25 21:22:30 +0000107 if (!p)
108 return;
David Sterbab3b4aa72011-04-21 01:20:15 +0200109 btrfs_release_path(p);
Chris Mason2c90e5d2007-04-02 10:50:19 -0400110 kmem_cache_free(btrfs_path_cachep, p);
111}
112
Chris Masond352ac62008-09-29 15:18:18 -0400113/*
114 * path release drops references on the extent buffers in the path
115 * and it drops any locks held by this path
116 *
117 * It is safe to call this on paths that no locks or extent buffers held.
118 */
David Sterbab3b4aa72011-04-21 01:20:15 +0200119noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -0500120{
121 int i;
Chris Masona2135012008-06-25 16:01:30 -0400122
Chris Mason234b63a2007-03-13 10:46:10 -0400123 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400124 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500125 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400126 continue;
127 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400128 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400129 p->locks[i] = 0;
130 }
Chris Mason5f39d392007-10-15 16:14:19 -0400131 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400132 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500133 }
134}
135
Chris Masond352ac62008-09-29 15:18:18 -0400136/*
137 * safely gets a reference on the root node of a tree. A lock
138 * is not taken, so a concurrent writer may put a different node
139 * at the root of the tree. See btrfs_lock_root_node for the
140 * looping required.
141 *
142 * The extent buffer returned by this has a reference taken, so
143 * it won't disappear. It may stop being the root of the tree
144 * at any time because there are no locks held.
145 */
Chris Mason925baed2008-06-25 16:01:30 -0400146struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
147{
148 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400149
Josef Bacik3083ee22012-03-09 16:01:49 -0500150 while (1) {
151 rcu_read_lock();
152 eb = rcu_dereference(root->node);
153
154 /*
155 * RCU really hurts here, we could free up the root node because
Nicholas D Steeves01327612016-05-19 21:18:45 -0400156 * it was COWed but we may not get the new root node yet so do
Josef Bacik3083ee22012-03-09 16:01:49 -0500157 * the inc_not_zero dance and if it doesn't work then
158 * synchronize_rcu and try again.
159 */
160 if (atomic_inc_not_zero(&eb->refs)) {
161 rcu_read_unlock();
162 break;
163 }
164 rcu_read_unlock();
165 synchronize_rcu();
166 }
Chris Mason925baed2008-06-25 16:01:30 -0400167 return eb;
168}
169
Chris Masond352ac62008-09-29 15:18:18 -0400170/* loop around taking references on and locking the root node of the
171 * tree until you end up with a lock on the root. A locked buffer
172 * is returned, with a reference held.
173 */
Chris Mason925baed2008-06-25 16:01:30 -0400174struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
175{
176 struct extent_buffer *eb;
177
Chris Masond3977122009-01-05 21:25:51 -0500178 while (1) {
Chris Mason925baed2008-06-25 16:01:30 -0400179 eb = btrfs_root_node(root);
180 btrfs_tree_lock(eb);
Chris Mason240f62c2011-03-23 14:54:42 -0400181 if (eb == root->node)
Chris Mason925baed2008-06-25 16:01:30 -0400182 break;
Chris Mason925baed2008-06-25 16:01:30 -0400183 btrfs_tree_unlock(eb);
184 free_extent_buffer(eb);
185 }
186 return eb;
187}
188
Chris Masonbd681512011-07-16 15:23:14 -0400189/* loop around taking references on and locking the root node of the
190 * tree until you end up with a lock on the root. A locked buffer
191 * is returned, with a reference held.
192 */
Josef Bacik84f7d8e2017-09-29 15:43:49 -0400193struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
Chris Masonbd681512011-07-16 15:23:14 -0400194{
195 struct extent_buffer *eb;
196
197 while (1) {
198 eb = btrfs_root_node(root);
199 btrfs_tree_read_lock(eb);
200 if (eb == root->node)
201 break;
202 btrfs_tree_read_unlock(eb);
203 free_extent_buffer(eb);
204 }
205 return eb;
206}
207
Chris Masond352ac62008-09-29 15:18:18 -0400208/* cowonly root (everything not a reference counted cow subvolume), just get
209 * put onto a simple dirty list. transaction.c walks this to make sure they
210 * get properly updated on disk.
211 */
Chris Mason0b86a832008-03-24 15:01:56 -0400212static void add_root_to_dirty_list(struct btrfs_root *root)
213{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400214 struct btrfs_fs_info *fs_info = root->fs_info;
215
Josef Bacike7070be2014-12-16 08:54:43 -0800216 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
217 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
218 return;
219
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400220 spin_lock(&fs_info->trans_lock);
Josef Bacike7070be2014-12-16 08:54:43 -0800221 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
222 /* Want the extent tree to be the last on the list */
223 if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
224 list_move_tail(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400225 &fs_info->dirty_cowonly_roots);
Josef Bacike7070be2014-12-16 08:54:43 -0800226 else
227 list_move(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400228 &fs_info->dirty_cowonly_roots);
Chris Mason0b86a832008-03-24 15:01:56 -0400229 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400230 spin_unlock(&fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400231}
232
Chris Masond352ac62008-09-29 15:18:18 -0400233/*
234 * used by snapshot creation to make a copy of a root for a tree with
235 * a given objectid. The buffer with the new root node is returned in
236 * cow_ret, and this func returns zero on success or a negative error code.
237 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500238int btrfs_copy_root(struct btrfs_trans_handle *trans,
239 struct btrfs_root *root,
240 struct extent_buffer *buf,
241 struct extent_buffer **cow_ret, u64 new_root_objectid)
242{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400243 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonbe20aa92007-12-17 20:14:01 -0500244 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500245 int ret = 0;
246 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400247 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500248
Miao Xie27cdeb72014-04-02 19:51:05 +0800249 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400250 trans->transid != fs_info->running_transaction->transid);
Miao Xie27cdeb72014-04-02 19:51:05 +0800251 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
252 trans->transid != root->last_trans);
Chris Masonbe20aa92007-12-17 20:14:01 -0500253
254 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400255 if (level == 0)
256 btrfs_item_key(buf, &disk_key, 0);
257 else
258 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400259
David Sterba4d75f8a2014-06-15 01:54:12 +0200260 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
261 &disk_key, level, buf->start, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400262 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500263 return PTR_ERR(cow);
264
David Sterba58e80122016-11-08 18:30:31 +0100265 copy_extent_buffer_full(cow, buf);
Chris Masonbe20aa92007-12-17 20:14:01 -0500266 btrfs_set_header_bytenr(cow, cow->start);
267 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400268 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
269 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
270 BTRFS_HEADER_FLAG_RELOC);
271 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
272 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
273 else
274 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500275
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400276 write_extent_buffer_fsid(cow, fs_info->fsid);
Yan Zheng2b820322008-11-17 21:11:30 -0500277
Chris Masonbe20aa92007-12-17 20:14:01 -0500278 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400279 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700280 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400281 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700282 ret = btrfs_inc_ref(trans, root, cow, 0);
Chris Mason4aec2b52007-12-18 16:25:45 -0500283
Chris Masonbe20aa92007-12-17 20:14:01 -0500284 if (ret)
285 return ret;
286
287 btrfs_mark_buffer_dirty(cow);
288 *cow_ret = cow;
289 return 0;
290}
291
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200292enum mod_log_op {
293 MOD_LOG_KEY_REPLACE,
294 MOD_LOG_KEY_ADD,
295 MOD_LOG_KEY_REMOVE,
296 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
297 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
298 MOD_LOG_MOVE_KEYS,
299 MOD_LOG_ROOT_REPLACE,
300};
301
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200302struct tree_mod_root {
303 u64 logical;
304 u8 level;
305};
306
307struct tree_mod_elem {
308 struct rb_node node;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530309 u64 logical;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200310 u64 seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200311 enum mod_log_op op;
312
313 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
314 int slot;
315
316 /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
317 u64 generation;
318
319 /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
320 struct btrfs_disk_key key;
321 u64 blockptr;
322
323 /* this is used for op == MOD_LOG_MOVE_KEYS */
David Sterbab6dfa352018-03-05 15:31:18 +0100324 struct {
325 int dst_slot;
326 int nr_items;
327 } move;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200328
329 /* this is used for op == MOD_LOG_ROOT_REPLACE */
330 struct tree_mod_root old_root;
331};
332
Jan Schmidt097b8a72012-06-21 11:08:04 +0200333/*
Josef Bacikfcebe452014-05-13 17:30:47 -0700334 * Pull a new tree mod seq number for our operation.
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000335 */
Josef Bacikfcebe452014-05-13 17:30:47 -0700336static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000337{
338 return atomic64_inc_return(&fs_info->tree_mod_seq);
339}
340
341/*
Jan Schmidt097b8a72012-06-21 11:08:04 +0200342 * This adds a new blocker to the tree mod log's blocker list if the @elem
343 * passed does not already have a sequence number set. So when a caller expects
344 * to record tree modifications, it should ensure to set elem->seq to zero
345 * before calling btrfs_get_tree_mod_seq.
346 * Returns a fresh, unused tree log modification sequence number, even if no new
347 * blocker was added.
348 */
349u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
350 struct seq_list *elem)
351{
David Sterbab1a09f12018-03-05 15:43:41 +0100352 write_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200353 spin_lock(&fs_info->tree_mod_seq_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200354 if (!elem->seq) {
Josef Bacikfcebe452014-05-13 17:30:47 -0700355 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200356 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
357 }
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200358 spin_unlock(&fs_info->tree_mod_seq_lock);
David Sterbab1a09f12018-03-05 15:43:41 +0100359 write_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200360
Josef Bacikfcebe452014-05-13 17:30:47 -0700361 return elem->seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200362}
363
364void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
365 struct seq_list *elem)
366{
367 struct rb_root *tm_root;
368 struct rb_node *node;
369 struct rb_node *next;
370 struct seq_list *cur_elem;
371 struct tree_mod_elem *tm;
372 u64 min_seq = (u64)-1;
373 u64 seq_putting = elem->seq;
374
375 if (!seq_putting)
376 return;
377
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200378 spin_lock(&fs_info->tree_mod_seq_lock);
379 list_del(&elem->list);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200380 elem->seq = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200381
382 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
Jan Schmidt097b8a72012-06-21 11:08:04 +0200383 if (cur_elem->seq < min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200384 if (seq_putting > cur_elem->seq) {
385 /*
386 * blocker with lower sequence number exists, we
387 * cannot remove anything from the log
388 */
Jan Schmidt097b8a72012-06-21 11:08:04 +0200389 spin_unlock(&fs_info->tree_mod_seq_lock);
390 return;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200391 }
392 min_seq = cur_elem->seq;
393 }
394 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200395 spin_unlock(&fs_info->tree_mod_seq_lock);
396
397 /*
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200398 * anything that's lower than the lowest existing (read: blocked)
399 * sequence number can be removed from the tree.
400 */
David Sterbab1a09f12018-03-05 15:43:41 +0100401 write_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200402 tm_root = &fs_info->tree_mod_log;
403 for (node = rb_first(tm_root); node; node = next) {
404 next = rb_next(node);
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800405 tm = rb_entry(node, struct tree_mod_elem, node);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200406 if (tm->seq > min_seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200407 continue;
408 rb_erase(node, tm_root);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200409 kfree(tm);
410 }
David Sterbab1a09f12018-03-05 15:43:41 +0100411 write_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200412}
413
414/*
415 * key order of the log:
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530416 * node/leaf start address -> sequence
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200417 *
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530418 * The 'start address' is the logical address of the *new* root node
419 * for root replace operations, or the logical address of the affected
420 * block for all other operations.
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000421 *
David Sterbab1a09f12018-03-05 15:43:41 +0100422 * Note: must be called with write lock for fs_info::tree_mod_log_lock.
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200423 */
424static noinline int
425__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
426{
427 struct rb_root *tm_root;
428 struct rb_node **new;
429 struct rb_node *parent = NULL;
430 struct tree_mod_elem *cur;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200431
Josef Bacikfcebe452014-05-13 17:30:47 -0700432 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200433
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200434 tm_root = &fs_info->tree_mod_log;
435 new = &tm_root->rb_node;
436 while (*new) {
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800437 cur = rb_entry(*new, struct tree_mod_elem, node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200438 parent = *new;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530439 if (cur->logical < tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200440 new = &((*new)->rb_left);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530441 else if (cur->logical > tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200442 new = &((*new)->rb_right);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200443 else if (cur->seq < tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200444 new = &((*new)->rb_left);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200445 else if (cur->seq > tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200446 new = &((*new)->rb_right);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000447 else
448 return -EEXIST;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200449 }
450
451 rb_link_node(&tm->node, parent, new);
452 rb_insert_color(&tm->node, tm_root);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000453 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200454}
455
Jan Schmidt097b8a72012-06-21 11:08:04 +0200456/*
457 * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
458 * returns zero with the tree_mod_log_lock acquired. The caller must hold
459 * this until all tree mod log insertions are recorded in the rb tree and then
David Sterbab1a09f12018-03-05 15:43:41 +0100460 * write unlock fs_info::tree_mod_log_lock.
Jan Schmidt097b8a72012-06-21 11:08:04 +0200461 */
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200462static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
463 struct extent_buffer *eb) {
464 smp_mb();
465 if (list_empty(&(fs_info)->tree_mod_seq_list))
466 return 1;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200467 if (eb && btrfs_header_level(eb) == 0)
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200468 return 1;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000469
David Sterbab1a09f12018-03-05 15:43:41 +0100470 write_lock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000471 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
David Sterbab1a09f12018-03-05 15:43:41 +0100472 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000473 return 1;
474 }
475
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200476 return 0;
477}
478
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000479/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
480static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
481 struct extent_buffer *eb)
482{
483 smp_mb();
484 if (list_empty(&(fs_info)->tree_mod_seq_list))
485 return 0;
486 if (eb && btrfs_header_level(eb) == 0)
487 return 0;
488
489 return 1;
490}
491
492static struct tree_mod_elem *
493alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
494 enum mod_log_op op, gfp_t flags)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200495{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200496 struct tree_mod_elem *tm;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200497
Josef Bacikc8cc6342013-07-01 16:18:19 -0400498 tm = kzalloc(sizeof(*tm), flags);
499 if (!tm)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000500 return NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200501
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530502 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200503 if (op != MOD_LOG_KEY_ADD) {
504 btrfs_node_key(eb, &tm->key, slot);
505 tm->blockptr = btrfs_node_blockptr(eb, slot);
506 }
507 tm->op = op;
508 tm->slot = slot;
509 tm->generation = btrfs_node_ptr_generation(eb, slot);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000510 RB_CLEAR_NODE(&tm->node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200511
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000512 return tm;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200513}
514
David Sterbae09c2ef2018-03-05 15:09:03 +0100515static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
516 enum mod_log_op op, gfp_t flags)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200517{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000518 struct tree_mod_elem *tm;
519 int ret;
520
David Sterbae09c2ef2018-03-05 15:09:03 +0100521 if (!tree_mod_need_log(eb->fs_info, eb))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200522 return 0;
523
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000524 tm = alloc_tree_mod_elem(eb, slot, op, flags);
525 if (!tm)
526 return -ENOMEM;
527
David Sterbae09c2ef2018-03-05 15:09:03 +0100528 if (tree_mod_dont_log(eb->fs_info, eb)) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000529 kfree(tm);
530 return 0;
531 }
532
David Sterbae09c2ef2018-03-05 15:09:03 +0100533 ret = __tree_mod_log_insert(eb->fs_info, tm);
David Sterbab1a09f12018-03-05 15:43:41 +0100534 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000535 if (ret)
536 kfree(tm);
537
538 return ret;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200539}
540
David Sterba6074d452018-03-05 15:03:52 +0100541static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
542 int dst_slot, int src_slot, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200543{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000544 struct tree_mod_elem *tm = NULL;
545 struct tree_mod_elem **tm_list = NULL;
546 int ret = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200547 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000548 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200549
David Sterba6074d452018-03-05 15:03:52 +0100550 if (!tree_mod_need_log(eb->fs_info, eb))
Jan Schmidtf3956942012-05-31 15:02:32 +0200551 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200552
David Sterba176ef8f2017-03-28 14:35:01 +0200553 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000554 if (!tm_list)
555 return -ENOMEM;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200556
David Sterba176ef8f2017-03-28 14:35:01 +0200557 tm = kzalloc(sizeof(*tm), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000558 if (!tm) {
559 ret = -ENOMEM;
560 goto free_tms;
561 }
Jan Schmidtf3956942012-05-31 15:02:32 +0200562
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530563 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200564 tm->slot = src_slot;
565 tm->move.dst_slot = dst_slot;
566 tm->move.nr_items = nr_items;
567 tm->op = MOD_LOG_MOVE_KEYS;
568
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000569 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
570 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
David Sterba176ef8f2017-03-28 14:35:01 +0200571 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000572 if (!tm_list[i]) {
573 ret = -ENOMEM;
574 goto free_tms;
575 }
576 }
577
David Sterba6074d452018-03-05 15:03:52 +0100578 if (tree_mod_dont_log(eb->fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000579 goto free_tms;
580 locked = 1;
581
582 /*
583 * When we override something during the move, we log these removals.
584 * This can only happen when we move towards the beginning of the
585 * buffer, i.e. dst_slot < src_slot.
586 */
587 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
David Sterba6074d452018-03-05 15:03:52 +0100588 ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000589 if (ret)
590 goto free_tms;
591 }
592
David Sterba6074d452018-03-05 15:03:52 +0100593 ret = __tree_mod_log_insert(eb->fs_info, tm);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000594 if (ret)
595 goto free_tms;
David Sterbab1a09f12018-03-05 15:43:41 +0100596 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000597 kfree(tm_list);
598
599 return 0;
600free_tms:
601 for (i = 0; i < nr_items; i++) {
602 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
David Sterba6074d452018-03-05 15:03:52 +0100603 rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000604 kfree(tm_list[i]);
605 }
606 if (locked)
David Sterbab1a09f12018-03-05 15:43:41 +0100607 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000608 kfree(tm_list);
609 kfree(tm);
610
611 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200612}
613
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000614static inline int
615__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
616 struct tree_mod_elem **tm_list,
617 int nritems)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200618{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000619 int i, j;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200620 int ret;
621
Jan Schmidt097b8a72012-06-21 11:08:04 +0200622 for (i = nritems - 1; i >= 0; i--) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000623 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
624 if (ret) {
625 for (j = nritems - 1; j > i; j--)
626 rb_erase(&tm_list[j]->node,
627 &fs_info->tree_mod_log);
628 return ret;
629 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200630 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000631
632 return 0;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200633}
634
David Sterba95b757c2018-03-05 15:22:30 +0100635static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
636 struct extent_buffer *new_root, int log_removal)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200637{
David Sterba95b757c2018-03-05 15:22:30 +0100638 struct btrfs_fs_info *fs_info = old_root->fs_info;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000639 struct tree_mod_elem *tm = NULL;
640 struct tree_mod_elem **tm_list = NULL;
641 int nritems = 0;
642 int ret = 0;
643 int i;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200644
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000645 if (!tree_mod_need_log(fs_info, NULL))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200646 return 0;
647
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000648 if (log_removal && btrfs_header_level(old_root) > 0) {
649 nritems = btrfs_header_nritems(old_root);
David Sterba31e818f2015-02-20 18:00:26 +0100650 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
David Sterbabcc8e072017-03-28 14:35:42 +0200651 GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000652 if (!tm_list) {
653 ret = -ENOMEM;
654 goto free_tms;
655 }
656 for (i = 0; i < nritems; i++) {
657 tm_list[i] = alloc_tree_mod_elem(old_root, i,
David Sterbabcc8e072017-03-28 14:35:42 +0200658 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000659 if (!tm_list[i]) {
660 ret = -ENOMEM;
661 goto free_tms;
662 }
663 }
664 }
Jan Schmidtd9abbf12013-03-20 13:49:48 +0000665
David Sterbabcc8e072017-03-28 14:35:42 +0200666 tm = kzalloc(sizeof(*tm), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000667 if (!tm) {
668 ret = -ENOMEM;
669 goto free_tms;
670 }
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200671
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530672 tm->logical = new_root->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200673 tm->old_root.logical = old_root->start;
674 tm->old_root.level = btrfs_header_level(old_root);
675 tm->generation = btrfs_header_generation(old_root);
676 tm->op = MOD_LOG_ROOT_REPLACE;
677
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000678 if (tree_mod_dont_log(fs_info, NULL))
679 goto free_tms;
680
681 if (tm_list)
682 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
683 if (!ret)
684 ret = __tree_mod_log_insert(fs_info, tm);
685
David Sterbab1a09f12018-03-05 15:43:41 +0100686 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000687 if (ret)
688 goto free_tms;
689 kfree(tm_list);
690
691 return ret;
692
693free_tms:
694 if (tm_list) {
695 for (i = 0; i < nritems; i++)
696 kfree(tm_list[i]);
697 kfree(tm_list);
698 }
699 kfree(tm);
700
701 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200702}
703
704static struct tree_mod_elem *
705__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
706 int smallest)
707{
708 struct rb_root *tm_root;
709 struct rb_node *node;
710 struct tree_mod_elem *cur = NULL;
711 struct tree_mod_elem *found = NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200712
David Sterbab1a09f12018-03-05 15:43:41 +0100713 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200714 tm_root = &fs_info->tree_mod_log;
715 node = tm_root->rb_node;
716 while (node) {
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800717 cur = rb_entry(node, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530718 if (cur->logical < start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200719 node = node->rb_left;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530720 } else if (cur->logical > start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200721 node = node->rb_right;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200722 } else if (cur->seq < min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200723 node = node->rb_left;
724 } else if (!smallest) {
725 /* we want the node with the highest seq */
726 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200727 BUG_ON(found->seq > cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200728 found = cur;
729 node = node->rb_left;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200730 } else if (cur->seq > min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200731 /* we want the node with the smallest seq */
732 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200733 BUG_ON(found->seq < cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200734 found = cur;
735 node = node->rb_right;
736 } else {
737 found = cur;
738 break;
739 }
740 }
David Sterbab1a09f12018-03-05 15:43:41 +0100741 read_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200742
743 return found;
744}
745
746/*
747 * this returns the element from the log with the smallest time sequence
748 * value that's in the log (the oldest log item). any element with a time
749 * sequence lower than min_seq will be ignored.
750 */
751static struct tree_mod_elem *
752tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
753 u64 min_seq)
754{
755 return __tree_mod_log_search(fs_info, start, min_seq, 1);
756}
757
758/*
759 * this returns the element from the log with the largest time sequence
760 * value that's in the log (the most recent log item). any element with
761 * a time sequence lower than min_seq will be ignored.
762 */
763static struct tree_mod_elem *
764tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
765{
766 return __tree_mod_log_search(fs_info, start, min_seq, 0);
767}
768
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000769static noinline int
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200770tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
771 struct extent_buffer *src, unsigned long dst_offset,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000772 unsigned long src_offset, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200773{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000774 int ret = 0;
775 struct tree_mod_elem **tm_list = NULL;
776 struct tree_mod_elem **tm_list_add, **tm_list_rem;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200777 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000778 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200779
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000780 if (!tree_mod_need_log(fs_info, NULL))
781 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200782
Josef Bacikc8cc6342013-07-01 16:18:19 -0400783 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000784 return 0;
785
David Sterba31e818f2015-02-20 18:00:26 +0100786 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000787 GFP_NOFS);
788 if (!tm_list)
789 return -ENOMEM;
790
791 tm_list_add = tm_list;
792 tm_list_rem = tm_list + nr_items;
793 for (i = 0; i < nr_items; i++) {
794 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
795 MOD_LOG_KEY_REMOVE, GFP_NOFS);
796 if (!tm_list_rem[i]) {
797 ret = -ENOMEM;
798 goto free_tms;
799 }
800
801 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
802 MOD_LOG_KEY_ADD, GFP_NOFS);
803 if (!tm_list_add[i]) {
804 ret = -ENOMEM;
805 goto free_tms;
806 }
807 }
808
809 if (tree_mod_dont_log(fs_info, NULL))
810 goto free_tms;
811 locked = 1;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200812
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200813 for (i = 0; i < nr_items; i++) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000814 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
815 if (ret)
816 goto free_tms;
817 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
818 if (ret)
819 goto free_tms;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200820 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000821
David Sterbab1a09f12018-03-05 15:43:41 +0100822 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000823 kfree(tm_list);
824
825 return 0;
826
827free_tms:
828 for (i = 0; i < nr_items * 2; i++) {
829 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
830 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
831 kfree(tm_list[i]);
832 }
833 if (locked)
David Sterbab1a09f12018-03-05 15:43:41 +0100834 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000835 kfree(tm_list);
836
837 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200838}
839
David Sterbadb7279a2018-03-05 15:14:25 +0100840static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200841{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000842 struct tree_mod_elem **tm_list = NULL;
843 int nritems = 0;
844 int i;
845 int ret = 0;
846
847 if (btrfs_header_level(eb) == 0)
848 return 0;
849
David Sterbadb7279a2018-03-05 15:14:25 +0100850 if (!tree_mod_need_log(eb->fs_info, NULL))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000851 return 0;
852
853 nritems = btrfs_header_nritems(eb);
David Sterba31e818f2015-02-20 18:00:26 +0100854 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000855 if (!tm_list)
856 return -ENOMEM;
857
858 for (i = 0; i < nritems; i++) {
859 tm_list[i] = alloc_tree_mod_elem(eb, i,
860 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
861 if (!tm_list[i]) {
862 ret = -ENOMEM;
863 goto free_tms;
864 }
865 }
866
David Sterbadb7279a2018-03-05 15:14:25 +0100867 if (tree_mod_dont_log(eb->fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000868 goto free_tms;
869
David Sterbadb7279a2018-03-05 15:14:25 +0100870 ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
David Sterbab1a09f12018-03-05 15:43:41 +0100871 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000872 if (ret)
873 goto free_tms;
874 kfree(tm_list);
875
876 return 0;
877
878free_tms:
879 for (i = 0; i < nritems; i++)
880 kfree(tm_list[i]);
881 kfree(tm_list);
882
883 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200884}
885
Jan Schmidt097b8a72012-06-21 11:08:04 +0200886static noinline void
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200887tree_mod_log_set_root_pointer(struct btrfs_root *root,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000888 struct extent_buffer *new_root_node,
889 int log_removal)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200890{
891 int ret;
David Sterba95b757c2018-03-05 15:22:30 +0100892 ret = tree_mod_log_insert_root(root->node, new_root_node, log_removal);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200893 BUG_ON(ret < 0);
894}
895
Chris Masond352ac62008-09-29 15:18:18 -0400896/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400897 * check if the tree block can be shared by multiple trees
898 */
899int btrfs_block_can_be_shared(struct btrfs_root *root,
900 struct extent_buffer *buf)
901{
902 /*
Nicholas D Steeves01327612016-05-19 21:18:45 -0400903 * Tree blocks not in reference counted trees and tree roots
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400904 * are never shared. If a block was allocated after the last
905 * snapshot and the block was not allocated by tree relocation,
906 * we know the block is not shared.
907 */
Miao Xie27cdeb72014-04-02 19:51:05 +0800908 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400909 buf != root->node && buf != root->commit_root &&
910 (btrfs_header_generation(buf) <=
911 btrfs_root_last_snapshot(&root->root_item) ||
912 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
913 return 1;
914#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
Miao Xie27cdeb72014-04-02 19:51:05 +0800915 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400916 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
917 return 1;
918#endif
919 return 0;
920}
921
922static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
923 struct btrfs_root *root,
924 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400925 struct extent_buffer *cow,
926 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400927{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400928 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400929 u64 refs;
930 u64 owner;
931 u64 flags;
932 u64 new_flags = 0;
933 int ret;
934
935 /*
936 * Backrefs update rules:
937 *
938 * Always use full backrefs for extent pointers in tree block
939 * allocated by tree relocation.
940 *
941 * If a shared tree block is no longer referenced by its owner
942 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
943 * use full backrefs for extent pointers in tree block.
944 *
945 * If a tree block is been relocating
946 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
947 * use full backrefs for extent pointers in tree block.
948 * The reason for this is some operations (such as drop tree)
949 * are only allowed for blocks use full backrefs.
950 */
951
952 if (btrfs_block_can_be_shared(root, buf)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400953 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
Josef Bacik3173a182013-03-07 14:22:04 -0500954 btrfs_header_level(buf), 1,
955 &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700956 if (ret)
957 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -0700958 if (refs == 0) {
959 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400960 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fashehe5df9572011-08-29 14:17:04 -0700961 return ret;
962 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400963 } else {
964 refs = 1;
965 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
966 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
967 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
968 else
969 flags = 0;
970 }
971
972 owner = btrfs_header_owner(buf);
973 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
974 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
975
976 if (refs > 1) {
977 if ((owner == root->root_key.objectid ||
978 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
979 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700980 ret = btrfs_inc_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500981 if (ret)
982 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400983
984 if (root->root_key.objectid ==
985 BTRFS_TREE_RELOC_OBJECTID) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700986 ret = btrfs_dec_ref(trans, root, buf, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500987 if (ret)
988 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700989 ret = btrfs_inc_ref(trans, root, cow, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500990 if (ret)
991 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400992 }
993 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
994 } else {
995
996 if (root->root_key.objectid ==
997 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700998 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400999 else
Josef Bacike339a6b2014-07-02 10:54:25 -07001000 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -05001001 if (ret)
1002 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001003 }
1004 if (new_flags != 0) {
Josef Bacikb1c79e02013-05-09 13:49:30 -04001005 int level = btrfs_header_level(buf);
1006
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001007 ret = btrfs_set_disk_extent_flags(trans, fs_info,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001008 buf->start,
1009 buf->len,
Josef Bacikb1c79e02013-05-09 13:49:30 -04001010 new_flags, level, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -07001011 if (ret)
1012 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001013 }
1014 } else {
1015 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
1016 if (root->root_key.objectid ==
1017 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -07001018 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001019 else
Josef Bacike339a6b2014-07-02 10:54:25 -07001020 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -05001021 if (ret)
1022 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -07001023 ret = btrfs_dec_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -05001024 if (ret)
1025 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001026 }
David Sterba7c302b42017-02-10 18:47:57 +01001027 clean_tree_block(fs_info, buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001028 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001029 }
1030 return 0;
1031}
1032
1033/*
Chris Masond3977122009-01-05 21:25:51 -05001034 * does the dirty work in cow of a single block. The parent block (if
1035 * supplied) is updated to point to the new cow copy. The new buffer is marked
1036 * dirty and returned locked. If you modify the block it needs to be marked
1037 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -04001038 *
1039 * search_start -- an allocation hint for the new block
1040 *
Chris Masond3977122009-01-05 21:25:51 -05001041 * empty_size -- a hint that you plan on doing more cow. This is the size in
1042 * bytes the allocator should try to find free next to the block it returns.
1043 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -04001044 */
Chris Masond3977122009-01-05 21:25:51 -05001045static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001046 struct btrfs_root *root,
1047 struct extent_buffer *buf,
1048 struct extent_buffer *parent, int parent_slot,
1049 struct extent_buffer **cow_ret,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001050 u64 search_start, u64 empty_size)
Chris Mason6702ed42007-08-07 16:15:09 -04001051{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001052 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001053 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -04001054 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -07001055 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04001056 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -04001057 int unlock_orig = 0;
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001058 u64 parent_start = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001059
Chris Mason925baed2008-06-25 16:01:30 -04001060 if (*cow_ret == buf)
1061 unlock_orig = 1;
1062
Chris Masonb9447ef2009-03-09 11:45:38 -04001063 btrfs_assert_tree_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -04001064
Miao Xie27cdeb72014-04-02 19:51:05 +08001065 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001066 trans->transid != fs_info->running_transaction->transid);
Miao Xie27cdeb72014-04-02 19:51:05 +08001067 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
1068 trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -04001069
Chris Mason7bb86312007-12-11 09:25:06 -05001070 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04001071
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001072 if (level == 0)
1073 btrfs_item_key(buf, &disk_key, 0);
1074 else
1075 btrfs_node_key(buf, &disk_key, 0);
1076
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001077 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1078 parent_start = parent->start;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001079
David Sterba4d75f8a2014-06-15 01:54:12 +02001080 cow = btrfs_alloc_tree_block(trans, root, parent_start,
1081 root->root_key.objectid, &disk_key, level,
1082 search_start, empty_size);
Chris Mason6702ed42007-08-07 16:15:09 -04001083 if (IS_ERR(cow))
1084 return PTR_ERR(cow);
1085
Chris Masonb4ce94d2009-02-04 09:25:08 -05001086 /* cow is set to blocking by btrfs_init_new_buffer */
1087
David Sterba58e80122016-11-08 18:30:31 +01001088 copy_extent_buffer_full(cow, buf);
Chris Masondb945352007-10-15 16:15:53 -04001089 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -04001090 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001091 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
1092 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
1093 BTRFS_HEADER_FLAG_RELOC);
1094 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1095 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
1096 else
1097 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -04001098
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001099 write_extent_buffer_fsid(cow, fs_info->fsid);
Yan Zheng2b820322008-11-17 21:11:30 -05001100
Mark Fashehbe1a5562011-08-08 13:20:18 -07001101 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001102 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001103 btrfs_abort_transaction(trans, ret);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001104 return ret;
1105 }
Zheng Yan1a40e232008-09-26 10:09:34 -04001106
Miao Xie27cdeb72014-04-02 19:51:05 +08001107 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001108 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
Zhaolei93314e32015-08-06 21:56:58 +08001109 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001110 btrfs_abort_transaction(trans, ret);
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001111 return ret;
Zhaolei93314e32015-08-06 21:56:58 +08001112 }
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001113 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001114
Chris Mason6702ed42007-08-07 16:15:09 -04001115 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -04001116 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001117 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1118 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1119 parent_start = buf->start;
Chris Mason925baed2008-06-25 16:01:30 -04001120
Chris Mason5f39d392007-10-15 16:14:19 -04001121 extent_buffer_get(cow);
Jan Schmidt90f8d622013-04-13 13:19:53 +00001122 tree_mod_log_set_root_pointer(root, cow, 1);
Chris Mason240f62c2011-03-23 14:54:42 -04001123 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -04001124
Yan, Zhengf0486c62010-05-16 10:46:25 -04001125 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001126 last_ref);
Chris Mason5f39d392007-10-15 16:14:19 -04001127 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -04001128 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -04001129 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001130 WARN_ON(trans->transid != btrfs_header_generation(parent));
David Sterbae09c2ef2018-03-05 15:09:03 +01001131 tree_mod_log_insert_key(parent, parent_slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -04001132 MOD_LOG_KEY_REPLACE, GFP_NOFS);
Chris Mason5f39d392007-10-15 16:14:19 -04001133 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -04001134 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -05001135 btrfs_set_node_ptr_generation(parent, parent_slot,
1136 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -04001137 btrfs_mark_buffer_dirty(parent);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001138 if (last_ref) {
David Sterbadb7279a2018-03-05 15:14:25 +01001139 ret = tree_mod_log_free_eb(buf);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001140 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001141 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001142 return ret;
1143 }
1144 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04001145 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001146 last_ref);
Chris Mason6702ed42007-08-07 16:15:09 -04001147 }
Chris Mason925baed2008-06-25 16:01:30 -04001148 if (unlock_orig)
1149 btrfs_tree_unlock(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05001150 free_extent_buffer_stale(buf);
Chris Mason6702ed42007-08-07 16:15:09 -04001151 btrfs_mark_buffer_dirty(cow);
1152 *cow_ret = cow;
1153 return 0;
1154}
1155
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001156/*
1157 * returns the logical address of the oldest predecessor of the given root.
1158 * entries older than time_seq are ignored.
1159 */
David Sterbabcd24da2018-03-05 15:33:18 +01001160static struct tree_mod_elem *__tree_mod_log_oldest_root(
1161 struct extent_buffer *eb_root, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001162{
1163 struct tree_mod_elem *tm;
1164 struct tree_mod_elem *found = NULL;
Jan Schmidt30b04632013-04-13 13:19:54 +00001165 u64 root_logical = eb_root->start;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001166 int looped = 0;
1167
1168 if (!time_seq)
Stefan Behrens35a36212013-08-14 18:12:25 +02001169 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001170
1171 /*
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301172 * the very last operation that's logged for a root is the
1173 * replacement operation (if it is replaced at all). this has
1174 * the logical address of the *new* root, making it the very
1175 * first operation that's logged for this root.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001176 */
1177 while (1) {
David Sterbabcd24da2018-03-05 15:33:18 +01001178 tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001179 time_seq);
1180 if (!looped && !tm)
Stefan Behrens35a36212013-08-14 18:12:25 +02001181 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001182 /*
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001183 * if there are no tree operation for the oldest root, we simply
1184 * return it. this should only happen if that (old) root is at
1185 * level 0.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001186 */
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001187 if (!tm)
1188 break;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001189
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001190 /*
1191 * if there's an operation that's not a root replacement, we
1192 * found the oldest version of our root. normally, we'll find a
1193 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1194 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001195 if (tm->op != MOD_LOG_ROOT_REPLACE)
1196 break;
1197
1198 found = tm;
1199 root_logical = tm->old_root.logical;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001200 looped = 1;
1201 }
1202
Jan Schmidta95236d2012-06-05 16:41:24 +02001203 /* if there's no old root to return, return what we found instead */
1204 if (!found)
1205 found = tm;
1206
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001207 return found;
1208}
1209
1210/*
1211 * tm is a pointer to the first operation to rewind within eb. then, all
Nicholas D Steeves01327612016-05-19 21:18:45 -04001212 * previous operations will be rewound (until we reach something older than
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001213 * time_seq).
1214 */
1215static void
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001216__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1217 u64 time_seq, struct tree_mod_elem *first_tm)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001218{
1219 u32 n;
1220 struct rb_node *next;
1221 struct tree_mod_elem *tm = first_tm;
1222 unsigned long o_dst;
1223 unsigned long o_src;
1224 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1225
1226 n = btrfs_header_nritems(eb);
David Sterbab1a09f12018-03-05 15:43:41 +01001227 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +02001228 while (tm && tm->seq >= time_seq) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001229 /*
1230 * all the operations are recorded with the operator used for
1231 * the modification. as we're going backwards, we do the
1232 * opposite of each operation here.
1233 */
1234 switch (tm->op) {
1235 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1236 BUG_ON(tm->slot < n);
Eric Sandeen1c697d42013-01-31 00:54:56 +00001237 /* Fallthrough */
Liu Bo95c80bb2012-10-19 09:50:52 +00001238 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
Chris Mason4c3e6962012-12-18 15:43:18 -05001239 case MOD_LOG_KEY_REMOVE:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001240 btrfs_set_node_key(eb, &tm->key, tm->slot);
1241 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1242 btrfs_set_node_ptr_generation(eb, tm->slot,
1243 tm->generation);
Chris Mason4c3e6962012-12-18 15:43:18 -05001244 n++;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001245 break;
1246 case MOD_LOG_KEY_REPLACE:
1247 BUG_ON(tm->slot >= n);
1248 btrfs_set_node_key(eb, &tm->key, tm->slot);
1249 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1250 btrfs_set_node_ptr_generation(eb, tm->slot,
1251 tm->generation);
1252 break;
1253 case MOD_LOG_KEY_ADD:
Jan Schmidt19956c72012-06-22 14:52:13 +02001254 /* if a move operation is needed it's in the log */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001255 n--;
1256 break;
1257 case MOD_LOG_MOVE_KEYS:
Jan Schmidtc3193102012-05-31 19:24:36 +02001258 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1259 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1260 memmove_extent_buffer(eb, o_dst, o_src,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001261 tm->move.nr_items * p_size);
1262 break;
1263 case MOD_LOG_ROOT_REPLACE:
1264 /*
1265 * this operation is special. for roots, this must be
1266 * handled explicitly before rewinding.
1267 * for non-roots, this operation may exist if the node
1268 * was a root: root A -> child B; then A gets empty and
1269 * B is promoted to the new root. in the mod log, we'll
1270 * have a root-replace operation for B, a tree block
1271 * that is no root. we simply ignore that operation.
1272 */
1273 break;
1274 }
1275 next = rb_next(&tm->node);
1276 if (!next)
1277 break;
Geliang Tang6b4df8b2016-12-19 22:53:41 +08001278 tm = rb_entry(next, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301279 if (tm->logical != first_tm->logical)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001280 break;
1281 }
David Sterbab1a09f12018-03-05 15:43:41 +01001282 read_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001283 btrfs_set_header_nritems(eb, n);
1284}
1285
Jan Schmidt47fb0912013-04-13 13:19:55 +00001286/*
Nicholas D Steeves01327612016-05-19 21:18:45 -04001287 * Called with eb read locked. If the buffer cannot be rewound, the same buffer
Jan Schmidt47fb0912013-04-13 13:19:55 +00001288 * is returned. If rewind operations happen, a fresh buffer is returned. The
1289 * returned buffer is always read-locked. If the returned buffer is not the
1290 * input buffer, the lock on the input buffer is released and the input buffer
1291 * is freed (its refcount is decremented).
1292 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001293static struct extent_buffer *
Josef Bacik9ec72672013-08-07 16:57:23 -04001294tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1295 struct extent_buffer *eb, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001296{
1297 struct extent_buffer *eb_rewin;
1298 struct tree_mod_elem *tm;
1299
1300 if (!time_seq)
1301 return eb;
1302
1303 if (btrfs_header_level(eb) == 0)
1304 return eb;
1305
1306 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1307 if (!tm)
1308 return eb;
1309
Josef Bacik9ec72672013-08-07 16:57:23 -04001310 btrfs_set_path_blocking(path);
1311 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1312
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001313 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1314 BUG_ON(tm->slot != 0);
Jeff Mahoneyda170662016-06-15 09:22:56 -04001315 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001316 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001317 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001318 free_extent_buffer(eb);
1319 return NULL;
1320 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001321 btrfs_set_header_bytenr(eb_rewin, eb->start);
1322 btrfs_set_header_backref_rev(eb_rewin,
1323 btrfs_header_backref_rev(eb));
1324 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
Jan Schmidtc3193102012-05-31 19:24:36 +02001325 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001326 } else {
1327 eb_rewin = btrfs_clone_extent_buffer(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001328 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001329 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001330 free_extent_buffer(eb);
1331 return NULL;
1332 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001333 }
1334
Josef Bacik9ec72672013-08-07 16:57:23 -04001335 btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
1336 btrfs_tree_read_unlock_blocking(eb);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001337 free_extent_buffer(eb);
1338
Jan Schmidt47fb0912013-04-13 13:19:55 +00001339 extent_buffer_get(eb_rewin);
1340 btrfs_tree_read_lock(eb_rewin);
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001341 __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
Jan Schmidt57911b82012-10-19 09:22:03 +02001342 WARN_ON(btrfs_header_nritems(eb_rewin) >
Jeff Mahoneyda170662016-06-15 09:22:56 -04001343 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001344
1345 return eb_rewin;
1346}
1347
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001348/*
1349 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1350 * value. If there are no changes, the current root->root_node is returned. If
1351 * anything changed in between, there's a fresh buffer allocated on which the
1352 * rewind operations are done. In any case, the returned buffer is read locked.
1353 * Returns NULL on error (with no locks held).
1354 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001355static inline struct extent_buffer *
1356get_old_root(struct btrfs_root *root, u64 time_seq)
1357{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001358 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001359 struct tree_mod_elem *tm;
Jan Schmidt30b04632013-04-13 13:19:54 +00001360 struct extent_buffer *eb = NULL;
1361 struct extent_buffer *eb_root;
Liu Bo7bfdcf72012-10-25 07:30:19 -06001362 struct extent_buffer *old;
Jan Schmidta95236d2012-06-05 16:41:24 +02001363 struct tree_mod_root *old_root = NULL;
Chris Mason4325edd2012-06-15 20:02:02 -04001364 u64 old_generation = 0;
Jan Schmidta95236d2012-06-05 16:41:24 +02001365 u64 logical;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001366
Jan Schmidt30b04632013-04-13 13:19:54 +00001367 eb_root = btrfs_read_lock_root_node(root);
David Sterbabcd24da2018-03-05 15:33:18 +01001368 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001369 if (!tm)
Jan Schmidt30b04632013-04-13 13:19:54 +00001370 return eb_root;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001371
Jan Schmidta95236d2012-06-05 16:41:24 +02001372 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1373 old_root = &tm->old_root;
1374 old_generation = tm->generation;
1375 logical = old_root->logical;
1376 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001377 logical = eb_root->start;
Jan Schmidta95236d2012-06-05 16:41:24 +02001378 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001379
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001380 tm = tree_mod_log_search(fs_info, logical, time_seq);
Jan Schmidt834328a2012-10-23 11:27:33 +02001381 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
Jan Schmidt30b04632013-04-13 13:19:54 +00001382 btrfs_tree_read_unlock(eb_root);
1383 free_extent_buffer(eb_root);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001384 old = read_tree_block(fs_info, logical, 0);
Liu Bo64c043d2015-05-25 17:30:15 +08001385 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
1386 if (!IS_ERR(old))
1387 free_extent_buffer(old);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001388 btrfs_warn(fs_info,
1389 "failed to read tree block %llu from get_old_root",
1390 logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001391 } else {
Liu Bo7bfdcf72012-10-25 07:30:19 -06001392 eb = btrfs_clone_extent_buffer(old);
1393 free_extent_buffer(old);
Jan Schmidt834328a2012-10-23 11:27:33 +02001394 }
1395 } else if (old_root) {
Jan Schmidt30b04632013-04-13 13:19:54 +00001396 btrfs_tree_read_unlock(eb_root);
1397 free_extent_buffer(eb_root);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001398 eb = alloc_dummy_extent_buffer(fs_info, logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001399 } else {
Josef Bacik9ec72672013-08-07 16:57:23 -04001400 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
Jan Schmidt30b04632013-04-13 13:19:54 +00001401 eb = btrfs_clone_extent_buffer(eb_root);
Josef Bacik9ec72672013-08-07 16:57:23 -04001402 btrfs_tree_read_unlock_blocking(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001403 free_extent_buffer(eb_root);
Jan Schmidt834328a2012-10-23 11:27:33 +02001404 }
1405
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001406 if (!eb)
1407 return NULL;
Jan Schmidtd6381082012-10-23 14:21:05 +02001408 extent_buffer_get(eb);
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001409 btrfs_tree_read_lock(eb);
Jan Schmidta95236d2012-06-05 16:41:24 +02001410 if (old_root) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001411 btrfs_set_header_bytenr(eb, eb->start);
1412 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
Jan Schmidt30b04632013-04-13 13:19:54 +00001413 btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
Jan Schmidta95236d2012-06-05 16:41:24 +02001414 btrfs_set_header_level(eb, old_root->level);
1415 btrfs_set_header_generation(eb, old_generation);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001416 }
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001417 if (tm)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001418 __tree_mod_log_rewind(fs_info, eb, time_seq, tm);
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001419 else
1420 WARN_ON(btrfs_header_level(eb) != 0);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001421 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001422
1423 return eb;
1424}
1425
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001426int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1427{
1428 struct tree_mod_elem *tm;
1429 int level;
Jan Schmidt30b04632013-04-13 13:19:54 +00001430 struct extent_buffer *eb_root = btrfs_root_node(root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001431
David Sterbabcd24da2018-03-05 15:33:18 +01001432 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001433 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1434 level = tm->old_root.level;
1435 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001436 level = btrfs_header_level(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001437 }
Jan Schmidt30b04632013-04-13 13:19:54 +00001438 free_extent_buffer(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001439
1440 return level;
1441}
1442
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001443static inline int should_cow_block(struct btrfs_trans_handle *trans,
1444 struct btrfs_root *root,
1445 struct extent_buffer *buf)
1446{
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -04001447 if (btrfs_is_testing(root->fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04001448 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02001449
Liu Bof1ebcc72011-11-14 20:48:06 -05001450 /* ensure we can see the force_cow */
1451 smp_rmb();
1452
1453 /*
1454 * We do not need to cow a block if
1455 * 1) this block is not created or changed in this transaction;
1456 * 2) this block does not belong to TREE_RELOC tree;
1457 * 3) the root is not forced COW.
1458 *
1459 * What is forced COW:
Nicholas D Steeves01327612016-05-19 21:18:45 -04001460 * when we create snapshot during committing the transaction,
Liu Bof1ebcc72011-11-14 20:48:06 -05001461 * after we've finished coping src root, we must COW the shared
1462 * block to ensure the metadata consistency.
1463 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001464 if (btrfs_header_generation(buf) == trans->transid &&
1465 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
1466 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -05001467 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
Miao Xie27cdeb72014-04-02 19:51:05 +08001468 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001469 return 0;
1470 return 1;
1471}
1472
Chris Masond352ac62008-09-29 15:18:18 -04001473/*
1474 * cows a single block, see __btrfs_cow_block for the real work.
Nicholas D Steeves01327612016-05-19 21:18:45 -04001475 * This version of it has extra checks so that a block isn't COWed more than
Chris Masond352ac62008-09-29 15:18:18 -04001476 * once per transaction, as long as it hasn't been written yet
1477 */
Chris Masond3977122009-01-05 21:25:51 -05001478noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001479 struct btrfs_root *root, struct extent_buffer *buf,
1480 struct extent_buffer *parent, int parent_slot,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001481 struct extent_buffer **cow_ret)
Chris Mason02217ed2007-03-02 16:08:05 -05001482{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001483 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6702ed42007-08-07 16:15:09 -04001484 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -04001485 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -05001486
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001487 if (trans->transaction != fs_info->running_transaction)
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001488 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02001489 trans->transid,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001490 fs_info->running_transaction->transid);
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001491
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001492 if (trans->transid != fs_info->generation)
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001493 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001494 trans->transid, fs_info->generation);
Chris Masondc17ff82008-01-08 15:46:30 -05001495
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001496 if (!should_cow_block(trans, root, buf)) {
Jeff Mahoney64c12922016-06-08 00:36:38 -04001497 trans->dirty = true;
Chris Mason02217ed2007-03-02 16:08:05 -05001498 *cow_ret = buf;
1499 return 0;
1500 }
Chris Masonc4876852009-02-04 09:24:25 -05001501
Byongho Leeee221842015-12-15 01:42:10 +09001502 search_start = buf->start & ~((u64)SZ_1G - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001503
1504 if (parent)
1505 btrfs_set_lock_blocking(parent);
1506 btrfs_set_lock_blocking(buf);
1507
Chris Masonf510cfe2007-10-15 16:14:48 -04001508 ret = __btrfs_cow_block(trans, root, buf, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001509 parent_slot, cow_ret, search_start, 0);
liubo1abe9b82011-03-24 11:18:59 +00001510
1511 trace_btrfs_cow_block(root, buf, *cow_ret);
1512
Chris Masonf510cfe2007-10-15 16:14:48 -04001513 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -04001514}
1515
Chris Masond352ac62008-09-29 15:18:18 -04001516/*
1517 * helper function for defrag to decide if two blocks pointed to by a
1518 * node are actually close by
1519 */
Chris Mason6b800532007-10-15 16:17:34 -04001520static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -04001521{
Chris Mason6b800532007-10-15 16:17:34 -04001522 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001523 return 1;
Chris Mason6b800532007-10-15 16:17:34 -04001524 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001525 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -05001526 return 0;
1527}
1528
Chris Mason081e9572007-11-06 10:26:24 -05001529/*
1530 * compare two keys in a memcmp fashion
1531 */
Omar Sandoval310712b2017-01-17 23:24:37 -08001532static int comp_keys(const struct btrfs_disk_key *disk,
1533 const struct btrfs_key *k2)
Chris Mason081e9572007-11-06 10:26:24 -05001534{
1535 struct btrfs_key k1;
1536
1537 btrfs_disk_key_to_cpu(&k1, disk);
1538
Diego Calleja20736ab2009-07-24 11:06:52 -04001539 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -05001540}
1541
Josef Bacikf3465ca2008-11-12 14:19:50 -05001542/*
1543 * same as comp_keys only with two btrfs_key's
1544 */
Omar Sandoval310712b2017-01-17 23:24:37 -08001545int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -05001546{
1547 if (k1->objectid > k2->objectid)
1548 return 1;
1549 if (k1->objectid < k2->objectid)
1550 return -1;
1551 if (k1->type > k2->type)
1552 return 1;
1553 if (k1->type < k2->type)
1554 return -1;
1555 if (k1->offset > k2->offset)
1556 return 1;
1557 if (k1->offset < k2->offset)
1558 return -1;
1559 return 0;
1560}
Chris Mason081e9572007-11-06 10:26:24 -05001561
Chris Masond352ac62008-09-29 15:18:18 -04001562/*
1563 * this is used by the defrag code to go through all the
1564 * leaves pointed to by a node and reallocate them so that
1565 * disk order is close to key order
1566 */
Chris Mason6702ed42007-08-07 16:15:09 -04001567int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001568 struct btrfs_root *root, struct extent_buffer *parent,
Eric Sandeende78b512013-01-31 18:21:12 +00001569 int start_slot, u64 *last_ret,
Chris Masona6b6e752007-10-15 16:22:39 -04001570 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -04001571{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001572 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6b800532007-10-15 16:17:34 -04001573 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -04001574 u64 blocknr;
Chris Masonca7a79a2008-05-12 12:59:19 -04001575 u64 gen;
Chris Masone9d0b132007-08-10 14:06:19 -04001576 u64 search_start = *last_ret;
1577 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001578 u64 other;
1579 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -04001580 int end_slot;
1581 int i;
1582 int err = 0;
Chris Masonf2183bd2007-08-10 14:42:37 -04001583 int parent_level;
Chris Mason6b800532007-10-15 16:17:34 -04001584 int uptodate;
1585 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -05001586 int progress_passed = 0;
1587 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -04001588
Chris Mason5708b952007-10-25 15:43:18 -04001589 parent_level = btrfs_header_level(parent);
Chris Mason5708b952007-10-25 15:43:18 -04001590
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001591 WARN_ON(trans->transaction != fs_info->running_transaction);
1592 WARN_ON(trans->transid != fs_info->generation);
Chris Mason86479a02007-09-10 19:58:16 -04001593
Chris Mason6b800532007-10-15 16:17:34 -04001594 parent_nritems = btrfs_header_nritems(parent);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001595 blocksize = fs_info->nodesize;
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001596 end_slot = parent_nritems - 1;
Chris Mason6702ed42007-08-07 16:15:09 -04001597
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001598 if (parent_nritems <= 1)
Chris Mason6702ed42007-08-07 16:15:09 -04001599 return 0;
1600
Chris Masonb4ce94d2009-02-04 09:25:08 -05001601 btrfs_set_lock_blocking(parent);
1602
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001603 for (i = start_slot; i <= end_slot; i++) {
Chris Mason6702ed42007-08-07 16:15:09 -04001604 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -04001605
Chris Mason081e9572007-11-06 10:26:24 -05001606 btrfs_node_key(parent, &disk_key, i);
1607 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
1608 continue;
1609
1610 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -04001611 blocknr = btrfs_node_blockptr(parent, i);
Chris Masonca7a79a2008-05-12 12:59:19 -04001612 gen = btrfs_node_ptr_generation(parent, i);
Chris Masone9d0b132007-08-10 14:06:19 -04001613 if (last_block == 0)
1614 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -04001615
Chris Mason6702ed42007-08-07 16:15:09 -04001616 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -04001617 other = btrfs_node_blockptr(parent, i - 1);
1618 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001619 }
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001620 if (!close && i < end_slot) {
Chris Mason6b800532007-10-15 16:17:34 -04001621 other = btrfs_node_blockptr(parent, i + 1);
1622 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001623 }
Chris Masone9d0b132007-08-10 14:06:19 -04001624 if (close) {
1625 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -04001626 continue;
Chris Masone9d0b132007-08-10 14:06:19 -04001627 }
Chris Mason6702ed42007-08-07 16:15:09 -04001628
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001629 cur = find_extent_buffer(fs_info, blocknr);
Chris Mason6b800532007-10-15 16:17:34 -04001630 if (cur)
Chris Masonb9fab912012-05-06 07:23:47 -04001631 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
Chris Mason6b800532007-10-15 16:17:34 -04001632 else
1633 uptodate = 0;
Chris Mason5708b952007-10-25 15:43:18 -04001634 if (!cur || !uptodate) {
Chris Mason6b800532007-10-15 16:17:34 -04001635 if (!cur) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001636 cur = read_tree_block(fs_info, blocknr, gen);
Liu Bo64c043d2015-05-25 17:30:15 +08001637 if (IS_ERR(cur)) {
1638 return PTR_ERR(cur);
1639 } else if (!extent_buffer_uptodate(cur)) {
Josef Bacik416bc652013-04-23 14:17:42 -04001640 free_extent_buffer(cur);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00001641 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04001642 }
Chris Mason6b800532007-10-15 16:17:34 -04001643 } else if (!uptodate) {
Tsutomu Itoh018642a2012-05-29 18:10:13 +09001644 err = btrfs_read_buffer(cur, gen);
1645 if (err) {
1646 free_extent_buffer(cur);
1647 return err;
1648 }
Chris Masonf2183bd2007-08-10 14:42:37 -04001649 }
Chris Mason6702ed42007-08-07 16:15:09 -04001650 }
Chris Masone9d0b132007-08-10 14:06:19 -04001651 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -04001652 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -04001653
Chris Masone7a84562008-06-25 16:01:31 -04001654 btrfs_tree_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001655 btrfs_set_lock_blocking(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001656 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -04001657 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -04001658 min(16 * blocksize,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001659 (end_slot - i) * blocksize));
Yan252c38f2007-08-29 09:11:44 -04001660 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -04001661 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001662 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001663 break;
Yan252c38f2007-08-29 09:11:44 -04001664 }
Chris Masone7a84562008-06-25 16:01:31 -04001665 search_start = cur->start;
1666 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -04001667 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -04001668 btrfs_tree_unlock(cur);
1669 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001670 }
1671 return err;
1672}
1673
Chris Mason74123bd2007-02-02 11:05:29 -05001674/*
Chris Mason5f39d392007-10-15 16:14:19 -04001675 * search for key in the extent_buffer. The items start at offset p,
1676 * and they are item_size apart. There are 'max' items in p.
1677 *
Chris Mason74123bd2007-02-02 11:05:29 -05001678 * the slot in the array is returned via slot, and it points to
1679 * the place where you would insert key if it is not found in
1680 * the array.
1681 *
1682 * slot may point to max if the key is bigger than all of the keys
1683 */
Chris Masone02119d2008-09-05 16:13:11 -04001684static noinline int generic_bin_search(struct extent_buffer *eb,
Omar Sandoval310712b2017-01-17 23:24:37 -08001685 unsigned long p, int item_size,
1686 const struct btrfs_key *key,
Chris Masone02119d2008-09-05 16:13:11 -04001687 int max, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001688{
1689 int low = 0;
1690 int high = max;
1691 int mid;
1692 int ret;
Chris Mason479965d2007-10-15 16:14:27 -04001693 struct btrfs_disk_key *tmp = NULL;
Chris Mason5f39d392007-10-15 16:14:19 -04001694 struct btrfs_disk_key unaligned;
1695 unsigned long offset;
Chris Mason5f39d392007-10-15 16:14:19 -04001696 char *kaddr = NULL;
1697 unsigned long map_start = 0;
1698 unsigned long map_len = 0;
Chris Mason479965d2007-10-15 16:14:27 -04001699 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001700
Liu Bo5e24e9a2016-06-23 16:32:45 -07001701 if (low > high) {
1702 btrfs_err(eb->fs_info,
1703 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
1704 __func__, low, high, eb->start,
1705 btrfs_header_owner(eb), btrfs_header_level(eb));
1706 return -EINVAL;
1707 }
1708
Chris Masond3977122009-01-05 21:25:51 -05001709 while (low < high) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05001710 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -04001711 offset = p + mid * item_size;
1712
Chris Masona6591712011-07-19 12:04:14 -04001713 if (!kaddr || offset < map_start ||
Chris Mason5f39d392007-10-15 16:14:19 -04001714 (offset + sizeof(struct btrfs_disk_key)) >
1715 map_start + map_len) {
Chris Mason934d3752008-12-08 16:43:10 -05001716
1717 err = map_private_extent_buffer(eb, offset,
Chris Mason479965d2007-10-15 16:14:27 -04001718 sizeof(struct btrfs_disk_key),
Chris Masona6591712011-07-19 12:04:14 -04001719 &kaddr, &map_start, &map_len);
Chris Mason5f39d392007-10-15 16:14:19 -04001720
Chris Mason479965d2007-10-15 16:14:27 -04001721 if (!err) {
1722 tmp = (struct btrfs_disk_key *)(kaddr + offset -
1723 map_start);
Liu Bo415b35a2016-06-17 19:16:21 -07001724 } else if (err == 1) {
Chris Mason479965d2007-10-15 16:14:27 -04001725 read_extent_buffer(eb, &unaligned,
1726 offset, sizeof(unaligned));
1727 tmp = &unaligned;
Liu Bo415b35a2016-06-17 19:16:21 -07001728 } else {
1729 return err;
Chris Mason479965d2007-10-15 16:14:27 -04001730 }
1731
Chris Mason5f39d392007-10-15 16:14:19 -04001732 } else {
1733 tmp = (struct btrfs_disk_key *)(kaddr + offset -
1734 map_start);
1735 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05001736 ret = comp_keys(tmp, key);
1737
1738 if (ret < 0)
1739 low = mid + 1;
1740 else if (ret > 0)
1741 high = mid;
1742 else {
1743 *slot = mid;
1744 return 0;
1745 }
1746 }
1747 *slot = low;
1748 return 1;
1749}
1750
Chris Mason97571fd2007-02-24 13:39:08 -05001751/*
1752 * simple bin_search frontend that does the right thing for
1753 * leaves vs nodes
1754 */
Nikolay Borisova74b35e2017-12-08 16:27:43 +02001755int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
1756 int level, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001757{
Wang Sheng-Huif7757382012-03-30 15:14:27 +08001758 if (level == 0)
Chris Mason5f39d392007-10-15 16:14:19 -04001759 return generic_bin_search(eb,
1760 offsetof(struct btrfs_leaf, items),
Chris Mason0783fcf2007-03-12 20:12:07 -04001761 sizeof(struct btrfs_item),
Chris Mason5f39d392007-10-15 16:14:19 -04001762 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001763 slot);
Wang Sheng-Huif7757382012-03-30 15:14:27 +08001764 else
Chris Mason5f39d392007-10-15 16:14:19 -04001765 return generic_bin_search(eb,
1766 offsetof(struct btrfs_node, ptrs),
Chris Mason123abc82007-03-14 14:14:43 -04001767 sizeof(struct btrfs_key_ptr),
Chris Mason5f39d392007-10-15 16:14:19 -04001768 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001769 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001770}
1771
Yan, Zhengf0486c62010-05-16 10:46:25 -04001772static void root_add_used(struct btrfs_root *root, u32 size)
1773{
1774 spin_lock(&root->accounting_lock);
1775 btrfs_set_root_used(&root->root_item,
1776 btrfs_root_used(&root->root_item) + size);
1777 spin_unlock(&root->accounting_lock);
1778}
1779
1780static void root_sub_used(struct btrfs_root *root, u32 size)
1781{
1782 spin_lock(&root->accounting_lock);
1783 btrfs_set_root_used(&root->root_item,
1784 btrfs_root_used(&root->root_item) - size);
1785 spin_unlock(&root->accounting_lock);
1786}
1787
Chris Masond352ac62008-09-29 15:18:18 -04001788/* given a node and slot number, this reads the blocks it points to. The
1789 * extent buffer is returned with a reference taken (but unlocked).
Chris Masond352ac62008-09-29 15:18:18 -04001790 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001791static noinline struct extent_buffer *
1792read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
1793 int slot)
Chris Masonbb803952007-03-01 12:04:21 -05001794{
Chris Masonca7a79a2008-05-12 12:59:19 -04001795 int level = btrfs_header_level(parent);
Josef Bacik416bc652013-04-23 14:17:42 -04001796 struct extent_buffer *eb;
1797
Liu Bofb770ae2016-07-05 12:10:14 -07001798 if (slot < 0 || slot >= btrfs_header_nritems(parent))
1799 return ERR_PTR(-ENOENT);
Chris Masonca7a79a2008-05-12 12:59:19 -04001800
1801 BUG_ON(level == 0);
1802
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001803 eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
Josef Bacik416bc652013-04-23 14:17:42 -04001804 btrfs_node_ptr_generation(parent, slot));
Liu Bofb770ae2016-07-05 12:10:14 -07001805 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
1806 free_extent_buffer(eb);
1807 eb = ERR_PTR(-EIO);
Josef Bacik416bc652013-04-23 14:17:42 -04001808 }
1809
1810 return eb;
Chris Masonbb803952007-03-01 12:04:21 -05001811}
1812
Chris Masond352ac62008-09-29 15:18:18 -04001813/*
1814 * node level balancing, used to make sure nodes are in proper order for
1815 * item deletion. We balance from the top down, so we have to make sure
1816 * that a deletion won't leave an node completely empty later on.
1817 */
Chris Masone02119d2008-09-05 16:13:11 -04001818static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001819 struct btrfs_root *root,
1820 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -05001821{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001822 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04001823 struct extent_buffer *right = NULL;
1824 struct extent_buffer *mid;
1825 struct extent_buffer *left = NULL;
1826 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001827 int ret = 0;
1828 int wret;
1829 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -05001830 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -05001831 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -05001832
1833 if (level == 0)
1834 return 0;
1835
Chris Mason5f39d392007-10-15 16:14:19 -04001836 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001837
Chris Masonbd681512011-07-16 15:23:14 -04001838 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
1839 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
Chris Mason7bb86312007-12-11 09:25:06 -05001840 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1841
Chris Mason1d4f8a02007-03-13 09:28:32 -04001842 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -05001843
Li Zefana05a9bb2011-09-06 16:55:34 +08001844 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001845 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001846 pslot = path->slots[level + 1];
1847 }
Chris Masonbb803952007-03-01 12:04:21 -05001848
Chris Mason40689472007-03-17 14:29:23 -04001849 /*
1850 * deal with the case where there is only one pointer in the root
1851 * by promoting the node below to a root
1852 */
Chris Mason5f39d392007-10-15 16:14:19 -04001853 if (!parent) {
1854 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -05001855
Chris Mason5f39d392007-10-15 16:14:19 -04001856 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -05001857 return 0;
1858
1859 /* promote the child to a root */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001860 child = read_node_slot(fs_info, mid, 0);
Liu Bofb770ae2016-07-05 12:10:14 -07001861 if (IS_ERR(child)) {
1862 ret = PTR_ERR(child);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001863 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001864 goto enospc;
1865 }
1866
Chris Mason925baed2008-06-25 16:01:30 -04001867 btrfs_tree_lock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001868 btrfs_set_lock_blocking(child);
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001869 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001870 if (ret) {
1871 btrfs_tree_unlock(child);
1872 free_extent_buffer(child);
1873 goto enospc;
1874 }
Yan2f375ab2008-02-01 14:58:07 -05001875
Jan Schmidt90f8d622013-04-13 13:19:53 +00001876 tree_mod_log_set_root_pointer(root, child, 1);
Chris Mason240f62c2011-03-23 14:54:42 -04001877 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -04001878
Chris Mason0b86a832008-03-24 15:01:56 -04001879 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -04001880 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001881
Chris Mason925baed2008-06-25 16:01:30 -04001882 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -05001883 path->nodes[level] = NULL;
David Sterba7c302b42017-02-10 18:47:57 +01001884 clean_tree_block(fs_info, mid);
Chris Mason925baed2008-06-25 16:01:30 -04001885 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -05001886 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001887 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001888
1889 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001890 btrfs_free_tree_block(trans, root, mid, 0, 1);
Chris Masonbb803952007-03-01 12:04:21 -05001891 /* once for the root ptr */
Josef Bacik3083ee22012-03-09 16:01:49 -05001892 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001893 return 0;
Chris Masonbb803952007-03-01 12:04:21 -05001894 }
Chris Mason5f39d392007-10-15 16:14:19 -04001895 if (btrfs_header_nritems(mid) >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001896 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
Chris Masonbb803952007-03-01 12:04:21 -05001897 return 0;
1898
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001899 left = read_node_slot(fs_info, parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001900 if (IS_ERR(left))
1901 left = NULL;
1902
Chris Mason5f39d392007-10-15 16:14:19 -04001903 if (left) {
Chris Mason925baed2008-06-25 16:01:30 -04001904 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001905 btrfs_set_lock_blocking(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001906 wret = btrfs_cow_block(trans, root, left,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001907 parent, pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001908 if (wret) {
1909 ret = wret;
1910 goto enospc;
1911 }
Chris Mason2cc58cf2007-08-27 16:49:44 -04001912 }
Liu Bofb770ae2016-07-05 12:10:14 -07001913
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001914 right = read_node_slot(fs_info, parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001915 if (IS_ERR(right))
1916 right = NULL;
1917
Chris Mason5f39d392007-10-15 16:14:19 -04001918 if (right) {
Chris Mason925baed2008-06-25 16:01:30 -04001919 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001920 btrfs_set_lock_blocking(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001921 wret = btrfs_cow_block(trans, root, right,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001922 parent, pslot + 1, &right);
Chris Mason2cc58cf2007-08-27 16:49:44 -04001923 if (wret) {
1924 ret = wret;
1925 goto enospc;
1926 }
1927 }
1928
1929 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001930 if (left) {
1931 orig_slot += btrfs_header_nritems(left);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001932 wret = push_node_left(trans, fs_info, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -05001933 if (wret < 0)
1934 ret = wret;
Chris Masonbb803952007-03-01 12:04:21 -05001935 }
Chris Mason79f95c82007-03-01 15:16:26 -05001936
1937 /*
1938 * then try to empty the right most buffer into the middle
1939 */
Chris Mason5f39d392007-10-15 16:14:19 -04001940 if (right) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001941 wret = push_node_left(trans, fs_info, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04001942 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -05001943 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04001944 if (btrfs_header_nritems(right) == 0) {
David Sterba7c302b42017-02-10 18:47:57 +01001945 clean_tree_block(fs_info, right);
Chris Mason925baed2008-06-25 16:01:30 -04001946 btrfs_tree_unlock(right);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001947 del_ptr(root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001948 root_sub_used(root, right->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001949 btrfs_free_tree_block(trans, root, right, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001950 free_extent_buffer_stale(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001951 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001952 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001953 struct btrfs_disk_key right_key;
1954 btrfs_node_key(right, &right_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01001955 ret = tree_mod_log_insert_key(parent, pslot + 1,
1956 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1957 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001958 btrfs_set_node_key(parent, &right_key, pslot + 1);
1959 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05001960 }
1961 }
Chris Mason5f39d392007-10-15 16:14:19 -04001962 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05001963 /*
1964 * we're not allowed to leave a node with one item in the
1965 * tree during a delete. A deletion from lower in the tree
1966 * could try to delete the only pointer in this node.
1967 * So, pull some keys from the left.
1968 * There has to be a left pointer at this point because
1969 * otherwise we would have pulled some pointers from the
1970 * right
1971 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07001972 if (!left) {
1973 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001974 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001975 goto enospc;
1976 }
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001977 wret = balance_node_right(trans, fs_info, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001978 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05001979 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04001980 goto enospc;
1981 }
Chris Masonbce4eae2008-04-24 14:42:46 -04001982 if (wret == 1) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001983 wret = push_node_left(trans, fs_info, left, mid, 1);
Chris Masonbce4eae2008-04-24 14:42:46 -04001984 if (wret < 0)
1985 ret = wret;
1986 }
Chris Mason79f95c82007-03-01 15:16:26 -05001987 BUG_ON(wret == 1);
1988 }
Chris Mason5f39d392007-10-15 16:14:19 -04001989 if (btrfs_header_nritems(mid) == 0) {
David Sterba7c302b42017-02-10 18:47:57 +01001990 clean_tree_block(fs_info, mid);
Chris Mason925baed2008-06-25 16:01:30 -04001991 btrfs_tree_unlock(mid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001992 del_ptr(root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001993 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001994 btrfs_free_tree_block(trans, root, mid, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001995 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001996 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05001997 } else {
1998 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04001999 struct btrfs_disk_key mid_key;
2000 btrfs_node_key(mid, &mid_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002001 ret = tree_mod_log_insert_key(parent, pslot,
2002 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2003 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002004 btrfs_set_node_key(parent, &mid_key, pslot);
2005 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05002006 }
Chris Masonbb803952007-03-01 12:04:21 -05002007
Chris Mason79f95c82007-03-01 15:16:26 -05002008 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04002009 if (left) {
2010 if (btrfs_header_nritems(left) > orig_slot) {
2011 extent_buffer_get(left);
Chris Mason925baed2008-06-25 16:01:30 -04002012 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04002013 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05002014 path->slots[level + 1] -= 1;
2015 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002016 if (mid) {
2017 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002018 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002019 }
Chris Masonbb803952007-03-01 12:04:21 -05002020 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002021 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05002022 path->slots[level] = orig_slot;
2023 }
2024 }
Chris Mason79f95c82007-03-01 15:16:26 -05002025 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04002026 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04002027 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05002028 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04002029enospc:
Chris Mason925baed2008-06-25 16:01:30 -04002030 if (right) {
2031 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002032 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04002033 }
2034 if (left) {
2035 if (path->nodes[level] != left)
2036 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002037 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04002038 }
Chris Masonbb803952007-03-01 12:04:21 -05002039 return ret;
2040}
2041
Chris Masond352ac62008-09-29 15:18:18 -04002042/* Node balancing for insertion. Here we only split or push nodes around
2043 * when they are completely full. This is also done top down, so we
2044 * have to be pessimistic.
2045 */
Chris Masond3977122009-01-05 21:25:51 -05002046static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05002047 struct btrfs_root *root,
2048 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04002049{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002050 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002051 struct extent_buffer *right = NULL;
2052 struct extent_buffer *mid;
2053 struct extent_buffer *left = NULL;
2054 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002055 int ret = 0;
2056 int wret;
2057 int pslot;
2058 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04002059
2060 if (level == 0)
2061 return 1;
2062
Chris Mason5f39d392007-10-15 16:14:19 -04002063 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002064 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04002065
Li Zefana05a9bb2011-09-06 16:55:34 +08002066 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04002067 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08002068 pslot = path->slots[level + 1];
2069 }
Chris Masone66f7092007-04-20 13:16:02 -04002070
Chris Mason5f39d392007-10-15 16:14:19 -04002071 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04002072 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04002073
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002074 left = read_node_slot(fs_info, parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002075 if (IS_ERR(left))
2076 left = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002077
2078 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04002079 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04002080 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04002081
2082 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002083 btrfs_set_lock_blocking(left);
2084
Chris Mason5f39d392007-10-15 16:14:19 -04002085 left_nr = btrfs_header_nritems(left);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002086 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002087 wret = 1;
2088 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002089 ret = btrfs_cow_block(trans, root, left, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04002090 pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04002091 if (ret)
2092 wret = 1;
2093 else {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002094 wret = push_node_left(trans, fs_info,
Chris Mason971a1f62008-04-24 10:54:32 -04002095 left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04002096 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002097 }
Chris Masone66f7092007-04-20 13:16:02 -04002098 if (wret < 0)
2099 ret = wret;
2100 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002101 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04002102 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04002103 btrfs_node_key(mid, &disk_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002104 ret = tree_mod_log_insert_key(parent, pslot,
2105 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2106 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002107 btrfs_set_node_key(parent, &disk_key, pslot);
2108 btrfs_mark_buffer_dirty(parent);
2109 if (btrfs_header_nritems(left) > orig_slot) {
2110 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04002111 path->slots[level + 1] -= 1;
2112 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002113 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002114 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002115 } else {
2116 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04002117 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04002118 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002119 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002120 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002121 }
Chris Masone66f7092007-04-20 13:16:02 -04002122 return 0;
2123 }
Chris Mason925baed2008-06-25 16:01:30 -04002124 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002125 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002126 }
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002127 right = read_node_slot(fs_info, parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002128 if (IS_ERR(right))
2129 right = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002130
2131 /*
2132 * then try to empty the right most buffer into the middle
2133 */
Chris Mason5f39d392007-10-15 16:14:19 -04002134 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002135 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002136
Chris Mason925baed2008-06-25 16:01:30 -04002137 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002138 btrfs_set_lock_blocking(right);
2139
Chris Mason5f39d392007-10-15 16:14:19 -04002140 right_nr = btrfs_header_nritems(right);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002141 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002142 wret = 1;
2143 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002144 ret = btrfs_cow_block(trans, root, right,
2145 parent, pslot + 1,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04002146 &right);
Chris Mason54aa1f42007-06-22 14:16:25 -04002147 if (ret)
2148 wret = 1;
2149 else {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002150 wret = balance_node_right(trans, fs_info,
Chris Mason5f39d392007-10-15 16:14:19 -04002151 right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04002152 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002153 }
Chris Masone66f7092007-04-20 13:16:02 -04002154 if (wret < 0)
2155 ret = wret;
2156 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002157 struct btrfs_disk_key disk_key;
2158
2159 btrfs_node_key(right, &disk_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002160 ret = tree_mod_log_insert_key(parent, pslot + 1,
2161 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2162 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002163 btrfs_set_node_key(parent, &disk_key, pslot + 1);
2164 btrfs_mark_buffer_dirty(parent);
2165
2166 if (btrfs_header_nritems(mid) <= orig_slot) {
2167 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04002168 path->slots[level + 1] += 1;
2169 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04002170 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002171 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002172 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002173 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002174 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002175 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002176 }
Chris Masone66f7092007-04-20 13:16:02 -04002177 return 0;
2178 }
Chris Mason925baed2008-06-25 16:01:30 -04002179 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002180 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002181 }
Chris Masone66f7092007-04-20 13:16:02 -04002182 return 1;
2183}
2184
Chris Mason74123bd2007-02-02 11:05:29 -05002185/*
Chris Masond352ac62008-09-29 15:18:18 -04002186 * readahead one full node of leaves, finding things that are close
2187 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04002188 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002189static void reada_for_search(struct btrfs_fs_info *fs_info,
Chris Masonc8c42862009-04-03 10:14:18 -04002190 struct btrfs_path *path,
2191 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04002192{
Chris Mason5f39d392007-10-15 16:14:19 -04002193 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05002194 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04002195 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04002196 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05002197 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04002198 u64 nread = 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002199 struct extent_buffer *eb;
Chris Mason6b800532007-10-15 16:17:34 -04002200 u32 nr;
2201 u32 blocksize;
2202 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04002203
Chris Masona6b6e752007-10-15 16:22:39 -04002204 if (level != 1)
Chris Mason3c69fae2007-08-07 15:52:22 -04002205 return;
2206
Chris Mason6702ed42007-08-07 16:15:09 -04002207 if (!path->nodes[level])
2208 return;
2209
Chris Mason5f39d392007-10-15 16:14:19 -04002210 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04002211
Chris Mason3c69fae2007-08-07 15:52:22 -04002212 search = btrfs_node_blockptr(node, slot);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002213 blocksize = fs_info->nodesize;
2214 eb = find_extent_buffer(fs_info, search);
Chris Mason5f39d392007-10-15 16:14:19 -04002215 if (eb) {
2216 free_extent_buffer(eb);
Chris Mason3c69fae2007-08-07 15:52:22 -04002217 return;
2218 }
2219
Chris Masona7175312009-01-22 09:23:10 -05002220 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04002221
Chris Mason5f39d392007-10-15 16:14:19 -04002222 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04002223 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04002224
Chris Masond3977122009-01-05 21:25:51 -05002225 while (1) {
David Sterbae4058b52015-11-27 16:31:35 +01002226 if (path->reada == READA_BACK) {
Chris Mason6b800532007-10-15 16:17:34 -04002227 if (nr == 0)
2228 break;
2229 nr--;
David Sterbae4058b52015-11-27 16:31:35 +01002230 } else if (path->reada == READA_FORWARD) {
Chris Mason6b800532007-10-15 16:17:34 -04002231 nr++;
2232 if (nr >= nritems)
2233 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002234 }
David Sterbae4058b52015-11-27 16:31:35 +01002235 if (path->reada == READA_BACK && objectid) {
Chris Mason01f46652007-12-21 16:24:26 -05002236 btrfs_node_key(node, &disk_key, nr);
2237 if (btrfs_disk_key_objectid(&disk_key) != objectid)
2238 break;
2239 }
Chris Mason6b800532007-10-15 16:17:34 -04002240 search = btrfs_node_blockptr(node, nr);
Chris Masona7175312009-01-22 09:23:10 -05002241 if ((search <= target && target - search <= 65536) ||
2242 (search > target && search - target <= 65536)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002243 readahead_tree_block(fs_info, search);
Chris Mason6b800532007-10-15 16:17:34 -04002244 nread += blocksize;
2245 }
2246 nscan++;
Chris Masona7175312009-01-22 09:23:10 -05002247 if ((nread > 65536 || nscan > 32))
Chris Mason6b800532007-10-15 16:17:34 -04002248 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002249 }
2250}
Chris Mason925baed2008-06-25 16:01:30 -04002251
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002252static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
Josef Bacik0b088512013-06-17 14:23:02 -04002253 struct btrfs_path *path, int level)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002254{
2255 int slot;
2256 int nritems;
2257 struct extent_buffer *parent;
2258 struct extent_buffer *eb;
2259 u64 gen;
2260 u64 block1 = 0;
2261 u64 block2 = 0;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002262
Chris Mason8c594ea2009-04-20 15:50:10 -04002263 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002264 if (!parent)
Josef Bacik0b088512013-06-17 14:23:02 -04002265 return;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002266
2267 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04002268 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002269
2270 if (slot > 0) {
2271 block1 = btrfs_node_blockptr(parent, slot - 1);
2272 gen = btrfs_node_ptr_generation(parent, slot - 1);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002273 eb = find_extent_buffer(fs_info, block1);
Chris Masonb9fab912012-05-06 07:23:47 -04002274 /*
2275 * if we get -eagain from btrfs_buffer_uptodate, we
2276 * don't want to return eagain here. That will loop
2277 * forever
2278 */
2279 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002280 block1 = 0;
2281 free_extent_buffer(eb);
2282 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002283 if (slot + 1 < nritems) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05002284 block2 = btrfs_node_blockptr(parent, slot + 1);
2285 gen = btrfs_node_ptr_generation(parent, slot + 1);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002286 eb = find_extent_buffer(fs_info, block2);
Chris Masonb9fab912012-05-06 07:23:47 -04002287 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002288 block2 = 0;
2289 free_extent_buffer(eb);
2290 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002291
Josef Bacik0b088512013-06-17 14:23:02 -04002292 if (block1)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002293 readahead_tree_block(fs_info, block1);
Josef Bacik0b088512013-06-17 14:23:02 -04002294 if (block2)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002295 readahead_tree_block(fs_info, block2);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002296}
2297
2298
2299/*
Chris Masond3977122009-01-05 21:25:51 -05002300 * when we walk down the tree, it is usually safe to unlock the higher layers
2301 * in the tree. The exceptions are when our path goes through slot 0, because
2302 * operations on the tree might require changing key pointers higher up in the
2303 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04002304 *
Chris Masond3977122009-01-05 21:25:51 -05002305 * callers might also have set path->keep_locks, which tells this code to keep
2306 * the lock if the path points to the last slot in the block. This is part of
2307 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04002308 *
Chris Masond3977122009-01-05 21:25:51 -05002309 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
2310 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04002311 */
Chris Masone02119d2008-09-05 16:13:11 -04002312static noinline void unlock_up(struct btrfs_path *path, int level,
Chris Masonf7c79f32012-03-19 15:54:38 -04002313 int lowest_unlock, int min_write_lock_level,
2314 int *write_lock_level)
Chris Mason925baed2008-06-25 16:01:30 -04002315{
2316 int i;
2317 int skip_level = level;
Chris Mason051e1b92008-06-25 16:01:30 -04002318 int no_skips = 0;
Chris Mason925baed2008-06-25 16:01:30 -04002319 struct extent_buffer *t;
2320
2321 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2322 if (!path->nodes[i])
2323 break;
2324 if (!path->locks[i])
2325 break;
Chris Mason051e1b92008-06-25 16:01:30 -04002326 if (!no_skips && path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002327 skip_level = i + 1;
2328 continue;
2329 }
Chris Mason051e1b92008-06-25 16:01:30 -04002330 if (!no_skips && path->keep_locks) {
Chris Mason925baed2008-06-25 16:01:30 -04002331 u32 nritems;
2332 t = path->nodes[i];
2333 nritems = btrfs_header_nritems(t);
Chris Mason051e1b92008-06-25 16:01:30 -04002334 if (nritems < 1 || path->slots[i] >= nritems - 1) {
Chris Mason925baed2008-06-25 16:01:30 -04002335 skip_level = i + 1;
2336 continue;
2337 }
2338 }
Chris Mason051e1b92008-06-25 16:01:30 -04002339 if (skip_level < i && i >= lowest_unlock)
2340 no_skips = 1;
2341
Chris Mason925baed2008-06-25 16:01:30 -04002342 t = path->nodes[i];
2343 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -04002344 btrfs_tree_unlock_rw(t, path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04002345 path->locks[i] = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002346 if (write_lock_level &&
2347 i > min_write_lock_level &&
2348 i <= *write_lock_level) {
2349 *write_lock_level = i - 1;
2350 }
Chris Mason925baed2008-06-25 16:01:30 -04002351 }
2352 }
2353}
2354
Chris Mason3c69fae2007-08-07 15:52:22 -04002355/*
Chris Masonb4ce94d2009-02-04 09:25:08 -05002356 * This releases any locks held in the path starting at level and
2357 * going all the way up to the root.
2358 *
2359 * btrfs_search_slot will keep the lock held on higher nodes in a few
2360 * corner cases, such as COW of the block at slot zero in the node. This
2361 * ignores those rules, and it should only be called when there are no
2362 * more updates to be done higher up in the tree.
2363 */
2364noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
2365{
2366 int i;
2367
Josef Bacik09a2a8f92013-04-05 16:51:15 -04002368 if (path->keep_locks)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002369 return;
2370
2371 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2372 if (!path->nodes[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05002373 continue;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002374 if (!path->locks[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05002375 continue;
Chris Masonbd681512011-07-16 15:23:14 -04002376 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002377 path->locks[i] = 0;
2378 }
2379}
2380
2381/*
Chris Masonc8c42862009-04-03 10:14:18 -04002382 * helper function for btrfs_search_slot. The goal is to find a block
2383 * in cache without setting the path to blocking. If we find the block
2384 * we return zero and the path is unchanged.
2385 *
2386 * If we can't find the block, we set the path blocking and do some
2387 * reada. -EAGAIN is returned and the search must be repeated.
2388 */
2389static int
Liu Bod07b8522017-01-30 12:23:42 -08002390read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2391 struct extent_buffer **eb_ret, int level, int slot,
David Sterbacda79c52017-02-10 18:44:32 +01002392 const struct btrfs_key *key)
Chris Masonc8c42862009-04-03 10:14:18 -04002393{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002394 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04002395 u64 blocknr;
2396 u64 gen;
Chris Masonc8c42862009-04-03 10:14:18 -04002397 struct extent_buffer *b = *eb_ret;
2398 struct extent_buffer *tmp;
Chris Mason76a05b32009-05-14 13:24:30 -04002399 int ret;
Chris Masonc8c42862009-04-03 10:14:18 -04002400
2401 blocknr = btrfs_node_blockptr(b, slot);
2402 gen = btrfs_node_ptr_generation(b, slot);
Chris Masonc8c42862009-04-03 10:14:18 -04002403
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002404 tmp = find_extent_buffer(fs_info, blocknr);
Chris Masoncb449212010-10-24 11:01:27 -04002405 if (tmp) {
Chris Masonb9fab912012-05-06 07:23:47 -04002406 /* first we do an atomic uptodate check */
Josef Bacikbdf7c002013-06-17 13:44:48 -04002407 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
2408 *eb_ret = tmp;
2409 return 0;
Chris Masoncb449212010-10-24 11:01:27 -04002410 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04002411
2412 /* the pages were up to date, but we failed
2413 * the generation number check. Do a full
2414 * read for the generation number that is correct.
2415 * We must do this without dropping locks so
2416 * we can trust our generation number
2417 */
2418 btrfs_set_path_blocking(p);
2419
2420 /* now we're allowed to do a blocking uptodate check */
2421 ret = btrfs_read_buffer(tmp, gen);
2422 if (!ret) {
2423 *eb_ret = tmp;
2424 return 0;
2425 }
2426 free_extent_buffer(tmp);
2427 btrfs_release_path(p);
2428 return -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002429 }
2430
2431 /*
2432 * reduce lock contention at high levels
2433 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04002434 * we read. Don't release the lock on the current
2435 * level because we need to walk this node to figure
2436 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04002437 */
Chris Mason8c594ea2009-04-20 15:50:10 -04002438 btrfs_unlock_up_safe(p, level + 1);
2439 btrfs_set_path_blocking(p);
2440
Chris Masoncb449212010-10-24 11:01:27 -04002441 free_extent_buffer(tmp);
David Sterbae4058b52015-11-27 16:31:35 +01002442 if (p->reada != READA_NONE)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002443 reada_for_search(fs_info, p, level, slot, key->objectid);
Chris Masonc8c42862009-04-03 10:14:18 -04002444
David Sterbab3b4aa72011-04-21 01:20:15 +02002445 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04002446
2447 ret = -EAGAIN;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002448 tmp = read_tree_block(fs_info, blocknr, 0);
Liu Bo64c043d2015-05-25 17:30:15 +08002449 if (!IS_ERR(tmp)) {
Chris Mason76a05b32009-05-14 13:24:30 -04002450 /*
2451 * If the read above didn't mark this buffer up to date,
2452 * it will never end up being up to date. Set ret to EIO now
2453 * and give up so that our caller doesn't loop forever
2454 * on our EAGAINs.
2455 */
Chris Masonb9fab912012-05-06 07:23:47 -04002456 if (!btrfs_buffer_uptodate(tmp, 0, 0))
Chris Mason76a05b32009-05-14 13:24:30 -04002457 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002458 free_extent_buffer(tmp);
Liu Boc871b0f2016-06-06 12:01:23 -07002459 } else {
2460 ret = PTR_ERR(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04002461 }
2462 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04002463}
2464
2465/*
2466 * helper function for btrfs_search_slot. This does all of the checks
2467 * for node-level blocks and does any balancing required based on
2468 * the ins_len.
2469 *
2470 * If no extra work was required, zero is returned. If we had to
2471 * drop the path, -EAGAIN is returned and btrfs_search_slot must
2472 * start over
2473 */
2474static int
2475setup_nodes_for_search(struct btrfs_trans_handle *trans,
2476 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04002477 struct extent_buffer *b, int level, int ins_len,
2478 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04002479{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002480 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04002481 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002482
Chris Masonc8c42862009-04-03 10:14:18 -04002483 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002484 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
Chris Masonc8c42862009-04-03 10:14:18 -04002485 int sret;
2486
Chris Masonbd681512011-07-16 15:23:14 -04002487 if (*write_lock_level < level + 1) {
2488 *write_lock_level = level + 1;
2489 btrfs_release_path(p);
2490 goto again;
2491 }
2492
Chris Masonc8c42862009-04-03 10:14:18 -04002493 btrfs_set_path_blocking(p);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002494 reada_for_balance(fs_info, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002495 sret = split_node(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04002496 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04002497
2498 BUG_ON(sret > 0);
2499 if (sret) {
2500 ret = sret;
2501 goto done;
2502 }
2503 b = p->nodes[level];
2504 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002505 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04002506 int sret;
2507
Chris Masonbd681512011-07-16 15:23:14 -04002508 if (*write_lock_level < level + 1) {
2509 *write_lock_level = level + 1;
2510 btrfs_release_path(p);
2511 goto again;
2512 }
2513
Chris Masonc8c42862009-04-03 10:14:18 -04002514 btrfs_set_path_blocking(p);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002515 reada_for_balance(fs_info, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002516 sret = balance_level(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04002517 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04002518
2519 if (sret) {
2520 ret = sret;
2521 goto done;
2522 }
2523 b = p->nodes[level];
2524 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002525 btrfs_release_path(p);
Chris Masonc8c42862009-04-03 10:14:18 -04002526 goto again;
2527 }
2528 BUG_ON(btrfs_header_nritems(b) == 1);
2529 }
2530 return 0;
2531
2532again:
2533 ret = -EAGAIN;
2534done:
2535 return ret;
2536}
2537
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002538static void key_search_validate(struct extent_buffer *b,
Omar Sandoval310712b2017-01-17 23:24:37 -08002539 const struct btrfs_key *key,
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002540 int level)
2541{
2542#ifdef CONFIG_BTRFS_ASSERT
2543 struct btrfs_disk_key disk_key;
2544
2545 btrfs_cpu_key_to_disk(&disk_key, key);
2546
2547 if (level == 0)
2548 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2549 offsetof(struct btrfs_leaf, items[0].key),
2550 sizeof(disk_key)));
2551 else
2552 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2553 offsetof(struct btrfs_node, ptrs[0].key),
2554 sizeof(disk_key)));
2555#endif
2556}
2557
Omar Sandoval310712b2017-01-17 23:24:37 -08002558static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002559 int level, int *prev_cmp, int *slot)
2560{
2561 if (*prev_cmp != 0) {
Nikolay Borisova74b35e2017-12-08 16:27:43 +02002562 *prev_cmp = btrfs_bin_search(b, key, level, slot);
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002563 return *prev_cmp;
2564 }
2565
2566 key_search_validate(b, key, level);
2567 *slot = 0;
2568
2569 return 0;
2570}
2571
David Sterba381cf652015-01-02 18:45:16 +01002572int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002573 u64 iobjectid, u64 ioff, u8 key_type,
2574 struct btrfs_key *found_key)
2575{
2576 int ret;
2577 struct btrfs_key key;
2578 struct extent_buffer *eb;
David Sterba381cf652015-01-02 18:45:16 +01002579
2580 ASSERT(path);
David Sterba1d4c08e2015-01-02 19:36:14 +01002581 ASSERT(found_key);
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002582
2583 key.type = key_type;
2584 key.objectid = iobjectid;
2585 key.offset = ioff;
2586
2587 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
David Sterba1d4c08e2015-01-02 19:36:14 +01002588 if (ret < 0)
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002589 return ret;
2590
2591 eb = path->nodes[0];
2592 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2593 ret = btrfs_next_leaf(fs_root, path);
2594 if (ret)
2595 return ret;
2596 eb = path->nodes[0];
2597 }
2598
2599 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2600 if (found_key->type != key.type ||
2601 found_key->objectid != key.objectid)
2602 return 1;
2603
2604 return 0;
2605}
2606
Chris Masonc8c42862009-04-03 10:14:18 -04002607/*
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002608 * btrfs_search_slot - look for a key in a tree and perform necessary
2609 * modifications to preserve tree invariants.
Chris Mason74123bd2007-02-02 11:05:29 -05002610 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002611 * @trans: Handle of transaction, used when modifying the tree
2612 * @p: Holds all btree nodes along the search path
2613 * @root: The root node of the tree
2614 * @key: The key we are looking for
2615 * @ins_len: Indicates purpose of search, for inserts it is 1, for
2616 * deletions it's -1. 0 for plain searches
2617 * @cow: boolean should CoW operations be performed. Must always be 1
2618 * when modifying the tree.
Chris Mason97571fd2007-02-24 13:39:08 -05002619 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002620 * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
2621 * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
2622 *
2623 * If @key is found, 0 is returned and you can find the item in the leaf level
2624 * of the path (level 0)
2625 *
2626 * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
2627 * points to the slot where it should be inserted
2628 *
2629 * If an error is encountered while searching the tree a negative error number
2630 * is returned
Chris Mason74123bd2007-02-02 11:05:29 -05002631 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002632int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2633 const struct btrfs_key *key, struct btrfs_path *p,
2634 int ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002635{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002636 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002637 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002638 int slot;
2639 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04002640 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002641 int level;
Chris Mason925baed2008-06-25 16:01:30 -04002642 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04002643 int root_lock;
2644 /* everything at write_lock_level or lower must be write locked */
2645 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04002646 u8 lowest_level = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002647 int min_write_lock_level;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002648 int prev_cmp;
Chris Mason9f3a7422007-08-07 15:52:19 -04002649
Chris Mason6702ed42007-08-07 16:15:09 -04002650 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04002651 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04002652 WARN_ON(p->nodes[0] != NULL);
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002653 BUG_ON(!cow && ins_len);
Josef Bacik25179202008-10-29 14:49:05 -04002654
Chris Masonbd681512011-07-16 15:23:14 -04002655 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002656 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04002657
Chris Masonbd681512011-07-16 15:23:14 -04002658 /* when we are removing items, we might have to go up to level
2659 * two as we update tree pointers Make sure we keep write
2660 * for those levels as well
2661 */
2662 write_lock_level = 2;
2663 } else if (ins_len > 0) {
2664 /*
2665 * for inserting items, make sure we have a write lock on
2666 * level 1 so we can update keys
2667 */
2668 write_lock_level = 1;
2669 }
2670
2671 if (!cow)
2672 write_lock_level = -1;
2673
Josef Bacik09a2a8f92013-04-05 16:51:15 -04002674 if (cow && (p->keep_locks || p->lowest_level))
Chris Masonbd681512011-07-16 15:23:14 -04002675 write_lock_level = BTRFS_MAX_LEVEL;
2676
Chris Masonf7c79f32012-03-19 15:54:38 -04002677 min_write_lock_level = write_lock_level;
2678
Chris Masonbb803952007-03-01 12:04:21 -05002679again:
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002680 prev_cmp = -1;
Chris Masonbd681512011-07-16 15:23:14 -04002681 /*
2682 * we try very hard to do read locks on the root
2683 */
2684 root_lock = BTRFS_READ_LOCK;
2685 level = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002686 if (p->search_commit_root) {
Chris Masonbd681512011-07-16 15:23:14 -04002687 /*
2688 * the commit roots are read only
2689 * so we always do read locks
2690 */
Josef Bacik3f8a18c2014-03-28 17:16:01 -04002691 if (p->need_commit_sem)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002692 down_read(&fs_info->commit_root_sem);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002693 b = root->commit_root;
2694 extent_buffer_get(b);
Chris Masonbd681512011-07-16 15:23:14 -04002695 level = btrfs_header_level(b);
Josef Bacik3f8a18c2014-03-28 17:16:01 -04002696 if (p->need_commit_sem)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002697 up_read(&fs_info->commit_root_sem);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002698 if (!p->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04002699 btrfs_tree_read_lock(b);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002700 } else {
Chris Masonbd681512011-07-16 15:23:14 -04002701 if (p->skip_locking) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002702 b = btrfs_root_node(root);
Chris Masonbd681512011-07-16 15:23:14 -04002703 level = btrfs_header_level(b);
2704 } else {
2705 /* we don't know the level of the root node
2706 * until we actually have it read locked
2707 */
2708 b = btrfs_read_lock_root_node(root);
2709 level = btrfs_header_level(b);
2710 if (level <= write_lock_level) {
2711 /* whoops, must trade for write lock */
2712 btrfs_tree_read_unlock(b);
2713 free_extent_buffer(b);
2714 b = btrfs_lock_root_node(root);
2715 root_lock = BTRFS_WRITE_LOCK;
2716
2717 /* the level might have changed, check again */
2718 level = btrfs_header_level(b);
2719 }
2720 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002721 }
Chris Masonbd681512011-07-16 15:23:14 -04002722 p->nodes[level] = b;
2723 if (!p->skip_locking)
2724 p->locks[level] = root_lock;
Chris Mason925baed2008-06-25 16:01:30 -04002725
Chris Masoneb60cea2007-02-02 09:18:22 -05002726 while (b) {
Chris Mason5f39d392007-10-15 16:14:19 -04002727 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04002728
2729 /*
2730 * setup the path here so we can release it under lock
2731 * contention with the cow code
2732 */
Chris Mason02217ed2007-03-02 16:08:05 -05002733 if (cow) {
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002734 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
2735
Chris Masonc8c42862009-04-03 10:14:18 -04002736 /*
2737 * if we don't really need to cow this block
2738 * then we don't want to set the path blocking,
2739 * so we test it here
2740 */
Jeff Mahoney64c12922016-06-08 00:36:38 -04002741 if (!should_cow_block(trans, root, b)) {
2742 trans->dirty = true;
Chris Mason65b51a02008-08-01 15:11:20 -04002743 goto cow_done;
Jeff Mahoney64c12922016-06-08 00:36:38 -04002744 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002745
Chris Masonbd681512011-07-16 15:23:14 -04002746 /*
2747 * must have write locks on this node and the
2748 * parent
2749 */
Josef Bacik5124e002012-11-07 13:44:13 -05002750 if (level > write_lock_level ||
2751 (level + 1 > write_lock_level &&
2752 level + 1 < BTRFS_MAX_LEVEL &&
2753 p->nodes[level + 1])) {
Chris Masonbd681512011-07-16 15:23:14 -04002754 write_lock_level = level + 1;
2755 btrfs_release_path(p);
2756 goto again;
2757 }
2758
Filipe Manana160f4082014-07-28 19:37:17 +01002759 btrfs_set_path_blocking(p);
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002760 if (last_level)
2761 err = btrfs_cow_block(trans, root, b, NULL, 0,
2762 &b);
2763 else
2764 err = btrfs_cow_block(trans, root, b,
2765 p->nodes[level + 1],
2766 p->slots[level + 1], &b);
Yan Zheng33c66f42009-07-22 09:59:00 -04002767 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002768 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002769 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04002770 }
Chris Mason02217ed2007-03-02 16:08:05 -05002771 }
Chris Mason65b51a02008-08-01 15:11:20 -04002772cow_done:
Chris Masoneb60cea2007-02-02 09:18:22 -05002773 p->nodes[level] = b;
Chris Masonbd681512011-07-16 15:23:14 -04002774 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002775
2776 /*
2777 * we have a lock on b and as long as we aren't changing
2778 * the tree, there is no way to for the items in b to change.
2779 * It is safe to drop the lock on our parent before we
2780 * go through the expensive btree search on b.
2781 *
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002782 * If we're inserting or deleting (ins_len != 0), then we might
2783 * be changing slot zero, which may require changing the parent.
2784 * So, we can't drop the lock until after we know which slot
2785 * we're operating on.
Chris Masonb4ce94d2009-02-04 09:25:08 -05002786 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002787 if (!ins_len && !p->keep_locks) {
2788 int u = level + 1;
2789
2790 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2791 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2792 p->locks[u] = 0;
2793 }
2794 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05002795
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002796 ret = key_search(b, key, level, &prev_cmp, &slot);
Liu Bo415b35a2016-06-17 19:16:21 -07002797 if (ret < 0)
2798 goto done;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002799
Chris Mason5f39d392007-10-15 16:14:19 -04002800 if (level != 0) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002801 int dec = 0;
2802 if (ret && slot > 0) {
2803 dec = 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002804 slot -= 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04002805 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002806 p->slots[level] = slot;
Yan Zheng33c66f42009-07-22 09:59:00 -04002807 err = setup_nodes_for_search(trans, root, p, b, level,
Chris Masonbd681512011-07-16 15:23:14 -04002808 ins_len, &write_lock_level);
Yan Zheng33c66f42009-07-22 09:59:00 -04002809 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04002810 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04002811 if (err) {
2812 ret = err;
Chris Masonc8c42862009-04-03 10:14:18 -04002813 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04002814 }
Chris Masonc8c42862009-04-03 10:14:18 -04002815 b = p->nodes[level];
2816 slot = p->slots[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002817
Chris Masonbd681512011-07-16 15:23:14 -04002818 /*
2819 * slot 0 is special, if we change the key
2820 * we have to update the parent pointer
2821 * which means we must have a write lock
2822 * on the parent
2823 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002824 if (slot == 0 && ins_len &&
Chris Masonbd681512011-07-16 15:23:14 -04002825 write_lock_level < level + 1) {
2826 write_lock_level = level + 1;
2827 btrfs_release_path(p);
2828 goto again;
2829 }
2830
Chris Masonf7c79f32012-03-19 15:54:38 -04002831 unlock_up(p, level, lowest_unlock,
2832 min_write_lock_level, &write_lock_level);
Chris Masonf9efa9c2008-06-25 16:14:04 -04002833
Chris Mason925baed2008-06-25 16:01:30 -04002834 if (level == lowest_level) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002835 if (dec)
2836 p->slots[level]++;
Zheng Yan5b21f2e2008-09-26 10:05:38 -04002837 goto done;
Chris Mason925baed2008-06-25 16:01:30 -04002838 }
Chris Masonca7a79a2008-05-12 12:59:19 -04002839
Liu Bod07b8522017-01-30 12:23:42 -08002840 err = read_block_for_search(root, p, &b, level,
David Sterbacda79c52017-02-10 18:44:32 +01002841 slot, key);
Yan Zheng33c66f42009-07-22 09:59:00 -04002842 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04002843 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04002844 if (err) {
2845 ret = err;
Chris Mason76a05b32009-05-14 13:24:30 -04002846 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04002847 }
Chris Mason76a05b32009-05-14 13:24:30 -04002848
Chris Masonb4ce94d2009-02-04 09:25:08 -05002849 if (!p->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04002850 level = btrfs_header_level(b);
2851 if (level <= write_lock_level) {
2852 err = btrfs_try_tree_write_lock(b);
2853 if (!err) {
2854 btrfs_set_path_blocking(p);
2855 btrfs_tree_lock(b);
2856 btrfs_clear_path_blocking(p, b,
2857 BTRFS_WRITE_LOCK);
2858 }
2859 p->locks[level] = BTRFS_WRITE_LOCK;
2860 } else {
Chris Masonf82c4582014-11-19 10:25:09 -08002861 err = btrfs_tree_read_lock_atomic(b);
Chris Masonbd681512011-07-16 15:23:14 -04002862 if (!err) {
2863 btrfs_set_path_blocking(p);
2864 btrfs_tree_read_lock(b);
2865 btrfs_clear_path_blocking(p, b,
2866 BTRFS_READ_LOCK);
2867 }
2868 p->locks[level] = BTRFS_READ_LOCK;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002869 }
Chris Masonbd681512011-07-16 15:23:14 -04002870 p->nodes[level] = b;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002871 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002872 } else {
2873 p->slots[level] = slot;
Yan Zheng87b29b22008-12-17 10:21:48 -05002874 if (ins_len > 0 &&
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002875 btrfs_leaf_free_space(fs_info, b) < ins_len) {
Chris Masonbd681512011-07-16 15:23:14 -04002876 if (write_lock_level < 1) {
2877 write_lock_level = 1;
2878 btrfs_release_path(p);
2879 goto again;
2880 }
2881
Chris Masonb4ce94d2009-02-04 09:25:08 -05002882 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04002883 err = split_leaf(trans, root, key,
2884 p, ins_len, ret == 0);
Chris Masonbd681512011-07-16 15:23:14 -04002885 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002886
Yan Zheng33c66f42009-07-22 09:59:00 -04002887 BUG_ON(err > 0);
2888 if (err) {
2889 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002890 goto done;
2891 }
Chris Mason5c680ed2007-02-22 11:39:13 -05002892 }
Chris Mason459931e2008-12-10 09:10:46 -05002893 if (!p->search_for_split)
Chris Masonf7c79f32012-03-19 15:54:38 -04002894 unlock_up(p, level, lowest_unlock,
2895 min_write_lock_level, &write_lock_level);
Chris Mason65b51a02008-08-01 15:11:20 -04002896 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002897 }
2898 }
Chris Mason65b51a02008-08-01 15:11:20 -04002899 ret = 1;
2900done:
Chris Masonb4ce94d2009-02-04 09:25:08 -05002901 /*
2902 * we don't really know what they plan on doing with the path
2903 * from here on, so for now just mark it as blocking
2904 */
Chris Masonb9473432009-03-13 11:00:37 -04002905 if (!p->leave_spinning)
2906 btrfs_set_path_blocking(p);
Filipe Manana5f5bc6b2014-11-09 08:38:39 +00002907 if (ret < 0 && !p->skip_release_on_error)
David Sterbab3b4aa72011-04-21 01:20:15 +02002908 btrfs_release_path(p);
Chris Mason65b51a02008-08-01 15:11:20 -04002909 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002910}
2911
Chris Mason74123bd2007-02-02 11:05:29 -05002912/*
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002913 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2914 * current state of the tree together with the operations recorded in the tree
2915 * modification log to search for the key in a previous version of this tree, as
2916 * denoted by the time_seq parameter.
2917 *
2918 * Naturally, there is no support for insert, delete or cow operations.
2919 *
2920 * The resulting path and return value will be set up as if we called
2921 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2922 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002923int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002924 struct btrfs_path *p, u64 time_seq)
2925{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002926 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002927 struct extent_buffer *b;
2928 int slot;
2929 int ret;
2930 int err;
2931 int level;
2932 int lowest_unlock = 1;
2933 u8 lowest_level = 0;
Josef Bacikd4b40872013-09-24 14:09:34 -04002934 int prev_cmp = -1;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002935
2936 lowest_level = p->lowest_level;
2937 WARN_ON(p->nodes[0] != NULL);
2938
2939 if (p->search_commit_root) {
2940 BUG_ON(time_seq);
2941 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2942 }
2943
2944again:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002945 b = get_old_root(root, time_seq);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002946 level = btrfs_header_level(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002947 p->locks[level] = BTRFS_READ_LOCK;
2948
2949 while (b) {
2950 level = btrfs_header_level(b);
2951 p->nodes[level] = b;
2952 btrfs_clear_path_blocking(p, NULL, 0);
2953
2954 /*
2955 * we have a lock on b and as long as we aren't changing
2956 * the tree, there is no way to for the items in b to change.
2957 * It is safe to drop the lock on our parent before we
2958 * go through the expensive btree search on b.
2959 */
2960 btrfs_unlock_up_safe(p, level + 1);
2961
Josef Bacikd4b40872013-09-24 14:09:34 -04002962 /*
Nicholas D Steeves01327612016-05-19 21:18:45 -04002963 * Since we can unwind ebs we want to do a real search every
Josef Bacikd4b40872013-09-24 14:09:34 -04002964 * time.
2965 */
2966 prev_cmp = -1;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002967 ret = key_search(b, key, level, &prev_cmp, &slot);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002968
2969 if (level != 0) {
2970 int dec = 0;
2971 if (ret && slot > 0) {
2972 dec = 1;
2973 slot -= 1;
2974 }
2975 p->slots[level] = slot;
2976 unlock_up(p, level, lowest_unlock, 0, NULL);
2977
2978 if (level == lowest_level) {
2979 if (dec)
2980 p->slots[level]++;
2981 goto done;
2982 }
2983
Liu Bod07b8522017-01-30 12:23:42 -08002984 err = read_block_for_search(root, p, &b, level,
David Sterbacda79c52017-02-10 18:44:32 +01002985 slot, key);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002986 if (err == -EAGAIN)
2987 goto again;
2988 if (err) {
2989 ret = err;
2990 goto done;
2991 }
2992
2993 level = btrfs_header_level(b);
Chris Masonf82c4582014-11-19 10:25:09 -08002994 err = btrfs_tree_read_lock_atomic(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002995 if (!err) {
2996 btrfs_set_path_blocking(p);
2997 btrfs_tree_read_lock(b);
2998 btrfs_clear_path_blocking(p, b,
2999 BTRFS_READ_LOCK);
3000 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003001 b = tree_mod_log_rewind(fs_info, p, b, time_seq);
Josef Bacikdb7f3432013-08-07 14:54:37 -04003002 if (!b) {
3003 ret = -ENOMEM;
3004 goto done;
3005 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003006 p->locks[level] = BTRFS_READ_LOCK;
3007 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003008 } else {
3009 p->slots[level] = slot;
3010 unlock_up(p, level, lowest_unlock, 0, NULL);
3011 goto done;
3012 }
3013 }
3014 ret = 1;
3015done:
3016 if (!p->leave_spinning)
3017 btrfs_set_path_blocking(p);
3018 if (ret < 0)
3019 btrfs_release_path(p);
3020
3021 return ret;
3022}
3023
3024/*
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003025 * helper to use instead of search slot if no exact match is needed but
3026 * instead the next or previous item should be returned.
3027 * When find_higher is true, the next higher item is returned, the next lower
3028 * otherwise.
3029 * When return_any and find_higher are both true, and no higher item is found,
3030 * return the next lower instead.
3031 * When return_any is true and find_higher is false, and no lower item is found,
3032 * return the next higher instead.
3033 * It returns 0 if any item is found, 1 if none is found (tree empty), and
3034 * < 0 on error
3035 */
3036int btrfs_search_slot_for_read(struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08003037 const struct btrfs_key *key,
3038 struct btrfs_path *p, int find_higher,
3039 int return_any)
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003040{
3041 int ret;
3042 struct extent_buffer *leaf;
3043
3044again:
3045 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
3046 if (ret <= 0)
3047 return ret;
3048 /*
3049 * a return value of 1 means the path is at the position where the
3050 * item should be inserted. Normally this is the next bigger item,
3051 * but in case the previous item is the last in a leaf, path points
3052 * to the first free slot in the previous leaf, i.e. at an invalid
3053 * item.
3054 */
3055 leaf = p->nodes[0];
3056
3057 if (find_higher) {
3058 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
3059 ret = btrfs_next_leaf(root, p);
3060 if (ret <= 0)
3061 return ret;
3062 if (!return_any)
3063 return 1;
3064 /*
3065 * no higher item found, return the next
3066 * lower instead
3067 */
3068 return_any = 0;
3069 find_higher = 0;
3070 btrfs_release_path(p);
3071 goto again;
3072 }
3073 } else {
Arne Jansene6793762011-09-13 11:18:10 +02003074 if (p->slots[0] == 0) {
3075 ret = btrfs_prev_leaf(root, p);
3076 if (ret < 0)
3077 return ret;
3078 if (!ret) {
Filipe David Borba Manana23c6bf62014-01-11 21:28:54 +00003079 leaf = p->nodes[0];
3080 if (p->slots[0] == btrfs_header_nritems(leaf))
3081 p->slots[0]--;
Arne Jansene6793762011-09-13 11:18:10 +02003082 return 0;
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003083 }
Arne Jansene6793762011-09-13 11:18:10 +02003084 if (!return_any)
3085 return 1;
3086 /*
3087 * no lower item found, return the next
3088 * higher instead
3089 */
3090 return_any = 0;
3091 find_higher = 1;
3092 btrfs_release_path(p);
3093 goto again;
3094 } else {
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003095 --p->slots[0];
3096 }
3097 }
3098 return 0;
3099}
3100
3101/*
Chris Mason74123bd2007-02-02 11:05:29 -05003102 * adjust the pointers going up the tree, starting at level
3103 * making sure the right key of each node is points to 'key'.
3104 * This is used after shifting pointers to the left, so it stops
3105 * fixing up pointers when a given leaf/node is not in slot 0 of the
3106 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05003107 *
Chris Mason74123bd2007-02-02 11:05:29 -05003108 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003109static void fixup_low_keys(struct btrfs_fs_info *fs_info,
3110 struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003111 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003112{
3113 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04003114 struct extent_buffer *t;
David Sterba0e82bcf2018-03-05 16:16:54 +01003115 int ret;
Chris Mason5f39d392007-10-15 16:14:19 -04003116
Chris Mason234b63a2007-03-13 10:46:10 -04003117 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05003118 int tslot = path->slots[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01003119
Chris Masoneb60cea2007-02-02 09:18:22 -05003120 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05003121 break;
Chris Mason5f39d392007-10-15 16:14:19 -04003122 t = path->nodes[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01003123 ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
3124 GFP_ATOMIC);
3125 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003126 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04003127 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003128 if (tslot != 0)
3129 break;
3130 }
3131}
3132
Chris Mason74123bd2007-02-02 11:05:29 -05003133/*
Zheng Yan31840ae2008-09-23 13:14:14 -04003134 * update item key.
3135 *
3136 * This function isn't completely safe. It's the caller's responsibility
3137 * that the new key won't break the order
3138 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003139void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3140 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08003141 const struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04003142{
3143 struct btrfs_disk_key disk_key;
3144 struct extent_buffer *eb;
3145 int slot;
3146
3147 eb = path->nodes[0];
3148 slot = path->slots[0];
3149 if (slot > 0) {
3150 btrfs_item_key(eb, &disk_key, slot - 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003151 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003152 }
3153 if (slot < btrfs_header_nritems(eb) - 1) {
3154 btrfs_item_key(eb, &disk_key, slot + 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003155 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003156 }
3157
3158 btrfs_cpu_key_to_disk(&disk_key, new_key);
3159 btrfs_set_item_key(eb, &disk_key, slot);
3160 btrfs_mark_buffer_dirty(eb);
3161 if (slot == 0)
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003162 fixup_low_keys(fs_info, path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04003163}
3164
3165/*
Chris Mason74123bd2007-02-02 11:05:29 -05003166 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05003167 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003168 *
3169 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
3170 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05003171 */
Chris Mason98ed5172008-01-03 10:01:48 -05003172static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003173 struct btrfs_fs_info *fs_info,
3174 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04003175 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003176{
Chris Masonbe0e5c02007-01-26 15:51:26 -05003177 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05003178 int src_nritems;
3179 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003180 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003181
Chris Mason5f39d392007-10-15 16:14:19 -04003182 src_nritems = btrfs_header_nritems(src);
3183 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003184 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05003185 WARN_ON(btrfs_header_generation(src) != trans->transid);
3186 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04003187
Chris Masonbce4eae2008-04-24 14:42:46 -04003188 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04003189 return 1;
3190
Chris Masond3977122009-01-05 21:25:51 -05003191 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003192 return 1;
3193
Chris Masonbce4eae2008-04-24 14:42:46 -04003194 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04003195 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04003196 if (push_items < src_nritems) {
3197 /* leave at least 8 pointers in the node if
3198 * we aren't going to empty it
3199 */
3200 if (src_nritems - push_items < 8) {
3201 if (push_items <= 8)
3202 return 1;
3203 push_items -= 8;
3204 }
3205 }
3206 } else
3207 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003208
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003209 ret = tree_mod_log_eb_copy(fs_info, dst, src, dst_nritems, 0,
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003210 push_items);
3211 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003212 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003213 return ret;
3214 }
Chris Mason5f39d392007-10-15 16:14:19 -04003215 copy_extent_buffer(dst, src,
3216 btrfs_node_key_ptr_offset(dst_nritems),
3217 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05003218 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04003219
Chris Masonbb803952007-03-01 12:04:21 -05003220 if (push_items < src_nritems) {
Jan Schmidt57911b82012-10-19 09:22:03 +02003221 /*
David Sterbabf1d3422018-03-05 15:47:39 +01003222 * Don't call tree_mod_log_insert_move here, key removal was
3223 * already fully logged by tree_mod_log_eb_copy above.
Jan Schmidt57911b82012-10-19 09:22:03 +02003224 */
Chris Mason5f39d392007-10-15 16:14:19 -04003225 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
3226 btrfs_node_key_ptr_offset(push_items),
3227 (src_nritems - push_items) *
3228 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05003229 }
Chris Mason5f39d392007-10-15 16:14:19 -04003230 btrfs_set_header_nritems(src, src_nritems - push_items);
3231 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3232 btrfs_mark_buffer_dirty(src);
3233 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003234
Chris Masonbb803952007-03-01 12:04:21 -05003235 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003236}
3237
Chris Mason97571fd2007-02-24 13:39:08 -05003238/*
Chris Mason79f95c82007-03-01 15:16:26 -05003239 * try to push data from one node into the next node right in the
3240 * tree.
3241 *
3242 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
3243 * error, and > 0 if there was no room in the right hand block.
3244 *
3245 * this will only push up to 1/2 the contents of the left node over
3246 */
Chris Mason5f39d392007-10-15 16:14:19 -04003247static int balance_node_right(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003248 struct btrfs_fs_info *fs_info,
Chris Mason5f39d392007-10-15 16:14:19 -04003249 struct extent_buffer *dst,
3250 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05003251{
Chris Mason79f95c82007-03-01 15:16:26 -05003252 int push_items = 0;
3253 int max_push;
3254 int src_nritems;
3255 int dst_nritems;
3256 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05003257
Chris Mason7bb86312007-12-11 09:25:06 -05003258 WARN_ON(btrfs_header_generation(src) != trans->transid);
3259 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3260
Chris Mason5f39d392007-10-15 16:14:19 -04003261 src_nritems = btrfs_header_nritems(src);
3262 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003263 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05003264 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05003265 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04003266
Chris Masond3977122009-01-05 21:25:51 -05003267 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04003268 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05003269
3270 max_push = src_nritems / 2 + 1;
3271 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05003272 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05003273 return 1;
Yan252c38f2007-08-29 09:11:44 -04003274
Chris Mason79f95c82007-03-01 15:16:26 -05003275 if (max_push < push_items)
3276 push_items = max_push;
3277
David Sterbabf1d3422018-03-05 15:47:39 +01003278 ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
3279 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003280 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
3281 btrfs_node_key_ptr_offset(0),
3282 (dst_nritems) *
3283 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04003284
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003285 ret = tree_mod_log_eb_copy(fs_info, dst, src, 0,
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003286 src_nritems - push_items, push_items);
3287 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003288 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003289 return ret;
3290 }
Chris Mason5f39d392007-10-15 16:14:19 -04003291 copy_extent_buffer(dst, src,
3292 btrfs_node_key_ptr_offset(0),
3293 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05003294 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05003295
Chris Mason5f39d392007-10-15 16:14:19 -04003296 btrfs_set_header_nritems(src, src_nritems - push_items);
3297 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003298
Chris Mason5f39d392007-10-15 16:14:19 -04003299 btrfs_mark_buffer_dirty(src);
3300 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003301
Chris Mason79f95c82007-03-01 15:16:26 -05003302 return ret;
3303}
3304
3305/*
Chris Mason97571fd2007-02-24 13:39:08 -05003306 * helper function to insert a new root level in the tree.
3307 * A new node is allocated, and a single item is inserted to
3308 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05003309 *
3310 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05003311 */
Chris Masond3977122009-01-05 21:25:51 -05003312static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04003313 struct btrfs_root *root,
Liu Bofdd99c72013-05-22 12:06:51 +00003314 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05003315{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003316 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason7bb86312007-12-11 09:25:06 -05003317 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04003318 struct extent_buffer *lower;
3319 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04003320 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04003321 struct btrfs_disk_key lower_key;
Chris Mason5c680ed2007-02-22 11:39:13 -05003322
3323 BUG_ON(path->nodes[level]);
3324 BUG_ON(path->nodes[level-1] != root->node);
3325
Chris Mason7bb86312007-12-11 09:25:06 -05003326 lower = path->nodes[level-1];
3327 if (level == 1)
3328 btrfs_item_key(lower, &lower_key, 0);
3329 else
3330 btrfs_node_key(lower, &lower_key, 0);
3331
David Sterba4d75f8a2014-06-15 01:54:12 +02003332 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3333 &lower_key, level, root->node->start, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003334 if (IS_ERR(c))
3335 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04003336
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003337 root_add_used(root, fs_info->nodesize);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003338
David Sterbab159fa22016-11-08 18:09:03 +01003339 memzero_extent_buffer(c, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04003340 btrfs_set_header_nritems(c, 1);
3341 btrfs_set_header_level(c, level);
Chris Masondb945352007-10-15 16:15:53 -04003342 btrfs_set_header_bytenr(c, c->start);
Chris Mason5f39d392007-10-15 16:14:19 -04003343 btrfs_set_header_generation(c, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003344 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04003345 btrfs_set_header_owner(c, root->root_key.objectid);
Chris Masond5719762007-03-23 10:01:08 -04003346
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003347 write_extent_buffer_fsid(c, fs_info->fsid);
3348 write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid);
Chris Masone17cade2008-04-15 15:41:47 -04003349
Chris Mason5f39d392007-10-15 16:14:19 -04003350 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04003351 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05003352 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04003353 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05003354
3355 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04003356
3357 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04003358
Chris Mason925baed2008-06-25 16:01:30 -04003359 old = root->node;
Liu Bofdd99c72013-05-22 12:06:51 +00003360 tree_mod_log_set_root_pointer(root, c, 0);
Chris Mason240f62c2011-03-23 14:54:42 -04003361 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04003362
3363 /* the super has an extra ref to root->node */
3364 free_extent_buffer(old);
3365
Chris Mason0b86a832008-03-24 15:01:56 -04003366 add_root_to_dirty_list(root);
Chris Mason5f39d392007-10-15 16:14:19 -04003367 extent_buffer_get(c);
3368 path->nodes[level] = c;
chandan95449a12015-01-15 12:22:03 +05303369 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Mason5c680ed2007-02-22 11:39:13 -05003370 path->slots[level] = 0;
3371 return 0;
3372}
3373
Chris Mason74123bd2007-02-02 11:05:29 -05003374/*
3375 * worker function to insert a single pointer in a node.
3376 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05003377 *
Chris Mason74123bd2007-02-02 11:05:29 -05003378 * slot and level indicate where you want the key to go, and
3379 * blocknr is the block the key points to.
3380 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003381static void insert_ptr(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003382 struct btrfs_fs_info *fs_info, struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003383 struct btrfs_disk_key *key, u64 bytenr,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003384 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05003385{
Chris Mason5f39d392007-10-15 16:14:19 -04003386 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05003387 int nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003388 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05003389
3390 BUG_ON(!path->nodes[level]);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003391 btrfs_assert_tree_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04003392 lower = path->nodes[level];
3393 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04003394 BUG_ON(slot > nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003395 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Chris Mason74123bd2007-02-02 11:05:29 -05003396 if (slot != nritems) {
David Sterbabf1d3422018-03-05 15:47:39 +01003397 if (level) {
3398 ret = tree_mod_log_insert_move(lower, slot + 1, slot,
David Sterbaa446a972018-03-05 15:26:29 +01003399 nritems - slot);
David Sterbabf1d3422018-03-05 15:47:39 +01003400 BUG_ON(ret < 0);
3401 }
Chris Mason5f39d392007-10-15 16:14:19 -04003402 memmove_extent_buffer(lower,
3403 btrfs_node_key_ptr_offset(slot + 1),
3404 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003405 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05003406 }
Jan Schmidtc3e06962012-06-21 11:01:06 +02003407 if (level) {
David Sterbae09c2ef2018-03-05 15:09:03 +01003408 ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
3409 GFP_NOFS);
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003410 BUG_ON(ret < 0);
3411 }
Chris Mason5f39d392007-10-15 16:14:19 -04003412 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04003413 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05003414 WARN_ON(trans->transid == 0);
3415 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003416 btrfs_set_header_nritems(lower, nritems + 1);
3417 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05003418}
3419
Chris Mason97571fd2007-02-24 13:39:08 -05003420/*
3421 * split the node at the specified level in path in two.
3422 * The path is corrected to point to the appropriate node after the split
3423 *
3424 * Before splitting this tries to make some room in the node by pushing
3425 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003426 *
3427 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05003428 */
Chris Masone02119d2008-09-05 16:13:11 -04003429static noinline int split_node(struct btrfs_trans_handle *trans,
3430 struct btrfs_root *root,
3431 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003432{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003433 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003434 struct extent_buffer *c;
3435 struct extent_buffer *split;
3436 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003437 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05003438 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04003439 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003440
Chris Mason5f39d392007-10-15 16:14:19 -04003441 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05003442 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003443 if (c == root->node) {
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003444 /*
Jan Schmidt90f8d622013-04-13 13:19:53 +00003445 * trying to split the root, lets make a new one
3446 *
Liu Bofdd99c72013-05-22 12:06:51 +00003447 * tree mod log: We don't log_removal old root in
Jan Schmidt90f8d622013-04-13 13:19:53 +00003448 * insert_new_root, because that root buffer will be kept as a
3449 * normal node. We are going to log removal of half of the
3450 * elements below with tree_mod_log_eb_copy. We're holding a
3451 * tree lock on the buffer, which is why we cannot race with
3452 * other tree_mod_log users.
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003453 */
Liu Bofdd99c72013-05-22 12:06:51 +00003454 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05003455 if (ret)
3456 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04003457 } else {
Chris Masone66f7092007-04-20 13:16:02 -04003458 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04003459 c = path->nodes[level];
3460 if (!ret && btrfs_header_nritems(c) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003461 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04003462 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04003463 if (ret < 0)
3464 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003465 }
Chris Masone66f7092007-04-20 13:16:02 -04003466
Chris Mason5f39d392007-10-15 16:14:19 -04003467 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003468 mid = (c_nritems + 1) / 2;
3469 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05003470
David Sterba4d75f8a2014-06-15 01:54:12 +02003471 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3472 &disk_key, level, c->start, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003473 if (IS_ERR(split))
3474 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04003475
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003476 root_add_used(root, fs_info->nodesize);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003477
David Sterbab159fa22016-11-08 18:09:03 +01003478 memzero_extent_buffer(split, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04003479 btrfs_set_header_level(split, btrfs_header_level(c));
Chris Masondb945352007-10-15 16:15:53 -04003480 btrfs_set_header_bytenr(split, split->start);
Chris Mason5f39d392007-10-15 16:14:19 -04003481 btrfs_set_header_generation(split, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003482 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04003483 btrfs_set_header_owner(split, root->root_key.objectid);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003484 write_extent_buffer_fsid(split, fs_info->fsid);
3485 write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid);
Chris Mason5f39d392007-10-15 16:14:19 -04003486
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003487 ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003488 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003489 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003490 return ret;
3491 }
Chris Mason5f39d392007-10-15 16:14:19 -04003492 copy_extent_buffer(split, c,
3493 btrfs_node_key_ptr_offset(0),
3494 btrfs_node_key_ptr_offset(mid),
3495 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
3496 btrfs_set_header_nritems(split, c_nritems - mid);
3497 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003498 ret = 0;
3499
Chris Mason5f39d392007-10-15 16:14:19 -04003500 btrfs_mark_buffer_dirty(c);
3501 btrfs_mark_buffer_dirty(split);
3502
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003503 insert_ptr(trans, fs_info, path, &disk_key, split->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003504 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003505
Chris Mason5de08d72007-02-24 06:24:44 -05003506 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05003507 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04003508 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04003509 free_extent_buffer(c);
3510 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05003511 path->slots[level + 1] += 1;
3512 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003513 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04003514 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003515 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003516 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003517}
3518
Chris Mason74123bd2007-02-02 11:05:29 -05003519/*
3520 * how many bytes are required to store the items in a leaf. start
3521 * and nr indicate which items in the leaf to check. This totals up the
3522 * space used both by the item structs and the item data
3523 */
Chris Mason5f39d392007-10-15 16:14:19 -04003524static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003525{
Josef Bacik41be1f32012-10-15 13:43:18 -04003526 struct btrfs_item *start_item;
3527 struct btrfs_item *end_item;
3528 struct btrfs_map_token token;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003529 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04003530 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04003531 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003532
3533 if (!nr)
3534 return 0;
Josef Bacik41be1f32012-10-15 13:43:18 -04003535 btrfs_init_map_token(&token);
Ross Kirkdd3cc162013-09-16 15:58:09 +01003536 start_item = btrfs_item_nr(start);
3537 end_item = btrfs_item_nr(end);
Josef Bacik41be1f32012-10-15 13:43:18 -04003538 data_len = btrfs_token_item_offset(l, start_item, &token) +
3539 btrfs_token_item_size(l, start_item, &token);
3540 data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04003541 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04003542 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003543 return data_len;
3544}
3545
Chris Mason74123bd2007-02-02 11:05:29 -05003546/*
Chris Masond4dbff92007-04-04 14:08:15 -04003547 * The space between the end of the leaf items and
3548 * the start of the leaf data. IOW, how much room
3549 * the leaf has left for both items and data
3550 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003551noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
Chris Masone02119d2008-09-05 16:13:11 -04003552 struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04003553{
Chris Mason5f39d392007-10-15 16:14:19 -04003554 int nritems = btrfs_header_nritems(leaf);
3555 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003556
3557 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003558 if (ret < 0) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003559 btrfs_crit(fs_info,
3560 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
3561 ret,
3562 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
3563 leaf_space_used(leaf, 0, nritems), nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003564 }
3565 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04003566}
3567
Chris Mason99d8f832010-07-07 10:51:48 -04003568/*
3569 * min slot controls the lowest index we're willing to push to the
3570 * right. We'll push up to and including min_slot, but no lower
3571 */
David Sterba1e47eef2017-02-10 19:13:06 +01003572static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
Chris Mason44871b12009-03-13 10:04:31 -04003573 struct btrfs_path *path,
3574 int data_size, int empty,
3575 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04003576 int free_space, u32 left_nritems,
3577 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05003578{
Chris Mason5f39d392007-10-15 16:14:19 -04003579 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003580 struct extent_buffer *upper = path->nodes[1];
Chris Masoncfed81a2012-03-03 07:40:03 -05003581 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04003582 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05003583 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05003584 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05003585 int push_space = 0;
3586 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003587 struct btrfs_item *item;
Chris Mason34a38212007-11-07 13:31:03 -05003588 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04003589 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04003590 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04003591 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05003592
Chris Masoncfed81a2012-03-03 07:40:03 -05003593 btrfs_init_map_token(&token);
3594
Chris Mason34a38212007-11-07 13:31:03 -05003595 if (empty)
3596 nr = 0;
3597 else
Chris Mason99d8f832010-07-07 10:51:48 -04003598 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003599
Zheng Yan31840ae2008-09-23 13:14:14 -04003600 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05003601 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04003602
Chris Mason44871b12009-03-13 10:04:31 -04003603 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05003604 i = left_nritems - 1;
3605 while (i >= nr) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003606 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003607
Zheng Yan31840ae2008-09-23 13:14:14 -04003608 if (!empty && push_items > 0) {
3609 if (path->slots[0] > i)
3610 break;
3611 if (path->slots[0] == i) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003612 int space = btrfs_leaf_free_space(fs_info, left);
Zheng Yan31840ae2008-09-23 13:14:14 -04003613 if (space + push_space * 2 > free_space)
3614 break;
3615 }
3616 }
3617
Chris Mason00ec4c52007-02-24 12:47:20 -05003618 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003619 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003620
Chris Masondb945352007-10-15 16:15:53 -04003621 this_item_size = btrfs_item_size(left, item);
3622 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05003623 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04003624
Chris Mason00ec4c52007-02-24 12:47:20 -05003625 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003626 push_space += this_item_size + sizeof(*item);
Chris Mason34a38212007-11-07 13:31:03 -05003627 if (i == 0)
3628 break;
3629 i--;
Chris Masondb945352007-10-15 16:15:53 -04003630 }
Chris Mason5f39d392007-10-15 16:14:19 -04003631
Chris Mason925baed2008-06-25 16:01:30 -04003632 if (push_items == 0)
3633 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04003634
Julia Lawall6c1500f2012-11-03 20:30:18 +00003635 WARN_ON(!empty && push_items == left_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003636
Chris Mason00ec4c52007-02-24 12:47:20 -05003637 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003638 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05003639
Chris Mason5f39d392007-10-15 16:14:19 -04003640 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003641 push_space -= leaf_data_end(fs_info, left);
Chris Mason5f39d392007-10-15 16:14:19 -04003642
Chris Mason00ec4c52007-02-24 12:47:20 -05003643 /* make room in the right data area */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003644 data_end = leaf_data_end(fs_info, right);
Chris Mason5f39d392007-10-15 16:14:19 -04003645 memmove_extent_buffer(right,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003646 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
3647 BTRFS_LEAF_DATA_OFFSET + data_end,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003648 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003649
Chris Mason00ec4c52007-02-24 12:47:20 -05003650 /* copy from the left data area */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003651 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003652 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003653 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left),
Chris Masond6025572007-03-30 14:27:56 -04003654 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003655
3656 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
3657 btrfs_item_nr_offset(0),
3658 right_nritems * sizeof(struct btrfs_item));
3659
Chris Mason00ec4c52007-02-24 12:47:20 -05003660 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003661 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
3662 btrfs_item_nr_offset(left_nritems - push_items),
3663 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05003664
3665 /* update the item pointers */
Chris Mason7518a232007-03-12 12:01:18 -04003666 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003667 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003668 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason7518a232007-03-12 12:01:18 -04003669 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003670 item = btrfs_item_nr(i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003671 push_space -= btrfs_token_item_size(right, item, &token);
3672 btrfs_set_token_item_offset(right, item, push_space, &token);
Chris Masondb945352007-10-15 16:15:53 -04003673 }
3674
Chris Mason7518a232007-03-12 12:01:18 -04003675 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003676 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05003677
Chris Mason34a38212007-11-07 13:31:03 -05003678 if (left_nritems)
3679 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003680 else
David Sterba7c302b42017-02-10 18:47:57 +01003681 clean_tree_block(fs_info, left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003682
Chris Mason5f39d392007-10-15 16:14:19 -04003683 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04003684
Chris Mason5f39d392007-10-15 16:14:19 -04003685 btrfs_item_key(right, &disk_key, 0);
3686 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04003687 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05003688
Chris Mason00ec4c52007-02-24 12:47:20 -05003689 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04003690 if (path->slots[0] >= left_nritems) {
3691 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003692 if (btrfs_header_nritems(path->nodes[0]) == 0)
David Sterba7c302b42017-02-10 18:47:57 +01003693 clean_tree_block(fs_info, path->nodes[0]);
Chris Mason925baed2008-06-25 16:01:30 -04003694 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003695 free_extent_buffer(path->nodes[0]);
3696 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05003697 path->slots[1] += 1;
3698 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003699 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003700 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05003701 }
3702 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04003703
3704out_unlock:
3705 btrfs_tree_unlock(right);
3706 free_extent_buffer(right);
3707 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05003708}
Chris Mason925baed2008-06-25 16:01:30 -04003709
Chris Mason00ec4c52007-02-24 12:47:20 -05003710/*
Chris Mason44871b12009-03-13 10:04:31 -04003711 * push some data in the path leaf to the right, trying to free up at
3712 * least data_size bytes. returns zero if the push worked, nonzero otherwise
3713 *
3714 * returns 1 if the push failed because the other node didn't have enough
3715 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04003716 *
3717 * this will push starting from min_slot to the end of the leaf. It won't
3718 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04003719 */
3720static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003721 *root, struct btrfs_path *path,
3722 int min_data_size, int data_size,
3723 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003724{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003725 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04003726 struct extent_buffer *left = path->nodes[0];
3727 struct extent_buffer *right;
3728 struct extent_buffer *upper;
3729 int slot;
3730 int free_space;
3731 u32 left_nritems;
3732 int ret;
3733
3734 if (!path->nodes[1])
3735 return 1;
3736
3737 slot = path->slots[1];
3738 upper = path->nodes[1];
3739 if (slot >= btrfs_header_nritems(upper) - 1)
3740 return 1;
3741
3742 btrfs_assert_tree_locked(path->nodes[1]);
3743
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003744 right = read_node_slot(fs_info, upper, slot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003745 /*
3746 * slot + 1 is not valid or we fail to read the right node,
3747 * no big deal, just return.
3748 */
3749 if (IS_ERR(right))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003750 return 1;
3751
Chris Mason44871b12009-03-13 10:04:31 -04003752 btrfs_tree_lock(right);
3753 btrfs_set_lock_blocking(right);
3754
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003755 free_space = btrfs_leaf_free_space(fs_info, right);
Chris Mason44871b12009-03-13 10:04:31 -04003756 if (free_space < data_size)
3757 goto out_unlock;
3758
3759 /* cow and double check */
3760 ret = btrfs_cow_block(trans, root, right, upper,
3761 slot + 1, &right);
3762 if (ret)
3763 goto out_unlock;
3764
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003765 free_space = btrfs_leaf_free_space(fs_info, right);
Chris Mason44871b12009-03-13 10:04:31 -04003766 if (free_space < data_size)
3767 goto out_unlock;
3768
3769 left_nritems = btrfs_header_nritems(left);
3770 if (left_nritems == 0)
3771 goto out_unlock;
3772
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003773 if (path->slots[0] == left_nritems && !empty) {
3774 /* Key greater than all keys in the leaf, right neighbor has
3775 * enough room for it and we're not emptying our leaf to delete
3776 * it, therefore use right neighbor to insert the new item and
3777 * no need to touch/dirty our left leaft. */
3778 btrfs_tree_unlock(left);
3779 free_extent_buffer(left);
3780 path->nodes[0] = right;
3781 path->slots[0] = 0;
3782 path->slots[1]++;
3783 return 0;
3784 }
3785
David Sterba1e47eef2017-02-10 19:13:06 +01003786 return __push_leaf_right(fs_info, path, min_data_size, empty,
Chris Mason99d8f832010-07-07 10:51:48 -04003787 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04003788out_unlock:
3789 btrfs_tree_unlock(right);
3790 free_extent_buffer(right);
3791 return 1;
3792}
3793
3794/*
Chris Mason74123bd2007-02-02 11:05:29 -05003795 * push some data in the path leaf to the left, trying to free up at
3796 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003797 *
3798 * max_slot can put a limit on how far into the leaf we'll push items. The
3799 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
3800 * items
Chris Mason74123bd2007-02-02 11:05:29 -05003801 */
David Sterba66cb7dd2017-02-10 19:14:36 +01003802static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
Chris Mason44871b12009-03-13 10:04:31 -04003803 struct btrfs_path *path, int data_size,
3804 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04003805 int free_space, u32 right_nritems,
3806 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003807{
Chris Mason5f39d392007-10-15 16:14:19 -04003808 struct btrfs_disk_key disk_key;
3809 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05003810 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003811 int push_space = 0;
3812 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003813 struct btrfs_item *item;
Chris Mason7518a232007-03-12 12:01:18 -04003814 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05003815 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003816 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04003817 u32 this_item_size;
3818 u32 old_left_item_size;
Chris Masoncfed81a2012-03-03 07:40:03 -05003819 struct btrfs_map_token token;
3820
3821 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003822
Chris Mason34a38212007-11-07 13:31:03 -05003823 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04003824 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003825 else
Chris Mason99d8f832010-07-07 10:51:48 -04003826 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003827
3828 for (i = 0; i < nr; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003829 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003830
Zheng Yan31840ae2008-09-23 13:14:14 -04003831 if (!empty && push_items > 0) {
3832 if (path->slots[0] < i)
3833 break;
3834 if (path->slots[0] == i) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003835 int space = btrfs_leaf_free_space(fs_info, right);
Zheng Yan31840ae2008-09-23 13:14:14 -04003836 if (space + push_space * 2 > free_space)
3837 break;
3838 }
3839 }
3840
Chris Masonbe0e5c02007-01-26 15:51:26 -05003841 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003842 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003843
3844 this_item_size = btrfs_item_size(right, item);
3845 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003846 break;
Chris Masondb945352007-10-15 16:15:53 -04003847
Chris Masonbe0e5c02007-01-26 15:51:26 -05003848 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003849 push_space += this_item_size + sizeof(*item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003850 }
Chris Masondb945352007-10-15 16:15:53 -04003851
Chris Masonbe0e5c02007-01-26 15:51:26 -05003852 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04003853 ret = 1;
3854 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003855 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05303856 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
Chris Mason5f39d392007-10-15 16:14:19 -04003857
Chris Masonbe0e5c02007-01-26 15:51:26 -05003858 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04003859 copy_extent_buffer(left, right,
3860 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3861 btrfs_item_nr_offset(0),
3862 push_items * sizeof(struct btrfs_item));
3863
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003864 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
Chris Masond3977122009-01-05 21:25:51 -05003865 btrfs_item_offset_nr(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003866
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003867 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003868 leaf_data_end(fs_info, left) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003869 BTRFS_LEAF_DATA_OFFSET +
Chris Mason5f39d392007-10-15 16:14:19 -04003870 btrfs_item_offset_nr(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04003871 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003872 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05003873 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05003874
Chris Masondb945352007-10-15 16:15:53 -04003875 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04003876 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003877 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003878
Ross Kirkdd3cc162013-09-16 15:58:09 +01003879 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003880
Chris Masoncfed81a2012-03-03 07:40:03 -05003881 ioff = btrfs_token_item_offset(left, item, &token);
3882 btrfs_set_token_item_offset(left, item,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003883 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size),
Chris Masoncfed81a2012-03-03 07:40:03 -05003884 &token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003885 }
Chris Mason5f39d392007-10-15 16:14:19 -04003886 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003887
3888 /* fixup right node */
Julia Lawall31b1a2b2012-11-03 10:58:34 +00003889 if (push_items > right_nritems)
3890 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
Chris Masond3977122009-01-05 21:25:51 -05003891 right_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003892
Chris Mason34a38212007-11-07 13:31:03 -05003893 if (push_items < right_nritems) {
3894 push_space = btrfs_item_offset_nr(right, push_items - 1) -
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003895 leaf_data_end(fs_info, right);
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003896 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003897 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003898 BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003899 leaf_data_end(fs_info, right), push_space);
Chris Mason34a38212007-11-07 13:31:03 -05003900
3901 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04003902 btrfs_item_nr_offset(push_items),
3903 (btrfs_header_nritems(right) - push_items) *
3904 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05003905 }
Yaneef1c492007-11-26 10:58:13 -05003906 right_nritems -= push_items;
3907 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003908 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason5f39d392007-10-15 16:14:19 -04003909 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003910 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003911
Chris Masoncfed81a2012-03-03 07:40:03 -05003912 push_space = push_space - btrfs_token_item_size(right,
3913 item, &token);
3914 btrfs_set_token_item_offset(right, item, push_space, &token);
Chris Masondb945352007-10-15 16:15:53 -04003915 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003916
Chris Mason5f39d392007-10-15 16:14:19 -04003917 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05003918 if (right_nritems)
3919 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003920 else
David Sterba7c302b42017-02-10 18:47:57 +01003921 clean_tree_block(fs_info, right);
Chris Mason098f59c2007-05-11 11:33:21 -04003922
Chris Mason5f39d392007-10-15 16:14:19 -04003923 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003924 fixup_low_keys(fs_info, path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003925
3926 /* then fixup the leaf pointer in the path */
3927 if (path->slots[0] < push_items) {
3928 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003929 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003930 free_extent_buffer(path->nodes[0]);
3931 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003932 path->slots[1] -= 1;
3933 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003934 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04003935 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003936 path->slots[0] -= push_items;
3937 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003938 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003939 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04003940out:
3941 btrfs_tree_unlock(left);
3942 free_extent_buffer(left);
3943 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003944}
3945
Chris Mason74123bd2007-02-02 11:05:29 -05003946/*
Chris Mason44871b12009-03-13 10:04:31 -04003947 * push some data in the path leaf to the left, trying to free up at
3948 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003949 *
3950 * max_slot can put a limit on how far into the leaf we'll push items. The
3951 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
3952 * items
Chris Mason44871b12009-03-13 10:04:31 -04003953 */
3954static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003955 *root, struct btrfs_path *path, int min_data_size,
3956 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003957{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003958 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04003959 struct extent_buffer *right = path->nodes[0];
3960 struct extent_buffer *left;
3961 int slot;
3962 int free_space;
3963 u32 right_nritems;
3964 int ret = 0;
3965
3966 slot = path->slots[1];
3967 if (slot == 0)
3968 return 1;
3969 if (!path->nodes[1])
3970 return 1;
3971
3972 right_nritems = btrfs_header_nritems(right);
3973 if (right_nritems == 0)
3974 return 1;
3975
3976 btrfs_assert_tree_locked(path->nodes[1]);
3977
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003978 left = read_node_slot(fs_info, path->nodes[1], slot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003979 /*
3980 * slot - 1 is not valid or we fail to read the left node,
3981 * no big deal, just return.
3982 */
3983 if (IS_ERR(left))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003984 return 1;
3985
Chris Mason44871b12009-03-13 10:04:31 -04003986 btrfs_tree_lock(left);
3987 btrfs_set_lock_blocking(left);
3988
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003989 free_space = btrfs_leaf_free_space(fs_info, left);
Chris Mason44871b12009-03-13 10:04:31 -04003990 if (free_space < data_size) {
3991 ret = 1;
3992 goto out;
3993 }
3994
3995 /* cow and double check */
3996 ret = btrfs_cow_block(trans, root, left,
3997 path->nodes[1], slot - 1, &left);
3998 if (ret) {
3999 /* we hit -ENOSPC, but it isn't fatal here */
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004000 if (ret == -ENOSPC)
4001 ret = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004002 goto out;
4003 }
4004
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004005 free_space = btrfs_leaf_free_space(fs_info, left);
Chris Mason44871b12009-03-13 10:04:31 -04004006 if (free_space < data_size) {
4007 ret = 1;
4008 goto out;
4009 }
4010
David Sterba66cb7dd2017-02-10 19:14:36 +01004011 return __push_leaf_left(fs_info, path, min_data_size,
Chris Mason99d8f832010-07-07 10:51:48 -04004012 empty, left, free_space, right_nritems,
4013 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04004014out:
4015 btrfs_tree_unlock(left);
4016 free_extent_buffer(left);
4017 return ret;
4018}
4019
4020/*
Chris Mason74123bd2007-02-02 11:05:29 -05004021 * split the path's leaf in two, making sure there is at least data_size
4022 * available for the resulting leaf level of the path.
4023 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004024static noinline void copy_for_split(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004025 struct btrfs_fs_info *fs_info,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004026 struct btrfs_path *path,
4027 struct extent_buffer *l,
4028 struct extent_buffer *right,
4029 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004030{
Chris Masonbe0e5c02007-01-26 15:51:26 -05004031 int data_copy_size;
4032 int rt_data_off;
4033 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04004034 struct btrfs_disk_key disk_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05004035 struct btrfs_map_token token;
4036
4037 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004038
Chris Mason5f39d392007-10-15 16:14:19 -04004039 nritems = nritems - mid;
4040 btrfs_set_header_nritems(right, nritems);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004041 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(fs_info, l);
Chris Mason5f39d392007-10-15 16:14:19 -04004042
4043 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
4044 btrfs_item_nr_offset(mid),
4045 nritems * sizeof(struct btrfs_item));
4046
4047 copy_extent_buffer(right, l,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004048 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
4049 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004050 leaf_data_end(fs_info, l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05004051
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004052 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004053
4054 for (i = 0; i < nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01004055 struct btrfs_item *item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004056 u32 ioff;
4057
Chris Masoncfed81a2012-03-03 07:40:03 -05004058 ioff = btrfs_token_item_offset(right, item, &token);
4059 btrfs_set_token_item_offset(right, item,
4060 ioff + rt_data_off, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04004061 }
Chris Mason74123bd2007-02-02 11:05:29 -05004062
Chris Mason5f39d392007-10-15 16:14:19 -04004063 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004064 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004065 insert_ptr(trans, fs_info, path, &disk_key, right->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02004066 path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04004067
4068 btrfs_mark_buffer_dirty(right);
4069 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05004070 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004071
Chris Masonbe0e5c02007-01-26 15:51:26 -05004072 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04004073 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04004074 free_extent_buffer(path->nodes[0]);
4075 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004076 path->slots[0] -= mid;
4077 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04004078 } else {
4079 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04004080 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04004081 }
Chris Mason5f39d392007-10-15 16:14:19 -04004082
Chris Masoneb60cea2007-02-02 09:18:22 -05004083 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04004084}
4085
4086/*
Chris Mason99d8f832010-07-07 10:51:48 -04004087 * double splits happen when we need to insert a big item in the middle
4088 * of a leaf. A double split can leave us with 3 mostly empty leaves:
4089 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
4090 * A B C
4091 *
4092 * We avoid this by trying to push the items on either side of our target
4093 * into the adjacent leaves. If all goes well we can avoid the double split
4094 * completely.
4095 */
4096static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4097 struct btrfs_root *root,
4098 struct btrfs_path *path,
4099 int data_size)
4100{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004101 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason99d8f832010-07-07 10:51:48 -04004102 int ret;
4103 int progress = 0;
4104 int slot;
4105 u32 nritems;
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004106 int space_needed = data_size;
Chris Mason99d8f832010-07-07 10:51:48 -04004107
4108 slot = path->slots[0];
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004109 if (slot < btrfs_header_nritems(path->nodes[0]))
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004110 space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
Chris Mason99d8f832010-07-07 10:51:48 -04004111
4112 /*
4113 * try to push all the items after our slot into the
4114 * right leaf
4115 */
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004116 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004117 if (ret < 0)
4118 return ret;
4119
4120 if (ret == 0)
4121 progress++;
4122
4123 nritems = btrfs_header_nritems(path->nodes[0]);
4124 /*
4125 * our goal is to get our slot at the start or end of a leaf. If
4126 * we've done so we're done
4127 */
4128 if (path->slots[0] == 0 || path->slots[0] == nritems)
4129 return 0;
4130
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004131 if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04004132 return 0;
4133
4134 /* try to push all the items before our slot into the next leaf */
4135 slot = path->slots[0];
Filipe Manana263d3992017-02-17 18:43:57 +00004136 space_needed = data_size;
4137 if (slot > 0)
4138 space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004139 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004140 if (ret < 0)
4141 return ret;
4142
4143 if (ret == 0)
4144 progress++;
4145
4146 if (progress)
4147 return 0;
4148 return 1;
4149}
4150
4151/*
Chris Mason44871b12009-03-13 10:04:31 -04004152 * split the path's leaf in two, making sure there is at least data_size
4153 * available for the resulting leaf level of the path.
4154 *
4155 * returns 0 if all went well and < 0 on failure.
4156 */
4157static noinline int split_leaf(struct btrfs_trans_handle *trans,
4158 struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08004159 const struct btrfs_key *ins_key,
Chris Mason44871b12009-03-13 10:04:31 -04004160 struct btrfs_path *path, int data_size,
4161 int extend)
4162{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004163 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004164 struct extent_buffer *l;
4165 u32 nritems;
4166 int mid;
4167 int slot;
4168 struct extent_buffer *right;
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004169 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04004170 int ret = 0;
4171 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004172 int split;
Chris Mason44871b12009-03-13 10:04:31 -04004173 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004174 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04004175
Yan, Zhenga5719522009-09-24 09:17:31 -04004176 l = path->nodes[0];
4177 slot = path->slots[0];
4178 if (extend && data_size + btrfs_item_size_nr(l, slot) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004179 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
Yan, Zhenga5719522009-09-24 09:17:31 -04004180 return -EOVERFLOW;
4181
Chris Mason44871b12009-03-13 10:04:31 -04004182 /* first try to make some room by pushing left and right */
Liu Bo33157e02013-05-22 12:07:06 +00004183 if (data_size && path->nodes[1]) {
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004184 int space_needed = data_size;
4185
4186 if (slot < btrfs_header_nritems(l))
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004187 space_needed -= btrfs_leaf_free_space(fs_info, l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004188
4189 wret = push_leaf_right(trans, root, path, space_needed,
4190 space_needed, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04004191 if (wret < 0)
4192 return wret;
4193 if (wret) {
Filipe Manana263d3992017-02-17 18:43:57 +00004194 space_needed = data_size;
4195 if (slot > 0)
4196 space_needed -= btrfs_leaf_free_space(fs_info,
4197 l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004198 wret = push_leaf_left(trans, root, path, space_needed,
4199 space_needed, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04004200 if (wret < 0)
4201 return wret;
4202 }
4203 l = path->nodes[0];
4204
4205 /* did the pushes work? */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004206 if (btrfs_leaf_free_space(fs_info, l) >= data_size)
Chris Mason44871b12009-03-13 10:04:31 -04004207 return 0;
4208 }
4209
4210 if (!path->nodes[1]) {
Liu Bofdd99c72013-05-22 12:06:51 +00004211 ret = insert_new_root(trans, root, path, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004212 if (ret)
4213 return ret;
4214 }
4215again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004216 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004217 l = path->nodes[0];
4218 slot = path->slots[0];
4219 nritems = btrfs_header_nritems(l);
4220 mid = (nritems + 1) / 2;
4221
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004222 if (mid <= slot) {
4223 if (nritems == 1 ||
4224 leaf_space_used(l, mid, nritems - mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004225 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004226 if (slot >= nritems) {
4227 split = 0;
4228 } else {
4229 mid = slot;
4230 if (mid != nritems &&
4231 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004232 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004233 if (data_size && !tried_avoid_double)
4234 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004235 split = 2;
4236 }
4237 }
4238 }
4239 } else {
4240 if (leaf_space_used(l, 0, mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004241 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004242 if (!extend && data_size && slot == 0) {
4243 split = 0;
4244 } else if ((extend || !data_size) && slot == 0) {
4245 mid = 1;
4246 } else {
4247 mid = slot;
4248 if (mid != nritems &&
4249 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004250 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004251 if (data_size && !tried_avoid_double)
4252 goto push_for_double;
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05304253 split = 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004254 }
4255 }
4256 }
4257 }
4258
4259 if (split == 0)
4260 btrfs_cpu_key_to_disk(&disk_key, ins_key);
4261 else
4262 btrfs_item_key(l, &disk_key, mid);
4263
David Sterba4d75f8a2014-06-15 01:54:12 +02004264 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
4265 &disk_key, 0, l->start, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004266 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04004267 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004268
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004269 root_add_used(root, fs_info->nodesize);
Chris Mason44871b12009-03-13 10:04:31 -04004270
David Sterbab159fa22016-11-08 18:09:03 +01004271 memzero_extent_buffer(right, 0, sizeof(struct btrfs_header));
Chris Mason44871b12009-03-13 10:04:31 -04004272 btrfs_set_header_bytenr(right, right->start);
4273 btrfs_set_header_generation(right, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004274 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
Chris Mason44871b12009-03-13 10:04:31 -04004275 btrfs_set_header_owner(right, root->root_key.objectid);
4276 btrfs_set_header_level(right, 0);
David Sterbad24ee972016-11-09 17:44:25 +01004277 write_extent_buffer_fsid(right, fs_info->fsid);
4278 write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid);
Chris Mason44871b12009-03-13 10:04:31 -04004279
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004280 if (split == 0) {
4281 if (mid <= slot) {
4282 btrfs_set_header_nritems(right, 0);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004283 insert_ptr(trans, fs_info, path, &disk_key,
4284 right->start, path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004285 btrfs_tree_unlock(path->nodes[0]);
4286 free_extent_buffer(path->nodes[0]);
4287 path->nodes[0] = right;
4288 path->slots[0] = 0;
4289 path->slots[1] += 1;
4290 } else {
4291 btrfs_set_header_nritems(right, 0);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004292 insert_ptr(trans, fs_info, path, &disk_key,
4293 right->start, path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004294 btrfs_tree_unlock(path->nodes[0]);
4295 free_extent_buffer(path->nodes[0]);
4296 path->nodes[0] = right;
4297 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01004298 if (path->slots[1] == 0)
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004299 fixup_low_keys(fs_info, path, &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004300 }
Liu Bo196e0242016-09-07 14:48:28 -07004301 /*
4302 * We create a new leaf 'right' for the required ins_len and
4303 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
4304 * the content of ins_len to 'right'.
4305 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004306 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004307 }
4308
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004309 copy_for_split(trans, fs_info, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04004310
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004311 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04004312 BUG_ON(num_doubles != 0);
4313 num_doubles++;
4314 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04004315 }
Chris Mason44871b12009-03-13 10:04:31 -04004316
Jeff Mahoney143bede2012-03-01 14:56:26 +01004317 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004318
4319push_for_double:
4320 push_for_double_split(trans, root, path, data_size);
4321 tried_avoid_double = 1;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004322 if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04004323 return 0;
4324 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004325}
4326
Yan, Zhengad48fd752009-11-12 09:33:58 +00004327static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4328 struct btrfs_root *root,
4329 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05004330{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004331 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004332 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05004333 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004334 struct btrfs_file_extent_item *fi;
4335 u64 extent_len = 0;
4336 u32 item_size;
4337 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05004338
4339 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00004340 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4341
4342 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4343 key.type != BTRFS_EXTENT_CSUM_KEY);
4344
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004345 if (btrfs_leaf_free_space(fs_info, leaf) >= ins_len)
Yan, Zhengad48fd752009-11-12 09:33:58 +00004346 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05004347
4348 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004349 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4350 fi = btrfs_item_ptr(leaf, path->slots[0],
4351 struct btrfs_file_extent_item);
4352 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
4353 }
David Sterbab3b4aa72011-04-21 01:20:15 +02004354 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05004355
Chris Mason459931e2008-12-10 09:10:46 -05004356 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004357 path->search_for_split = 1;
4358 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05004359 path->search_for_split = 0;
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004360 if (ret > 0)
4361 ret = -EAGAIN;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004362 if (ret < 0)
4363 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004364
Yan, Zhengad48fd752009-11-12 09:33:58 +00004365 ret = -EAGAIN;
4366 leaf = path->nodes[0];
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004367 /* if our item isn't there, return now */
4368 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
Yan, Zhengad48fd752009-11-12 09:33:58 +00004369 goto err;
4370
Chris Mason109f6ae2010-04-02 09:20:18 -04004371 /* the leaf has changed, it now has room. return now */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004372 if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= ins_len)
Chris Mason109f6ae2010-04-02 09:20:18 -04004373 goto err;
4374
Yan, Zhengad48fd752009-11-12 09:33:58 +00004375 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4376 fi = btrfs_item_ptr(leaf, path->slots[0],
4377 struct btrfs_file_extent_item);
4378 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
4379 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004380 }
4381
Chris Masonb9473432009-03-13 11:00:37 -04004382 btrfs_set_path_blocking(path);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004383 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004384 if (ret)
4385 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004386
Yan, Zhengad48fd752009-11-12 09:33:58 +00004387 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04004388 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004389 return 0;
4390err:
4391 path->keep_locks = 0;
4392 return ret;
4393}
4394
David Sterba4961e292017-02-10 18:49:53 +01004395static noinline int split_item(struct btrfs_fs_info *fs_info,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004396 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004397 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004398 unsigned long split_offset)
4399{
4400 struct extent_buffer *leaf;
4401 struct btrfs_item *item;
4402 struct btrfs_item *new_item;
4403 int slot;
4404 char *buf;
4405 u32 nritems;
4406 u32 item_size;
4407 u32 orig_offset;
4408 struct btrfs_disk_key disk_key;
4409
Chris Masonb9473432009-03-13 11:00:37 -04004410 leaf = path->nodes[0];
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004411 BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < sizeof(struct btrfs_item));
Chris Masonb9473432009-03-13 11:00:37 -04004412
Chris Masonb4ce94d2009-02-04 09:25:08 -05004413 btrfs_set_path_blocking(path);
4414
Ross Kirkdd3cc162013-09-16 15:58:09 +01004415 item = btrfs_item_nr(path->slots[0]);
Chris Mason459931e2008-12-10 09:10:46 -05004416 orig_offset = btrfs_item_offset(leaf, item);
4417 item_size = btrfs_item_size(leaf, item);
4418
Chris Mason459931e2008-12-10 09:10:46 -05004419 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004420 if (!buf)
4421 return -ENOMEM;
4422
Chris Mason459931e2008-12-10 09:10:46 -05004423 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
4424 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004425
Chris Mason459931e2008-12-10 09:10:46 -05004426 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05004427 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05004428 if (slot != nritems) {
4429 /* shift the items */
4430 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00004431 btrfs_item_nr_offset(slot),
4432 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05004433 }
4434
4435 btrfs_cpu_key_to_disk(&disk_key, new_key);
4436 btrfs_set_item_key(leaf, &disk_key, slot);
4437
Ross Kirkdd3cc162013-09-16 15:58:09 +01004438 new_item = btrfs_item_nr(slot);
Chris Mason459931e2008-12-10 09:10:46 -05004439
4440 btrfs_set_item_offset(leaf, new_item, orig_offset);
4441 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
4442
4443 btrfs_set_item_offset(leaf, item,
4444 orig_offset + item_size - split_offset);
4445 btrfs_set_item_size(leaf, item, split_offset);
4446
4447 btrfs_set_header_nritems(leaf, nritems + 1);
4448
4449 /* write the data for the start of the original item */
4450 write_extent_buffer(leaf, buf,
4451 btrfs_item_ptr_offset(leaf, path->slots[0]),
4452 split_offset);
4453
4454 /* write the data for the new item */
4455 write_extent_buffer(leaf, buf + split_offset,
4456 btrfs_item_ptr_offset(leaf, slot),
4457 item_size - split_offset);
4458 btrfs_mark_buffer_dirty(leaf);
4459
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004460 BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05004461 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004462 return 0;
4463}
4464
4465/*
4466 * This function splits a single item into two items,
4467 * giving 'new_key' to the new item and splitting the
4468 * old one at split_offset (from the start of the item).
4469 *
4470 * The path may be released by this operation. After
4471 * the split, the path is pointing to the old item. The
4472 * new item is going to be in the same node as the old one.
4473 *
4474 * Note, the item being split must be smaller enough to live alone on
4475 * a tree block with room for one extra struct btrfs_item
4476 *
4477 * This allows us to split the item in place, keeping a lock on the
4478 * leaf the entire time.
4479 */
4480int btrfs_split_item(struct btrfs_trans_handle *trans,
4481 struct btrfs_root *root,
4482 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004483 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004484 unsigned long split_offset)
4485{
4486 int ret;
4487 ret = setup_leaf_for_split(trans, root, path,
4488 sizeof(struct btrfs_item));
4489 if (ret)
4490 return ret;
4491
David Sterba4961e292017-02-10 18:49:53 +01004492 ret = split_item(root->fs_info, path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05004493 return ret;
4494}
4495
4496/*
Yan, Zhengad48fd752009-11-12 09:33:58 +00004497 * This function duplicate a item, giving 'new_key' to the new item.
4498 * It guarantees both items live in the same tree leaf and the new item
4499 * is contiguous with the original item.
4500 *
4501 * This allows us to split file extent in place, keeping a lock on the
4502 * leaf the entire time.
4503 */
4504int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4505 struct btrfs_root *root,
4506 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004507 const struct btrfs_key *new_key)
Yan, Zhengad48fd752009-11-12 09:33:58 +00004508{
4509 struct extent_buffer *leaf;
4510 int ret;
4511 u32 item_size;
4512
4513 leaf = path->nodes[0];
4514 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4515 ret = setup_leaf_for_split(trans, root, path,
4516 item_size + sizeof(struct btrfs_item));
4517 if (ret)
4518 return ret;
4519
4520 path->slots[0]++;
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004521 setup_items_for_insert(root, path, new_key, &item_size,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004522 item_size, item_size +
4523 sizeof(struct btrfs_item), 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004524 leaf = path->nodes[0];
4525 memcpy_extent_buffer(leaf,
4526 btrfs_item_ptr_offset(leaf, path->slots[0]),
4527 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4528 item_size);
4529 return 0;
4530}
4531
4532/*
Chris Masond352ac62008-09-29 15:18:18 -04004533 * make the item pointed to by the path smaller. new_size indicates
4534 * how small to make it, and from_end tells us if we just chop bytes
4535 * off the end of the item or if we shift the item to chop bytes off
4536 * the front.
4537 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004538void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
4539 struct btrfs_path *path, u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04004540{
Chris Masonb18c6682007-04-17 13:26:50 -04004541 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004542 struct extent_buffer *leaf;
4543 struct btrfs_item *item;
Chris Masonb18c6682007-04-17 13:26:50 -04004544 u32 nritems;
4545 unsigned int data_end;
4546 unsigned int old_data_start;
4547 unsigned int old_size;
4548 unsigned int size_diff;
4549 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004550 struct btrfs_map_token token;
4551
4552 btrfs_init_map_token(&token);
Chris Masonb18c6682007-04-17 13:26:50 -04004553
Chris Mason5f39d392007-10-15 16:14:19 -04004554 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04004555 slot = path->slots[0];
4556
4557 old_size = btrfs_item_size_nr(leaf, slot);
4558 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004559 return;
Chris Masonb18c6682007-04-17 13:26:50 -04004560
Chris Mason5f39d392007-10-15 16:14:19 -04004561 nritems = btrfs_header_nritems(leaf);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004562 data_end = leaf_data_end(fs_info, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004563
Chris Mason5f39d392007-10-15 16:14:19 -04004564 old_data_start = btrfs_item_offset_nr(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04004565
Chris Masonb18c6682007-04-17 13:26:50 -04004566 size_diff = old_size - new_size;
4567
4568 BUG_ON(slot < 0);
4569 BUG_ON(slot >= nritems);
4570
4571 /*
4572 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4573 */
4574 /* first correct the data pointers */
4575 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004576 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004577 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004578
Chris Masoncfed81a2012-03-03 07:40:03 -05004579 ioff = btrfs_token_item_offset(leaf, item, &token);
4580 btrfs_set_token_item_offset(leaf, item,
4581 ioff + size_diff, &token);
Chris Masonb18c6682007-04-17 13:26:50 -04004582 }
Chris Masondb945352007-10-15 16:15:53 -04004583
Chris Masonb18c6682007-04-17 13:26:50 -04004584 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04004585 if (from_end) {
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004586 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4587 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04004588 data_end, old_data_start + new_size - data_end);
4589 } else {
4590 struct btrfs_disk_key disk_key;
4591 u64 offset;
4592
4593 btrfs_item_key(leaf, &disk_key, slot);
4594
4595 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
4596 unsigned long ptr;
4597 struct btrfs_file_extent_item *fi;
4598
4599 fi = btrfs_item_ptr(leaf, slot,
4600 struct btrfs_file_extent_item);
4601 fi = (struct btrfs_file_extent_item *)(
4602 (unsigned long)fi - size_diff);
4603
4604 if (btrfs_file_extent_type(leaf, fi) ==
4605 BTRFS_FILE_EXTENT_INLINE) {
4606 ptr = btrfs_item_ptr_offset(leaf, slot);
4607 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05004608 (unsigned long)fi,
David Sterba7ec20af2014-07-24 17:34:58 +02004609 BTRFS_FILE_EXTENT_INLINE_DATA_START);
Chris Mason179e29e2007-11-01 11:28:41 -04004610 }
4611 }
4612
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004613 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4614 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04004615 data_end, old_data_start - data_end);
4616
4617 offset = btrfs_disk_key_offset(&disk_key);
4618 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4619 btrfs_set_item_key(leaf, &disk_key, slot);
4620 if (slot == 0)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004621 fixup_low_keys(fs_info, path, &disk_key, 1);
Chris Mason179e29e2007-11-01 11:28:41 -04004622 }
Chris Mason5f39d392007-10-15 16:14:19 -04004623
Ross Kirkdd3cc162013-09-16 15:58:09 +01004624 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004625 btrfs_set_item_size(leaf, item, new_size);
4626 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004627
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004628 if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004629 btrfs_print_leaf(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004630 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004631 }
Chris Masonb18c6682007-04-17 13:26:50 -04004632}
4633
Chris Masond352ac62008-09-29 15:18:18 -04004634/*
Stefan Behrens8f69dbd2013-05-07 10:23:30 +00004635 * make the item pointed to by the path bigger, data_size is the added size.
Chris Masond352ac62008-09-29 15:18:18 -04004636 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004637void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004638 u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04004639{
Chris Mason6567e832007-04-16 09:22:45 -04004640 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004641 struct extent_buffer *leaf;
4642 struct btrfs_item *item;
Chris Mason6567e832007-04-16 09:22:45 -04004643 u32 nritems;
4644 unsigned int data_end;
4645 unsigned int old_data;
4646 unsigned int old_size;
4647 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004648 struct btrfs_map_token token;
4649
4650 btrfs_init_map_token(&token);
Chris Mason6567e832007-04-16 09:22:45 -04004651
Chris Mason5f39d392007-10-15 16:14:19 -04004652 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04004653
Chris Mason5f39d392007-10-15 16:14:19 -04004654 nritems = btrfs_header_nritems(leaf);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004655 data_end = leaf_data_end(fs_info, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004656
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004657 if (btrfs_leaf_free_space(fs_info, leaf) < data_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02004658 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004659 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004660 }
Chris Mason6567e832007-04-16 09:22:45 -04004661 slot = path->slots[0];
Chris Mason5f39d392007-10-15 16:14:19 -04004662 old_data = btrfs_item_end_nr(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04004663
4664 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04004665 if (slot >= nritems) {
David Sterbaa4f78752017-06-29 18:37:49 +02004666 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004667 btrfs_crit(fs_info, "slot %d too large, nritems %d",
4668 slot, nritems);
Chris Mason3326d1b2007-10-15 16:18:25 -04004669 BUG_ON(1);
4670 }
Chris Mason6567e832007-04-16 09:22:45 -04004671
4672 /*
4673 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4674 */
4675 /* first correct the data pointers */
4676 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004677 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004678 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004679
Chris Masoncfed81a2012-03-03 07:40:03 -05004680 ioff = btrfs_token_item_offset(leaf, item, &token);
4681 btrfs_set_token_item_offset(leaf, item,
4682 ioff - data_size, &token);
Chris Mason6567e832007-04-16 09:22:45 -04004683 }
Chris Mason5f39d392007-10-15 16:14:19 -04004684
Chris Mason6567e832007-04-16 09:22:45 -04004685 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004686 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4687 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
Chris Mason6567e832007-04-16 09:22:45 -04004688 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004689
Chris Mason6567e832007-04-16 09:22:45 -04004690 data_end = old_data;
Chris Mason5f39d392007-10-15 16:14:19 -04004691 old_size = btrfs_item_size_nr(leaf, slot);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004692 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004693 btrfs_set_item_size(leaf, item, old_size + data_size);
4694 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004695
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004696 if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004697 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004698 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004699 }
Chris Mason6567e832007-04-16 09:22:45 -04004700}
4701
Chris Mason74123bd2007-02-02 11:05:29 -05004702/*
Chris Mason44871b12009-03-13 10:04:31 -04004703 * this is a helper for btrfs_insert_empty_items, the main goal here is
4704 * to save stack depth by doing the bulk of the work in a function
4705 * that doesn't call btrfs_search_slot
Chris Mason74123bd2007-02-02 11:05:29 -05004706 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004707void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004708 const struct btrfs_key *cpu_key, u32 *data_size,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004709 u32 total_data, u32 total_size, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004710{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004711 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004712 struct btrfs_item *item;
Chris Mason9c583092008-01-29 15:15:18 -05004713 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004714 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004715 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04004716 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004717 struct extent_buffer *leaf;
4718 int slot;
Chris Masoncfed81a2012-03-03 07:40:03 -05004719 struct btrfs_map_token token;
4720
Filipe Manana24cdc842014-07-28 19:34:35 +01004721 if (path->slots[0] == 0) {
4722 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004723 fixup_low_keys(fs_info, path, &disk_key, 1);
Filipe Manana24cdc842014-07-28 19:34:35 +01004724 }
4725 btrfs_unlock_up_safe(path, 1);
4726
Chris Masoncfed81a2012-03-03 07:40:03 -05004727 btrfs_init_map_token(&token);
Chris Masone2fa7222007-03-12 16:22:34 -04004728
Chris Mason5f39d392007-10-15 16:14:19 -04004729 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04004730 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05004731
Chris Mason5f39d392007-10-15 16:14:19 -04004732 nritems = btrfs_header_nritems(leaf);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004733 data_end = leaf_data_end(fs_info, leaf);
Chris Masoneb60cea2007-02-02 09:18:22 -05004734
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004735 if (btrfs_leaf_free_space(fs_info, leaf) < total_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02004736 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004737 btrfs_crit(fs_info, "not enough freespace need %u have %d",
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004738 total_size, btrfs_leaf_free_space(fs_info, leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004739 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04004740 }
Chris Mason5f39d392007-10-15 16:14:19 -04004741
Chris Masonbe0e5c02007-01-26 15:51:26 -05004742 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04004743 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004744
Chris Mason5f39d392007-10-15 16:14:19 -04004745 if (old_data < data_end) {
David Sterbaa4f78752017-06-29 18:37:49 +02004746 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004747 btrfs_crit(fs_info, "slot %d old_data %d data_end %d",
Jeff Mahoney5d163e02016-09-20 10:05:00 -04004748 slot, old_data, data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004749 BUG_ON(1);
4750 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004751 /*
4752 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4753 */
4754 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04004755 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004756 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004757
Jeff Mahoney62e85572016-09-20 10:05:01 -04004758 item = btrfs_item_nr(i);
Chris Masoncfed81a2012-03-03 07:40:03 -05004759 ioff = btrfs_token_item_offset(leaf, item, &token);
4760 btrfs_set_token_item_offset(leaf, item,
4761 ioff - total_data, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04004762 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004763 /* shift the items */
Chris Mason9c583092008-01-29 15:15:18 -05004764 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
Chris Mason5f39d392007-10-15 16:14:19 -04004765 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04004766 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004767
4768 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004769 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4770 data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04004771 data_end, old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004772 data_end = old_data;
4773 }
Chris Mason5f39d392007-10-15 16:14:19 -04004774
Chris Mason62e27492007-03-15 12:56:47 -04004775 /* setup the item for the new data */
Chris Mason9c583092008-01-29 15:15:18 -05004776 for (i = 0; i < nr; i++) {
4777 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4778 btrfs_set_item_key(leaf, &disk_key, slot + i);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004779 item = btrfs_item_nr(slot + i);
Chris Masoncfed81a2012-03-03 07:40:03 -05004780 btrfs_set_token_item_offset(leaf, item,
4781 data_end - data_size[i], &token);
Chris Mason9c583092008-01-29 15:15:18 -05004782 data_end -= data_size[i];
Chris Masoncfed81a2012-03-03 07:40:03 -05004783 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
Chris Mason9c583092008-01-29 15:15:18 -05004784 }
Chris Mason44871b12009-03-13 10:04:31 -04004785
Chris Mason9c583092008-01-29 15:15:18 -05004786 btrfs_set_header_nritems(leaf, nritems + nr);
Chris Masonb9473432009-03-13 11:00:37 -04004787 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004788
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004789 if (btrfs_leaf_free_space(fs_info, leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004790 btrfs_print_leaf(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004791 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004792 }
Chris Mason44871b12009-03-13 10:04:31 -04004793}
4794
4795/*
4796 * Given a key and some data, insert items into the tree.
4797 * This does all the path init required, making room in the tree if needed.
4798 */
4799int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4800 struct btrfs_root *root,
4801 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004802 const struct btrfs_key *cpu_key, u32 *data_size,
Chris Mason44871b12009-03-13 10:04:31 -04004803 int nr)
4804{
Chris Mason44871b12009-03-13 10:04:31 -04004805 int ret = 0;
4806 int slot;
4807 int i;
4808 u32 total_size = 0;
4809 u32 total_data = 0;
4810
4811 for (i = 0; i < nr; i++)
4812 total_data += data_size[i];
4813
4814 total_size = total_data + (nr * sizeof(struct btrfs_item));
4815 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
4816 if (ret == 0)
4817 return -EEXIST;
4818 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004819 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004820
Chris Mason44871b12009-03-13 10:04:31 -04004821 slot = path->slots[0];
4822 BUG_ON(slot < 0);
4823
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004824 setup_items_for_insert(root, path, cpu_key, data_size,
Chris Mason44871b12009-03-13 10:04:31 -04004825 total_data, total_size, nr);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004826 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04004827}
4828
4829/*
4830 * Given a key and some data, insert an item into the tree.
4831 * This does all the path init required, making room in the tree if needed.
4832 */
Omar Sandoval310712b2017-01-17 23:24:37 -08004833int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4834 const struct btrfs_key *cpu_key, void *data,
4835 u32 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04004836{
4837 int ret = 0;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004838 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04004839 struct extent_buffer *leaf;
4840 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04004841
Chris Mason2c90e5d2007-04-02 10:50:19 -04004842 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004843 if (!path)
4844 return -ENOMEM;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004845 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04004846 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04004847 leaf = path->nodes[0];
4848 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4849 write_extent_buffer(leaf, data, ptr, data_size);
4850 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04004851 }
Chris Mason2c90e5d2007-04-02 10:50:19 -04004852 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004853 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004854}
4855
Chris Mason74123bd2007-02-02 11:05:29 -05004856/*
Chris Mason5de08d72007-02-24 06:24:44 -05004857 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05004858 *
Chris Masond352ac62008-09-29 15:18:18 -04004859 * the tree should have been previously balanced so the deletion does not
4860 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05004861 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004862static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4863 int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004864{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004865 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004866 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04004867 u32 nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004868 int ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004869
Chris Mason5f39d392007-10-15 16:14:19 -04004870 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05004871 if (slot != nritems - 1) {
David Sterbabf1d3422018-03-05 15:47:39 +01004872 if (level) {
4873 ret = tree_mod_log_insert_move(parent, slot, slot + 1,
David Sterbaa446a972018-03-05 15:26:29 +01004874 nritems - slot - 1);
David Sterbabf1d3422018-03-05 15:47:39 +01004875 BUG_ON(ret < 0);
4876 }
Chris Mason5f39d392007-10-15 16:14:19 -04004877 memmove_extent_buffer(parent,
4878 btrfs_node_key_ptr_offset(slot),
4879 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04004880 sizeof(struct btrfs_key_ptr) *
4881 (nritems - slot - 1));
Chris Mason57ba86c2012-12-18 19:35:32 -05004882 } else if (level) {
David Sterbae09c2ef2018-03-05 15:09:03 +01004883 ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
4884 GFP_NOFS);
Chris Mason57ba86c2012-12-18 19:35:32 -05004885 BUG_ON(ret < 0);
Chris Masonbb803952007-03-01 12:04:21 -05004886 }
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004887
Chris Mason7518a232007-03-12 12:01:18 -04004888 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04004889 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04004890 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04004891 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05004892 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04004893 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05004894 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004895 struct btrfs_disk_key disk_key;
4896
4897 btrfs_node_key(parent, &disk_key, 0);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004898 fixup_low_keys(fs_info, path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004899 }
Chris Masond6025572007-03-30 14:27:56 -04004900 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004901}
4902
Chris Mason74123bd2007-02-02 11:05:29 -05004903/*
Chris Mason323ac952008-10-01 19:05:46 -04004904 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004905 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04004906 *
4907 * This deletes the pointer in path->nodes[1] and frees the leaf
4908 * block extent. zero is returned if it all worked out, < 0 otherwise.
4909 *
4910 * The path must have already been setup for deleting the leaf, including
4911 * all the proper balancing. path->nodes[1] must be locked.
4912 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004913static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4914 struct btrfs_root *root,
4915 struct btrfs_path *path,
4916 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04004917{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004918 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004919 del_ptr(root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04004920
Chris Mason4d081c42009-02-04 09:31:28 -05004921 /*
4922 * btrfs_free_extent is expensive, we want to make sure we
4923 * aren't holding any locks when we call it
4924 */
4925 btrfs_unlock_up_safe(path, 0);
4926
Yan, Zhengf0486c62010-05-16 10:46:25 -04004927 root_sub_used(root, leaf->len);
4928
Josef Bacik3083ee22012-03-09 16:01:49 -05004929 extent_buffer_get(leaf);
Jan Schmidt5581a512012-05-16 17:04:52 +02004930 btrfs_free_tree_block(trans, root, leaf, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05004931 free_extent_buffer_stale(leaf);
Chris Mason323ac952008-10-01 19:05:46 -04004932}
4933/*
Chris Mason74123bd2007-02-02 11:05:29 -05004934 * delete the item at the leaf level in path. If that empties
4935 * the leaf, remove it from the tree
4936 */
Chris Mason85e21ba2008-01-29 15:11:36 -05004937int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4938 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004939{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004940 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004941 struct extent_buffer *leaf;
4942 struct btrfs_item *item;
Alexandru Moisece0eac22015-08-23 16:01:42 +00004943 u32 last_off;
4944 u32 dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05004945 int ret = 0;
4946 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05004947 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004948 u32 nritems;
Chris Masoncfed81a2012-03-03 07:40:03 -05004949 struct btrfs_map_token token;
4950
4951 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004952
Chris Mason5f39d392007-10-15 16:14:19 -04004953 leaf = path->nodes[0];
Chris Mason85e21ba2008-01-29 15:11:36 -05004954 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
4955
4956 for (i = 0; i < nr; i++)
4957 dsize += btrfs_item_size_nr(leaf, slot + i);
4958
Chris Mason5f39d392007-10-15 16:14:19 -04004959 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004960
Chris Mason85e21ba2008-01-29 15:11:36 -05004961 if (slot + nr != nritems) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004962 int data_end = leaf_data_end(fs_info, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04004963
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004964 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04004965 data_end + dsize,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004966 BTRFS_LEAF_DATA_OFFSET + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05004967 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004968
Chris Mason85e21ba2008-01-29 15:11:36 -05004969 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004970 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004971
Ross Kirkdd3cc162013-09-16 15:58:09 +01004972 item = btrfs_item_nr(i);
Chris Masoncfed81a2012-03-03 07:40:03 -05004973 ioff = btrfs_token_item_offset(leaf, item, &token);
4974 btrfs_set_token_item_offset(leaf, item,
4975 ioff + dsize, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04004976 }
Chris Masondb945352007-10-15 16:15:53 -04004977
Chris Mason5f39d392007-10-15 16:14:19 -04004978 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05004979 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04004980 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05004981 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004982 }
Chris Mason85e21ba2008-01-29 15:11:36 -05004983 btrfs_set_header_nritems(leaf, nritems - nr);
4984 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04004985
Chris Mason74123bd2007-02-02 11:05:29 -05004986 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04004987 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004988 if (leaf == root->node) {
4989 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05004990 } else {
Yan, Zhengf0486c62010-05-16 10:46:25 -04004991 btrfs_set_path_blocking(path);
David Sterba7c302b42017-02-10 18:47:57 +01004992 clean_tree_block(fs_info, leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004993 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05004994 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004995 } else {
Chris Mason7518a232007-03-12 12:01:18 -04004996 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004997 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004998 struct btrfs_disk_key disk_key;
4999
5000 btrfs_item_key(leaf, &disk_key, 0);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005001 fixup_low_keys(fs_info, path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005002 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005003
Chris Mason74123bd2007-02-02 11:05:29 -05005004 /* delete the leaf if it is mostly empty */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005005 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05005006 /* push_leaf_left fixes the path.
5007 * make sure the path still points to our leaf
5008 * for possible call to del_ptr below
5009 */
Chris Mason4920c9a2007-01-26 16:38:42 -05005010 slot = path->slots[1];
Chris Mason5f39d392007-10-15 16:14:19 -04005011 extent_buffer_get(leaf);
5012
Chris Masonb9473432009-03-13 11:00:37 -04005013 btrfs_set_path_blocking(path);
Chris Mason99d8f832010-07-07 10:51:48 -04005014 wret = push_leaf_left(trans, root, path, 1, 1,
5015 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04005016 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005017 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04005018
5019 if (path->nodes[0] == leaf &&
5020 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04005021 wret = push_leaf_right(trans, root, path, 1,
5022 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04005023 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005024 ret = wret;
5025 }
Chris Mason5f39d392007-10-15 16:14:19 -04005026
5027 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04005028 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01005029 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005030 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005031 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05005032 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005033 /* if we're still in the path, make sure
5034 * we're dirty. Otherwise, one of the
5035 * push_leaf functions must have already
5036 * dirtied this buffer
5037 */
5038 if (path->nodes[0] == leaf)
5039 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005040 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005041 }
Chris Masond5719762007-03-23 10:01:08 -04005042 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04005043 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005044 }
5045 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005046 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05005047}
5048
Chris Mason97571fd2007-02-24 13:39:08 -05005049/*
Chris Mason925baed2008-06-25 16:01:30 -04005050 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05005051 * returns 0 if it found something or 1 if there are no lesser leaves.
5052 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04005053 *
5054 * This may release the path, and so you may lose any locks held at the
5055 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05005056 */
Josef Bacik16e75492013-10-22 12:18:51 -04005057int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Mason7bb86312007-12-11 09:25:06 -05005058{
Chris Mason925baed2008-06-25 16:01:30 -04005059 struct btrfs_key key;
5060 struct btrfs_disk_key found_key;
5061 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05005062
Chris Mason925baed2008-06-25 16:01:30 -04005063 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05005064
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005065 if (key.offset > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005066 key.offset--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005067 } else if (key.type > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005068 key.type--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005069 key.offset = (u64)-1;
5070 } else if (key.objectid > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005071 key.objectid--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005072 key.type = (u8)-1;
5073 key.offset = (u64)-1;
5074 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005075 return 1;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005076 }
Chris Mason7bb86312007-12-11 09:25:06 -05005077
David Sterbab3b4aa72011-04-21 01:20:15 +02005078 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04005079 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5080 if (ret < 0)
5081 return ret;
5082 btrfs_item_key(path->nodes[0], &found_key, 0);
5083 ret = comp_keys(&found_key, &key);
Filipe Manana337c6f62014-06-09 13:22:13 +01005084 /*
5085 * We might have had an item with the previous key in the tree right
5086 * before we released our path. And after we released our path, that
5087 * item might have been pushed to the first slot (0) of the leaf we
5088 * were holding due to a tree balance. Alternatively, an item with the
5089 * previous key can exist as the only element of a leaf (big fat item).
5090 * Therefore account for these 2 cases, so that our callers (like
5091 * btrfs_previous_item) don't miss an existing item with a key matching
5092 * the previous key we computed above.
5093 */
5094 if (ret <= 0)
Chris Mason925baed2008-06-25 16:01:30 -04005095 return 0;
5096 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05005097}
5098
Chris Mason3f157a22008-06-25 16:01:31 -04005099/*
5100 * A helper function to walk down the tree starting at min_key, and looking
Eric Sandeende78b512013-01-31 18:21:12 +00005101 * for nodes or leaves that are have a minimum transaction id.
5102 * This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04005103 *
5104 * This does not cow, but it does stuff the starting key it finds back
5105 * into min_key, so you can call btrfs_search_slot with cow=1 on the
5106 * key and get a writable path.
5107 *
Chris Mason3f157a22008-06-25 16:01:31 -04005108 * This honors path->lowest_level to prevent descent past a given level
5109 * of the tree.
5110 *
Chris Masond352ac62008-09-29 15:18:18 -04005111 * min_trans indicates the oldest transaction that you are interested
5112 * in walking through. Any nodes or leaves older than min_trans are
5113 * skipped over (without reading them).
5114 *
Chris Mason3f157a22008-06-25 16:01:31 -04005115 * returns zero if something useful was found, < 0 on error and 1 if there
5116 * was nothing in the tree that matched the search criteria.
5117 */
5118int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Eric Sandeende78b512013-01-31 18:21:12 +00005119 struct btrfs_path *path,
Chris Mason3f157a22008-06-25 16:01:31 -04005120 u64 min_trans)
5121{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005122 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason3f157a22008-06-25 16:01:31 -04005123 struct extent_buffer *cur;
5124 struct btrfs_key found_key;
5125 int slot;
Yan96524802008-07-24 12:19:49 -04005126 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04005127 u32 nritems;
5128 int level;
5129 int ret = 1;
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005130 int keep_locks = path->keep_locks;
Chris Mason3f157a22008-06-25 16:01:31 -04005131
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005132 path->keep_locks = 1;
Chris Mason3f157a22008-06-25 16:01:31 -04005133again:
Chris Masonbd681512011-07-16 15:23:14 -04005134 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04005135 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04005136 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04005137 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04005138 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005139
5140 if (btrfs_header_generation(cur) < min_trans) {
5141 ret = 1;
5142 goto out;
5143 }
Chris Masond3977122009-01-05 21:25:51 -05005144 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04005145 nritems = btrfs_header_nritems(cur);
5146 level = btrfs_header_level(cur);
Nikolay Borisova74b35e2017-12-08 16:27:43 +02005147 sret = btrfs_bin_search(cur, min_key, level, &slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005148
Chris Mason323ac952008-10-01 19:05:46 -04005149 /* at the lowest level, we're done, setup the path and exit */
5150 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04005151 if (slot >= nritems)
5152 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04005153 ret = 0;
5154 path->slots[level] = slot;
5155 btrfs_item_key_to_cpu(cur, &found_key, slot);
5156 goto out;
5157 }
Yan96524802008-07-24 12:19:49 -04005158 if (sret && slot > 0)
5159 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04005160 /*
Eric Sandeende78b512013-01-31 18:21:12 +00005161 * check this node pointer against the min_trans parameters.
5162 * If it is too old, old, skip to the next one.
Chris Mason3f157a22008-06-25 16:01:31 -04005163 */
Chris Masond3977122009-01-05 21:25:51 -05005164 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04005165 u64 gen;
Chris Masone02119d2008-09-05 16:13:11 -04005166
Chris Mason3f157a22008-06-25 16:01:31 -04005167 gen = btrfs_node_ptr_generation(cur, slot);
5168 if (gen < min_trans) {
5169 slot++;
5170 continue;
5171 }
Eric Sandeende78b512013-01-31 18:21:12 +00005172 break;
Chris Mason3f157a22008-06-25 16:01:31 -04005173 }
Chris Masone02119d2008-09-05 16:13:11 -04005174find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04005175 /*
5176 * we didn't find a candidate key in this node, walk forward
5177 * and find another one
5178 */
5179 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04005180 path->slots[level] = slot;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005181 btrfs_set_path_blocking(path);
Chris Masone02119d2008-09-05 16:13:11 -04005182 sret = btrfs_find_next_key(root, path, min_key, level,
Eric Sandeende78b512013-01-31 18:21:12 +00005183 min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04005184 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005185 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005186 goto again;
5187 } else {
5188 goto out;
5189 }
5190 }
5191 /* save our key for returning back */
5192 btrfs_node_key_to_cpu(cur, &found_key, slot);
5193 path->slots[level] = slot;
5194 if (level == path->lowest_level) {
5195 ret = 0;
Chris Mason3f157a22008-06-25 16:01:31 -04005196 goto out;
5197 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05005198 btrfs_set_path_blocking(path);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005199 cur = read_node_slot(fs_info, cur, slot);
Liu Bofb770ae2016-07-05 12:10:14 -07005200 if (IS_ERR(cur)) {
5201 ret = PTR_ERR(cur);
5202 goto out;
5203 }
Chris Mason3f157a22008-06-25 16:01:31 -04005204
Chris Masonbd681512011-07-16 15:23:14 -04005205 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05005206
Chris Masonbd681512011-07-16 15:23:14 -04005207 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005208 path->nodes[level - 1] = cur;
Chris Masonf7c79f32012-03-19 15:54:38 -04005209 unlock_up(path, level, 1, 0, NULL);
Chris Masonbd681512011-07-16 15:23:14 -04005210 btrfs_clear_path_blocking(path, NULL, 0);
Chris Mason3f157a22008-06-25 16:01:31 -04005211 }
5212out:
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005213 path->keep_locks = keep_locks;
5214 if (ret == 0) {
5215 btrfs_unlock_up_safe(path, path->lowest_level + 1);
5216 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005217 memcpy(min_key, &found_key, sizeof(found_key));
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005218 }
Chris Mason3f157a22008-06-25 16:01:31 -04005219 return ret;
5220}
5221
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005222static int tree_move_down(struct btrfs_fs_info *fs_info,
Alexander Block70698302012-06-05 21:07:48 +02005223 struct btrfs_path *path,
David Sterbaab6a43e2017-02-10 19:24:53 +01005224 int *level)
Alexander Block70698302012-06-05 21:07:48 +02005225{
Liu Bofb770ae2016-07-05 12:10:14 -07005226 struct extent_buffer *eb;
5227
Chris Mason74dd17f2012-08-07 16:25:13 -04005228 BUG_ON(*level == 0);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005229 eb = read_node_slot(fs_info, path->nodes[*level], path->slots[*level]);
Liu Bofb770ae2016-07-05 12:10:14 -07005230 if (IS_ERR(eb))
5231 return PTR_ERR(eb);
5232
5233 path->nodes[*level - 1] = eb;
Alexander Block70698302012-06-05 21:07:48 +02005234 path->slots[*level - 1] = 0;
5235 (*level)--;
Liu Bofb770ae2016-07-05 12:10:14 -07005236 return 0;
Alexander Block70698302012-06-05 21:07:48 +02005237}
5238
David Sterbaf1e30262017-02-10 19:25:51 +01005239static int tree_move_next_or_upnext(struct btrfs_path *path,
Alexander Block70698302012-06-05 21:07:48 +02005240 int *level, int root_level)
5241{
5242 int ret = 0;
5243 int nritems;
5244 nritems = btrfs_header_nritems(path->nodes[*level]);
5245
5246 path->slots[*level]++;
5247
Chris Mason74dd17f2012-08-07 16:25:13 -04005248 while (path->slots[*level] >= nritems) {
Alexander Block70698302012-06-05 21:07:48 +02005249 if (*level == root_level)
5250 return -1;
5251
5252 /* move upnext */
5253 path->slots[*level] = 0;
5254 free_extent_buffer(path->nodes[*level]);
5255 path->nodes[*level] = NULL;
5256 (*level)++;
5257 path->slots[*level]++;
5258
5259 nritems = btrfs_header_nritems(path->nodes[*level]);
5260 ret = 1;
5261 }
5262 return ret;
5263}
5264
5265/*
5266 * Returns 1 if it had to move up and next. 0 is returned if it moved only next
5267 * or down.
5268 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005269static int tree_advance(struct btrfs_fs_info *fs_info,
Alexander Block70698302012-06-05 21:07:48 +02005270 struct btrfs_path *path,
5271 int *level, int root_level,
5272 int allow_down,
5273 struct btrfs_key *key)
5274{
5275 int ret;
5276
5277 if (*level == 0 || !allow_down) {
David Sterbaf1e30262017-02-10 19:25:51 +01005278 ret = tree_move_next_or_upnext(path, level, root_level);
Alexander Block70698302012-06-05 21:07:48 +02005279 } else {
David Sterbaab6a43e2017-02-10 19:24:53 +01005280 ret = tree_move_down(fs_info, path, level);
Alexander Block70698302012-06-05 21:07:48 +02005281 }
5282 if (ret >= 0) {
5283 if (*level == 0)
5284 btrfs_item_key_to_cpu(path->nodes[*level], key,
5285 path->slots[*level]);
5286 else
5287 btrfs_node_key_to_cpu(path->nodes[*level], key,
5288 path->slots[*level]);
5289 }
5290 return ret;
5291}
5292
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005293static int tree_compare_item(struct btrfs_path *left_path,
Alexander Block70698302012-06-05 21:07:48 +02005294 struct btrfs_path *right_path,
5295 char *tmp_buf)
5296{
5297 int cmp;
5298 int len1, len2;
5299 unsigned long off1, off2;
5300
5301 len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]);
5302 len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]);
5303 if (len1 != len2)
5304 return 1;
5305
5306 off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
5307 off2 = btrfs_item_ptr_offset(right_path->nodes[0],
5308 right_path->slots[0]);
5309
5310 read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
5311
5312 cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
5313 if (cmp)
5314 return 1;
5315 return 0;
5316}
5317
5318#define ADVANCE 1
5319#define ADVANCE_ONLY_NEXT -1
5320
5321/*
5322 * This function compares two trees and calls the provided callback for
5323 * every changed/new/deleted item it finds.
5324 * If shared tree blocks are encountered, whole subtrees are skipped, making
5325 * the compare pretty fast on snapshotted subvolumes.
5326 *
5327 * This currently works on commit roots only. As commit roots are read only,
5328 * we don't do any locking. The commit roots are protected with transactions.
5329 * Transactions are ended and rejoined when a commit is tried in between.
5330 *
5331 * This function checks for modifications done to the trees while comparing.
5332 * If it detects a change, it aborts immediately.
5333 */
5334int btrfs_compare_trees(struct btrfs_root *left_root,
5335 struct btrfs_root *right_root,
5336 btrfs_changed_cb_t changed_cb, void *ctx)
5337{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005338 struct btrfs_fs_info *fs_info = left_root->fs_info;
Alexander Block70698302012-06-05 21:07:48 +02005339 int ret;
5340 int cmp;
Alexander Block70698302012-06-05 21:07:48 +02005341 struct btrfs_path *left_path = NULL;
5342 struct btrfs_path *right_path = NULL;
5343 struct btrfs_key left_key;
5344 struct btrfs_key right_key;
5345 char *tmp_buf = NULL;
5346 int left_root_level;
5347 int right_root_level;
5348 int left_level;
5349 int right_level;
5350 int left_end_reached;
5351 int right_end_reached;
5352 int advance_left;
5353 int advance_right;
5354 u64 left_blockptr;
5355 u64 right_blockptr;
Filipe Manana6baa4292014-02-20 21:15:25 +00005356 u64 left_gen;
5357 u64 right_gen;
Alexander Block70698302012-06-05 21:07:48 +02005358
5359 left_path = btrfs_alloc_path();
5360 if (!left_path) {
5361 ret = -ENOMEM;
5362 goto out;
5363 }
5364 right_path = btrfs_alloc_path();
5365 if (!right_path) {
5366 ret = -ENOMEM;
5367 goto out;
5368 }
5369
Michal Hocko752ade62017-05-08 15:57:27 -07005370 tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
Alexander Block70698302012-06-05 21:07:48 +02005371 if (!tmp_buf) {
Michal Hocko752ade62017-05-08 15:57:27 -07005372 ret = -ENOMEM;
5373 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005374 }
5375
5376 left_path->search_commit_root = 1;
5377 left_path->skip_locking = 1;
5378 right_path->search_commit_root = 1;
5379 right_path->skip_locking = 1;
5380
Alexander Block70698302012-06-05 21:07:48 +02005381 /*
5382 * Strategy: Go to the first items of both trees. Then do
5383 *
5384 * If both trees are at level 0
5385 * Compare keys of current items
5386 * If left < right treat left item as new, advance left tree
5387 * and repeat
5388 * If left > right treat right item as deleted, advance right tree
5389 * and repeat
5390 * If left == right do deep compare of items, treat as changed if
5391 * needed, advance both trees and repeat
5392 * If both trees are at the same level but not at level 0
5393 * Compare keys of current nodes/leafs
5394 * If left < right advance left tree and repeat
5395 * If left > right advance right tree and repeat
5396 * If left == right compare blockptrs of the next nodes/leafs
5397 * If they match advance both trees but stay at the same level
5398 * and repeat
5399 * If they don't match advance both trees while allowing to go
5400 * deeper and repeat
5401 * If tree levels are different
5402 * Advance the tree that needs it and repeat
5403 *
5404 * Advancing a tree means:
5405 * If we are at level 0, try to go to the next slot. If that's not
5406 * possible, go one level up and repeat. Stop when we found a level
5407 * where we could go to the next slot. We may at this point be on a
5408 * node or a leaf.
5409 *
5410 * If we are not at level 0 and not on shared tree blocks, go one
5411 * level deeper.
5412 *
5413 * If we are not at level 0 and on shared tree blocks, go one slot to
5414 * the right if possible or go up and right.
5415 */
5416
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005417 down_read(&fs_info->commit_root_sem);
Alexander Block70698302012-06-05 21:07:48 +02005418 left_level = btrfs_header_level(left_root->commit_root);
5419 left_root_level = left_level;
5420 left_path->nodes[left_level] = left_root->commit_root;
5421 extent_buffer_get(left_path->nodes[left_level]);
5422
5423 right_level = btrfs_header_level(right_root->commit_root);
5424 right_root_level = right_level;
5425 right_path->nodes[right_level] = right_root->commit_root;
5426 extent_buffer_get(right_path->nodes[right_level]);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005427 up_read(&fs_info->commit_root_sem);
Alexander Block70698302012-06-05 21:07:48 +02005428
5429 if (left_level == 0)
5430 btrfs_item_key_to_cpu(left_path->nodes[left_level],
5431 &left_key, left_path->slots[left_level]);
5432 else
5433 btrfs_node_key_to_cpu(left_path->nodes[left_level],
5434 &left_key, left_path->slots[left_level]);
5435 if (right_level == 0)
5436 btrfs_item_key_to_cpu(right_path->nodes[right_level],
5437 &right_key, right_path->slots[right_level]);
5438 else
5439 btrfs_node_key_to_cpu(right_path->nodes[right_level],
5440 &right_key, right_path->slots[right_level]);
5441
5442 left_end_reached = right_end_reached = 0;
5443 advance_left = advance_right = 0;
5444
5445 while (1) {
Alexander Block70698302012-06-05 21:07:48 +02005446 if (advance_left && !left_end_reached) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005447 ret = tree_advance(fs_info, left_path, &left_level,
Alexander Block70698302012-06-05 21:07:48 +02005448 left_root_level,
5449 advance_left != ADVANCE_ONLY_NEXT,
5450 &left_key);
Liu Bofb770ae2016-07-05 12:10:14 -07005451 if (ret == -1)
Alexander Block70698302012-06-05 21:07:48 +02005452 left_end_reached = ADVANCE;
Liu Bofb770ae2016-07-05 12:10:14 -07005453 else if (ret < 0)
5454 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005455 advance_left = 0;
5456 }
5457 if (advance_right && !right_end_reached) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005458 ret = tree_advance(fs_info, right_path, &right_level,
Alexander Block70698302012-06-05 21:07:48 +02005459 right_root_level,
5460 advance_right != ADVANCE_ONLY_NEXT,
5461 &right_key);
Liu Bofb770ae2016-07-05 12:10:14 -07005462 if (ret == -1)
Alexander Block70698302012-06-05 21:07:48 +02005463 right_end_reached = ADVANCE;
Liu Bofb770ae2016-07-05 12:10:14 -07005464 else if (ret < 0)
5465 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005466 advance_right = 0;
5467 }
5468
5469 if (left_end_reached && right_end_reached) {
5470 ret = 0;
5471 goto out;
5472 } else if (left_end_reached) {
5473 if (right_level == 0) {
Nikolay Borisovee8c4942017-08-21 12:43:45 +03005474 ret = changed_cb(left_path, right_path,
Alexander Block70698302012-06-05 21:07:48 +02005475 &right_key,
5476 BTRFS_COMPARE_TREE_DELETED,
5477 ctx);
5478 if (ret < 0)
5479 goto out;
5480 }
5481 advance_right = ADVANCE;
5482 continue;
5483 } else if (right_end_reached) {
5484 if (left_level == 0) {
Nikolay Borisovee8c4942017-08-21 12:43:45 +03005485 ret = changed_cb(left_path, right_path,
Alexander Block70698302012-06-05 21:07:48 +02005486 &left_key,
5487 BTRFS_COMPARE_TREE_NEW,
5488 ctx);
5489 if (ret < 0)
5490 goto out;
5491 }
5492 advance_left = ADVANCE;
5493 continue;
5494 }
5495
5496 if (left_level == 0 && right_level == 0) {
5497 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
5498 if (cmp < 0) {
Nikolay Borisovee8c4942017-08-21 12:43:45 +03005499 ret = changed_cb(left_path, right_path,
Alexander Block70698302012-06-05 21:07:48 +02005500 &left_key,
5501 BTRFS_COMPARE_TREE_NEW,
5502 ctx);
5503 if (ret < 0)
5504 goto out;
5505 advance_left = ADVANCE;
5506 } else if (cmp > 0) {
Nikolay Borisovee8c4942017-08-21 12:43:45 +03005507 ret = changed_cb(left_path, right_path,
Alexander Block70698302012-06-05 21:07:48 +02005508 &right_key,
5509 BTRFS_COMPARE_TREE_DELETED,
5510 ctx);
5511 if (ret < 0)
5512 goto out;
5513 advance_right = ADVANCE;
5514 } else {
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005515 enum btrfs_compare_tree_result result;
Josef Bacikba5e8f22013-08-16 16:52:55 -04005516
Chris Mason74dd17f2012-08-07 16:25:13 -04005517 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005518 ret = tree_compare_item(left_path, right_path,
5519 tmp_buf);
Josef Bacikba5e8f22013-08-16 16:52:55 -04005520 if (ret)
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005521 result = BTRFS_COMPARE_TREE_CHANGED;
Josef Bacikba5e8f22013-08-16 16:52:55 -04005522 else
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005523 result = BTRFS_COMPARE_TREE_SAME;
Nikolay Borisovee8c4942017-08-21 12:43:45 +03005524 ret = changed_cb(left_path, right_path,
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005525 &left_key, result, ctx);
Josef Bacikba5e8f22013-08-16 16:52:55 -04005526 if (ret < 0)
5527 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005528 advance_left = ADVANCE;
5529 advance_right = ADVANCE;
5530 }
5531 } else if (left_level == right_level) {
5532 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
5533 if (cmp < 0) {
5534 advance_left = ADVANCE;
5535 } else if (cmp > 0) {
5536 advance_right = ADVANCE;
5537 } else {
5538 left_blockptr = btrfs_node_blockptr(
5539 left_path->nodes[left_level],
5540 left_path->slots[left_level]);
5541 right_blockptr = btrfs_node_blockptr(
5542 right_path->nodes[right_level],
5543 right_path->slots[right_level]);
Filipe Manana6baa4292014-02-20 21:15:25 +00005544 left_gen = btrfs_node_ptr_generation(
5545 left_path->nodes[left_level],
5546 left_path->slots[left_level]);
5547 right_gen = btrfs_node_ptr_generation(
5548 right_path->nodes[right_level],
5549 right_path->slots[right_level]);
5550 if (left_blockptr == right_blockptr &&
5551 left_gen == right_gen) {
Alexander Block70698302012-06-05 21:07:48 +02005552 /*
5553 * As we're on a shared block, don't
5554 * allow to go deeper.
5555 */
5556 advance_left = ADVANCE_ONLY_NEXT;
5557 advance_right = ADVANCE_ONLY_NEXT;
5558 } else {
5559 advance_left = ADVANCE;
5560 advance_right = ADVANCE;
5561 }
5562 }
5563 } else if (left_level < right_level) {
5564 advance_right = ADVANCE;
5565 } else {
5566 advance_left = ADVANCE;
5567 }
5568 }
5569
5570out:
5571 btrfs_free_path(left_path);
5572 btrfs_free_path(right_path);
David Sterba8f282f72016-03-30 16:01:12 +02005573 kvfree(tmp_buf);
Alexander Block70698302012-06-05 21:07:48 +02005574 return ret;
5575}
5576
Chris Mason3f157a22008-06-25 16:01:31 -04005577/*
5578 * this is similar to btrfs_next_leaf, but does not try to preserve
5579 * and fixup the path. It looks for and returns the next key in the
Eric Sandeende78b512013-01-31 18:21:12 +00005580 * tree based on the current path and the min_trans parameters.
Chris Mason3f157a22008-06-25 16:01:31 -04005581 *
5582 * 0 is returned if another key is found, < 0 if there are any errors
5583 * and 1 is returned if there are no higher keys in the tree
5584 *
5585 * path->keep_locks should be set to 1 on the search made before
5586 * calling this function.
5587 */
Chris Masone7a84562008-06-25 16:01:31 -04005588int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Eric Sandeende78b512013-01-31 18:21:12 +00005589 struct btrfs_key *key, int level, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04005590{
Chris Masone7a84562008-06-25 16:01:31 -04005591 int slot;
5592 struct extent_buffer *c;
5593
Chris Mason934d3752008-12-08 16:43:10 -05005594 WARN_ON(!path->keep_locks);
Chris Masond3977122009-01-05 21:25:51 -05005595 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04005596 if (!path->nodes[level])
5597 return 1;
5598
5599 slot = path->slots[level] + 1;
5600 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04005601next:
Chris Masone7a84562008-06-25 16:01:31 -04005602 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04005603 int ret;
5604 int orig_lowest;
5605 struct btrfs_key cur_key;
5606 if (level + 1 >= BTRFS_MAX_LEVEL ||
5607 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04005608 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04005609
5610 if (path->locks[level + 1]) {
5611 level++;
5612 continue;
5613 }
5614
5615 slot = btrfs_header_nritems(c) - 1;
5616 if (level == 0)
5617 btrfs_item_key_to_cpu(c, &cur_key, slot);
5618 else
5619 btrfs_node_key_to_cpu(c, &cur_key, slot);
5620
5621 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02005622 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04005623 path->lowest_level = level;
5624 ret = btrfs_search_slot(NULL, root, &cur_key, path,
5625 0, 0);
5626 path->lowest_level = orig_lowest;
5627 if (ret < 0)
5628 return ret;
5629
5630 c = path->nodes[level];
5631 slot = path->slots[level];
5632 if (ret == 0)
5633 slot++;
5634 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04005635 }
Yan Zheng33c66f42009-07-22 09:59:00 -04005636
Chris Masone7a84562008-06-25 16:01:31 -04005637 if (level == 0)
5638 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005639 else {
Chris Mason3f157a22008-06-25 16:01:31 -04005640 u64 gen = btrfs_node_ptr_generation(c, slot);
5641
Chris Mason3f157a22008-06-25 16:01:31 -04005642 if (gen < min_trans) {
5643 slot++;
5644 goto next;
5645 }
Chris Masone7a84562008-06-25 16:01:31 -04005646 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005647 }
Chris Masone7a84562008-06-25 16:01:31 -04005648 return 0;
5649 }
5650 return 1;
5651}
5652
Chris Mason7bb86312007-12-11 09:25:06 -05005653/*
Chris Mason925baed2008-06-25 16:01:30 -04005654 * search the tree again to find a leaf with greater keys
Chris Mason0f70abe2007-02-28 16:46:22 -05005655 * returns 0 if it found something or 1 if there are no greater leaves.
5656 * returns < 0 on io errors.
Chris Mason97571fd2007-02-24 13:39:08 -05005657 */
Chris Mason234b63a2007-03-13 10:46:10 -04005658int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Masond97e63b2007-02-20 16:40:44 -05005659{
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005660 return btrfs_next_old_leaf(root, path, 0);
5661}
5662
5663int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5664 u64 time_seq)
5665{
Chris Masond97e63b2007-02-20 16:40:44 -05005666 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04005667 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04005668 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04005669 struct extent_buffer *next;
Chris Mason925baed2008-06-25 16:01:30 -04005670 struct btrfs_key key;
5671 u32 nritems;
5672 int ret;
Chris Mason8e73f272009-04-03 10:14:18 -04005673 int old_spinning = path->leave_spinning;
Chris Masonbd681512011-07-16 15:23:14 -04005674 int next_rw_lock = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005675
5676 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05005677 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04005678 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04005679
Chris Mason8e73f272009-04-03 10:14:18 -04005680 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5681again:
5682 level = 1;
5683 next = NULL;
Chris Masonbd681512011-07-16 15:23:14 -04005684 next_rw_lock = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02005685 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04005686
Chris Masona2135012008-06-25 16:01:30 -04005687 path->keep_locks = 1;
Chris Mason31533fb2011-07-26 16:01:59 -04005688 path->leave_spinning = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04005689
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005690 if (time_seq)
5691 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5692 else
5693 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Chris Mason925baed2008-06-25 16:01:30 -04005694 path->keep_locks = 0;
5695
5696 if (ret < 0)
5697 return ret;
5698
Chris Masona2135012008-06-25 16:01:30 -04005699 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04005700 /*
5701 * by releasing the path above we dropped all our locks. A balance
5702 * could have added more items next to the key that used to be
5703 * at the very end of the block. So, check again here and
5704 * advance the path if there are now more items available.
5705 */
Chris Masona2135012008-06-25 16:01:30 -04005706 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04005707 if (ret == 0)
5708 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04005709 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005710 goto done;
5711 }
Liu Bo0b43e042014-06-09 11:04:49 +08005712 /*
5713 * So the above check misses one case:
5714 * - after releasing the path above, someone has removed the item that
5715 * used to be at the very end of the block, and balance between leafs
5716 * gets another one with bigger key.offset to replace it.
5717 *
5718 * This one should be returned as well, or we can get leaf corruption
5719 * later(esp. in __btrfs_drop_extents()).
5720 *
5721 * And a bit more explanation about this check,
5722 * with ret > 0, the key isn't found, the path points to the slot
5723 * where it should be inserted, so the path->slots[0] item must be the
5724 * bigger one.
5725 */
5726 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
5727 ret = 0;
5728 goto done;
5729 }
Chris Masond97e63b2007-02-20 16:40:44 -05005730
Chris Masond3977122009-01-05 21:25:51 -05005731 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04005732 if (!path->nodes[level]) {
5733 ret = 1;
5734 goto done;
5735 }
Chris Mason5f39d392007-10-15 16:14:19 -04005736
Chris Masond97e63b2007-02-20 16:40:44 -05005737 slot = path->slots[level] + 1;
5738 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04005739 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05005740 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04005741 if (level == BTRFS_MAX_LEVEL) {
5742 ret = 1;
5743 goto done;
5744 }
Chris Masond97e63b2007-02-20 16:40:44 -05005745 continue;
5746 }
Chris Mason5f39d392007-10-15 16:14:19 -04005747
Chris Mason925baed2008-06-25 16:01:30 -04005748 if (next) {
Chris Masonbd681512011-07-16 15:23:14 -04005749 btrfs_tree_unlock_rw(next, next_rw_lock);
Chris Mason5f39d392007-10-15 16:14:19 -04005750 free_extent_buffer(next);
Chris Mason925baed2008-06-25 16:01:30 -04005751 }
Chris Mason5f39d392007-10-15 16:14:19 -04005752
Chris Mason8e73f272009-04-03 10:14:18 -04005753 next = c;
Chris Masonbd681512011-07-16 15:23:14 -04005754 next_rw_lock = path->locks[level];
Liu Bod07b8522017-01-30 12:23:42 -08005755 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01005756 slot, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04005757 if (ret == -EAGAIN)
5758 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04005759
Chris Mason76a05b32009-05-14 13:24:30 -04005760 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005761 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005762 goto done;
5763 }
5764
Chris Mason5cd57b22008-06-25 16:01:30 -04005765 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005766 ret = btrfs_try_tree_read_lock(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005767 if (!ret && time_seq) {
5768 /*
5769 * If we don't get the lock, we may be racing
5770 * with push_leaf_left, holding that lock while
5771 * itself waiting for the leaf we've currently
5772 * locked. To solve this situation, we give up
5773 * on our lock and cycle.
5774 */
Jan Schmidtcf538832012-07-04 15:42:48 +02005775 free_extent_buffer(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005776 btrfs_release_path(path);
5777 cond_resched();
5778 goto again;
5779 }
Chris Mason8e73f272009-04-03 10:14:18 -04005780 if (!ret) {
5781 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04005782 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04005783 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04005784 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04005785 }
Chris Mason31533fb2011-07-26 16:01:59 -04005786 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005787 }
Chris Masond97e63b2007-02-20 16:40:44 -05005788 break;
5789 }
5790 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05005791 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05005792 level--;
5793 c = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04005794 if (path->locks[level])
Chris Masonbd681512011-07-16 15:23:14 -04005795 btrfs_tree_unlock_rw(c, path->locks[level]);
Chris Mason8e73f272009-04-03 10:14:18 -04005796
Chris Mason5f39d392007-10-15 16:14:19 -04005797 free_extent_buffer(c);
Chris Masond97e63b2007-02-20 16:40:44 -05005798 path->nodes[level] = next;
5799 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04005800 if (!path->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04005801 path->locks[level] = next_rw_lock;
Chris Masond97e63b2007-02-20 16:40:44 -05005802 if (!level)
5803 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005804
Liu Bod07b8522017-01-30 12:23:42 -08005805 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01005806 0, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04005807 if (ret == -EAGAIN)
5808 goto again;
5809
Chris Mason76a05b32009-05-14 13:24:30 -04005810 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005811 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005812 goto done;
5813 }
5814
Chris Mason5cd57b22008-06-25 16:01:30 -04005815 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005816 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04005817 if (!ret) {
5818 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04005819 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04005820 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04005821 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04005822 }
Chris Mason31533fb2011-07-26 16:01:59 -04005823 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005824 }
Chris Masond97e63b2007-02-20 16:40:44 -05005825 }
Chris Mason8e73f272009-04-03 10:14:18 -04005826 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005827done:
Chris Masonf7c79f32012-03-19 15:54:38 -04005828 unlock_up(path, 0, 1, 0, NULL);
Chris Mason8e73f272009-04-03 10:14:18 -04005829 path->leave_spinning = old_spinning;
5830 if (!old_spinning)
5831 btrfs_set_path_blocking(path);
5832
5833 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05005834}
Chris Mason0b86a832008-03-24 15:01:56 -04005835
Chris Mason3f157a22008-06-25 16:01:31 -04005836/*
5837 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
5838 * searching until it gets past min_objectid or finds an item of 'type'
5839 *
5840 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5841 */
Chris Mason0b86a832008-03-24 15:01:56 -04005842int btrfs_previous_item(struct btrfs_root *root,
5843 struct btrfs_path *path, u64 min_objectid,
5844 int type)
5845{
5846 struct btrfs_key found_key;
5847 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04005848 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04005849 int ret;
5850
Chris Masond3977122009-01-05 21:25:51 -05005851 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04005852 if (path->slots[0] == 0) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05005853 btrfs_set_path_blocking(path);
Chris Mason0b86a832008-03-24 15:01:56 -04005854 ret = btrfs_prev_leaf(root, path);
5855 if (ret != 0)
5856 return ret;
5857 } else {
5858 path->slots[0]--;
5859 }
5860 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04005861 nritems = btrfs_header_nritems(leaf);
5862 if (nritems == 0)
5863 return 1;
5864 if (path->slots[0] == nritems)
5865 path->slots[0]--;
5866
Chris Mason0b86a832008-03-24 15:01:56 -04005867 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04005868 if (found_key.objectid < min_objectid)
5869 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04005870 if (found_key.type == type)
5871 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04005872 if (found_key.objectid == min_objectid &&
5873 found_key.type < type)
5874 break;
Chris Mason0b86a832008-03-24 15:01:56 -04005875 }
5876 return 1;
5877}
Wang Shilongade2e0b2014-01-12 21:38:33 +08005878
5879/*
5880 * search in extent tree to find a previous Metadata/Data extent item with
5881 * min objecitd.
5882 *
5883 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5884 */
5885int btrfs_previous_extent_item(struct btrfs_root *root,
5886 struct btrfs_path *path, u64 min_objectid)
5887{
5888 struct btrfs_key found_key;
5889 struct extent_buffer *leaf;
5890 u32 nritems;
5891 int ret;
5892
5893 while (1) {
5894 if (path->slots[0] == 0) {
5895 btrfs_set_path_blocking(path);
5896 ret = btrfs_prev_leaf(root, path);
5897 if (ret != 0)
5898 return ret;
5899 } else {
5900 path->slots[0]--;
5901 }
5902 leaf = path->nodes[0];
5903 nritems = btrfs_header_nritems(leaf);
5904 if (nritems == 0)
5905 return 1;
5906 if (path->slots[0] == nritems)
5907 path->slots[0]--;
5908
5909 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5910 if (found_key.objectid < min_objectid)
5911 break;
5912 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5913 found_key.type == BTRFS_METADATA_ITEM_KEY)
5914 return 0;
5915 if (found_key.objectid == min_objectid &&
5916 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5917 break;
5918 }
5919 return 1;
5920}