blob: f6ba165d3f811a76c6fdf900fac4ad37228361b0 [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
Chris Masond352ac62008-09-29 15:18:18 -04002 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Chris Masona6b6e752007-10-15 16:22:39 -040019#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Jan Schmidtbd989ba2012-05-16 17:18:50 +020021#include <linux/rbtree.h>
David Sterba8f282f72016-03-30 16:01:12 +020022#include <linux/vmalloc.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050023#include "ctree.h"
24#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040025#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040026#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040027#include "locking.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050028
Chris Masone089f052007-03-16 16:20:31 -040029static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
30 *root, struct btrfs_path *path, int level);
31static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Masond4dbff92007-04-04 14:08:15 -040032 *root, struct btrfs_key *ins_key,
Chris Masoncc0c5532007-10-25 15:42:57 -040033 struct btrfs_path *path, int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040034static int push_node_left(struct btrfs_trans_handle *trans,
35 struct btrfs_root *root, struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040036 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040037static int balance_node_right(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root,
39 struct extent_buffer *dst_buf,
40 struct extent_buffer *src_buf);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +000041static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
42 int level, int slot);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +000043static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
Jan Schmidtf2304752012-05-26 11:43:17 +020044 struct extent_buffer *eb);
Chris Masond97e63b2007-02-20 16:40:44 -050045
Chris Mason2c90e5d2007-04-02 10:50:19 -040046struct btrfs_path *btrfs_alloc_path(void)
47{
Masahiro Yamadae2c89902016-09-13 04:35:52 +090048 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Mason2c90e5d2007-04-02 10:50:19 -040049}
50
Chris Masonb4ce94d2009-02-04 09:25:08 -050051/*
52 * set all locked nodes in the path to blocking locks. This should
53 * be done before scheduling
54 */
55noinline void btrfs_set_path_blocking(struct btrfs_path *p)
56{
57 int i;
58 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbd681512011-07-16 15:23:14 -040059 if (!p->nodes[i] || !p->locks[i])
60 continue;
61 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
62 if (p->locks[i] == BTRFS_READ_LOCK)
63 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
64 else if (p->locks[i] == BTRFS_WRITE_LOCK)
65 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Masonb4ce94d2009-02-04 09:25:08 -050066 }
67}
68
69/*
70 * reset all the locked nodes in the patch to spinning locks.
Chris Mason4008c042009-02-12 14:09:45 -050071 *
72 * held is used to keep lockdep happy, when lockdep is enabled
73 * we set held to a blocking lock before we go around and
74 * retake all the spinlocks in the path. You can safely use NULL
75 * for held
Chris Masonb4ce94d2009-02-04 09:25:08 -050076 */
Chris Mason4008c042009-02-12 14:09:45 -050077noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -040078 struct extent_buffer *held, int held_rw)
Chris Masonb4ce94d2009-02-04 09:25:08 -050079{
80 int i;
Chris Mason4008c042009-02-12 14:09:45 -050081
Chris Masonbd681512011-07-16 15:23:14 -040082 if (held) {
83 btrfs_set_lock_blocking_rw(held, held_rw);
84 if (held_rw == BTRFS_WRITE_LOCK)
85 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
86 else if (held_rw == BTRFS_READ_LOCK)
87 held_rw = BTRFS_READ_LOCK_BLOCKING;
88 }
Chris Mason4008c042009-02-12 14:09:45 -050089 btrfs_set_path_blocking(p);
Chris Mason4008c042009-02-12 14:09:45 -050090
91 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
Chris Masonbd681512011-07-16 15:23:14 -040092 if (p->nodes[i] && p->locks[i]) {
93 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
94 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
95 p->locks[i] = BTRFS_WRITE_LOCK;
96 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
97 p->locks[i] = BTRFS_READ_LOCK;
98 }
Chris Masonb4ce94d2009-02-04 09:25:08 -050099 }
Chris Mason4008c042009-02-12 14:09:45 -0500100
Chris Mason4008c042009-02-12 14:09:45 -0500101 if (held)
Chris Masonbd681512011-07-16 15:23:14 -0400102 btrfs_clear_lock_blocking_rw(held, held_rw);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500103}
104
Chris Masond352ac62008-09-29 15:18:18 -0400105/* this also releases the path */
Chris Mason2c90e5d2007-04-02 10:50:19 -0400106void btrfs_free_path(struct btrfs_path *p)
107{
Jesper Juhlff175d52010-12-25 21:22:30 +0000108 if (!p)
109 return;
David Sterbab3b4aa72011-04-21 01:20:15 +0200110 btrfs_release_path(p);
Chris Mason2c90e5d2007-04-02 10:50:19 -0400111 kmem_cache_free(btrfs_path_cachep, p);
112}
113
Chris Masond352ac62008-09-29 15:18:18 -0400114/*
115 * path release drops references on the extent buffers in the path
116 * and it drops any locks held by this path
117 *
118 * It is safe to call this on paths that no locks or extent buffers held.
119 */
David Sterbab3b4aa72011-04-21 01:20:15 +0200120noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -0500121{
122 int i;
Chris Masona2135012008-06-25 16:01:30 -0400123
Chris Mason234b63a2007-03-13 10:46:10 -0400124 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400125 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500126 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400127 continue;
128 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400129 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400130 p->locks[i] = 0;
131 }
Chris Mason5f39d392007-10-15 16:14:19 -0400132 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400133 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500134 }
135}
136
Chris Masond352ac62008-09-29 15:18:18 -0400137/*
138 * safely gets a reference on the root node of a tree. A lock
139 * is not taken, so a concurrent writer may put a different node
140 * at the root of the tree. See btrfs_lock_root_node for the
141 * looping required.
142 *
143 * The extent buffer returned by this has a reference taken, so
144 * it won't disappear. It may stop being the root of the tree
145 * at any time because there are no locks held.
146 */
Chris Mason925baed2008-06-25 16:01:30 -0400147struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
148{
149 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400150
Josef Bacik3083ee22012-03-09 16:01:49 -0500151 while (1) {
152 rcu_read_lock();
153 eb = rcu_dereference(root->node);
154
155 /*
156 * RCU really hurts here, we could free up the root node because
Nicholas D Steeves01327612016-05-19 21:18:45 -0400157 * it was COWed but we may not get the new root node yet so do
Josef Bacik3083ee22012-03-09 16:01:49 -0500158 * the inc_not_zero dance and if it doesn't work then
159 * synchronize_rcu and try again.
160 */
161 if (atomic_inc_not_zero(&eb->refs)) {
162 rcu_read_unlock();
163 break;
164 }
165 rcu_read_unlock();
166 synchronize_rcu();
167 }
Chris Mason925baed2008-06-25 16:01:30 -0400168 return eb;
169}
170
Chris Masond352ac62008-09-29 15:18:18 -0400171/* loop around taking references on and locking the root node of the
172 * tree until you end up with a lock on the root. A locked buffer
173 * is returned, with a reference held.
174 */
Chris Mason925baed2008-06-25 16:01:30 -0400175struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
176{
177 struct extent_buffer *eb;
178
Chris Masond3977122009-01-05 21:25:51 -0500179 while (1) {
Chris Mason925baed2008-06-25 16:01:30 -0400180 eb = btrfs_root_node(root);
181 btrfs_tree_lock(eb);
Chris Mason240f62c2011-03-23 14:54:42 -0400182 if (eb == root->node)
Chris Mason925baed2008-06-25 16:01:30 -0400183 break;
Chris Mason925baed2008-06-25 16:01:30 -0400184 btrfs_tree_unlock(eb);
185 free_extent_buffer(eb);
186 }
187 return eb;
188}
189
Chris Masonbd681512011-07-16 15:23:14 -0400190/* loop around taking references on and locking the root node of the
191 * tree until you end up with a lock on the root. A locked buffer
192 * is returned, with a reference held.
193 */
Eric Sandeen48a3b632013-04-25 20:41:01 +0000194static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
Chris Masonbd681512011-07-16 15:23:14 -0400195{
196 struct extent_buffer *eb;
197
198 while (1) {
199 eb = btrfs_root_node(root);
200 btrfs_tree_read_lock(eb);
201 if (eb == root->node)
202 break;
203 btrfs_tree_read_unlock(eb);
204 free_extent_buffer(eb);
205 }
206 return eb;
207}
208
Chris Masond352ac62008-09-29 15:18:18 -0400209/* cowonly root (everything not a reference counted cow subvolume), just get
210 * put onto a simple dirty list. transaction.c walks this to make sure they
211 * get properly updated on disk.
212 */
Chris Mason0b86a832008-03-24 15:01:56 -0400213static void add_root_to_dirty_list(struct btrfs_root *root)
214{
Josef Bacike7070be2014-12-16 08:54:43 -0800215 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
216 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
217 return;
218
Chris Masone5846fc2012-05-03 12:08:48 -0400219 spin_lock(&root->fs_info->trans_lock);
Josef Bacike7070be2014-12-16 08:54:43 -0800220 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
221 /* Want the extent tree to be the last on the list */
222 if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
223 list_move_tail(&root->dirty_list,
224 &root->fs_info->dirty_cowonly_roots);
225 else
226 list_move(&root->dirty_list,
227 &root->fs_info->dirty_cowonly_roots);
Chris Mason0b86a832008-03-24 15:01:56 -0400228 }
Chris Masone5846fc2012-05-03 12:08:48 -0400229 spin_unlock(&root->fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400230}
231
Chris Masond352ac62008-09-29 15:18:18 -0400232/*
233 * used by snapshot creation to make a copy of a root for a tree with
234 * a given objectid. The buffer with the new root node is returned in
235 * cow_ret, and this func returns zero on success or a negative error code.
236 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500237int btrfs_copy_root(struct btrfs_trans_handle *trans,
238 struct btrfs_root *root,
239 struct extent_buffer *buf,
240 struct extent_buffer **cow_ret, u64 new_root_objectid)
241{
242 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500243 int ret = 0;
244 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400245 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500246
Miao Xie27cdeb72014-04-02 19:51:05 +0800247 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
248 trans->transid != root->fs_info->running_transaction->transid);
249 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
250 trans->transid != root->last_trans);
Chris Masonbe20aa92007-12-17 20:14:01 -0500251
252 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400253 if (level == 0)
254 btrfs_item_key(buf, &disk_key, 0);
255 else
256 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400257
David Sterba4d75f8a2014-06-15 01:54:12 +0200258 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
259 &disk_key, level, buf->start, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400260 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500261 return PTR_ERR(cow);
262
263 copy_extent_buffer(cow, buf, 0, 0, cow->len);
264 btrfs_set_header_bytenr(cow, cow->start);
265 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400266 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
267 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
268 BTRFS_HEADER_FLAG_RELOC);
269 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
270 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
271 else
272 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500273
Ross Kirk0a4e5582013-09-24 10:12:38 +0100274 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
Yan Zheng2b820322008-11-17 21:11:30 -0500275 BTRFS_FSID_SIZE);
276
Chris Masonbe20aa92007-12-17 20:14:01 -0500277 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400278 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700279 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400280 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700281 ret = btrfs_inc_ref(trans, root, cow, 0);
Chris Mason4aec2b52007-12-18 16:25:45 -0500282
Chris Masonbe20aa92007-12-17 20:14:01 -0500283 if (ret)
284 return ret;
285
286 btrfs_mark_buffer_dirty(cow);
287 *cow_ret = cow;
288 return 0;
289}
290
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200291enum mod_log_op {
292 MOD_LOG_KEY_REPLACE,
293 MOD_LOG_KEY_ADD,
294 MOD_LOG_KEY_REMOVE,
295 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
296 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
297 MOD_LOG_MOVE_KEYS,
298 MOD_LOG_ROOT_REPLACE,
299};
300
301struct tree_mod_move {
302 int dst_slot;
303 int nr_items;
304};
305
306struct tree_mod_root {
307 u64 logical;
308 u8 level;
309};
310
311struct tree_mod_elem {
312 struct rb_node node;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530313 u64 logical;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200314 u64 seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200315 enum mod_log_op op;
316
317 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
318 int slot;
319
320 /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
321 u64 generation;
322
323 /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
324 struct btrfs_disk_key key;
325 u64 blockptr;
326
327 /* this is used for op == MOD_LOG_MOVE_KEYS */
328 struct tree_mod_move move;
329
330 /* this is used for op == MOD_LOG_ROOT_REPLACE */
331 struct tree_mod_root old_root;
332};
333
Jan Schmidt097b8a72012-06-21 11:08:04 +0200334static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200335{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200336 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200337}
338
Jan Schmidt097b8a72012-06-21 11:08:04 +0200339static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200340{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200341 read_unlock(&fs_info->tree_mod_log_lock);
342}
343
344static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
345{
346 write_lock(&fs_info->tree_mod_log_lock);
347}
348
349static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
350{
351 write_unlock(&fs_info->tree_mod_log_lock);
352}
353
354/*
Josef Bacikfcebe452014-05-13 17:30:47 -0700355 * Pull a new tree mod seq number for our operation.
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000356 */
Josef Bacikfcebe452014-05-13 17:30:47 -0700357static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000358{
359 return atomic64_inc_return(&fs_info->tree_mod_seq);
360}
361
362/*
Jan Schmidt097b8a72012-06-21 11:08:04 +0200363 * This adds a new blocker to the tree mod log's blocker list if the @elem
364 * passed does not already have a sequence number set. So when a caller expects
365 * to record tree modifications, it should ensure to set elem->seq to zero
366 * before calling btrfs_get_tree_mod_seq.
367 * Returns a fresh, unused tree log modification sequence number, even if no new
368 * blocker was added.
369 */
370u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
371 struct seq_list *elem)
372{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200373 tree_mod_log_write_lock(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200374 spin_lock(&fs_info->tree_mod_seq_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200375 if (!elem->seq) {
Josef Bacikfcebe452014-05-13 17:30:47 -0700376 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200377 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
378 }
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200379 spin_unlock(&fs_info->tree_mod_seq_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200380 tree_mod_log_write_unlock(fs_info);
381
Josef Bacikfcebe452014-05-13 17:30:47 -0700382 return elem->seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200383}
384
385void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
386 struct seq_list *elem)
387{
388 struct rb_root *tm_root;
389 struct rb_node *node;
390 struct rb_node *next;
391 struct seq_list *cur_elem;
392 struct tree_mod_elem *tm;
393 u64 min_seq = (u64)-1;
394 u64 seq_putting = elem->seq;
395
396 if (!seq_putting)
397 return;
398
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200399 spin_lock(&fs_info->tree_mod_seq_lock);
400 list_del(&elem->list);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200401 elem->seq = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200402
403 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
Jan Schmidt097b8a72012-06-21 11:08:04 +0200404 if (cur_elem->seq < min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200405 if (seq_putting > cur_elem->seq) {
406 /*
407 * blocker with lower sequence number exists, we
408 * cannot remove anything from the log
409 */
Jan Schmidt097b8a72012-06-21 11:08:04 +0200410 spin_unlock(&fs_info->tree_mod_seq_lock);
411 return;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200412 }
413 min_seq = cur_elem->seq;
414 }
415 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200416 spin_unlock(&fs_info->tree_mod_seq_lock);
417
418 /*
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200419 * anything that's lower than the lowest existing (read: blocked)
420 * sequence number can be removed from the tree.
421 */
Jan Schmidt097b8a72012-06-21 11:08:04 +0200422 tree_mod_log_write_lock(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200423 tm_root = &fs_info->tree_mod_log;
424 for (node = rb_first(tm_root); node; node = next) {
425 next = rb_next(node);
426 tm = container_of(node, struct tree_mod_elem, node);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200427 if (tm->seq > min_seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200428 continue;
429 rb_erase(node, tm_root);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200430 kfree(tm);
431 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200432 tree_mod_log_write_unlock(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200433}
434
435/*
436 * key order of the log:
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530437 * node/leaf start address -> sequence
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200438 *
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530439 * The 'start address' is the logical address of the *new* root node
440 * for root replace operations, or the logical address of the affected
441 * block for all other operations.
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000442 *
443 * Note: must be called with write lock (tree_mod_log_write_lock).
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200444 */
445static noinline int
446__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
447{
448 struct rb_root *tm_root;
449 struct rb_node **new;
450 struct rb_node *parent = NULL;
451 struct tree_mod_elem *cur;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200452
Josef Bacikc8cc6342013-07-01 16:18:19 -0400453 BUG_ON(!tm);
454
Josef Bacikfcebe452014-05-13 17:30:47 -0700455 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200456
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200457 tm_root = &fs_info->tree_mod_log;
458 new = &tm_root->rb_node;
459 while (*new) {
460 cur = container_of(*new, struct tree_mod_elem, node);
461 parent = *new;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530462 if (cur->logical < tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200463 new = &((*new)->rb_left);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530464 else if (cur->logical > tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200465 new = &((*new)->rb_right);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200466 else if (cur->seq < tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200467 new = &((*new)->rb_left);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200468 else if (cur->seq > tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200469 new = &((*new)->rb_right);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000470 else
471 return -EEXIST;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200472 }
473
474 rb_link_node(&tm->node, parent, new);
475 rb_insert_color(&tm->node, tm_root);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000476 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200477}
478
Jan Schmidt097b8a72012-06-21 11:08:04 +0200479/*
480 * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
481 * returns zero with the tree_mod_log_lock acquired. The caller must hold
482 * this until all tree mod log insertions are recorded in the rb tree and then
483 * call tree_mod_log_write_unlock() to release.
484 */
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200485static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
486 struct extent_buffer *eb) {
487 smp_mb();
488 if (list_empty(&(fs_info)->tree_mod_seq_list))
489 return 1;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200490 if (eb && btrfs_header_level(eb) == 0)
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200491 return 1;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000492
493 tree_mod_log_write_lock(fs_info);
494 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
495 tree_mod_log_write_unlock(fs_info);
496 return 1;
497 }
498
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200499 return 0;
500}
501
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000502/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
503static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
504 struct extent_buffer *eb)
505{
506 smp_mb();
507 if (list_empty(&(fs_info)->tree_mod_seq_list))
508 return 0;
509 if (eb && btrfs_header_level(eb) == 0)
510 return 0;
511
512 return 1;
513}
514
515static struct tree_mod_elem *
516alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
517 enum mod_log_op op, gfp_t flags)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200518{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200519 struct tree_mod_elem *tm;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200520
Josef Bacikc8cc6342013-07-01 16:18:19 -0400521 tm = kzalloc(sizeof(*tm), flags);
522 if (!tm)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000523 return NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200524
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530525 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200526 if (op != MOD_LOG_KEY_ADD) {
527 btrfs_node_key(eb, &tm->key, slot);
528 tm->blockptr = btrfs_node_blockptr(eb, slot);
529 }
530 tm->op = op;
531 tm->slot = slot;
532 tm->generation = btrfs_node_ptr_generation(eb, slot);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000533 RB_CLEAR_NODE(&tm->node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200534
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000535 return tm;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200536}
537
538static noinline int
Josef Bacikc8cc6342013-07-01 16:18:19 -0400539tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
540 struct extent_buffer *eb, int slot,
541 enum mod_log_op op, gfp_t flags)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200542{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000543 struct tree_mod_elem *tm;
544 int ret;
545
546 if (!tree_mod_need_log(fs_info, eb))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200547 return 0;
548
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000549 tm = alloc_tree_mod_elem(eb, slot, op, flags);
550 if (!tm)
551 return -ENOMEM;
552
553 if (tree_mod_dont_log(fs_info, eb)) {
554 kfree(tm);
555 return 0;
556 }
557
558 ret = __tree_mod_log_insert(fs_info, tm);
559 tree_mod_log_write_unlock(fs_info);
560 if (ret)
561 kfree(tm);
562
563 return ret;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200564}
565
566static noinline int
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200567tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
568 struct extent_buffer *eb, int dst_slot, int src_slot,
569 int nr_items, gfp_t flags)
570{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000571 struct tree_mod_elem *tm = NULL;
572 struct tree_mod_elem **tm_list = NULL;
573 int ret = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200574 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000575 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200576
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000577 if (!tree_mod_need_log(fs_info, eb))
Jan Schmidtf3956942012-05-31 15:02:32 +0200578 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200579
David Sterba31e818f2015-02-20 18:00:26 +0100580 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000581 if (!tm_list)
582 return -ENOMEM;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200583
Josef Bacikc8cc6342013-07-01 16:18:19 -0400584 tm = kzalloc(sizeof(*tm), flags);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000585 if (!tm) {
586 ret = -ENOMEM;
587 goto free_tms;
588 }
Jan Schmidtf3956942012-05-31 15:02:32 +0200589
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530590 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200591 tm->slot = src_slot;
592 tm->move.dst_slot = dst_slot;
593 tm->move.nr_items = nr_items;
594 tm->op = MOD_LOG_MOVE_KEYS;
595
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000596 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
597 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
598 MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
599 if (!tm_list[i]) {
600 ret = -ENOMEM;
601 goto free_tms;
602 }
603 }
604
605 if (tree_mod_dont_log(fs_info, eb))
606 goto free_tms;
607 locked = 1;
608
609 /*
610 * When we override something during the move, we log these removals.
611 * This can only happen when we move towards the beginning of the
612 * buffer, i.e. dst_slot < src_slot.
613 */
614 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
615 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
616 if (ret)
617 goto free_tms;
618 }
619
620 ret = __tree_mod_log_insert(fs_info, tm);
621 if (ret)
622 goto free_tms;
623 tree_mod_log_write_unlock(fs_info);
624 kfree(tm_list);
625
626 return 0;
627free_tms:
628 for (i = 0; i < nr_items; i++) {
629 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
630 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
631 kfree(tm_list[i]);
632 }
633 if (locked)
634 tree_mod_log_write_unlock(fs_info);
635 kfree(tm_list);
636 kfree(tm);
637
638 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200639}
640
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000641static inline int
642__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
643 struct tree_mod_elem **tm_list,
644 int nritems)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200645{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000646 int i, j;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200647 int ret;
648
Jan Schmidt097b8a72012-06-21 11:08:04 +0200649 for (i = nritems - 1; i >= 0; i--) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000650 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
651 if (ret) {
652 for (j = nritems - 1; j > i; j--)
653 rb_erase(&tm_list[j]->node,
654 &fs_info->tree_mod_log);
655 return ret;
656 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200657 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000658
659 return 0;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200660}
661
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200662static noinline int
663tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
664 struct extent_buffer *old_root,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000665 struct extent_buffer *new_root, gfp_t flags,
666 int log_removal)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200667{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000668 struct tree_mod_elem *tm = NULL;
669 struct tree_mod_elem **tm_list = NULL;
670 int nritems = 0;
671 int ret = 0;
672 int i;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200673
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000674 if (!tree_mod_need_log(fs_info, NULL))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200675 return 0;
676
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000677 if (log_removal && btrfs_header_level(old_root) > 0) {
678 nritems = btrfs_header_nritems(old_root);
David Sterba31e818f2015-02-20 18:00:26 +0100679 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000680 flags);
681 if (!tm_list) {
682 ret = -ENOMEM;
683 goto free_tms;
684 }
685 for (i = 0; i < nritems; i++) {
686 tm_list[i] = alloc_tree_mod_elem(old_root, i,
687 MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
688 if (!tm_list[i]) {
689 ret = -ENOMEM;
690 goto free_tms;
691 }
692 }
693 }
Jan Schmidtd9abbf12013-03-20 13:49:48 +0000694
Josef Bacikc8cc6342013-07-01 16:18:19 -0400695 tm = kzalloc(sizeof(*tm), flags);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000696 if (!tm) {
697 ret = -ENOMEM;
698 goto free_tms;
699 }
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200700
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530701 tm->logical = new_root->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200702 tm->old_root.logical = old_root->start;
703 tm->old_root.level = btrfs_header_level(old_root);
704 tm->generation = btrfs_header_generation(old_root);
705 tm->op = MOD_LOG_ROOT_REPLACE;
706
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000707 if (tree_mod_dont_log(fs_info, NULL))
708 goto free_tms;
709
710 if (tm_list)
711 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
712 if (!ret)
713 ret = __tree_mod_log_insert(fs_info, tm);
714
715 tree_mod_log_write_unlock(fs_info);
716 if (ret)
717 goto free_tms;
718 kfree(tm_list);
719
720 return ret;
721
722free_tms:
723 if (tm_list) {
724 for (i = 0; i < nritems; i++)
725 kfree(tm_list[i]);
726 kfree(tm_list);
727 }
728 kfree(tm);
729
730 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200731}
732
733static struct tree_mod_elem *
734__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
735 int smallest)
736{
737 struct rb_root *tm_root;
738 struct rb_node *node;
739 struct tree_mod_elem *cur = NULL;
740 struct tree_mod_elem *found = NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200741
Jan Schmidt097b8a72012-06-21 11:08:04 +0200742 tree_mod_log_read_lock(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200743 tm_root = &fs_info->tree_mod_log;
744 node = tm_root->rb_node;
745 while (node) {
746 cur = container_of(node, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530747 if (cur->logical < start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200748 node = node->rb_left;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530749 } else if (cur->logical > start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200750 node = node->rb_right;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200751 } else if (cur->seq < min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200752 node = node->rb_left;
753 } else if (!smallest) {
754 /* we want the node with the highest seq */
755 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200756 BUG_ON(found->seq > cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200757 found = cur;
758 node = node->rb_left;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200759 } else if (cur->seq > min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200760 /* we want the node with the smallest seq */
761 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200762 BUG_ON(found->seq < cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200763 found = cur;
764 node = node->rb_right;
765 } else {
766 found = cur;
767 break;
768 }
769 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200770 tree_mod_log_read_unlock(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200771
772 return found;
773}
774
775/*
776 * this returns the element from the log with the smallest time sequence
777 * value that's in the log (the oldest log item). any element with a time
778 * sequence lower than min_seq will be ignored.
779 */
780static struct tree_mod_elem *
781tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
782 u64 min_seq)
783{
784 return __tree_mod_log_search(fs_info, start, min_seq, 1);
785}
786
787/*
788 * this returns the element from the log with the largest time sequence
789 * value that's in the log (the most recent log item). any element with
790 * a time sequence lower than min_seq will be ignored.
791 */
792static struct tree_mod_elem *
793tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
794{
795 return __tree_mod_log_search(fs_info, start, min_seq, 0);
796}
797
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000798static noinline int
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200799tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
800 struct extent_buffer *src, unsigned long dst_offset,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000801 unsigned long src_offset, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200802{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000803 int ret = 0;
804 struct tree_mod_elem **tm_list = NULL;
805 struct tree_mod_elem **tm_list_add, **tm_list_rem;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200806 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000807 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200808
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000809 if (!tree_mod_need_log(fs_info, NULL))
810 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200811
Josef Bacikc8cc6342013-07-01 16:18:19 -0400812 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000813 return 0;
814
David Sterba31e818f2015-02-20 18:00:26 +0100815 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000816 GFP_NOFS);
817 if (!tm_list)
818 return -ENOMEM;
819
820 tm_list_add = tm_list;
821 tm_list_rem = tm_list + nr_items;
822 for (i = 0; i < nr_items; i++) {
823 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
824 MOD_LOG_KEY_REMOVE, GFP_NOFS);
825 if (!tm_list_rem[i]) {
826 ret = -ENOMEM;
827 goto free_tms;
828 }
829
830 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
831 MOD_LOG_KEY_ADD, GFP_NOFS);
832 if (!tm_list_add[i]) {
833 ret = -ENOMEM;
834 goto free_tms;
835 }
836 }
837
838 if (tree_mod_dont_log(fs_info, NULL))
839 goto free_tms;
840 locked = 1;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200841
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200842 for (i = 0; i < nr_items; i++) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000843 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
844 if (ret)
845 goto free_tms;
846 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
847 if (ret)
848 goto free_tms;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200849 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000850
851 tree_mod_log_write_unlock(fs_info);
852 kfree(tm_list);
853
854 return 0;
855
856free_tms:
857 for (i = 0; i < nr_items * 2; i++) {
858 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
859 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
860 kfree(tm_list[i]);
861 }
862 if (locked)
863 tree_mod_log_write_unlock(fs_info);
864 kfree(tm_list);
865
866 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200867}
868
869static inline void
870tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
871 int dst_offset, int src_offset, int nr_items)
872{
873 int ret;
874 ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
875 nr_items, GFP_NOFS);
876 BUG_ON(ret < 0);
877}
878
Jan Schmidt097b8a72012-06-21 11:08:04 +0200879static noinline void
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200880tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
Liu Bo32adf092012-10-19 12:52:15 +0000881 struct extent_buffer *eb, int slot, int atomic)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200882{
883 int ret;
884
Filipe David Borba Manana78357762013-12-12 19:19:52 +0000885 ret = tree_mod_log_insert_key(fs_info, eb, slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -0400886 MOD_LOG_KEY_REPLACE,
887 atomic ? GFP_ATOMIC : GFP_NOFS);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200888 BUG_ON(ret < 0);
889}
890
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000891static noinline int
Jan Schmidt097b8a72012-06-21 11:08:04 +0200892tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200893{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000894 struct tree_mod_elem **tm_list = NULL;
895 int nritems = 0;
896 int i;
897 int ret = 0;
898
899 if (btrfs_header_level(eb) == 0)
900 return 0;
901
902 if (!tree_mod_need_log(fs_info, NULL))
903 return 0;
904
905 nritems = btrfs_header_nritems(eb);
David Sterba31e818f2015-02-20 18:00:26 +0100906 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000907 if (!tm_list)
908 return -ENOMEM;
909
910 for (i = 0; i < nritems; i++) {
911 tm_list[i] = alloc_tree_mod_elem(eb, i,
912 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
913 if (!tm_list[i]) {
914 ret = -ENOMEM;
915 goto free_tms;
916 }
917 }
918
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200919 if (tree_mod_dont_log(fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000920 goto free_tms;
921
922 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
923 tree_mod_log_write_unlock(fs_info);
924 if (ret)
925 goto free_tms;
926 kfree(tm_list);
927
928 return 0;
929
930free_tms:
931 for (i = 0; i < nritems; i++)
932 kfree(tm_list[i]);
933 kfree(tm_list);
934
935 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200936}
937
Jan Schmidt097b8a72012-06-21 11:08:04 +0200938static noinline void
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200939tree_mod_log_set_root_pointer(struct btrfs_root *root,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000940 struct extent_buffer *new_root_node,
941 int log_removal)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200942{
943 int ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200944 ret = tree_mod_log_insert_root(root->fs_info, root->node,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000945 new_root_node, GFP_NOFS, log_removal);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200946 BUG_ON(ret < 0);
947}
948
Chris Masond352ac62008-09-29 15:18:18 -0400949/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400950 * check if the tree block can be shared by multiple trees
951 */
952int btrfs_block_can_be_shared(struct btrfs_root *root,
953 struct extent_buffer *buf)
954{
955 /*
Nicholas D Steeves01327612016-05-19 21:18:45 -0400956 * Tree blocks not in reference counted trees and tree roots
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400957 * are never shared. If a block was allocated after the last
958 * snapshot and the block was not allocated by tree relocation,
959 * we know the block is not shared.
960 */
Miao Xie27cdeb72014-04-02 19:51:05 +0800961 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400962 buf != root->node && buf != root->commit_root &&
963 (btrfs_header_generation(buf) <=
964 btrfs_root_last_snapshot(&root->root_item) ||
965 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
966 return 1;
967#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
Miao Xie27cdeb72014-04-02 19:51:05 +0800968 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400969 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
970 return 1;
971#endif
972 return 0;
973}
974
975static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
976 struct btrfs_root *root,
977 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400978 struct extent_buffer *cow,
979 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400980{
981 u64 refs;
982 u64 owner;
983 u64 flags;
984 u64 new_flags = 0;
985 int ret;
986
987 /*
988 * Backrefs update rules:
989 *
990 * Always use full backrefs for extent pointers in tree block
991 * allocated by tree relocation.
992 *
993 * If a shared tree block is no longer referenced by its owner
994 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
995 * use full backrefs for extent pointers in tree block.
996 *
997 * If a tree block is been relocating
998 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
999 * use full backrefs for extent pointers in tree block.
1000 * The reason for this is some operations (such as drop tree)
1001 * are only allowed for blocks use full backrefs.
1002 */
1003
1004 if (btrfs_block_can_be_shared(root, buf)) {
1005 ret = btrfs_lookup_extent_info(trans, root, buf->start,
Josef Bacik3173a182013-03-07 14:22:04 -05001006 btrfs_header_level(buf), 1,
1007 &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -07001008 if (ret)
1009 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -07001010 if (refs == 0) {
1011 ret = -EROFS;
Anand Jain34d97002016-03-16 16:43:06 +08001012 btrfs_handle_fs_error(root->fs_info, ret, NULL);
Mark Fashehe5df9572011-08-29 14:17:04 -07001013 return ret;
1014 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001015 } else {
1016 refs = 1;
1017 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1018 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1019 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
1020 else
1021 flags = 0;
1022 }
1023
1024 owner = btrfs_header_owner(buf);
1025 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
1026 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
1027
1028 if (refs > 1) {
1029 if ((owner == root->root_key.objectid ||
1030 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
1031 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Josef Bacike339a6b2014-07-02 10:54:25 -07001032 ret = btrfs_inc_ref(trans, root, buf, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001033 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001034
1035 if (root->root_key.objectid ==
1036 BTRFS_TREE_RELOC_OBJECTID) {
Josef Bacike339a6b2014-07-02 10:54:25 -07001037 ret = btrfs_dec_ref(trans, root, buf, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001038 BUG_ON(ret); /* -ENOMEM */
Josef Bacike339a6b2014-07-02 10:54:25 -07001039 ret = btrfs_inc_ref(trans, root, cow, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001040 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001041 }
1042 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1043 } else {
1044
1045 if (root->root_key.objectid ==
1046 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -07001047 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001048 else
Josef Bacike339a6b2014-07-02 10:54:25 -07001049 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001050 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001051 }
1052 if (new_flags != 0) {
Josef Bacikb1c79e02013-05-09 13:49:30 -04001053 int level = btrfs_header_level(buf);
1054
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001055 ret = btrfs_set_disk_extent_flags(trans, root,
1056 buf->start,
1057 buf->len,
Josef Bacikb1c79e02013-05-09 13:49:30 -04001058 new_flags, level, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -07001059 if (ret)
1060 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001061 }
1062 } else {
1063 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
1064 if (root->root_key.objectid ==
1065 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -07001066 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001067 else
Josef Bacike339a6b2014-07-02 10:54:25 -07001068 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001069 BUG_ON(ret); /* -ENOMEM */
Josef Bacike339a6b2014-07-02 10:54:25 -07001070 ret = btrfs_dec_ref(trans, root, buf, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001071 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001072 }
Daniel Dressler01d58472014-11-21 17:15:07 +09001073 clean_tree_block(trans, root->fs_info, buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001074 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001075 }
1076 return 0;
1077}
1078
1079/*
Chris Masond3977122009-01-05 21:25:51 -05001080 * does the dirty work in cow of a single block. The parent block (if
1081 * supplied) is updated to point to the new cow copy. The new buffer is marked
1082 * dirty and returned locked. If you modify the block it needs to be marked
1083 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -04001084 *
1085 * search_start -- an allocation hint for the new block
1086 *
Chris Masond3977122009-01-05 21:25:51 -05001087 * empty_size -- a hint that you plan on doing more cow. This is the size in
1088 * bytes the allocator should try to find free next to the block it returns.
1089 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -04001090 */
Chris Masond3977122009-01-05 21:25:51 -05001091static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001092 struct btrfs_root *root,
1093 struct extent_buffer *buf,
1094 struct extent_buffer *parent, int parent_slot,
1095 struct extent_buffer **cow_ret,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001096 u64 search_start, u64 empty_size)
Chris Mason6702ed42007-08-07 16:15:09 -04001097{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001098 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -04001099 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -07001100 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04001101 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -04001102 int unlock_orig = 0;
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001103 u64 parent_start = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001104
Chris Mason925baed2008-06-25 16:01:30 -04001105 if (*cow_ret == buf)
1106 unlock_orig = 1;
1107
Chris Masonb9447ef2009-03-09 11:45:38 -04001108 btrfs_assert_tree_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -04001109
Miao Xie27cdeb72014-04-02 19:51:05 +08001110 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
1111 trans->transid != root->fs_info->running_transaction->transid);
1112 WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
1113 trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -04001114
Chris Mason7bb86312007-12-11 09:25:06 -05001115 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04001116
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001117 if (level == 0)
1118 btrfs_item_key(buf, &disk_key, 0);
1119 else
1120 btrfs_node_key(buf, &disk_key, 0);
1121
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001122 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1123 parent_start = parent->start;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001124
David Sterba4d75f8a2014-06-15 01:54:12 +02001125 cow = btrfs_alloc_tree_block(trans, root, parent_start,
1126 root->root_key.objectid, &disk_key, level,
1127 search_start, empty_size);
Chris Mason6702ed42007-08-07 16:15:09 -04001128 if (IS_ERR(cow))
1129 return PTR_ERR(cow);
1130
Chris Masonb4ce94d2009-02-04 09:25:08 -05001131 /* cow is set to blocking by btrfs_init_new_buffer */
1132
Chris Mason5f39d392007-10-15 16:14:19 -04001133 copy_extent_buffer(cow, buf, 0, 0, cow->len);
Chris Masondb945352007-10-15 16:15:53 -04001134 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -04001135 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001136 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
1137 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
1138 BTRFS_HEADER_FLAG_RELOC);
1139 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1140 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
1141 else
1142 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -04001143
Ross Kirk0a4e5582013-09-24 10:12:38 +01001144 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
Yan Zheng2b820322008-11-17 21:11:30 -05001145 BTRFS_FSID_SIZE);
1146
Mark Fashehbe1a5562011-08-08 13:20:18 -07001147 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001148 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001149 btrfs_abort_transaction(trans, ret);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001150 return ret;
1151 }
Zheng Yan1a40e232008-09-26 10:09:34 -04001152
Miao Xie27cdeb72014-04-02 19:51:05 +08001153 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001154 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
Zhaolei93314e32015-08-06 21:56:58 +08001155 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001156 btrfs_abort_transaction(trans, ret);
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001157 return ret;
Zhaolei93314e32015-08-06 21:56:58 +08001158 }
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001159 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001160
Chris Mason6702ed42007-08-07 16:15:09 -04001161 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -04001162 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001163 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1164 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1165 parent_start = buf->start;
Chris Mason925baed2008-06-25 16:01:30 -04001166
Chris Mason5f39d392007-10-15 16:14:19 -04001167 extent_buffer_get(cow);
Jan Schmidt90f8d622013-04-13 13:19:53 +00001168 tree_mod_log_set_root_pointer(root, cow, 1);
Chris Mason240f62c2011-03-23 14:54:42 -04001169 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -04001170
Yan, Zhengf0486c62010-05-16 10:46:25 -04001171 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001172 last_ref);
Chris Mason5f39d392007-10-15 16:14:19 -04001173 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -04001174 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -04001175 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001176 WARN_ON(trans->transid != btrfs_header_generation(parent));
Jan Schmidtf2304752012-05-26 11:43:17 +02001177 tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -04001178 MOD_LOG_KEY_REPLACE, GFP_NOFS);
Chris Mason5f39d392007-10-15 16:14:19 -04001179 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -04001180 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -05001181 btrfs_set_node_ptr_generation(parent, parent_slot,
1182 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -04001183 btrfs_mark_buffer_dirty(parent);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001184 if (last_ref) {
1185 ret = tree_mod_log_free_eb(root->fs_info, buf);
1186 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001187 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001188 return ret;
1189 }
1190 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04001191 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001192 last_ref);
Chris Mason6702ed42007-08-07 16:15:09 -04001193 }
Chris Mason925baed2008-06-25 16:01:30 -04001194 if (unlock_orig)
1195 btrfs_tree_unlock(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05001196 free_extent_buffer_stale(buf);
Chris Mason6702ed42007-08-07 16:15:09 -04001197 btrfs_mark_buffer_dirty(cow);
1198 *cow_ret = cow;
1199 return 0;
1200}
1201
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001202/*
1203 * returns the logical address of the oldest predecessor of the given root.
1204 * entries older than time_seq are ignored.
1205 */
1206static struct tree_mod_elem *
1207__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
Jan Schmidt30b04632013-04-13 13:19:54 +00001208 struct extent_buffer *eb_root, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001209{
1210 struct tree_mod_elem *tm;
1211 struct tree_mod_elem *found = NULL;
Jan Schmidt30b04632013-04-13 13:19:54 +00001212 u64 root_logical = eb_root->start;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001213 int looped = 0;
1214
1215 if (!time_seq)
Stefan Behrens35a36212013-08-14 18:12:25 +02001216 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001217
1218 /*
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301219 * the very last operation that's logged for a root is the
1220 * replacement operation (if it is replaced at all). this has
1221 * the logical address of the *new* root, making it the very
1222 * first operation that's logged for this root.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001223 */
1224 while (1) {
1225 tm = tree_mod_log_search_oldest(fs_info, root_logical,
1226 time_seq);
1227 if (!looped && !tm)
Stefan Behrens35a36212013-08-14 18:12:25 +02001228 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001229 /*
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001230 * if there are no tree operation for the oldest root, we simply
1231 * return it. this should only happen if that (old) root is at
1232 * level 0.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001233 */
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001234 if (!tm)
1235 break;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001236
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001237 /*
1238 * if there's an operation that's not a root replacement, we
1239 * found the oldest version of our root. normally, we'll find a
1240 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1241 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001242 if (tm->op != MOD_LOG_ROOT_REPLACE)
1243 break;
1244
1245 found = tm;
1246 root_logical = tm->old_root.logical;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001247 looped = 1;
1248 }
1249
Jan Schmidta95236d2012-06-05 16:41:24 +02001250 /* if there's no old root to return, return what we found instead */
1251 if (!found)
1252 found = tm;
1253
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001254 return found;
1255}
1256
1257/*
1258 * tm is a pointer to the first operation to rewind within eb. then, all
Nicholas D Steeves01327612016-05-19 21:18:45 -04001259 * previous operations will be rewound (until we reach something older than
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001260 * time_seq).
1261 */
1262static void
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001263__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1264 u64 time_seq, struct tree_mod_elem *first_tm)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001265{
1266 u32 n;
1267 struct rb_node *next;
1268 struct tree_mod_elem *tm = first_tm;
1269 unsigned long o_dst;
1270 unsigned long o_src;
1271 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1272
1273 n = btrfs_header_nritems(eb);
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001274 tree_mod_log_read_lock(fs_info);
Jan Schmidt097b8a72012-06-21 11:08:04 +02001275 while (tm && tm->seq >= time_seq) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001276 /*
1277 * all the operations are recorded with the operator used for
1278 * the modification. as we're going backwards, we do the
1279 * opposite of each operation here.
1280 */
1281 switch (tm->op) {
1282 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1283 BUG_ON(tm->slot < n);
Eric Sandeen1c697d42013-01-31 00:54:56 +00001284 /* Fallthrough */
Liu Bo95c80bb2012-10-19 09:50:52 +00001285 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
Chris Mason4c3e6962012-12-18 15:43:18 -05001286 case MOD_LOG_KEY_REMOVE:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001287 btrfs_set_node_key(eb, &tm->key, tm->slot);
1288 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1289 btrfs_set_node_ptr_generation(eb, tm->slot,
1290 tm->generation);
Chris Mason4c3e6962012-12-18 15:43:18 -05001291 n++;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001292 break;
1293 case MOD_LOG_KEY_REPLACE:
1294 BUG_ON(tm->slot >= n);
1295 btrfs_set_node_key(eb, &tm->key, tm->slot);
1296 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1297 btrfs_set_node_ptr_generation(eb, tm->slot,
1298 tm->generation);
1299 break;
1300 case MOD_LOG_KEY_ADD:
Jan Schmidt19956c72012-06-22 14:52:13 +02001301 /* if a move operation is needed it's in the log */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001302 n--;
1303 break;
1304 case MOD_LOG_MOVE_KEYS:
Jan Schmidtc3193102012-05-31 19:24:36 +02001305 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1306 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1307 memmove_extent_buffer(eb, o_dst, o_src,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001308 tm->move.nr_items * p_size);
1309 break;
1310 case MOD_LOG_ROOT_REPLACE:
1311 /*
1312 * this operation is special. for roots, this must be
1313 * handled explicitly before rewinding.
1314 * for non-roots, this operation may exist if the node
1315 * was a root: root A -> child B; then A gets empty and
1316 * B is promoted to the new root. in the mod log, we'll
1317 * have a root-replace operation for B, a tree block
1318 * that is no root. we simply ignore that operation.
1319 */
1320 break;
1321 }
1322 next = rb_next(&tm->node);
1323 if (!next)
1324 break;
1325 tm = container_of(next, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301326 if (tm->logical != first_tm->logical)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001327 break;
1328 }
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001329 tree_mod_log_read_unlock(fs_info);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001330 btrfs_set_header_nritems(eb, n);
1331}
1332
Jan Schmidt47fb0912013-04-13 13:19:55 +00001333/*
Nicholas D Steeves01327612016-05-19 21:18:45 -04001334 * Called with eb read locked. If the buffer cannot be rewound, the same buffer
Jan Schmidt47fb0912013-04-13 13:19:55 +00001335 * is returned. If rewind operations happen, a fresh buffer is returned. The
1336 * returned buffer is always read-locked. If the returned buffer is not the
1337 * input buffer, the lock on the input buffer is released and the input buffer
1338 * is freed (its refcount is decremented).
1339 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001340static struct extent_buffer *
Josef Bacik9ec72672013-08-07 16:57:23 -04001341tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1342 struct extent_buffer *eb, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001343{
1344 struct extent_buffer *eb_rewin;
1345 struct tree_mod_elem *tm;
1346
1347 if (!time_seq)
1348 return eb;
1349
1350 if (btrfs_header_level(eb) == 0)
1351 return eb;
1352
1353 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1354 if (!tm)
1355 return eb;
1356
Josef Bacik9ec72672013-08-07 16:57:23 -04001357 btrfs_set_path_blocking(path);
1358 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1359
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001360 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1361 BUG_ON(tm->slot != 0);
Feifei Xub9ef22d2016-06-01 19:18:25 +08001362 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start,
1363 eb->len);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001364 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001365 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001366 free_extent_buffer(eb);
1367 return NULL;
1368 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001369 btrfs_set_header_bytenr(eb_rewin, eb->start);
1370 btrfs_set_header_backref_rev(eb_rewin,
1371 btrfs_header_backref_rev(eb));
1372 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
Jan Schmidtc3193102012-05-31 19:24:36 +02001373 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001374 } else {
1375 eb_rewin = btrfs_clone_extent_buffer(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001376 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001377 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001378 free_extent_buffer(eb);
1379 return NULL;
1380 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001381 }
1382
Josef Bacik9ec72672013-08-07 16:57:23 -04001383 btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
1384 btrfs_tree_read_unlock_blocking(eb);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001385 free_extent_buffer(eb);
1386
Jan Schmidt47fb0912013-04-13 13:19:55 +00001387 extent_buffer_get(eb_rewin);
1388 btrfs_tree_read_lock(eb_rewin);
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001389 __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
Jan Schmidt57911b82012-10-19 09:22:03 +02001390 WARN_ON(btrfs_header_nritems(eb_rewin) >
Arne Jansen2a745b12013-02-13 04:20:01 -07001391 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001392
1393 return eb_rewin;
1394}
1395
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001396/*
1397 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1398 * value. If there are no changes, the current root->root_node is returned. If
1399 * anything changed in between, there's a fresh buffer allocated on which the
1400 * rewind operations are done. In any case, the returned buffer is read locked.
1401 * Returns NULL on error (with no locks held).
1402 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001403static inline struct extent_buffer *
1404get_old_root(struct btrfs_root *root, u64 time_seq)
1405{
1406 struct tree_mod_elem *tm;
Jan Schmidt30b04632013-04-13 13:19:54 +00001407 struct extent_buffer *eb = NULL;
1408 struct extent_buffer *eb_root;
Liu Bo7bfdcf72012-10-25 07:30:19 -06001409 struct extent_buffer *old;
Jan Schmidta95236d2012-06-05 16:41:24 +02001410 struct tree_mod_root *old_root = NULL;
Chris Mason4325edd2012-06-15 20:02:02 -04001411 u64 old_generation = 0;
Jan Schmidta95236d2012-06-05 16:41:24 +02001412 u64 logical;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001413
Jan Schmidt30b04632013-04-13 13:19:54 +00001414 eb_root = btrfs_read_lock_root_node(root);
1415 tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001416 if (!tm)
Jan Schmidt30b04632013-04-13 13:19:54 +00001417 return eb_root;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001418
Jan Schmidta95236d2012-06-05 16:41:24 +02001419 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1420 old_root = &tm->old_root;
1421 old_generation = tm->generation;
1422 logical = old_root->logical;
1423 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001424 logical = eb_root->start;
Jan Schmidta95236d2012-06-05 16:41:24 +02001425 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001426
Jan Schmidta95236d2012-06-05 16:41:24 +02001427 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
Jan Schmidt834328a2012-10-23 11:27:33 +02001428 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
Jan Schmidt30b04632013-04-13 13:19:54 +00001429 btrfs_tree_read_unlock(eb_root);
1430 free_extent_buffer(eb_root);
David Sterbace86cd52014-06-15 01:07:32 +02001431 old = read_tree_block(root, logical, 0);
Liu Bo64c043d2015-05-25 17:30:15 +08001432 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
1433 if (!IS_ERR(old))
1434 free_extent_buffer(old);
Frank Holtonefe120a2013-12-20 11:37:06 -05001435 btrfs_warn(root->fs_info,
1436 "failed to read tree block %llu from get_old_root", logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001437 } else {
Liu Bo7bfdcf72012-10-25 07:30:19 -06001438 eb = btrfs_clone_extent_buffer(old);
1439 free_extent_buffer(old);
Jan Schmidt834328a2012-10-23 11:27:33 +02001440 }
1441 } else if (old_root) {
Jan Schmidt30b04632013-04-13 13:19:54 +00001442 btrfs_tree_read_unlock(eb_root);
1443 free_extent_buffer(eb_root);
Feifei Xub9ef22d2016-06-01 19:18:25 +08001444 eb = alloc_dummy_extent_buffer(root->fs_info, logical,
1445 root->nodesize);
Jan Schmidt834328a2012-10-23 11:27:33 +02001446 } else {
Josef Bacik9ec72672013-08-07 16:57:23 -04001447 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
Jan Schmidt30b04632013-04-13 13:19:54 +00001448 eb = btrfs_clone_extent_buffer(eb_root);
Josef Bacik9ec72672013-08-07 16:57:23 -04001449 btrfs_tree_read_unlock_blocking(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001450 free_extent_buffer(eb_root);
Jan Schmidt834328a2012-10-23 11:27:33 +02001451 }
1452
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001453 if (!eb)
1454 return NULL;
Jan Schmidtd6381082012-10-23 14:21:05 +02001455 extent_buffer_get(eb);
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001456 btrfs_tree_read_lock(eb);
Jan Schmidta95236d2012-06-05 16:41:24 +02001457 if (old_root) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001458 btrfs_set_header_bytenr(eb, eb->start);
1459 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
Jan Schmidt30b04632013-04-13 13:19:54 +00001460 btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
Jan Schmidta95236d2012-06-05 16:41:24 +02001461 btrfs_set_header_level(eb, old_root->level);
1462 btrfs_set_header_generation(eb, old_generation);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001463 }
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001464 if (tm)
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001465 __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001466 else
1467 WARN_ON(btrfs_header_level(eb) != 0);
Jan Schmidt57911b82012-10-19 09:22:03 +02001468 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001469
1470 return eb;
1471}
1472
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001473int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1474{
1475 struct tree_mod_elem *tm;
1476 int level;
Jan Schmidt30b04632013-04-13 13:19:54 +00001477 struct extent_buffer *eb_root = btrfs_root_node(root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001478
Jan Schmidt30b04632013-04-13 13:19:54 +00001479 tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001480 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1481 level = tm->old_root.level;
1482 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001483 level = btrfs_header_level(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001484 }
Jan Schmidt30b04632013-04-13 13:19:54 +00001485 free_extent_buffer(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001486
1487 return level;
1488}
1489
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001490static inline int should_cow_block(struct btrfs_trans_handle *trans,
1491 struct btrfs_root *root,
1492 struct extent_buffer *buf)
1493{
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -04001494 if (btrfs_is_testing(root->fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04001495 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02001496
Liu Bof1ebcc72011-11-14 20:48:06 -05001497 /* ensure we can see the force_cow */
1498 smp_rmb();
1499
1500 /*
1501 * We do not need to cow a block if
1502 * 1) this block is not created or changed in this transaction;
1503 * 2) this block does not belong to TREE_RELOC tree;
1504 * 3) the root is not forced COW.
1505 *
1506 * What is forced COW:
Nicholas D Steeves01327612016-05-19 21:18:45 -04001507 * when we create snapshot during committing the transaction,
Liu Bof1ebcc72011-11-14 20:48:06 -05001508 * after we've finished coping src root, we must COW the shared
1509 * block to ensure the metadata consistency.
1510 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001511 if (btrfs_header_generation(buf) == trans->transid &&
1512 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
1513 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -05001514 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
Miao Xie27cdeb72014-04-02 19:51:05 +08001515 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001516 return 0;
1517 return 1;
1518}
1519
Chris Masond352ac62008-09-29 15:18:18 -04001520/*
1521 * cows a single block, see __btrfs_cow_block for the real work.
Nicholas D Steeves01327612016-05-19 21:18:45 -04001522 * This version of it has extra checks so that a block isn't COWed more than
Chris Masond352ac62008-09-29 15:18:18 -04001523 * once per transaction, as long as it hasn't been written yet
1524 */
Chris Masond3977122009-01-05 21:25:51 -05001525noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001526 struct btrfs_root *root, struct extent_buffer *buf,
1527 struct extent_buffer *parent, int parent_slot,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001528 struct extent_buffer **cow_ret)
Chris Mason02217ed2007-03-02 16:08:05 -05001529{
Chris Mason6702ed42007-08-07 16:15:09 -04001530 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -04001531 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -05001532
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001533 if (trans->transaction != root->fs_info->running_transaction)
1534 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02001535 trans->transid,
Chris Masonccd467d2007-06-28 15:57:36 -04001536 root->fs_info->running_transaction->transid);
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001537
1538 if (trans->transid != root->fs_info->generation)
1539 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02001540 trans->transid, root->fs_info->generation);
Chris Masondc17ff82008-01-08 15:46:30 -05001541
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001542 if (!should_cow_block(trans, root, buf)) {
Jeff Mahoney64c12922016-06-08 00:36:38 -04001543 trans->dirty = true;
Chris Mason02217ed2007-03-02 16:08:05 -05001544 *cow_ret = buf;
1545 return 0;
1546 }
Chris Masonc4876852009-02-04 09:24:25 -05001547
Byongho Leeee221842015-12-15 01:42:10 +09001548 search_start = buf->start & ~((u64)SZ_1G - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001549
1550 if (parent)
1551 btrfs_set_lock_blocking(parent);
1552 btrfs_set_lock_blocking(buf);
1553
Chris Masonf510cfe2007-10-15 16:14:48 -04001554 ret = __btrfs_cow_block(trans, root, buf, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001555 parent_slot, cow_ret, search_start, 0);
liubo1abe9b82011-03-24 11:18:59 +00001556
1557 trace_btrfs_cow_block(root, buf, *cow_ret);
1558
Chris Masonf510cfe2007-10-15 16:14:48 -04001559 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -04001560}
1561
Chris Masond352ac62008-09-29 15:18:18 -04001562/*
1563 * helper function for defrag to decide if two blocks pointed to by a
1564 * node are actually close by
1565 */
Chris Mason6b800532007-10-15 16:17:34 -04001566static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -04001567{
Chris Mason6b800532007-10-15 16:17:34 -04001568 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001569 return 1;
Chris Mason6b800532007-10-15 16:17:34 -04001570 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001571 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -05001572 return 0;
1573}
1574
Chris Mason081e9572007-11-06 10:26:24 -05001575/*
1576 * compare two keys in a memcmp fashion
1577 */
1578static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
1579{
1580 struct btrfs_key k1;
1581
1582 btrfs_disk_key_to_cpu(&k1, disk);
1583
Diego Calleja20736ab2009-07-24 11:06:52 -04001584 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -05001585}
1586
Josef Bacikf3465ca2008-11-12 14:19:50 -05001587/*
1588 * same as comp_keys only with two btrfs_key's
1589 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001590int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -05001591{
1592 if (k1->objectid > k2->objectid)
1593 return 1;
1594 if (k1->objectid < k2->objectid)
1595 return -1;
1596 if (k1->type > k2->type)
1597 return 1;
1598 if (k1->type < k2->type)
1599 return -1;
1600 if (k1->offset > k2->offset)
1601 return 1;
1602 if (k1->offset < k2->offset)
1603 return -1;
1604 return 0;
1605}
Chris Mason081e9572007-11-06 10:26:24 -05001606
Chris Masond352ac62008-09-29 15:18:18 -04001607/*
1608 * this is used by the defrag code to go through all the
1609 * leaves pointed to by a node and reallocate them so that
1610 * disk order is close to key order
1611 */
Chris Mason6702ed42007-08-07 16:15:09 -04001612int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001613 struct btrfs_root *root, struct extent_buffer *parent,
Eric Sandeende78b512013-01-31 18:21:12 +00001614 int start_slot, u64 *last_ret,
Chris Masona6b6e752007-10-15 16:22:39 -04001615 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -04001616{
Chris Mason6b800532007-10-15 16:17:34 -04001617 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -04001618 u64 blocknr;
Chris Masonca7a79a2008-05-12 12:59:19 -04001619 u64 gen;
Chris Masone9d0b132007-08-10 14:06:19 -04001620 u64 search_start = *last_ret;
1621 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001622 u64 other;
1623 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -04001624 int end_slot;
1625 int i;
1626 int err = 0;
Chris Masonf2183bd2007-08-10 14:42:37 -04001627 int parent_level;
Chris Mason6b800532007-10-15 16:17:34 -04001628 int uptodate;
1629 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -05001630 int progress_passed = 0;
1631 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -04001632
Chris Mason5708b952007-10-25 15:43:18 -04001633 parent_level = btrfs_header_level(parent);
Chris Mason5708b952007-10-25 15:43:18 -04001634
Julia Lawall6c1500f2012-11-03 20:30:18 +00001635 WARN_ON(trans->transaction != root->fs_info->running_transaction);
1636 WARN_ON(trans->transid != root->fs_info->generation);
Chris Mason86479a02007-09-10 19:58:16 -04001637
Chris Mason6b800532007-10-15 16:17:34 -04001638 parent_nritems = btrfs_header_nritems(parent);
David Sterba707e8a02014-06-04 19:22:26 +02001639 blocksize = root->nodesize;
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001640 end_slot = parent_nritems - 1;
Chris Mason6702ed42007-08-07 16:15:09 -04001641
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001642 if (parent_nritems <= 1)
Chris Mason6702ed42007-08-07 16:15:09 -04001643 return 0;
1644
Chris Masonb4ce94d2009-02-04 09:25:08 -05001645 btrfs_set_lock_blocking(parent);
1646
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001647 for (i = start_slot; i <= end_slot; i++) {
Chris Mason6702ed42007-08-07 16:15:09 -04001648 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -04001649
Chris Mason081e9572007-11-06 10:26:24 -05001650 btrfs_node_key(parent, &disk_key, i);
1651 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
1652 continue;
1653
1654 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -04001655 blocknr = btrfs_node_blockptr(parent, i);
Chris Masonca7a79a2008-05-12 12:59:19 -04001656 gen = btrfs_node_ptr_generation(parent, i);
Chris Masone9d0b132007-08-10 14:06:19 -04001657 if (last_block == 0)
1658 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -04001659
Chris Mason6702ed42007-08-07 16:15:09 -04001660 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -04001661 other = btrfs_node_blockptr(parent, i - 1);
1662 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001663 }
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001664 if (!close && i < end_slot) {
Chris Mason6b800532007-10-15 16:17:34 -04001665 other = btrfs_node_blockptr(parent, i + 1);
1666 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001667 }
Chris Masone9d0b132007-08-10 14:06:19 -04001668 if (close) {
1669 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -04001670 continue;
Chris Masone9d0b132007-08-10 14:06:19 -04001671 }
Chris Mason6702ed42007-08-07 16:15:09 -04001672
Daniel Dressler01d58472014-11-21 17:15:07 +09001673 cur = btrfs_find_tree_block(root->fs_info, blocknr);
Chris Mason6b800532007-10-15 16:17:34 -04001674 if (cur)
Chris Masonb9fab912012-05-06 07:23:47 -04001675 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
Chris Mason6b800532007-10-15 16:17:34 -04001676 else
1677 uptodate = 0;
Chris Mason5708b952007-10-25 15:43:18 -04001678 if (!cur || !uptodate) {
Chris Mason6b800532007-10-15 16:17:34 -04001679 if (!cur) {
David Sterbace86cd52014-06-15 01:07:32 +02001680 cur = read_tree_block(root, blocknr, gen);
Liu Bo64c043d2015-05-25 17:30:15 +08001681 if (IS_ERR(cur)) {
1682 return PTR_ERR(cur);
1683 } else if (!extent_buffer_uptodate(cur)) {
Josef Bacik416bc652013-04-23 14:17:42 -04001684 free_extent_buffer(cur);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00001685 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04001686 }
Chris Mason6b800532007-10-15 16:17:34 -04001687 } else if (!uptodate) {
Tsutomu Itoh018642a2012-05-29 18:10:13 +09001688 err = btrfs_read_buffer(cur, gen);
1689 if (err) {
1690 free_extent_buffer(cur);
1691 return err;
1692 }
Chris Masonf2183bd2007-08-10 14:42:37 -04001693 }
Chris Mason6702ed42007-08-07 16:15:09 -04001694 }
Chris Masone9d0b132007-08-10 14:06:19 -04001695 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -04001696 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -04001697
Chris Masone7a84562008-06-25 16:01:31 -04001698 btrfs_tree_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001699 btrfs_set_lock_blocking(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001700 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -04001701 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -04001702 min(16 * blocksize,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001703 (end_slot - i) * blocksize));
Yan252c38f2007-08-29 09:11:44 -04001704 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -04001705 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001706 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001707 break;
Yan252c38f2007-08-29 09:11:44 -04001708 }
Chris Masone7a84562008-06-25 16:01:31 -04001709 search_start = cur->start;
1710 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -04001711 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -04001712 btrfs_tree_unlock(cur);
1713 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001714 }
1715 return err;
1716}
1717
Chris Masonaa5d6be2007-02-28 16:35:06 -05001718
Chris Mason74123bd2007-02-02 11:05:29 -05001719/*
Chris Mason5f39d392007-10-15 16:14:19 -04001720 * search for key in the extent_buffer. The items start at offset p,
1721 * and they are item_size apart. There are 'max' items in p.
1722 *
Chris Mason74123bd2007-02-02 11:05:29 -05001723 * the slot in the array is returned via slot, and it points to
1724 * the place where you would insert key if it is not found in
1725 * the array.
1726 *
1727 * slot may point to max if the key is bigger than all of the keys
1728 */
Chris Masone02119d2008-09-05 16:13:11 -04001729static noinline int generic_bin_search(struct extent_buffer *eb,
1730 unsigned long p,
1731 int item_size, struct btrfs_key *key,
1732 int max, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001733{
1734 int low = 0;
1735 int high = max;
1736 int mid;
1737 int ret;
Chris Mason479965d2007-10-15 16:14:27 -04001738 struct btrfs_disk_key *tmp = NULL;
Chris Mason5f39d392007-10-15 16:14:19 -04001739 struct btrfs_disk_key unaligned;
1740 unsigned long offset;
Chris Mason5f39d392007-10-15 16:14:19 -04001741 char *kaddr = NULL;
1742 unsigned long map_start = 0;
1743 unsigned long map_len = 0;
Chris Mason479965d2007-10-15 16:14:27 -04001744 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001745
Liu Bo5e24e9a2016-06-23 16:32:45 -07001746 if (low > high) {
1747 btrfs_err(eb->fs_info,
1748 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
1749 __func__, low, high, eb->start,
1750 btrfs_header_owner(eb), btrfs_header_level(eb));
1751 return -EINVAL;
1752 }
1753
Chris Masond3977122009-01-05 21:25:51 -05001754 while (low < high) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05001755 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -04001756 offset = p + mid * item_size;
1757
Chris Masona6591712011-07-19 12:04:14 -04001758 if (!kaddr || offset < map_start ||
Chris Mason5f39d392007-10-15 16:14:19 -04001759 (offset + sizeof(struct btrfs_disk_key)) >
1760 map_start + map_len) {
Chris Mason934d3752008-12-08 16:43:10 -05001761
1762 err = map_private_extent_buffer(eb, offset,
Chris Mason479965d2007-10-15 16:14:27 -04001763 sizeof(struct btrfs_disk_key),
Chris Masona6591712011-07-19 12:04:14 -04001764 &kaddr, &map_start, &map_len);
Chris Mason5f39d392007-10-15 16:14:19 -04001765
Chris Mason479965d2007-10-15 16:14:27 -04001766 if (!err) {
1767 tmp = (struct btrfs_disk_key *)(kaddr + offset -
1768 map_start);
Liu Bo415b35a2016-06-17 19:16:21 -07001769 } else if (err == 1) {
Chris Mason479965d2007-10-15 16:14:27 -04001770 read_extent_buffer(eb, &unaligned,
1771 offset, sizeof(unaligned));
1772 tmp = &unaligned;
Liu Bo415b35a2016-06-17 19:16:21 -07001773 } else {
1774 return err;
Chris Mason479965d2007-10-15 16:14:27 -04001775 }
1776
Chris Mason5f39d392007-10-15 16:14:19 -04001777 } else {
1778 tmp = (struct btrfs_disk_key *)(kaddr + offset -
1779 map_start);
1780 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05001781 ret = comp_keys(tmp, key);
1782
1783 if (ret < 0)
1784 low = mid + 1;
1785 else if (ret > 0)
1786 high = mid;
1787 else {
1788 *slot = mid;
1789 return 0;
1790 }
1791 }
1792 *slot = low;
1793 return 1;
1794}
1795
Chris Mason97571fd2007-02-24 13:39:08 -05001796/*
1797 * simple bin_search frontend that does the right thing for
1798 * leaves vs nodes
1799 */
Chris Mason5f39d392007-10-15 16:14:19 -04001800static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1801 int level, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001802{
Wang Sheng-Huif7757382012-03-30 15:14:27 +08001803 if (level == 0)
Chris Mason5f39d392007-10-15 16:14:19 -04001804 return generic_bin_search(eb,
1805 offsetof(struct btrfs_leaf, items),
Chris Mason0783fcf2007-03-12 20:12:07 -04001806 sizeof(struct btrfs_item),
Chris Mason5f39d392007-10-15 16:14:19 -04001807 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001808 slot);
Wang Sheng-Huif7757382012-03-30 15:14:27 +08001809 else
Chris Mason5f39d392007-10-15 16:14:19 -04001810 return generic_bin_search(eb,
1811 offsetof(struct btrfs_node, ptrs),
Chris Mason123abc82007-03-14 14:14:43 -04001812 sizeof(struct btrfs_key_ptr),
Chris Mason5f39d392007-10-15 16:14:19 -04001813 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001814 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001815}
1816
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001817int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1818 int level, int *slot)
1819{
1820 return bin_search(eb, key, level, slot);
1821}
1822
Yan, Zhengf0486c62010-05-16 10:46:25 -04001823static void root_add_used(struct btrfs_root *root, u32 size)
1824{
1825 spin_lock(&root->accounting_lock);
1826 btrfs_set_root_used(&root->root_item,
1827 btrfs_root_used(&root->root_item) + size);
1828 spin_unlock(&root->accounting_lock);
1829}
1830
1831static void root_sub_used(struct btrfs_root *root, u32 size)
1832{
1833 spin_lock(&root->accounting_lock);
1834 btrfs_set_root_used(&root->root_item,
1835 btrfs_root_used(&root->root_item) - size);
1836 spin_unlock(&root->accounting_lock);
1837}
1838
Chris Masond352ac62008-09-29 15:18:18 -04001839/* given a node and slot number, this reads the blocks it points to. The
1840 * extent buffer is returned with a reference taken (but unlocked).
Chris Masond352ac62008-09-29 15:18:18 -04001841 */
Chris Masone02119d2008-09-05 16:13:11 -04001842static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
Chris Mason5f39d392007-10-15 16:14:19 -04001843 struct extent_buffer *parent, int slot)
Chris Masonbb803952007-03-01 12:04:21 -05001844{
Chris Masonca7a79a2008-05-12 12:59:19 -04001845 int level = btrfs_header_level(parent);
Josef Bacik416bc652013-04-23 14:17:42 -04001846 struct extent_buffer *eb;
1847
Liu Bofb770ae2016-07-05 12:10:14 -07001848 if (slot < 0 || slot >= btrfs_header_nritems(parent))
1849 return ERR_PTR(-ENOENT);
Chris Masonca7a79a2008-05-12 12:59:19 -04001850
1851 BUG_ON(level == 0);
1852
Josef Bacik416bc652013-04-23 14:17:42 -04001853 eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
Josef Bacik416bc652013-04-23 14:17:42 -04001854 btrfs_node_ptr_generation(parent, slot));
Liu Bofb770ae2016-07-05 12:10:14 -07001855 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
1856 free_extent_buffer(eb);
1857 eb = ERR_PTR(-EIO);
Josef Bacik416bc652013-04-23 14:17:42 -04001858 }
1859
1860 return eb;
Chris Masonbb803952007-03-01 12:04:21 -05001861}
1862
Chris Masond352ac62008-09-29 15:18:18 -04001863/*
1864 * node level balancing, used to make sure nodes are in proper order for
1865 * item deletion. We balance from the top down, so we have to make sure
1866 * that a deletion won't leave an node completely empty later on.
1867 */
Chris Masone02119d2008-09-05 16:13:11 -04001868static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001869 struct btrfs_root *root,
1870 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -05001871{
Chris Mason5f39d392007-10-15 16:14:19 -04001872 struct extent_buffer *right = NULL;
1873 struct extent_buffer *mid;
1874 struct extent_buffer *left = NULL;
1875 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001876 int ret = 0;
1877 int wret;
1878 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -05001879 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -05001880 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -05001881
1882 if (level == 0)
1883 return 0;
1884
Chris Mason5f39d392007-10-15 16:14:19 -04001885 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001886
Chris Masonbd681512011-07-16 15:23:14 -04001887 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
1888 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
Chris Mason7bb86312007-12-11 09:25:06 -05001889 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1890
Chris Mason1d4f8a02007-03-13 09:28:32 -04001891 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -05001892
Li Zefana05a9bb2011-09-06 16:55:34 +08001893 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001894 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001895 pslot = path->slots[level + 1];
1896 }
Chris Masonbb803952007-03-01 12:04:21 -05001897
Chris Mason40689472007-03-17 14:29:23 -04001898 /*
1899 * deal with the case where there is only one pointer in the root
1900 * by promoting the node below to a root
1901 */
Chris Mason5f39d392007-10-15 16:14:19 -04001902 if (!parent) {
1903 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -05001904
Chris Mason5f39d392007-10-15 16:14:19 -04001905 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -05001906 return 0;
1907
1908 /* promote the child to a root */
Chris Mason5f39d392007-10-15 16:14:19 -04001909 child = read_node_slot(root, mid, 0);
Liu Bofb770ae2016-07-05 12:10:14 -07001910 if (IS_ERR(child)) {
1911 ret = PTR_ERR(child);
Anand Jain34d97002016-03-16 16:43:06 +08001912 btrfs_handle_fs_error(root->fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001913 goto enospc;
1914 }
1915
Chris Mason925baed2008-06-25 16:01:30 -04001916 btrfs_tree_lock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001917 btrfs_set_lock_blocking(child);
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001918 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001919 if (ret) {
1920 btrfs_tree_unlock(child);
1921 free_extent_buffer(child);
1922 goto enospc;
1923 }
Yan2f375ab2008-02-01 14:58:07 -05001924
Jan Schmidt90f8d622013-04-13 13:19:53 +00001925 tree_mod_log_set_root_pointer(root, child, 1);
Chris Mason240f62c2011-03-23 14:54:42 -04001926 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -04001927
Chris Mason0b86a832008-03-24 15:01:56 -04001928 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -04001929 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001930
Chris Mason925baed2008-06-25 16:01:30 -04001931 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -05001932 path->nodes[level] = NULL;
Daniel Dressler01d58472014-11-21 17:15:07 +09001933 clean_tree_block(trans, root->fs_info, mid);
Chris Mason925baed2008-06-25 16:01:30 -04001934 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -05001935 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001936 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001937
1938 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001939 btrfs_free_tree_block(trans, root, mid, 0, 1);
Chris Masonbb803952007-03-01 12:04:21 -05001940 /* once for the root ptr */
Josef Bacik3083ee22012-03-09 16:01:49 -05001941 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001942 return 0;
Chris Masonbb803952007-03-01 12:04:21 -05001943 }
Chris Mason5f39d392007-10-15 16:14:19 -04001944 if (btrfs_header_nritems(mid) >
Chris Mason123abc82007-03-14 14:14:43 -04001945 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
Chris Masonbb803952007-03-01 12:04:21 -05001946 return 0;
1947
Chris Mason5f39d392007-10-15 16:14:19 -04001948 left = read_node_slot(root, parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001949 if (IS_ERR(left))
1950 left = NULL;
1951
Chris Mason5f39d392007-10-15 16:14:19 -04001952 if (left) {
Chris Mason925baed2008-06-25 16:01:30 -04001953 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001954 btrfs_set_lock_blocking(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001955 wret = btrfs_cow_block(trans, root, left,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001956 parent, pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001957 if (wret) {
1958 ret = wret;
1959 goto enospc;
1960 }
Chris Mason2cc58cf2007-08-27 16:49:44 -04001961 }
Liu Bofb770ae2016-07-05 12:10:14 -07001962
Chris Mason5f39d392007-10-15 16:14:19 -04001963 right = read_node_slot(root, parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001964 if (IS_ERR(right))
1965 right = NULL;
1966
Chris Mason5f39d392007-10-15 16:14:19 -04001967 if (right) {
Chris Mason925baed2008-06-25 16:01:30 -04001968 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001969 btrfs_set_lock_blocking(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001970 wret = btrfs_cow_block(trans, root, right,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001971 parent, pslot + 1, &right);
Chris Mason2cc58cf2007-08-27 16:49:44 -04001972 if (wret) {
1973 ret = wret;
1974 goto enospc;
1975 }
1976 }
1977
1978 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001979 if (left) {
1980 orig_slot += btrfs_header_nritems(left);
Chris Masonbce4eae2008-04-24 14:42:46 -04001981 wret = push_node_left(trans, root, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -05001982 if (wret < 0)
1983 ret = wret;
Chris Masonbb803952007-03-01 12:04:21 -05001984 }
Chris Mason79f95c82007-03-01 15:16:26 -05001985
1986 /*
1987 * then try to empty the right most buffer into the middle
1988 */
Chris Mason5f39d392007-10-15 16:14:19 -04001989 if (right) {
Chris Mason971a1f62008-04-24 10:54:32 -04001990 wret = push_node_left(trans, root, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04001991 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -05001992 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04001993 if (btrfs_header_nritems(right) == 0) {
Daniel Dressler01d58472014-11-21 17:15:07 +09001994 clean_tree_block(trans, root->fs_info, right);
Chris Mason925baed2008-06-25 16:01:30 -04001995 btrfs_tree_unlock(right);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001996 del_ptr(root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001997 root_sub_used(root, right->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001998 btrfs_free_tree_block(trans, root, right, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001999 free_extent_buffer_stale(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002000 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05002001 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002002 struct btrfs_disk_key right_key;
2003 btrfs_node_key(right, &right_key, 0);
Jan Schmidtf2304752012-05-26 11:43:17 +02002004 tree_mod_log_set_node_key(root->fs_info, parent,
Liu Bo32adf092012-10-19 12:52:15 +00002005 pslot + 1, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002006 btrfs_set_node_key(parent, &right_key, pslot + 1);
2007 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05002008 }
2009 }
Chris Mason5f39d392007-10-15 16:14:19 -04002010 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05002011 /*
2012 * we're not allowed to leave a node with one item in the
2013 * tree during a delete. A deletion from lower in the tree
2014 * could try to delete the only pointer in this node.
2015 * So, pull some keys from the left.
2016 * There has to be a left pointer at this point because
2017 * otherwise we would have pulled some pointers from the
2018 * right
2019 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07002020 if (!left) {
2021 ret = -EROFS;
Anand Jain34d97002016-03-16 16:43:06 +08002022 btrfs_handle_fs_error(root->fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07002023 goto enospc;
2024 }
Chris Mason5f39d392007-10-15 16:14:19 -04002025 wret = balance_node_right(trans, root, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04002026 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05002027 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04002028 goto enospc;
2029 }
Chris Masonbce4eae2008-04-24 14:42:46 -04002030 if (wret == 1) {
2031 wret = push_node_left(trans, root, left, mid, 1);
2032 if (wret < 0)
2033 ret = wret;
2034 }
Chris Mason79f95c82007-03-01 15:16:26 -05002035 BUG_ON(wret == 1);
2036 }
Chris Mason5f39d392007-10-15 16:14:19 -04002037 if (btrfs_header_nritems(mid) == 0) {
Daniel Dressler01d58472014-11-21 17:15:07 +09002038 clean_tree_block(trans, root->fs_info, mid);
Chris Mason925baed2008-06-25 16:01:30 -04002039 btrfs_tree_unlock(mid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00002040 del_ptr(root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002041 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02002042 btrfs_free_tree_block(trans, root, mid, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05002043 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002044 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05002045 } else {
2046 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04002047 struct btrfs_disk_key mid_key;
2048 btrfs_node_key(mid, &mid_key, 0);
Liu Bo32adf092012-10-19 12:52:15 +00002049 tree_mod_log_set_node_key(root->fs_info, parent,
Jan Schmidtf2304752012-05-26 11:43:17 +02002050 pslot, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002051 btrfs_set_node_key(parent, &mid_key, pslot);
2052 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05002053 }
Chris Masonbb803952007-03-01 12:04:21 -05002054
Chris Mason79f95c82007-03-01 15:16:26 -05002055 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04002056 if (left) {
2057 if (btrfs_header_nritems(left) > orig_slot) {
2058 extent_buffer_get(left);
Chris Mason925baed2008-06-25 16:01:30 -04002059 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04002060 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05002061 path->slots[level + 1] -= 1;
2062 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002063 if (mid) {
2064 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002065 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002066 }
Chris Masonbb803952007-03-01 12:04:21 -05002067 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002068 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05002069 path->slots[level] = orig_slot;
2070 }
2071 }
Chris Mason79f95c82007-03-01 15:16:26 -05002072 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04002073 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04002074 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05002075 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04002076enospc:
Chris Mason925baed2008-06-25 16:01:30 -04002077 if (right) {
2078 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002079 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04002080 }
2081 if (left) {
2082 if (path->nodes[level] != left)
2083 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002084 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04002085 }
Chris Masonbb803952007-03-01 12:04:21 -05002086 return ret;
2087}
2088
Chris Masond352ac62008-09-29 15:18:18 -04002089/* Node balancing for insertion. Here we only split or push nodes around
2090 * when they are completely full. This is also done top down, so we
2091 * have to be pessimistic.
2092 */
Chris Masond3977122009-01-05 21:25:51 -05002093static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05002094 struct btrfs_root *root,
2095 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04002096{
Chris Mason5f39d392007-10-15 16:14:19 -04002097 struct extent_buffer *right = NULL;
2098 struct extent_buffer *mid;
2099 struct extent_buffer *left = NULL;
2100 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002101 int ret = 0;
2102 int wret;
2103 int pslot;
2104 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04002105
2106 if (level == 0)
2107 return 1;
2108
Chris Mason5f39d392007-10-15 16:14:19 -04002109 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002110 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04002111
Li Zefana05a9bb2011-09-06 16:55:34 +08002112 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04002113 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08002114 pslot = path->slots[level + 1];
2115 }
Chris Masone66f7092007-04-20 13:16:02 -04002116
Chris Mason5f39d392007-10-15 16:14:19 -04002117 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04002118 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04002119
Chris Mason5f39d392007-10-15 16:14:19 -04002120 left = read_node_slot(root, parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002121 if (IS_ERR(left))
2122 left = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002123
2124 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04002125 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04002126 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04002127
2128 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002129 btrfs_set_lock_blocking(left);
2130
Chris Mason5f39d392007-10-15 16:14:19 -04002131 left_nr = btrfs_header_nritems(left);
Chris Mason33ade1f2007-04-20 13:48:57 -04002132 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
2133 wret = 1;
2134 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002135 ret = btrfs_cow_block(trans, root, left, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04002136 pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04002137 if (ret)
2138 wret = 1;
2139 else {
Chris Mason54aa1f42007-06-22 14:16:25 -04002140 wret = push_node_left(trans, root,
Chris Mason971a1f62008-04-24 10:54:32 -04002141 left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04002142 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002143 }
Chris Masone66f7092007-04-20 13:16:02 -04002144 if (wret < 0)
2145 ret = wret;
2146 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002147 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04002148 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04002149 btrfs_node_key(mid, &disk_key, 0);
Jan Schmidtf2304752012-05-26 11:43:17 +02002150 tree_mod_log_set_node_key(root->fs_info, parent,
Liu Bo32adf092012-10-19 12:52:15 +00002151 pslot, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002152 btrfs_set_node_key(parent, &disk_key, pslot);
2153 btrfs_mark_buffer_dirty(parent);
2154 if (btrfs_header_nritems(left) > orig_slot) {
2155 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04002156 path->slots[level + 1] -= 1;
2157 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002158 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002159 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002160 } else {
2161 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04002162 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04002163 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002164 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002165 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002166 }
Chris Masone66f7092007-04-20 13:16:02 -04002167 return 0;
2168 }
Chris Mason925baed2008-06-25 16:01:30 -04002169 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002170 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002171 }
Chris Mason925baed2008-06-25 16:01:30 -04002172 right = read_node_slot(root, parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002173 if (IS_ERR(right))
2174 right = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002175
2176 /*
2177 * then try to empty the right most buffer into the middle
2178 */
Chris Mason5f39d392007-10-15 16:14:19 -04002179 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002180 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002181
Chris Mason925baed2008-06-25 16:01:30 -04002182 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002183 btrfs_set_lock_blocking(right);
2184
Chris Mason5f39d392007-10-15 16:14:19 -04002185 right_nr = btrfs_header_nritems(right);
Chris Mason33ade1f2007-04-20 13:48:57 -04002186 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
2187 wret = 1;
2188 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002189 ret = btrfs_cow_block(trans, root, right,
2190 parent, pslot + 1,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04002191 &right);
Chris Mason54aa1f42007-06-22 14:16:25 -04002192 if (ret)
2193 wret = 1;
2194 else {
Chris Mason54aa1f42007-06-22 14:16:25 -04002195 wret = balance_node_right(trans, root,
Chris Mason5f39d392007-10-15 16:14:19 -04002196 right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04002197 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002198 }
Chris Masone66f7092007-04-20 13:16:02 -04002199 if (wret < 0)
2200 ret = wret;
2201 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002202 struct btrfs_disk_key disk_key;
2203
2204 btrfs_node_key(right, &disk_key, 0);
Jan Schmidtf2304752012-05-26 11:43:17 +02002205 tree_mod_log_set_node_key(root->fs_info, parent,
Liu Bo32adf092012-10-19 12:52:15 +00002206 pslot + 1, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002207 btrfs_set_node_key(parent, &disk_key, pslot + 1);
2208 btrfs_mark_buffer_dirty(parent);
2209
2210 if (btrfs_header_nritems(mid) <= orig_slot) {
2211 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04002212 path->slots[level + 1] += 1;
2213 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04002214 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002215 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002216 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002217 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002218 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002219 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002220 }
Chris Masone66f7092007-04-20 13:16:02 -04002221 return 0;
2222 }
Chris Mason925baed2008-06-25 16:01:30 -04002223 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002224 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002225 }
Chris Masone66f7092007-04-20 13:16:02 -04002226 return 1;
2227}
2228
Chris Mason74123bd2007-02-02 11:05:29 -05002229/*
Chris Masond352ac62008-09-29 15:18:18 -04002230 * readahead one full node of leaves, finding things that are close
2231 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04002232 */
Chris Masonc8c42862009-04-03 10:14:18 -04002233static void reada_for_search(struct btrfs_root *root,
2234 struct btrfs_path *path,
2235 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04002236{
Chris Mason5f39d392007-10-15 16:14:19 -04002237 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05002238 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04002239 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04002240 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05002241 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04002242 u64 nread = 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002243 struct extent_buffer *eb;
Chris Mason6b800532007-10-15 16:17:34 -04002244 u32 nr;
2245 u32 blocksize;
2246 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04002247
Chris Masona6b6e752007-10-15 16:22:39 -04002248 if (level != 1)
Chris Mason3c69fae2007-08-07 15:52:22 -04002249 return;
2250
Chris Mason6702ed42007-08-07 16:15:09 -04002251 if (!path->nodes[level])
2252 return;
2253
Chris Mason5f39d392007-10-15 16:14:19 -04002254 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04002255
Chris Mason3c69fae2007-08-07 15:52:22 -04002256 search = btrfs_node_blockptr(node, slot);
David Sterba707e8a02014-06-04 19:22:26 +02002257 blocksize = root->nodesize;
Daniel Dressler01d58472014-11-21 17:15:07 +09002258 eb = btrfs_find_tree_block(root->fs_info, search);
Chris Mason5f39d392007-10-15 16:14:19 -04002259 if (eb) {
2260 free_extent_buffer(eb);
Chris Mason3c69fae2007-08-07 15:52:22 -04002261 return;
2262 }
2263
Chris Masona7175312009-01-22 09:23:10 -05002264 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04002265
Chris Mason5f39d392007-10-15 16:14:19 -04002266 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04002267 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04002268
Chris Masond3977122009-01-05 21:25:51 -05002269 while (1) {
David Sterbae4058b52015-11-27 16:31:35 +01002270 if (path->reada == READA_BACK) {
Chris Mason6b800532007-10-15 16:17:34 -04002271 if (nr == 0)
2272 break;
2273 nr--;
David Sterbae4058b52015-11-27 16:31:35 +01002274 } else if (path->reada == READA_FORWARD) {
Chris Mason6b800532007-10-15 16:17:34 -04002275 nr++;
2276 if (nr >= nritems)
2277 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002278 }
David Sterbae4058b52015-11-27 16:31:35 +01002279 if (path->reada == READA_BACK && objectid) {
Chris Mason01f46652007-12-21 16:24:26 -05002280 btrfs_node_key(node, &disk_key, nr);
2281 if (btrfs_disk_key_objectid(&disk_key) != objectid)
2282 break;
2283 }
Chris Mason6b800532007-10-15 16:17:34 -04002284 search = btrfs_node_blockptr(node, nr);
Chris Masona7175312009-01-22 09:23:10 -05002285 if ((search <= target && target - search <= 65536) ||
2286 (search > target && search - target <= 65536)) {
David Sterbad3e46fe2014-06-15 02:04:19 +02002287 readahead_tree_block(root, search);
Chris Mason6b800532007-10-15 16:17:34 -04002288 nread += blocksize;
2289 }
2290 nscan++;
Chris Masona7175312009-01-22 09:23:10 -05002291 if ((nread > 65536 || nscan > 32))
Chris Mason6b800532007-10-15 16:17:34 -04002292 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002293 }
2294}
Chris Mason925baed2008-06-25 16:01:30 -04002295
Josef Bacik0b088512013-06-17 14:23:02 -04002296static noinline void reada_for_balance(struct btrfs_root *root,
2297 struct btrfs_path *path, int level)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002298{
2299 int slot;
2300 int nritems;
2301 struct extent_buffer *parent;
2302 struct extent_buffer *eb;
2303 u64 gen;
2304 u64 block1 = 0;
2305 u64 block2 = 0;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002306
Chris Mason8c594ea2009-04-20 15:50:10 -04002307 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002308 if (!parent)
Josef Bacik0b088512013-06-17 14:23:02 -04002309 return;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002310
2311 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04002312 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002313
2314 if (slot > 0) {
2315 block1 = btrfs_node_blockptr(parent, slot - 1);
2316 gen = btrfs_node_ptr_generation(parent, slot - 1);
Daniel Dressler01d58472014-11-21 17:15:07 +09002317 eb = btrfs_find_tree_block(root->fs_info, block1);
Chris Masonb9fab912012-05-06 07:23:47 -04002318 /*
2319 * if we get -eagain from btrfs_buffer_uptodate, we
2320 * don't want to return eagain here. That will loop
2321 * forever
2322 */
2323 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002324 block1 = 0;
2325 free_extent_buffer(eb);
2326 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002327 if (slot + 1 < nritems) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05002328 block2 = btrfs_node_blockptr(parent, slot + 1);
2329 gen = btrfs_node_ptr_generation(parent, slot + 1);
Daniel Dressler01d58472014-11-21 17:15:07 +09002330 eb = btrfs_find_tree_block(root->fs_info, block2);
Chris Masonb9fab912012-05-06 07:23:47 -04002331 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002332 block2 = 0;
2333 free_extent_buffer(eb);
2334 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002335
Josef Bacik0b088512013-06-17 14:23:02 -04002336 if (block1)
David Sterbad3e46fe2014-06-15 02:04:19 +02002337 readahead_tree_block(root, block1);
Josef Bacik0b088512013-06-17 14:23:02 -04002338 if (block2)
David Sterbad3e46fe2014-06-15 02:04:19 +02002339 readahead_tree_block(root, block2);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002340}
2341
2342
2343/*
Chris Masond3977122009-01-05 21:25:51 -05002344 * when we walk down the tree, it is usually safe to unlock the higher layers
2345 * in the tree. The exceptions are when our path goes through slot 0, because
2346 * operations on the tree might require changing key pointers higher up in the
2347 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04002348 *
Chris Masond3977122009-01-05 21:25:51 -05002349 * callers might also have set path->keep_locks, which tells this code to keep
2350 * the lock if the path points to the last slot in the block. This is part of
2351 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04002352 *
Chris Masond3977122009-01-05 21:25:51 -05002353 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
2354 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04002355 */
Chris Masone02119d2008-09-05 16:13:11 -04002356static noinline void unlock_up(struct btrfs_path *path, int level,
Chris Masonf7c79f32012-03-19 15:54:38 -04002357 int lowest_unlock, int min_write_lock_level,
2358 int *write_lock_level)
Chris Mason925baed2008-06-25 16:01:30 -04002359{
2360 int i;
2361 int skip_level = level;
Chris Mason051e1b92008-06-25 16:01:30 -04002362 int no_skips = 0;
Chris Mason925baed2008-06-25 16:01:30 -04002363 struct extent_buffer *t;
2364
2365 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2366 if (!path->nodes[i])
2367 break;
2368 if (!path->locks[i])
2369 break;
Chris Mason051e1b92008-06-25 16:01:30 -04002370 if (!no_skips && path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002371 skip_level = i + 1;
2372 continue;
2373 }
Chris Mason051e1b92008-06-25 16:01:30 -04002374 if (!no_skips && path->keep_locks) {
Chris Mason925baed2008-06-25 16:01:30 -04002375 u32 nritems;
2376 t = path->nodes[i];
2377 nritems = btrfs_header_nritems(t);
Chris Mason051e1b92008-06-25 16:01:30 -04002378 if (nritems < 1 || path->slots[i] >= nritems - 1) {
Chris Mason925baed2008-06-25 16:01:30 -04002379 skip_level = i + 1;
2380 continue;
2381 }
2382 }
Chris Mason051e1b92008-06-25 16:01:30 -04002383 if (skip_level < i && i >= lowest_unlock)
2384 no_skips = 1;
2385
Chris Mason925baed2008-06-25 16:01:30 -04002386 t = path->nodes[i];
2387 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -04002388 btrfs_tree_unlock_rw(t, path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04002389 path->locks[i] = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002390 if (write_lock_level &&
2391 i > min_write_lock_level &&
2392 i <= *write_lock_level) {
2393 *write_lock_level = i - 1;
2394 }
Chris Mason925baed2008-06-25 16:01:30 -04002395 }
2396 }
2397}
2398
Chris Mason3c69fae2007-08-07 15:52:22 -04002399/*
Chris Masonb4ce94d2009-02-04 09:25:08 -05002400 * This releases any locks held in the path starting at level and
2401 * going all the way up to the root.
2402 *
2403 * btrfs_search_slot will keep the lock held on higher nodes in a few
2404 * corner cases, such as COW of the block at slot zero in the node. This
2405 * ignores those rules, and it should only be called when there are no
2406 * more updates to be done higher up in the tree.
2407 */
2408noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
2409{
2410 int i;
2411
Josef Bacik09a2a8f92013-04-05 16:51:15 -04002412 if (path->keep_locks)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002413 return;
2414
2415 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2416 if (!path->nodes[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05002417 continue;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002418 if (!path->locks[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05002419 continue;
Chris Masonbd681512011-07-16 15:23:14 -04002420 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002421 path->locks[i] = 0;
2422 }
2423}
2424
2425/*
Chris Masonc8c42862009-04-03 10:14:18 -04002426 * helper function for btrfs_search_slot. The goal is to find a block
2427 * in cache without setting the path to blocking. If we find the block
2428 * we return zero and the path is unchanged.
2429 *
2430 * If we can't find the block, we set the path blocking and do some
2431 * reada. -EAGAIN is returned and the search must be repeated.
2432 */
2433static int
2434read_block_for_search(struct btrfs_trans_handle *trans,
2435 struct btrfs_root *root, struct btrfs_path *p,
2436 struct extent_buffer **eb_ret, int level, int slot,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002437 struct btrfs_key *key, u64 time_seq)
Chris Masonc8c42862009-04-03 10:14:18 -04002438{
2439 u64 blocknr;
2440 u64 gen;
Chris Masonc8c42862009-04-03 10:14:18 -04002441 struct extent_buffer *b = *eb_ret;
2442 struct extent_buffer *tmp;
Chris Mason76a05b32009-05-14 13:24:30 -04002443 int ret;
Chris Masonc8c42862009-04-03 10:14:18 -04002444
2445 blocknr = btrfs_node_blockptr(b, slot);
2446 gen = btrfs_node_ptr_generation(b, slot);
Chris Masonc8c42862009-04-03 10:14:18 -04002447
Daniel Dressler01d58472014-11-21 17:15:07 +09002448 tmp = btrfs_find_tree_block(root->fs_info, blocknr);
Chris Masoncb449212010-10-24 11:01:27 -04002449 if (tmp) {
Chris Masonb9fab912012-05-06 07:23:47 -04002450 /* first we do an atomic uptodate check */
Josef Bacikbdf7c002013-06-17 13:44:48 -04002451 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
2452 *eb_ret = tmp;
2453 return 0;
Chris Masoncb449212010-10-24 11:01:27 -04002454 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04002455
2456 /* the pages were up to date, but we failed
2457 * the generation number check. Do a full
2458 * read for the generation number that is correct.
2459 * We must do this without dropping locks so
2460 * we can trust our generation number
2461 */
2462 btrfs_set_path_blocking(p);
2463
2464 /* now we're allowed to do a blocking uptodate check */
2465 ret = btrfs_read_buffer(tmp, gen);
2466 if (!ret) {
2467 *eb_ret = tmp;
2468 return 0;
2469 }
2470 free_extent_buffer(tmp);
2471 btrfs_release_path(p);
2472 return -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002473 }
2474
2475 /*
2476 * reduce lock contention at high levels
2477 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04002478 * we read. Don't release the lock on the current
2479 * level because we need to walk this node to figure
2480 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04002481 */
Chris Mason8c594ea2009-04-20 15:50:10 -04002482 btrfs_unlock_up_safe(p, level + 1);
2483 btrfs_set_path_blocking(p);
2484
Chris Masoncb449212010-10-24 11:01:27 -04002485 free_extent_buffer(tmp);
David Sterbae4058b52015-11-27 16:31:35 +01002486 if (p->reada != READA_NONE)
Chris Masonc8c42862009-04-03 10:14:18 -04002487 reada_for_search(root, p, level, slot, key->objectid);
2488
David Sterbab3b4aa72011-04-21 01:20:15 +02002489 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04002490
2491 ret = -EAGAIN;
David Sterbace86cd52014-06-15 01:07:32 +02002492 tmp = read_tree_block(root, blocknr, 0);
Liu Bo64c043d2015-05-25 17:30:15 +08002493 if (!IS_ERR(tmp)) {
Chris Mason76a05b32009-05-14 13:24:30 -04002494 /*
2495 * If the read above didn't mark this buffer up to date,
2496 * it will never end up being up to date. Set ret to EIO now
2497 * and give up so that our caller doesn't loop forever
2498 * on our EAGAINs.
2499 */
Chris Masonb9fab912012-05-06 07:23:47 -04002500 if (!btrfs_buffer_uptodate(tmp, 0, 0))
Chris Mason76a05b32009-05-14 13:24:30 -04002501 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002502 free_extent_buffer(tmp);
Liu Boc871b0f2016-06-06 12:01:23 -07002503 } else {
2504 ret = PTR_ERR(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04002505 }
2506 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04002507}
2508
2509/*
2510 * helper function for btrfs_search_slot. This does all of the checks
2511 * for node-level blocks and does any balancing required based on
2512 * the ins_len.
2513 *
2514 * If no extra work was required, zero is returned. If we had to
2515 * drop the path, -EAGAIN is returned and btrfs_search_slot must
2516 * start over
2517 */
2518static int
2519setup_nodes_for_search(struct btrfs_trans_handle *trans,
2520 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04002521 struct extent_buffer *b, int level, int ins_len,
2522 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04002523{
2524 int ret;
2525 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
2526 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
2527 int sret;
2528
Chris Masonbd681512011-07-16 15:23:14 -04002529 if (*write_lock_level < level + 1) {
2530 *write_lock_level = level + 1;
2531 btrfs_release_path(p);
2532 goto again;
2533 }
2534
Chris Masonc8c42862009-04-03 10:14:18 -04002535 btrfs_set_path_blocking(p);
Josef Bacik0b088512013-06-17 14:23:02 -04002536 reada_for_balance(root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002537 sret = split_node(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04002538 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04002539
2540 BUG_ON(sret > 0);
2541 if (sret) {
2542 ret = sret;
2543 goto done;
2544 }
2545 b = p->nodes[level];
2546 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Chris Masoncfbb9302009-05-18 10:41:58 -04002547 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04002548 int sret;
2549
Chris Masonbd681512011-07-16 15:23:14 -04002550 if (*write_lock_level < level + 1) {
2551 *write_lock_level = level + 1;
2552 btrfs_release_path(p);
2553 goto again;
2554 }
2555
Chris Masonc8c42862009-04-03 10:14:18 -04002556 btrfs_set_path_blocking(p);
Josef Bacik0b088512013-06-17 14:23:02 -04002557 reada_for_balance(root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002558 sret = balance_level(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04002559 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04002560
2561 if (sret) {
2562 ret = sret;
2563 goto done;
2564 }
2565 b = p->nodes[level];
2566 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002567 btrfs_release_path(p);
Chris Masonc8c42862009-04-03 10:14:18 -04002568 goto again;
2569 }
2570 BUG_ON(btrfs_header_nritems(b) == 1);
2571 }
2572 return 0;
2573
2574again:
2575 ret = -EAGAIN;
2576done:
2577 return ret;
2578}
2579
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002580static void key_search_validate(struct extent_buffer *b,
2581 struct btrfs_key *key,
2582 int level)
2583{
2584#ifdef CONFIG_BTRFS_ASSERT
2585 struct btrfs_disk_key disk_key;
2586
2587 btrfs_cpu_key_to_disk(&disk_key, key);
2588
2589 if (level == 0)
2590 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2591 offsetof(struct btrfs_leaf, items[0].key),
2592 sizeof(disk_key)));
2593 else
2594 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2595 offsetof(struct btrfs_node, ptrs[0].key),
2596 sizeof(disk_key)));
2597#endif
2598}
2599
2600static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2601 int level, int *prev_cmp, int *slot)
2602{
2603 if (*prev_cmp != 0) {
2604 *prev_cmp = bin_search(b, key, level, slot);
2605 return *prev_cmp;
2606 }
2607
2608 key_search_validate(b, key, level);
2609 *slot = 0;
2610
2611 return 0;
2612}
2613
David Sterba381cf652015-01-02 18:45:16 +01002614int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002615 u64 iobjectid, u64 ioff, u8 key_type,
2616 struct btrfs_key *found_key)
2617{
2618 int ret;
2619 struct btrfs_key key;
2620 struct extent_buffer *eb;
David Sterba381cf652015-01-02 18:45:16 +01002621
2622 ASSERT(path);
David Sterba1d4c08e2015-01-02 19:36:14 +01002623 ASSERT(found_key);
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002624
2625 key.type = key_type;
2626 key.objectid = iobjectid;
2627 key.offset = ioff;
2628
2629 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
David Sterba1d4c08e2015-01-02 19:36:14 +01002630 if (ret < 0)
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002631 return ret;
2632
2633 eb = path->nodes[0];
2634 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2635 ret = btrfs_next_leaf(fs_root, path);
2636 if (ret)
2637 return ret;
2638 eb = path->nodes[0];
2639 }
2640
2641 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2642 if (found_key->type != key.type ||
2643 found_key->objectid != key.objectid)
2644 return 1;
2645
2646 return 0;
2647}
2648
Chris Masonc8c42862009-04-03 10:14:18 -04002649/*
Chris Mason74123bd2007-02-02 11:05:29 -05002650 * look for key in the tree. path is filled in with nodes along the way
2651 * if key is found, we return zero and you can find the item in the leaf
2652 * level of the path (level 0)
2653 *
2654 * If the key isn't found, the path points to the slot where it should
Chris Masonaa5d6be2007-02-28 16:35:06 -05002655 * be inserted, and 1 is returned. If there are other errors during the
2656 * search a negative error number is returned.
Chris Mason97571fd2007-02-24 13:39:08 -05002657 *
2658 * if ins_len > 0, nodes and leaves will be split as we walk down the
2659 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
2660 * possible)
Chris Mason74123bd2007-02-02 11:05:29 -05002661 */
Chris Masone089f052007-03-16 16:20:31 -04002662int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2663 *root, struct btrfs_key *key, struct btrfs_path *p, int
2664 ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002665{
Chris Mason5f39d392007-10-15 16:14:19 -04002666 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002667 int slot;
2668 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04002669 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002670 int level;
Chris Mason925baed2008-06-25 16:01:30 -04002671 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04002672 int root_lock;
2673 /* everything at write_lock_level or lower must be write locked */
2674 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04002675 u8 lowest_level = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002676 int min_write_lock_level;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002677 int prev_cmp;
Chris Mason9f3a7422007-08-07 15:52:19 -04002678
Chris Mason6702ed42007-08-07 16:15:09 -04002679 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04002680 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04002681 WARN_ON(p->nodes[0] != NULL);
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002682 BUG_ON(!cow && ins_len);
Josef Bacik25179202008-10-29 14:49:05 -04002683
Chris Masonbd681512011-07-16 15:23:14 -04002684 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002685 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04002686
Chris Masonbd681512011-07-16 15:23:14 -04002687 /* when we are removing items, we might have to go up to level
2688 * two as we update tree pointers Make sure we keep write
2689 * for those levels as well
2690 */
2691 write_lock_level = 2;
2692 } else if (ins_len > 0) {
2693 /*
2694 * for inserting items, make sure we have a write lock on
2695 * level 1 so we can update keys
2696 */
2697 write_lock_level = 1;
2698 }
2699
2700 if (!cow)
2701 write_lock_level = -1;
2702
Josef Bacik09a2a8f92013-04-05 16:51:15 -04002703 if (cow && (p->keep_locks || p->lowest_level))
Chris Masonbd681512011-07-16 15:23:14 -04002704 write_lock_level = BTRFS_MAX_LEVEL;
2705
Chris Masonf7c79f32012-03-19 15:54:38 -04002706 min_write_lock_level = write_lock_level;
2707
Chris Masonbb803952007-03-01 12:04:21 -05002708again:
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002709 prev_cmp = -1;
Chris Masonbd681512011-07-16 15:23:14 -04002710 /*
2711 * we try very hard to do read locks on the root
2712 */
2713 root_lock = BTRFS_READ_LOCK;
2714 level = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002715 if (p->search_commit_root) {
Chris Masonbd681512011-07-16 15:23:14 -04002716 /*
2717 * the commit roots are read only
2718 * so we always do read locks
2719 */
Josef Bacik3f8a18c2014-03-28 17:16:01 -04002720 if (p->need_commit_sem)
2721 down_read(&root->fs_info->commit_root_sem);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002722 b = root->commit_root;
2723 extent_buffer_get(b);
Chris Masonbd681512011-07-16 15:23:14 -04002724 level = btrfs_header_level(b);
Josef Bacik3f8a18c2014-03-28 17:16:01 -04002725 if (p->need_commit_sem)
2726 up_read(&root->fs_info->commit_root_sem);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002727 if (!p->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04002728 btrfs_tree_read_lock(b);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002729 } else {
Chris Masonbd681512011-07-16 15:23:14 -04002730 if (p->skip_locking) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002731 b = btrfs_root_node(root);
Chris Masonbd681512011-07-16 15:23:14 -04002732 level = btrfs_header_level(b);
2733 } else {
2734 /* we don't know the level of the root node
2735 * until we actually have it read locked
2736 */
2737 b = btrfs_read_lock_root_node(root);
2738 level = btrfs_header_level(b);
2739 if (level <= write_lock_level) {
2740 /* whoops, must trade for write lock */
2741 btrfs_tree_read_unlock(b);
2742 free_extent_buffer(b);
2743 b = btrfs_lock_root_node(root);
2744 root_lock = BTRFS_WRITE_LOCK;
2745
2746 /* the level might have changed, check again */
2747 level = btrfs_header_level(b);
2748 }
2749 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002750 }
Chris Masonbd681512011-07-16 15:23:14 -04002751 p->nodes[level] = b;
2752 if (!p->skip_locking)
2753 p->locks[level] = root_lock;
Chris Mason925baed2008-06-25 16:01:30 -04002754
Chris Masoneb60cea2007-02-02 09:18:22 -05002755 while (b) {
Chris Mason5f39d392007-10-15 16:14:19 -04002756 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04002757
2758 /*
2759 * setup the path here so we can release it under lock
2760 * contention with the cow code
2761 */
Chris Mason02217ed2007-03-02 16:08:05 -05002762 if (cow) {
Chris Masonc8c42862009-04-03 10:14:18 -04002763 /*
2764 * if we don't really need to cow this block
2765 * then we don't want to set the path blocking,
2766 * so we test it here
2767 */
Jeff Mahoney64c12922016-06-08 00:36:38 -04002768 if (!should_cow_block(trans, root, b)) {
2769 trans->dirty = true;
Chris Mason65b51a02008-08-01 15:11:20 -04002770 goto cow_done;
Jeff Mahoney64c12922016-06-08 00:36:38 -04002771 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002772
Chris Masonbd681512011-07-16 15:23:14 -04002773 /*
2774 * must have write locks on this node and the
2775 * parent
2776 */
Josef Bacik5124e002012-11-07 13:44:13 -05002777 if (level > write_lock_level ||
2778 (level + 1 > write_lock_level &&
2779 level + 1 < BTRFS_MAX_LEVEL &&
2780 p->nodes[level + 1])) {
Chris Masonbd681512011-07-16 15:23:14 -04002781 write_lock_level = level + 1;
2782 btrfs_release_path(p);
2783 goto again;
2784 }
2785
Filipe Manana160f4082014-07-28 19:37:17 +01002786 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04002787 err = btrfs_cow_block(trans, root, b,
2788 p->nodes[level + 1],
2789 p->slots[level + 1], &b);
2790 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002791 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002792 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04002793 }
Chris Mason02217ed2007-03-02 16:08:05 -05002794 }
Chris Mason65b51a02008-08-01 15:11:20 -04002795cow_done:
Chris Masoneb60cea2007-02-02 09:18:22 -05002796 p->nodes[level] = b;
Chris Masonbd681512011-07-16 15:23:14 -04002797 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002798
2799 /*
2800 * we have a lock on b and as long as we aren't changing
2801 * the tree, there is no way to for the items in b to change.
2802 * It is safe to drop the lock on our parent before we
2803 * go through the expensive btree search on b.
2804 *
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002805 * If we're inserting or deleting (ins_len != 0), then we might
2806 * be changing slot zero, which may require changing the parent.
2807 * So, we can't drop the lock until after we know which slot
2808 * we're operating on.
Chris Masonb4ce94d2009-02-04 09:25:08 -05002809 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002810 if (!ins_len && !p->keep_locks) {
2811 int u = level + 1;
2812
2813 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2814 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2815 p->locks[u] = 0;
2816 }
2817 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05002818
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002819 ret = key_search(b, key, level, &prev_cmp, &slot);
Liu Bo415b35a2016-06-17 19:16:21 -07002820 if (ret < 0)
2821 goto done;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002822
Chris Mason5f39d392007-10-15 16:14:19 -04002823 if (level != 0) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002824 int dec = 0;
2825 if (ret && slot > 0) {
2826 dec = 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002827 slot -= 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04002828 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002829 p->slots[level] = slot;
Yan Zheng33c66f42009-07-22 09:59:00 -04002830 err = setup_nodes_for_search(trans, root, p, b, level,
Chris Masonbd681512011-07-16 15:23:14 -04002831 ins_len, &write_lock_level);
Yan Zheng33c66f42009-07-22 09:59:00 -04002832 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04002833 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04002834 if (err) {
2835 ret = err;
Chris Masonc8c42862009-04-03 10:14:18 -04002836 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04002837 }
Chris Masonc8c42862009-04-03 10:14:18 -04002838 b = p->nodes[level];
2839 slot = p->slots[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002840
Chris Masonbd681512011-07-16 15:23:14 -04002841 /*
2842 * slot 0 is special, if we change the key
2843 * we have to update the parent pointer
2844 * which means we must have a write lock
2845 * on the parent
2846 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002847 if (slot == 0 && ins_len &&
Chris Masonbd681512011-07-16 15:23:14 -04002848 write_lock_level < level + 1) {
2849 write_lock_level = level + 1;
2850 btrfs_release_path(p);
2851 goto again;
2852 }
2853
Chris Masonf7c79f32012-03-19 15:54:38 -04002854 unlock_up(p, level, lowest_unlock,
2855 min_write_lock_level, &write_lock_level);
Chris Masonf9efa9c2008-06-25 16:14:04 -04002856
Chris Mason925baed2008-06-25 16:01:30 -04002857 if (level == lowest_level) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002858 if (dec)
2859 p->slots[level]++;
Zheng Yan5b21f2e2008-09-26 10:05:38 -04002860 goto done;
Chris Mason925baed2008-06-25 16:01:30 -04002861 }
Chris Masonca7a79a2008-05-12 12:59:19 -04002862
Yan Zheng33c66f42009-07-22 09:59:00 -04002863 err = read_block_for_search(trans, root, p,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002864 &b, level, slot, key, 0);
Yan Zheng33c66f42009-07-22 09:59:00 -04002865 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04002866 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04002867 if (err) {
2868 ret = err;
Chris Mason76a05b32009-05-14 13:24:30 -04002869 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04002870 }
Chris Mason76a05b32009-05-14 13:24:30 -04002871
Chris Masonb4ce94d2009-02-04 09:25:08 -05002872 if (!p->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04002873 level = btrfs_header_level(b);
2874 if (level <= write_lock_level) {
2875 err = btrfs_try_tree_write_lock(b);
2876 if (!err) {
2877 btrfs_set_path_blocking(p);
2878 btrfs_tree_lock(b);
2879 btrfs_clear_path_blocking(p, b,
2880 BTRFS_WRITE_LOCK);
2881 }
2882 p->locks[level] = BTRFS_WRITE_LOCK;
2883 } else {
Chris Masonf82c4582014-11-19 10:25:09 -08002884 err = btrfs_tree_read_lock_atomic(b);
Chris Masonbd681512011-07-16 15:23:14 -04002885 if (!err) {
2886 btrfs_set_path_blocking(p);
2887 btrfs_tree_read_lock(b);
2888 btrfs_clear_path_blocking(p, b,
2889 BTRFS_READ_LOCK);
2890 }
2891 p->locks[level] = BTRFS_READ_LOCK;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002892 }
Chris Masonbd681512011-07-16 15:23:14 -04002893 p->nodes[level] = b;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002894 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002895 } else {
2896 p->slots[level] = slot;
Yan Zheng87b29b22008-12-17 10:21:48 -05002897 if (ins_len > 0 &&
2898 btrfs_leaf_free_space(root, b) < ins_len) {
Chris Masonbd681512011-07-16 15:23:14 -04002899 if (write_lock_level < 1) {
2900 write_lock_level = 1;
2901 btrfs_release_path(p);
2902 goto again;
2903 }
2904
Chris Masonb4ce94d2009-02-04 09:25:08 -05002905 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04002906 err = split_leaf(trans, root, key,
2907 p, ins_len, ret == 0);
Chris Masonbd681512011-07-16 15:23:14 -04002908 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002909
Yan Zheng33c66f42009-07-22 09:59:00 -04002910 BUG_ON(err > 0);
2911 if (err) {
2912 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002913 goto done;
2914 }
Chris Mason5c680ed2007-02-22 11:39:13 -05002915 }
Chris Mason459931e2008-12-10 09:10:46 -05002916 if (!p->search_for_split)
Chris Masonf7c79f32012-03-19 15:54:38 -04002917 unlock_up(p, level, lowest_unlock,
2918 min_write_lock_level, &write_lock_level);
Chris Mason65b51a02008-08-01 15:11:20 -04002919 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002920 }
2921 }
Chris Mason65b51a02008-08-01 15:11:20 -04002922 ret = 1;
2923done:
Chris Masonb4ce94d2009-02-04 09:25:08 -05002924 /*
2925 * we don't really know what they plan on doing with the path
2926 * from here on, so for now just mark it as blocking
2927 */
Chris Masonb9473432009-03-13 11:00:37 -04002928 if (!p->leave_spinning)
2929 btrfs_set_path_blocking(p);
Filipe Manana5f5bc6b2014-11-09 08:38:39 +00002930 if (ret < 0 && !p->skip_release_on_error)
David Sterbab3b4aa72011-04-21 01:20:15 +02002931 btrfs_release_path(p);
Chris Mason65b51a02008-08-01 15:11:20 -04002932 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002933}
2934
Chris Mason74123bd2007-02-02 11:05:29 -05002935/*
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002936 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2937 * current state of the tree together with the operations recorded in the tree
2938 * modification log to search for the key in a previous version of this tree, as
2939 * denoted by the time_seq parameter.
2940 *
2941 * Naturally, there is no support for insert, delete or cow operations.
2942 *
2943 * The resulting path and return value will be set up as if we called
2944 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2945 */
2946int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2947 struct btrfs_path *p, u64 time_seq)
2948{
2949 struct extent_buffer *b;
2950 int slot;
2951 int ret;
2952 int err;
2953 int level;
2954 int lowest_unlock = 1;
2955 u8 lowest_level = 0;
Josef Bacikd4b40872013-09-24 14:09:34 -04002956 int prev_cmp = -1;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002957
2958 lowest_level = p->lowest_level;
2959 WARN_ON(p->nodes[0] != NULL);
2960
2961 if (p->search_commit_root) {
2962 BUG_ON(time_seq);
2963 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2964 }
2965
2966again:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002967 b = get_old_root(root, time_seq);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002968 level = btrfs_header_level(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002969 p->locks[level] = BTRFS_READ_LOCK;
2970
2971 while (b) {
2972 level = btrfs_header_level(b);
2973 p->nodes[level] = b;
2974 btrfs_clear_path_blocking(p, NULL, 0);
2975
2976 /*
2977 * we have a lock on b and as long as we aren't changing
2978 * the tree, there is no way to for the items in b to change.
2979 * It is safe to drop the lock on our parent before we
2980 * go through the expensive btree search on b.
2981 */
2982 btrfs_unlock_up_safe(p, level + 1);
2983
Josef Bacikd4b40872013-09-24 14:09:34 -04002984 /*
Nicholas D Steeves01327612016-05-19 21:18:45 -04002985 * Since we can unwind ebs we want to do a real search every
Josef Bacikd4b40872013-09-24 14:09:34 -04002986 * time.
2987 */
2988 prev_cmp = -1;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002989 ret = key_search(b, key, level, &prev_cmp, &slot);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002990
2991 if (level != 0) {
2992 int dec = 0;
2993 if (ret && slot > 0) {
2994 dec = 1;
2995 slot -= 1;
2996 }
2997 p->slots[level] = slot;
2998 unlock_up(p, level, lowest_unlock, 0, NULL);
2999
3000 if (level == lowest_level) {
3001 if (dec)
3002 p->slots[level]++;
3003 goto done;
3004 }
3005
3006 err = read_block_for_search(NULL, root, p, &b, level,
3007 slot, key, time_seq);
3008 if (err == -EAGAIN)
3009 goto again;
3010 if (err) {
3011 ret = err;
3012 goto done;
3013 }
3014
3015 level = btrfs_header_level(b);
Chris Masonf82c4582014-11-19 10:25:09 -08003016 err = btrfs_tree_read_lock_atomic(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003017 if (!err) {
3018 btrfs_set_path_blocking(p);
3019 btrfs_tree_read_lock(b);
3020 btrfs_clear_path_blocking(p, b,
3021 BTRFS_READ_LOCK);
3022 }
Josef Bacik9ec72672013-08-07 16:57:23 -04003023 b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
Josef Bacikdb7f3432013-08-07 14:54:37 -04003024 if (!b) {
3025 ret = -ENOMEM;
3026 goto done;
3027 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003028 p->locks[level] = BTRFS_READ_LOCK;
3029 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003030 } else {
3031 p->slots[level] = slot;
3032 unlock_up(p, level, lowest_unlock, 0, NULL);
3033 goto done;
3034 }
3035 }
3036 ret = 1;
3037done:
3038 if (!p->leave_spinning)
3039 btrfs_set_path_blocking(p);
3040 if (ret < 0)
3041 btrfs_release_path(p);
3042
3043 return ret;
3044}
3045
3046/*
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003047 * helper to use instead of search slot if no exact match is needed but
3048 * instead the next or previous item should be returned.
3049 * When find_higher is true, the next higher item is returned, the next lower
3050 * otherwise.
3051 * When return_any and find_higher are both true, and no higher item is found,
3052 * return the next lower instead.
3053 * When return_any is true and find_higher is false, and no lower item is found,
3054 * return the next higher instead.
3055 * It returns 0 if any item is found, 1 if none is found (tree empty), and
3056 * < 0 on error
3057 */
3058int btrfs_search_slot_for_read(struct btrfs_root *root,
3059 struct btrfs_key *key, struct btrfs_path *p,
3060 int find_higher, int return_any)
3061{
3062 int ret;
3063 struct extent_buffer *leaf;
3064
3065again:
3066 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
3067 if (ret <= 0)
3068 return ret;
3069 /*
3070 * a return value of 1 means the path is at the position where the
3071 * item should be inserted. Normally this is the next bigger item,
3072 * but in case the previous item is the last in a leaf, path points
3073 * to the first free slot in the previous leaf, i.e. at an invalid
3074 * item.
3075 */
3076 leaf = p->nodes[0];
3077
3078 if (find_higher) {
3079 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
3080 ret = btrfs_next_leaf(root, p);
3081 if (ret <= 0)
3082 return ret;
3083 if (!return_any)
3084 return 1;
3085 /*
3086 * no higher item found, return the next
3087 * lower instead
3088 */
3089 return_any = 0;
3090 find_higher = 0;
3091 btrfs_release_path(p);
3092 goto again;
3093 }
3094 } else {
Arne Jansene6793762011-09-13 11:18:10 +02003095 if (p->slots[0] == 0) {
3096 ret = btrfs_prev_leaf(root, p);
3097 if (ret < 0)
3098 return ret;
3099 if (!ret) {
Filipe David Borba Manana23c6bf62014-01-11 21:28:54 +00003100 leaf = p->nodes[0];
3101 if (p->slots[0] == btrfs_header_nritems(leaf))
3102 p->slots[0]--;
Arne Jansene6793762011-09-13 11:18:10 +02003103 return 0;
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003104 }
Arne Jansene6793762011-09-13 11:18:10 +02003105 if (!return_any)
3106 return 1;
3107 /*
3108 * no lower item found, return the next
3109 * higher instead
3110 */
3111 return_any = 0;
3112 find_higher = 1;
3113 btrfs_release_path(p);
3114 goto again;
3115 } else {
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003116 --p->slots[0];
3117 }
3118 }
3119 return 0;
3120}
3121
3122/*
Chris Mason74123bd2007-02-02 11:05:29 -05003123 * adjust the pointers going up the tree, starting at level
3124 * making sure the right key of each node is points to 'key'.
3125 * This is used after shifting pointers to the left, so it stops
3126 * fixing up pointers when a given leaf/node is not in slot 0 of the
3127 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05003128 *
Chris Mason74123bd2007-02-02 11:05:29 -05003129 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003130static void fixup_low_keys(struct btrfs_fs_info *fs_info,
3131 struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003132 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003133{
3134 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04003135 struct extent_buffer *t;
3136
Chris Mason234b63a2007-03-13 10:46:10 -04003137 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05003138 int tslot = path->slots[i];
Chris Masoneb60cea2007-02-02 09:18:22 -05003139 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05003140 break;
Chris Mason5f39d392007-10-15 16:14:19 -04003141 t = path->nodes[i];
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003142 tree_mod_log_set_node_key(fs_info, t, tslot, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003143 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04003144 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003145 if (tslot != 0)
3146 break;
3147 }
3148}
3149
Chris Mason74123bd2007-02-02 11:05:29 -05003150/*
Zheng Yan31840ae2008-09-23 13:14:14 -04003151 * update item key.
3152 *
3153 * This function isn't completely safe. It's the caller's responsibility
3154 * that the new key won't break the order
3155 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003156void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3157 struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003158 struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04003159{
3160 struct btrfs_disk_key disk_key;
3161 struct extent_buffer *eb;
3162 int slot;
3163
3164 eb = path->nodes[0];
3165 slot = path->slots[0];
3166 if (slot > 0) {
3167 btrfs_item_key(eb, &disk_key, slot - 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003168 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003169 }
3170 if (slot < btrfs_header_nritems(eb) - 1) {
3171 btrfs_item_key(eb, &disk_key, slot + 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003172 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003173 }
3174
3175 btrfs_cpu_key_to_disk(&disk_key, new_key);
3176 btrfs_set_item_key(eb, &disk_key, slot);
3177 btrfs_mark_buffer_dirty(eb);
3178 if (slot == 0)
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003179 fixup_low_keys(fs_info, path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04003180}
3181
3182/*
Chris Mason74123bd2007-02-02 11:05:29 -05003183 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05003184 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003185 *
3186 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
3187 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05003188 */
Chris Mason98ed5172008-01-03 10:01:48 -05003189static int push_node_left(struct btrfs_trans_handle *trans,
3190 struct btrfs_root *root, struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04003191 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003192{
Chris Masonbe0e5c02007-01-26 15:51:26 -05003193 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05003194 int src_nritems;
3195 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003196 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003197
Chris Mason5f39d392007-10-15 16:14:19 -04003198 src_nritems = btrfs_header_nritems(src);
3199 dst_nritems = btrfs_header_nritems(dst);
Chris Mason123abc82007-03-14 14:14:43 -04003200 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05003201 WARN_ON(btrfs_header_generation(src) != trans->transid);
3202 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04003203
Chris Masonbce4eae2008-04-24 14:42:46 -04003204 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04003205 return 1;
3206
Chris Masond3977122009-01-05 21:25:51 -05003207 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003208 return 1;
3209
Chris Masonbce4eae2008-04-24 14:42:46 -04003210 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04003211 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04003212 if (push_items < src_nritems) {
3213 /* leave at least 8 pointers in the node if
3214 * we aren't going to empty it
3215 */
3216 if (src_nritems - push_items < 8) {
3217 if (push_items <= 8)
3218 return 1;
3219 push_items -= 8;
3220 }
3221 }
3222 } else
3223 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003224
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003225 ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
3226 push_items);
3227 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003228 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003229 return ret;
3230 }
Chris Mason5f39d392007-10-15 16:14:19 -04003231 copy_extent_buffer(dst, src,
3232 btrfs_node_key_ptr_offset(dst_nritems),
3233 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05003234 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04003235
Chris Masonbb803952007-03-01 12:04:21 -05003236 if (push_items < src_nritems) {
Jan Schmidt57911b82012-10-19 09:22:03 +02003237 /*
3238 * don't call tree_mod_log_eb_move here, key removal was already
3239 * fully logged by tree_mod_log_eb_copy above.
3240 */
Chris Mason5f39d392007-10-15 16:14:19 -04003241 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
3242 btrfs_node_key_ptr_offset(push_items),
3243 (src_nritems - push_items) *
3244 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05003245 }
Chris Mason5f39d392007-10-15 16:14:19 -04003246 btrfs_set_header_nritems(src, src_nritems - push_items);
3247 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3248 btrfs_mark_buffer_dirty(src);
3249 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003250
Chris Masonbb803952007-03-01 12:04:21 -05003251 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003252}
3253
Chris Mason97571fd2007-02-24 13:39:08 -05003254/*
Chris Mason79f95c82007-03-01 15:16:26 -05003255 * try to push data from one node into the next node right in the
3256 * tree.
3257 *
3258 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
3259 * error, and > 0 if there was no room in the right hand block.
3260 *
3261 * this will only push up to 1/2 the contents of the left node over
3262 */
Chris Mason5f39d392007-10-15 16:14:19 -04003263static int balance_node_right(struct btrfs_trans_handle *trans,
3264 struct btrfs_root *root,
3265 struct extent_buffer *dst,
3266 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05003267{
Chris Mason79f95c82007-03-01 15:16:26 -05003268 int push_items = 0;
3269 int max_push;
3270 int src_nritems;
3271 int dst_nritems;
3272 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05003273
Chris Mason7bb86312007-12-11 09:25:06 -05003274 WARN_ON(btrfs_header_generation(src) != trans->transid);
3275 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3276
Chris Mason5f39d392007-10-15 16:14:19 -04003277 src_nritems = btrfs_header_nritems(src);
3278 dst_nritems = btrfs_header_nritems(dst);
Chris Mason123abc82007-03-14 14:14:43 -04003279 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05003280 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05003281 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04003282
Chris Masond3977122009-01-05 21:25:51 -05003283 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04003284 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05003285
3286 max_push = src_nritems / 2 + 1;
3287 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05003288 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05003289 return 1;
Yan252c38f2007-08-29 09:11:44 -04003290
Chris Mason79f95c82007-03-01 15:16:26 -05003291 if (max_push < push_items)
3292 push_items = max_push;
3293
Jan Schmidtf2304752012-05-26 11:43:17 +02003294 tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003295 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
3296 btrfs_node_key_ptr_offset(0),
3297 (dst_nritems) *
3298 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04003299
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003300 ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3301 src_nritems - push_items, push_items);
3302 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003303 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003304 return ret;
3305 }
Chris Mason5f39d392007-10-15 16:14:19 -04003306 copy_extent_buffer(dst, src,
3307 btrfs_node_key_ptr_offset(0),
3308 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05003309 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05003310
Chris Mason5f39d392007-10-15 16:14:19 -04003311 btrfs_set_header_nritems(src, src_nritems - push_items);
3312 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003313
Chris Mason5f39d392007-10-15 16:14:19 -04003314 btrfs_mark_buffer_dirty(src);
3315 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003316
Chris Mason79f95c82007-03-01 15:16:26 -05003317 return ret;
3318}
3319
3320/*
Chris Mason97571fd2007-02-24 13:39:08 -05003321 * helper function to insert a new root level in the tree.
3322 * A new node is allocated, and a single item is inserted to
3323 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05003324 *
3325 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05003326 */
Chris Masond3977122009-01-05 21:25:51 -05003327static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04003328 struct btrfs_root *root,
Liu Bofdd99c72013-05-22 12:06:51 +00003329 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05003330{
Chris Mason7bb86312007-12-11 09:25:06 -05003331 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04003332 struct extent_buffer *lower;
3333 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04003334 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04003335 struct btrfs_disk_key lower_key;
Chris Mason5c680ed2007-02-22 11:39:13 -05003336
3337 BUG_ON(path->nodes[level]);
3338 BUG_ON(path->nodes[level-1] != root->node);
3339
Chris Mason7bb86312007-12-11 09:25:06 -05003340 lower = path->nodes[level-1];
3341 if (level == 1)
3342 btrfs_item_key(lower, &lower_key, 0);
3343 else
3344 btrfs_node_key(lower, &lower_key, 0);
3345
David Sterba4d75f8a2014-06-15 01:54:12 +02003346 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3347 &lower_key, level, root->node->start, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003348 if (IS_ERR(c))
3349 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04003350
Yan, Zhengf0486c62010-05-16 10:46:25 -04003351 root_add_used(root, root->nodesize);
3352
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003353 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04003354 btrfs_set_header_nritems(c, 1);
3355 btrfs_set_header_level(c, level);
Chris Masondb945352007-10-15 16:15:53 -04003356 btrfs_set_header_bytenr(c, c->start);
Chris Mason5f39d392007-10-15 16:14:19 -04003357 btrfs_set_header_generation(c, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003358 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04003359 btrfs_set_header_owner(c, root->root_key.objectid);
Chris Masond5719762007-03-23 10:01:08 -04003360
Ross Kirk0a4e5582013-09-24 10:12:38 +01003361 write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
Chris Mason5f39d392007-10-15 16:14:19 -04003362 BTRFS_FSID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04003363
3364 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
Geert Uytterhoevenb308bc22013-08-20 13:20:15 +02003365 btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04003366
Chris Mason5f39d392007-10-15 16:14:19 -04003367 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04003368 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05003369 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04003370 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05003371
3372 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04003373
3374 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04003375
Chris Mason925baed2008-06-25 16:01:30 -04003376 old = root->node;
Liu Bofdd99c72013-05-22 12:06:51 +00003377 tree_mod_log_set_root_pointer(root, c, 0);
Chris Mason240f62c2011-03-23 14:54:42 -04003378 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04003379
3380 /* the super has an extra ref to root->node */
3381 free_extent_buffer(old);
3382
Chris Mason0b86a832008-03-24 15:01:56 -04003383 add_root_to_dirty_list(root);
Chris Mason5f39d392007-10-15 16:14:19 -04003384 extent_buffer_get(c);
3385 path->nodes[level] = c;
chandan95449a12015-01-15 12:22:03 +05303386 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Mason5c680ed2007-02-22 11:39:13 -05003387 path->slots[level] = 0;
3388 return 0;
3389}
3390
Chris Mason74123bd2007-02-02 11:05:29 -05003391/*
3392 * worker function to insert a single pointer in a node.
3393 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05003394 *
Chris Mason74123bd2007-02-02 11:05:29 -05003395 * slot and level indicate where you want the key to go, and
3396 * blocknr is the block the key points to.
3397 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003398static void insert_ptr(struct btrfs_trans_handle *trans,
3399 struct btrfs_root *root, struct btrfs_path *path,
3400 struct btrfs_disk_key *key, u64 bytenr,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003401 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05003402{
Chris Mason5f39d392007-10-15 16:14:19 -04003403 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05003404 int nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003405 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05003406
3407 BUG_ON(!path->nodes[level]);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003408 btrfs_assert_tree_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04003409 lower = path->nodes[level];
3410 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04003411 BUG_ON(slot > nritems);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003412 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
Chris Mason74123bd2007-02-02 11:05:29 -05003413 if (slot != nritems) {
Jan Schmidtc3e06962012-06-21 11:01:06 +02003414 if (level)
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003415 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
3416 slot, nritems - slot);
Chris Mason5f39d392007-10-15 16:14:19 -04003417 memmove_extent_buffer(lower,
3418 btrfs_node_key_ptr_offset(slot + 1),
3419 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003420 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05003421 }
Jan Schmidtc3e06962012-06-21 11:01:06 +02003422 if (level) {
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003423 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -04003424 MOD_LOG_KEY_ADD, GFP_NOFS);
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003425 BUG_ON(ret < 0);
3426 }
Chris Mason5f39d392007-10-15 16:14:19 -04003427 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04003428 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05003429 WARN_ON(trans->transid == 0);
3430 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003431 btrfs_set_header_nritems(lower, nritems + 1);
3432 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05003433}
3434
Chris Mason97571fd2007-02-24 13:39:08 -05003435/*
3436 * split the node at the specified level in path in two.
3437 * The path is corrected to point to the appropriate node after the split
3438 *
3439 * Before splitting this tries to make some room in the node by pushing
3440 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003441 *
3442 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05003443 */
Chris Masone02119d2008-09-05 16:13:11 -04003444static noinline int split_node(struct btrfs_trans_handle *trans,
3445 struct btrfs_root *root,
3446 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003447{
Chris Mason5f39d392007-10-15 16:14:19 -04003448 struct extent_buffer *c;
3449 struct extent_buffer *split;
3450 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003451 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05003452 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04003453 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003454
Chris Mason5f39d392007-10-15 16:14:19 -04003455 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05003456 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003457 if (c == root->node) {
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003458 /*
Jan Schmidt90f8d622013-04-13 13:19:53 +00003459 * trying to split the root, lets make a new one
3460 *
Liu Bofdd99c72013-05-22 12:06:51 +00003461 * tree mod log: We don't log_removal old root in
Jan Schmidt90f8d622013-04-13 13:19:53 +00003462 * insert_new_root, because that root buffer will be kept as a
3463 * normal node. We are going to log removal of half of the
3464 * elements below with tree_mod_log_eb_copy. We're holding a
3465 * tree lock on the buffer, which is why we cannot race with
3466 * other tree_mod_log users.
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003467 */
Liu Bofdd99c72013-05-22 12:06:51 +00003468 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05003469 if (ret)
3470 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04003471 } else {
Chris Masone66f7092007-04-20 13:16:02 -04003472 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04003473 c = path->nodes[level];
3474 if (!ret && btrfs_header_nritems(c) <
Chris Masonc448acf2008-04-24 09:34:34 -04003475 BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04003476 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04003477 if (ret < 0)
3478 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003479 }
Chris Masone66f7092007-04-20 13:16:02 -04003480
Chris Mason5f39d392007-10-15 16:14:19 -04003481 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003482 mid = (c_nritems + 1) / 2;
3483 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05003484
David Sterba4d75f8a2014-06-15 01:54:12 +02003485 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3486 &disk_key, level, c->start, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003487 if (IS_ERR(split))
3488 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04003489
Yan, Zhengf0486c62010-05-16 10:46:25 -04003490 root_add_used(root, root->nodesize);
3491
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003492 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04003493 btrfs_set_header_level(split, btrfs_header_level(c));
Chris Masondb945352007-10-15 16:15:53 -04003494 btrfs_set_header_bytenr(split, split->start);
Chris Mason5f39d392007-10-15 16:14:19 -04003495 btrfs_set_header_generation(split, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003496 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04003497 btrfs_set_header_owner(split, root->root_key.objectid);
3498 write_extent_buffer(split, root->fs_info->fsid,
Ross Kirk0a4e5582013-09-24 10:12:38 +01003499 btrfs_header_fsid(), BTRFS_FSID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04003500 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
Geert Uytterhoevenb308bc22013-08-20 13:20:15 +02003501 btrfs_header_chunk_tree_uuid(split),
Chris Masone17cade2008-04-15 15:41:47 -04003502 BTRFS_UUID_SIZE);
Chris Mason5f39d392007-10-15 16:14:19 -04003503
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003504 ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
3505 mid, c_nritems - mid);
3506 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003507 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003508 return ret;
3509 }
Chris Mason5f39d392007-10-15 16:14:19 -04003510 copy_extent_buffer(split, c,
3511 btrfs_node_key_ptr_offset(0),
3512 btrfs_node_key_ptr_offset(mid),
3513 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
3514 btrfs_set_header_nritems(split, c_nritems - mid);
3515 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003516 ret = 0;
3517
Chris Mason5f39d392007-10-15 16:14:19 -04003518 btrfs_mark_buffer_dirty(c);
3519 btrfs_mark_buffer_dirty(split);
3520
Jeff Mahoney143bede2012-03-01 14:56:26 +01003521 insert_ptr(trans, root, path, &disk_key, split->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003522 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003523
Chris Mason5de08d72007-02-24 06:24:44 -05003524 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05003525 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04003526 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04003527 free_extent_buffer(c);
3528 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05003529 path->slots[level + 1] += 1;
3530 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003531 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04003532 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003533 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003534 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003535}
3536
Chris Mason74123bd2007-02-02 11:05:29 -05003537/*
3538 * how many bytes are required to store the items in a leaf. start
3539 * and nr indicate which items in the leaf to check. This totals up the
3540 * space used both by the item structs and the item data
3541 */
Chris Mason5f39d392007-10-15 16:14:19 -04003542static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003543{
Josef Bacik41be1f32012-10-15 13:43:18 -04003544 struct btrfs_item *start_item;
3545 struct btrfs_item *end_item;
3546 struct btrfs_map_token token;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003547 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04003548 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04003549 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003550
3551 if (!nr)
3552 return 0;
Josef Bacik41be1f32012-10-15 13:43:18 -04003553 btrfs_init_map_token(&token);
Ross Kirkdd3cc162013-09-16 15:58:09 +01003554 start_item = btrfs_item_nr(start);
3555 end_item = btrfs_item_nr(end);
Josef Bacik41be1f32012-10-15 13:43:18 -04003556 data_len = btrfs_token_item_offset(l, start_item, &token) +
3557 btrfs_token_item_size(l, start_item, &token);
3558 data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04003559 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04003560 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003561 return data_len;
3562}
3563
Chris Mason74123bd2007-02-02 11:05:29 -05003564/*
Chris Masond4dbff92007-04-04 14:08:15 -04003565 * The space between the end of the leaf items and
3566 * the start of the leaf data. IOW, how much room
3567 * the leaf has left for both items and data
3568 */
Chris Masond3977122009-01-05 21:25:51 -05003569noinline int btrfs_leaf_free_space(struct btrfs_root *root,
Chris Masone02119d2008-09-05 16:13:11 -04003570 struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04003571{
Chris Mason5f39d392007-10-15 16:14:19 -04003572 int nritems = btrfs_header_nritems(leaf);
3573 int ret;
3574 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
3575 if (ret < 0) {
Frank Holtonefe120a2013-12-20 11:37:06 -05003576 btrfs_crit(root->fs_info,
3577 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
Jens Axboeae2f5412007-10-19 09:22:59 -04003578 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
Chris Mason5f39d392007-10-15 16:14:19 -04003579 leaf_space_used(leaf, 0, nritems), nritems);
3580 }
3581 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04003582}
3583
Chris Mason99d8f832010-07-07 10:51:48 -04003584/*
3585 * min slot controls the lowest index we're willing to push to the
3586 * right. We'll push up to and including min_slot, but no lower
3587 */
Chris Mason44871b12009-03-13 10:04:31 -04003588static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3589 struct btrfs_root *root,
3590 struct btrfs_path *path,
3591 int data_size, int empty,
3592 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04003593 int free_space, u32 left_nritems,
3594 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05003595{
Chris Mason5f39d392007-10-15 16:14:19 -04003596 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003597 struct extent_buffer *upper = path->nodes[1];
Chris Masoncfed81a2012-03-03 07:40:03 -05003598 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04003599 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05003600 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05003601 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05003602 int push_space = 0;
3603 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003604 struct btrfs_item *item;
Chris Mason34a38212007-11-07 13:31:03 -05003605 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04003606 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04003607 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04003608 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05003609
Chris Masoncfed81a2012-03-03 07:40:03 -05003610 btrfs_init_map_token(&token);
3611
Chris Mason34a38212007-11-07 13:31:03 -05003612 if (empty)
3613 nr = 0;
3614 else
Chris Mason99d8f832010-07-07 10:51:48 -04003615 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003616
Zheng Yan31840ae2008-09-23 13:14:14 -04003617 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05003618 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04003619
Chris Mason44871b12009-03-13 10:04:31 -04003620 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05003621 i = left_nritems - 1;
3622 while (i >= nr) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003623 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003624
Zheng Yan31840ae2008-09-23 13:14:14 -04003625 if (!empty && push_items > 0) {
3626 if (path->slots[0] > i)
3627 break;
3628 if (path->slots[0] == i) {
3629 int space = btrfs_leaf_free_space(root, left);
3630 if (space + push_space * 2 > free_space)
3631 break;
3632 }
3633 }
3634
Chris Mason00ec4c52007-02-24 12:47:20 -05003635 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003636 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003637
Chris Masondb945352007-10-15 16:15:53 -04003638 this_item_size = btrfs_item_size(left, item);
3639 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05003640 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04003641
Chris Mason00ec4c52007-02-24 12:47:20 -05003642 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003643 push_space += this_item_size + sizeof(*item);
Chris Mason34a38212007-11-07 13:31:03 -05003644 if (i == 0)
3645 break;
3646 i--;
Chris Masondb945352007-10-15 16:15:53 -04003647 }
Chris Mason5f39d392007-10-15 16:14:19 -04003648
Chris Mason925baed2008-06-25 16:01:30 -04003649 if (push_items == 0)
3650 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04003651
Julia Lawall6c1500f2012-11-03 20:30:18 +00003652 WARN_ON(!empty && push_items == left_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003653
Chris Mason00ec4c52007-02-24 12:47:20 -05003654 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003655 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05003656
Chris Mason5f39d392007-10-15 16:14:19 -04003657 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
Chris Mason123abc82007-03-14 14:14:43 -04003658 push_space -= leaf_data_end(root, left);
Chris Mason5f39d392007-10-15 16:14:19 -04003659
Chris Mason00ec4c52007-02-24 12:47:20 -05003660 /* make room in the right data area */
Chris Mason5f39d392007-10-15 16:14:19 -04003661 data_end = leaf_data_end(root, right);
3662 memmove_extent_buffer(right,
3663 btrfs_leaf_data(right) + data_end - push_space,
3664 btrfs_leaf_data(right) + data_end,
3665 BTRFS_LEAF_DATA_SIZE(root) - data_end);
3666
Chris Mason00ec4c52007-02-24 12:47:20 -05003667 /* copy from the left data area */
Chris Mason5f39d392007-10-15 16:14:19 -04003668 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
Chris Masond6025572007-03-30 14:27:56 -04003669 BTRFS_LEAF_DATA_SIZE(root) - push_space,
3670 btrfs_leaf_data(left) + leaf_data_end(root, left),
3671 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003672
3673 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
3674 btrfs_item_nr_offset(0),
3675 right_nritems * sizeof(struct btrfs_item));
3676
Chris Mason00ec4c52007-02-24 12:47:20 -05003677 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003678 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
3679 btrfs_item_nr_offset(left_nritems - push_items),
3680 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05003681
3682 /* update the item pointers */
Chris Mason7518a232007-03-12 12:01:18 -04003683 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003684 btrfs_set_header_nritems(right, right_nritems);
Chris Mason123abc82007-03-14 14:14:43 -04003685 push_space = BTRFS_LEAF_DATA_SIZE(root);
Chris Mason7518a232007-03-12 12:01:18 -04003686 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003687 item = btrfs_item_nr(i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003688 push_space -= btrfs_token_item_size(right, item, &token);
3689 btrfs_set_token_item_offset(right, item, push_space, &token);
Chris Masondb945352007-10-15 16:15:53 -04003690 }
3691
Chris Mason7518a232007-03-12 12:01:18 -04003692 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003693 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05003694
Chris Mason34a38212007-11-07 13:31:03 -05003695 if (left_nritems)
3696 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003697 else
Daniel Dressler01d58472014-11-21 17:15:07 +09003698 clean_tree_block(trans, root->fs_info, left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003699
Chris Mason5f39d392007-10-15 16:14:19 -04003700 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04003701
Chris Mason5f39d392007-10-15 16:14:19 -04003702 btrfs_item_key(right, &disk_key, 0);
3703 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04003704 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05003705
Chris Mason00ec4c52007-02-24 12:47:20 -05003706 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04003707 if (path->slots[0] >= left_nritems) {
3708 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003709 if (btrfs_header_nritems(path->nodes[0]) == 0)
Daniel Dressler01d58472014-11-21 17:15:07 +09003710 clean_tree_block(trans, root->fs_info, path->nodes[0]);
Chris Mason925baed2008-06-25 16:01:30 -04003711 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003712 free_extent_buffer(path->nodes[0]);
3713 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05003714 path->slots[1] += 1;
3715 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003716 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003717 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05003718 }
3719 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04003720
3721out_unlock:
3722 btrfs_tree_unlock(right);
3723 free_extent_buffer(right);
3724 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05003725}
Chris Mason925baed2008-06-25 16:01:30 -04003726
Chris Mason00ec4c52007-02-24 12:47:20 -05003727/*
Chris Mason44871b12009-03-13 10:04:31 -04003728 * push some data in the path leaf to the right, trying to free up at
3729 * least data_size bytes. returns zero if the push worked, nonzero otherwise
3730 *
3731 * returns 1 if the push failed because the other node didn't have enough
3732 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04003733 *
3734 * this will push starting from min_slot to the end of the leaf. It won't
3735 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04003736 */
3737static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003738 *root, struct btrfs_path *path,
3739 int min_data_size, int data_size,
3740 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003741{
3742 struct extent_buffer *left = path->nodes[0];
3743 struct extent_buffer *right;
3744 struct extent_buffer *upper;
3745 int slot;
3746 int free_space;
3747 u32 left_nritems;
3748 int ret;
3749
3750 if (!path->nodes[1])
3751 return 1;
3752
3753 slot = path->slots[1];
3754 upper = path->nodes[1];
3755 if (slot >= btrfs_header_nritems(upper) - 1)
3756 return 1;
3757
3758 btrfs_assert_tree_locked(path->nodes[1]);
3759
3760 right = read_node_slot(root, upper, slot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003761 /*
3762 * slot + 1 is not valid or we fail to read the right node,
3763 * no big deal, just return.
3764 */
3765 if (IS_ERR(right))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003766 return 1;
3767
Chris Mason44871b12009-03-13 10:04:31 -04003768 btrfs_tree_lock(right);
3769 btrfs_set_lock_blocking(right);
3770
3771 free_space = btrfs_leaf_free_space(root, right);
3772 if (free_space < data_size)
3773 goto out_unlock;
3774
3775 /* cow and double check */
3776 ret = btrfs_cow_block(trans, root, right, upper,
3777 slot + 1, &right);
3778 if (ret)
3779 goto out_unlock;
3780
3781 free_space = btrfs_leaf_free_space(root, right);
3782 if (free_space < data_size)
3783 goto out_unlock;
3784
3785 left_nritems = btrfs_header_nritems(left);
3786 if (left_nritems == 0)
3787 goto out_unlock;
3788
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003789 if (path->slots[0] == left_nritems && !empty) {
3790 /* Key greater than all keys in the leaf, right neighbor has
3791 * enough room for it and we're not emptying our leaf to delete
3792 * it, therefore use right neighbor to insert the new item and
3793 * no need to touch/dirty our left leaft. */
3794 btrfs_tree_unlock(left);
3795 free_extent_buffer(left);
3796 path->nodes[0] = right;
3797 path->slots[0] = 0;
3798 path->slots[1]++;
3799 return 0;
3800 }
3801
Chris Mason99d8f832010-07-07 10:51:48 -04003802 return __push_leaf_right(trans, root, path, min_data_size, empty,
3803 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04003804out_unlock:
3805 btrfs_tree_unlock(right);
3806 free_extent_buffer(right);
3807 return 1;
3808}
3809
3810/*
Chris Mason74123bd2007-02-02 11:05:29 -05003811 * push some data in the path leaf to the left, trying to free up at
3812 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003813 *
3814 * max_slot can put a limit on how far into the leaf we'll push items. The
3815 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
3816 * items
Chris Mason74123bd2007-02-02 11:05:29 -05003817 */
Chris Mason44871b12009-03-13 10:04:31 -04003818static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3819 struct btrfs_root *root,
3820 struct btrfs_path *path, int data_size,
3821 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04003822 int free_space, u32 right_nritems,
3823 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003824{
Chris Mason5f39d392007-10-15 16:14:19 -04003825 struct btrfs_disk_key disk_key;
3826 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05003827 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003828 int push_space = 0;
3829 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003830 struct btrfs_item *item;
Chris Mason7518a232007-03-12 12:01:18 -04003831 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05003832 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003833 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04003834 u32 this_item_size;
3835 u32 old_left_item_size;
Chris Masoncfed81a2012-03-03 07:40:03 -05003836 struct btrfs_map_token token;
3837
3838 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003839
Chris Mason34a38212007-11-07 13:31:03 -05003840 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04003841 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003842 else
Chris Mason99d8f832010-07-07 10:51:48 -04003843 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003844
3845 for (i = 0; i < nr; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003846 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003847
Zheng Yan31840ae2008-09-23 13:14:14 -04003848 if (!empty && push_items > 0) {
3849 if (path->slots[0] < i)
3850 break;
3851 if (path->slots[0] == i) {
3852 int space = btrfs_leaf_free_space(root, right);
3853 if (space + push_space * 2 > free_space)
3854 break;
3855 }
3856 }
3857
Chris Masonbe0e5c02007-01-26 15:51:26 -05003858 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003859 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003860
3861 this_item_size = btrfs_item_size(right, item);
3862 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003863 break;
Chris Masondb945352007-10-15 16:15:53 -04003864
Chris Masonbe0e5c02007-01-26 15:51:26 -05003865 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003866 push_space += this_item_size + sizeof(*item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003867 }
Chris Masondb945352007-10-15 16:15:53 -04003868
Chris Masonbe0e5c02007-01-26 15:51:26 -05003869 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04003870 ret = 1;
3871 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003872 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05303873 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
Chris Mason5f39d392007-10-15 16:14:19 -04003874
Chris Masonbe0e5c02007-01-26 15:51:26 -05003875 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04003876 copy_extent_buffer(left, right,
3877 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3878 btrfs_item_nr_offset(0),
3879 push_items * sizeof(struct btrfs_item));
3880
Chris Mason123abc82007-03-14 14:14:43 -04003881 push_space = BTRFS_LEAF_DATA_SIZE(root) -
Chris Masond3977122009-01-05 21:25:51 -05003882 btrfs_item_offset_nr(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003883
3884 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
Chris Masond6025572007-03-30 14:27:56 -04003885 leaf_data_end(root, left) - push_space,
3886 btrfs_leaf_data(right) +
Chris Mason5f39d392007-10-15 16:14:19 -04003887 btrfs_item_offset_nr(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04003888 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003889 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05003890 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05003891
Chris Masondb945352007-10-15 16:15:53 -04003892 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04003893 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003894 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003895
Ross Kirkdd3cc162013-09-16 15:58:09 +01003896 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003897
Chris Masoncfed81a2012-03-03 07:40:03 -05003898 ioff = btrfs_token_item_offset(left, item, &token);
3899 btrfs_set_token_item_offset(left, item,
3900 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
3901 &token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003902 }
Chris Mason5f39d392007-10-15 16:14:19 -04003903 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003904
3905 /* fixup right node */
Julia Lawall31b1a2b2012-11-03 10:58:34 +00003906 if (push_items > right_nritems)
3907 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
Chris Masond3977122009-01-05 21:25:51 -05003908 right_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003909
Chris Mason34a38212007-11-07 13:31:03 -05003910 if (push_items < right_nritems) {
3911 push_space = btrfs_item_offset_nr(right, push_items - 1) -
3912 leaf_data_end(root, right);
3913 memmove_extent_buffer(right, btrfs_leaf_data(right) +
3914 BTRFS_LEAF_DATA_SIZE(root) - push_space,
3915 btrfs_leaf_data(right) +
3916 leaf_data_end(root, right), push_space);
3917
3918 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04003919 btrfs_item_nr_offset(push_items),
3920 (btrfs_header_nritems(right) - push_items) *
3921 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05003922 }
Yaneef1c492007-11-26 10:58:13 -05003923 right_nritems -= push_items;
3924 btrfs_set_header_nritems(right, right_nritems);
Chris Mason123abc82007-03-14 14:14:43 -04003925 push_space = BTRFS_LEAF_DATA_SIZE(root);
Chris Mason5f39d392007-10-15 16:14:19 -04003926 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003927 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003928
Chris Masoncfed81a2012-03-03 07:40:03 -05003929 push_space = push_space - btrfs_token_item_size(right,
3930 item, &token);
3931 btrfs_set_token_item_offset(right, item, push_space, &token);
Chris Masondb945352007-10-15 16:15:53 -04003932 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003933
Chris Mason5f39d392007-10-15 16:14:19 -04003934 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05003935 if (right_nritems)
3936 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003937 else
Daniel Dressler01d58472014-11-21 17:15:07 +09003938 clean_tree_block(trans, root->fs_info, right);
Chris Mason098f59c2007-05-11 11:33:21 -04003939
Chris Mason5f39d392007-10-15 16:14:19 -04003940 btrfs_item_key(right, &disk_key, 0);
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003941 fixup_low_keys(root->fs_info, path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003942
3943 /* then fixup the leaf pointer in the path */
3944 if (path->slots[0] < push_items) {
3945 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003946 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003947 free_extent_buffer(path->nodes[0]);
3948 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003949 path->slots[1] -= 1;
3950 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003951 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04003952 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003953 path->slots[0] -= push_items;
3954 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003955 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003956 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04003957out:
3958 btrfs_tree_unlock(left);
3959 free_extent_buffer(left);
3960 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003961}
3962
Chris Mason74123bd2007-02-02 11:05:29 -05003963/*
Chris Mason44871b12009-03-13 10:04:31 -04003964 * push some data in the path leaf to the left, trying to free up at
3965 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003966 *
3967 * max_slot can put a limit on how far into the leaf we'll push items. The
3968 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
3969 * items
Chris Mason44871b12009-03-13 10:04:31 -04003970 */
3971static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003972 *root, struct btrfs_path *path, int min_data_size,
3973 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003974{
3975 struct extent_buffer *right = path->nodes[0];
3976 struct extent_buffer *left;
3977 int slot;
3978 int free_space;
3979 u32 right_nritems;
3980 int ret = 0;
3981
3982 slot = path->slots[1];
3983 if (slot == 0)
3984 return 1;
3985 if (!path->nodes[1])
3986 return 1;
3987
3988 right_nritems = btrfs_header_nritems(right);
3989 if (right_nritems == 0)
3990 return 1;
3991
3992 btrfs_assert_tree_locked(path->nodes[1]);
3993
3994 left = read_node_slot(root, path->nodes[1], slot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003995 /*
3996 * slot - 1 is not valid or we fail to read the left node,
3997 * no big deal, just return.
3998 */
3999 if (IS_ERR(left))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00004000 return 1;
4001
Chris Mason44871b12009-03-13 10:04:31 -04004002 btrfs_tree_lock(left);
4003 btrfs_set_lock_blocking(left);
4004
4005 free_space = btrfs_leaf_free_space(root, left);
4006 if (free_space < data_size) {
4007 ret = 1;
4008 goto out;
4009 }
4010
4011 /* cow and double check */
4012 ret = btrfs_cow_block(trans, root, left,
4013 path->nodes[1], slot - 1, &left);
4014 if (ret) {
4015 /* we hit -ENOSPC, but it isn't fatal here */
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004016 if (ret == -ENOSPC)
4017 ret = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004018 goto out;
4019 }
4020
4021 free_space = btrfs_leaf_free_space(root, left);
4022 if (free_space < data_size) {
4023 ret = 1;
4024 goto out;
4025 }
4026
Chris Mason99d8f832010-07-07 10:51:48 -04004027 return __push_leaf_left(trans, root, path, min_data_size,
4028 empty, left, free_space, right_nritems,
4029 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04004030out:
4031 btrfs_tree_unlock(left);
4032 free_extent_buffer(left);
4033 return ret;
4034}
4035
4036/*
Chris Mason74123bd2007-02-02 11:05:29 -05004037 * split the path's leaf in two, making sure there is at least data_size
4038 * available for the resulting leaf level of the path.
4039 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004040static noinline void copy_for_split(struct btrfs_trans_handle *trans,
4041 struct btrfs_root *root,
4042 struct btrfs_path *path,
4043 struct extent_buffer *l,
4044 struct extent_buffer *right,
4045 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004046{
Chris Masonbe0e5c02007-01-26 15:51:26 -05004047 int data_copy_size;
4048 int rt_data_off;
4049 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04004050 struct btrfs_disk_key disk_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05004051 struct btrfs_map_token token;
4052
4053 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004054
Chris Mason5f39d392007-10-15 16:14:19 -04004055 nritems = nritems - mid;
4056 btrfs_set_header_nritems(right, nritems);
4057 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
4058
4059 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
4060 btrfs_item_nr_offset(mid),
4061 nritems * sizeof(struct btrfs_item));
4062
4063 copy_extent_buffer(right, l,
Chris Masond6025572007-03-30 14:27:56 -04004064 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
4065 data_copy_size, btrfs_leaf_data(l) +
4066 leaf_data_end(root, l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05004067
Chris Mason5f39d392007-10-15 16:14:19 -04004068 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
4069 btrfs_item_end_nr(l, mid);
4070
4071 for (i = 0; i < nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01004072 struct btrfs_item *item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004073 u32 ioff;
4074
Chris Masoncfed81a2012-03-03 07:40:03 -05004075 ioff = btrfs_token_item_offset(right, item, &token);
4076 btrfs_set_token_item_offset(right, item,
4077 ioff + rt_data_off, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04004078 }
Chris Mason74123bd2007-02-02 11:05:29 -05004079
Chris Mason5f39d392007-10-15 16:14:19 -04004080 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004081 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004082 insert_ptr(trans, root, path, &disk_key, right->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02004083 path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04004084
4085 btrfs_mark_buffer_dirty(right);
4086 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05004087 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004088
Chris Masonbe0e5c02007-01-26 15:51:26 -05004089 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04004090 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04004091 free_extent_buffer(path->nodes[0]);
4092 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004093 path->slots[0] -= mid;
4094 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04004095 } else {
4096 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04004097 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04004098 }
Chris Mason5f39d392007-10-15 16:14:19 -04004099
Chris Masoneb60cea2007-02-02 09:18:22 -05004100 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04004101}
4102
4103/*
Chris Mason99d8f832010-07-07 10:51:48 -04004104 * double splits happen when we need to insert a big item in the middle
4105 * of a leaf. A double split can leave us with 3 mostly empty leaves:
4106 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
4107 * A B C
4108 *
4109 * We avoid this by trying to push the items on either side of our target
4110 * into the adjacent leaves. If all goes well we can avoid the double split
4111 * completely.
4112 */
4113static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4114 struct btrfs_root *root,
4115 struct btrfs_path *path,
4116 int data_size)
4117{
4118 int ret;
4119 int progress = 0;
4120 int slot;
4121 u32 nritems;
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004122 int space_needed = data_size;
Chris Mason99d8f832010-07-07 10:51:48 -04004123
4124 slot = path->slots[0];
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004125 if (slot < btrfs_header_nritems(path->nodes[0]))
4126 space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
Chris Mason99d8f832010-07-07 10:51:48 -04004127
4128 /*
4129 * try to push all the items after our slot into the
4130 * right leaf
4131 */
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004132 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004133 if (ret < 0)
4134 return ret;
4135
4136 if (ret == 0)
4137 progress++;
4138
4139 nritems = btrfs_header_nritems(path->nodes[0]);
4140 /*
4141 * our goal is to get our slot at the start or end of a leaf. If
4142 * we've done so we're done
4143 */
4144 if (path->slots[0] == 0 || path->slots[0] == nritems)
4145 return 0;
4146
4147 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
4148 return 0;
4149
4150 /* try to push all the items before our slot into the next leaf */
4151 slot = path->slots[0];
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004152 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004153 if (ret < 0)
4154 return ret;
4155
4156 if (ret == 0)
4157 progress++;
4158
4159 if (progress)
4160 return 0;
4161 return 1;
4162}
4163
4164/*
Chris Mason44871b12009-03-13 10:04:31 -04004165 * split the path's leaf in two, making sure there is at least data_size
4166 * available for the resulting leaf level of the path.
4167 *
4168 * returns 0 if all went well and < 0 on failure.
4169 */
4170static noinline int split_leaf(struct btrfs_trans_handle *trans,
4171 struct btrfs_root *root,
4172 struct btrfs_key *ins_key,
4173 struct btrfs_path *path, int data_size,
4174 int extend)
4175{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004176 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004177 struct extent_buffer *l;
4178 u32 nritems;
4179 int mid;
4180 int slot;
4181 struct extent_buffer *right;
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004182 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04004183 int ret = 0;
4184 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004185 int split;
Chris Mason44871b12009-03-13 10:04:31 -04004186 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004187 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04004188
Yan, Zhenga5719522009-09-24 09:17:31 -04004189 l = path->nodes[0];
4190 slot = path->slots[0];
4191 if (extend && data_size + btrfs_item_size_nr(l, slot) +
4192 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
4193 return -EOVERFLOW;
4194
Chris Mason44871b12009-03-13 10:04:31 -04004195 /* first try to make some room by pushing left and right */
Liu Bo33157e02013-05-22 12:07:06 +00004196 if (data_size && path->nodes[1]) {
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004197 int space_needed = data_size;
4198
4199 if (slot < btrfs_header_nritems(l))
4200 space_needed -= btrfs_leaf_free_space(root, l);
4201
4202 wret = push_leaf_right(trans, root, path, space_needed,
4203 space_needed, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04004204 if (wret < 0)
4205 return wret;
4206 if (wret) {
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004207 wret = push_leaf_left(trans, root, path, space_needed,
4208 space_needed, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04004209 if (wret < 0)
4210 return wret;
4211 }
4212 l = path->nodes[0];
4213
4214 /* did the pushes work? */
4215 if (btrfs_leaf_free_space(root, l) >= data_size)
4216 return 0;
4217 }
4218
4219 if (!path->nodes[1]) {
Liu Bofdd99c72013-05-22 12:06:51 +00004220 ret = insert_new_root(trans, root, path, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004221 if (ret)
4222 return ret;
4223 }
4224again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004225 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004226 l = path->nodes[0];
4227 slot = path->slots[0];
4228 nritems = btrfs_header_nritems(l);
4229 mid = (nritems + 1) / 2;
4230
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004231 if (mid <= slot) {
4232 if (nritems == 1 ||
4233 leaf_space_used(l, mid, nritems - mid) + data_size >
4234 BTRFS_LEAF_DATA_SIZE(root)) {
4235 if (slot >= nritems) {
4236 split = 0;
4237 } else {
4238 mid = slot;
4239 if (mid != nritems &&
4240 leaf_space_used(l, mid, nritems - mid) +
4241 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004242 if (data_size && !tried_avoid_double)
4243 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004244 split = 2;
4245 }
4246 }
4247 }
4248 } else {
4249 if (leaf_space_used(l, 0, mid) + data_size >
4250 BTRFS_LEAF_DATA_SIZE(root)) {
4251 if (!extend && data_size && slot == 0) {
4252 split = 0;
4253 } else if ((extend || !data_size) && slot == 0) {
4254 mid = 1;
4255 } else {
4256 mid = slot;
4257 if (mid != nritems &&
4258 leaf_space_used(l, mid, nritems - mid) +
4259 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004260 if (data_size && !tried_avoid_double)
4261 goto push_for_double;
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05304262 split = 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004263 }
4264 }
4265 }
4266 }
4267
4268 if (split == 0)
4269 btrfs_cpu_key_to_disk(&disk_key, ins_key);
4270 else
4271 btrfs_item_key(l, &disk_key, mid);
4272
David Sterba4d75f8a2014-06-15 01:54:12 +02004273 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
4274 &disk_key, 0, l->start, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004275 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04004276 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004277
David Sterba707e8a02014-06-04 19:22:26 +02004278 root_add_used(root, root->nodesize);
Chris Mason44871b12009-03-13 10:04:31 -04004279
4280 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
4281 btrfs_set_header_bytenr(right, right->start);
4282 btrfs_set_header_generation(right, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004283 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
Chris Mason44871b12009-03-13 10:04:31 -04004284 btrfs_set_header_owner(right, root->root_key.objectid);
4285 btrfs_set_header_level(right, 0);
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004286 write_extent_buffer(right, fs_info->fsid,
Ross Kirk0a4e5582013-09-24 10:12:38 +01004287 btrfs_header_fsid(), BTRFS_FSID_SIZE);
Chris Mason44871b12009-03-13 10:04:31 -04004288
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004289 write_extent_buffer(right, fs_info->chunk_tree_uuid,
Geert Uytterhoevenb308bc22013-08-20 13:20:15 +02004290 btrfs_header_chunk_tree_uuid(right),
Chris Mason44871b12009-03-13 10:04:31 -04004291 BTRFS_UUID_SIZE);
4292
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004293 if (split == 0) {
4294 if (mid <= slot) {
4295 btrfs_set_header_nritems(right, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004296 insert_ptr(trans, root, path, &disk_key, right->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02004297 path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004298 btrfs_tree_unlock(path->nodes[0]);
4299 free_extent_buffer(path->nodes[0]);
4300 path->nodes[0] = right;
4301 path->slots[0] = 0;
4302 path->slots[1] += 1;
4303 } else {
4304 btrfs_set_header_nritems(right, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004305 insert_ptr(trans, root, path, &disk_key, right->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02004306 path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004307 btrfs_tree_unlock(path->nodes[0]);
4308 free_extent_buffer(path->nodes[0]);
4309 path->nodes[0] = right;
4310 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01004311 if (path->slots[1] == 0)
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004312 fixup_low_keys(fs_info, path, &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004313 }
Liu Bo196e0242016-09-07 14:48:28 -07004314 /*
4315 * We create a new leaf 'right' for the required ins_len and
4316 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
4317 * the content of ins_len to 'right'.
4318 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004319 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004320 }
4321
Jeff Mahoney143bede2012-03-01 14:56:26 +01004322 copy_for_split(trans, root, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04004323
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004324 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04004325 BUG_ON(num_doubles != 0);
4326 num_doubles++;
4327 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04004328 }
Chris Mason44871b12009-03-13 10:04:31 -04004329
Jeff Mahoney143bede2012-03-01 14:56:26 +01004330 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004331
4332push_for_double:
4333 push_for_double_split(trans, root, path, data_size);
4334 tried_avoid_double = 1;
4335 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
4336 return 0;
4337 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004338}
4339
Yan, Zhengad48fd752009-11-12 09:33:58 +00004340static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4341 struct btrfs_root *root,
4342 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05004343{
Yan, Zhengad48fd752009-11-12 09:33:58 +00004344 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05004345 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004346 struct btrfs_file_extent_item *fi;
4347 u64 extent_len = 0;
4348 u32 item_size;
4349 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05004350
4351 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00004352 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4353
4354 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4355 key.type != BTRFS_EXTENT_CSUM_KEY);
4356
4357 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
4358 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05004359
4360 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004361 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4362 fi = btrfs_item_ptr(leaf, path->slots[0],
4363 struct btrfs_file_extent_item);
4364 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
4365 }
David Sterbab3b4aa72011-04-21 01:20:15 +02004366 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05004367
Chris Mason459931e2008-12-10 09:10:46 -05004368 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004369 path->search_for_split = 1;
4370 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05004371 path->search_for_split = 0;
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004372 if (ret > 0)
4373 ret = -EAGAIN;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004374 if (ret < 0)
4375 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004376
Yan, Zhengad48fd752009-11-12 09:33:58 +00004377 ret = -EAGAIN;
4378 leaf = path->nodes[0];
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004379 /* if our item isn't there, return now */
4380 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
Yan, Zhengad48fd752009-11-12 09:33:58 +00004381 goto err;
4382
Chris Mason109f6ae2010-04-02 09:20:18 -04004383 /* the leaf has changed, it now has room. return now */
4384 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
4385 goto err;
4386
Yan, Zhengad48fd752009-11-12 09:33:58 +00004387 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4388 fi = btrfs_item_ptr(leaf, path->slots[0],
4389 struct btrfs_file_extent_item);
4390 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
4391 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004392 }
4393
Chris Masonb9473432009-03-13 11:00:37 -04004394 btrfs_set_path_blocking(path);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004395 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004396 if (ret)
4397 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004398
Yan, Zhengad48fd752009-11-12 09:33:58 +00004399 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04004400 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004401 return 0;
4402err:
4403 path->keep_locks = 0;
4404 return ret;
4405}
4406
4407static noinline int split_item(struct btrfs_trans_handle *trans,
4408 struct btrfs_root *root,
4409 struct btrfs_path *path,
4410 struct btrfs_key *new_key,
4411 unsigned long split_offset)
4412{
4413 struct extent_buffer *leaf;
4414 struct btrfs_item *item;
4415 struct btrfs_item *new_item;
4416 int slot;
4417 char *buf;
4418 u32 nritems;
4419 u32 item_size;
4420 u32 orig_offset;
4421 struct btrfs_disk_key disk_key;
4422
Chris Masonb9473432009-03-13 11:00:37 -04004423 leaf = path->nodes[0];
4424 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
4425
Chris Masonb4ce94d2009-02-04 09:25:08 -05004426 btrfs_set_path_blocking(path);
4427
Ross Kirkdd3cc162013-09-16 15:58:09 +01004428 item = btrfs_item_nr(path->slots[0]);
Chris Mason459931e2008-12-10 09:10:46 -05004429 orig_offset = btrfs_item_offset(leaf, item);
4430 item_size = btrfs_item_size(leaf, item);
4431
Chris Mason459931e2008-12-10 09:10:46 -05004432 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004433 if (!buf)
4434 return -ENOMEM;
4435
Chris Mason459931e2008-12-10 09:10:46 -05004436 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
4437 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004438
Chris Mason459931e2008-12-10 09:10:46 -05004439 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05004440 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05004441 if (slot != nritems) {
4442 /* shift the items */
4443 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00004444 btrfs_item_nr_offset(slot),
4445 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05004446 }
4447
4448 btrfs_cpu_key_to_disk(&disk_key, new_key);
4449 btrfs_set_item_key(leaf, &disk_key, slot);
4450
Ross Kirkdd3cc162013-09-16 15:58:09 +01004451 new_item = btrfs_item_nr(slot);
Chris Mason459931e2008-12-10 09:10:46 -05004452
4453 btrfs_set_item_offset(leaf, new_item, orig_offset);
4454 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
4455
4456 btrfs_set_item_offset(leaf, item,
4457 orig_offset + item_size - split_offset);
4458 btrfs_set_item_size(leaf, item, split_offset);
4459
4460 btrfs_set_header_nritems(leaf, nritems + 1);
4461
4462 /* write the data for the start of the original item */
4463 write_extent_buffer(leaf, buf,
4464 btrfs_item_ptr_offset(leaf, path->slots[0]),
4465 split_offset);
4466
4467 /* write the data for the new item */
4468 write_extent_buffer(leaf, buf + split_offset,
4469 btrfs_item_ptr_offset(leaf, slot),
4470 item_size - split_offset);
4471 btrfs_mark_buffer_dirty(leaf);
4472
Yan, Zhengad48fd752009-11-12 09:33:58 +00004473 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05004474 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004475 return 0;
4476}
4477
4478/*
4479 * This function splits a single item into two items,
4480 * giving 'new_key' to the new item and splitting the
4481 * old one at split_offset (from the start of the item).
4482 *
4483 * The path may be released by this operation. After
4484 * the split, the path is pointing to the old item. The
4485 * new item is going to be in the same node as the old one.
4486 *
4487 * Note, the item being split must be smaller enough to live alone on
4488 * a tree block with room for one extra struct btrfs_item
4489 *
4490 * This allows us to split the item in place, keeping a lock on the
4491 * leaf the entire time.
4492 */
4493int btrfs_split_item(struct btrfs_trans_handle *trans,
4494 struct btrfs_root *root,
4495 struct btrfs_path *path,
4496 struct btrfs_key *new_key,
4497 unsigned long split_offset)
4498{
4499 int ret;
4500 ret = setup_leaf_for_split(trans, root, path,
4501 sizeof(struct btrfs_item));
4502 if (ret)
4503 return ret;
4504
4505 ret = split_item(trans, root, path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05004506 return ret;
4507}
4508
4509/*
Yan, Zhengad48fd752009-11-12 09:33:58 +00004510 * This function duplicate a item, giving 'new_key' to the new item.
4511 * It guarantees both items live in the same tree leaf and the new item
4512 * is contiguous with the original item.
4513 *
4514 * This allows us to split file extent in place, keeping a lock on the
4515 * leaf the entire time.
4516 */
4517int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4518 struct btrfs_root *root,
4519 struct btrfs_path *path,
4520 struct btrfs_key *new_key)
4521{
4522 struct extent_buffer *leaf;
4523 int ret;
4524 u32 item_size;
4525
4526 leaf = path->nodes[0];
4527 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4528 ret = setup_leaf_for_split(trans, root, path,
4529 item_size + sizeof(struct btrfs_item));
4530 if (ret)
4531 return ret;
4532
4533 path->slots[0]++;
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004534 setup_items_for_insert(root, path, new_key, &item_size,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004535 item_size, item_size +
4536 sizeof(struct btrfs_item), 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004537 leaf = path->nodes[0];
4538 memcpy_extent_buffer(leaf,
4539 btrfs_item_ptr_offset(leaf, path->slots[0]),
4540 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4541 item_size);
4542 return 0;
4543}
4544
4545/*
Chris Masond352ac62008-09-29 15:18:18 -04004546 * make the item pointed to by the path smaller. new_size indicates
4547 * how small to make it, and from_end tells us if we just chop bytes
4548 * off the end of the item or if we shift the item to chop bytes off
4549 * the front.
4550 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004551void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004552 u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04004553{
Chris Masonb18c6682007-04-17 13:26:50 -04004554 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004555 struct extent_buffer *leaf;
4556 struct btrfs_item *item;
Chris Masonb18c6682007-04-17 13:26:50 -04004557 u32 nritems;
4558 unsigned int data_end;
4559 unsigned int old_data_start;
4560 unsigned int old_size;
4561 unsigned int size_diff;
4562 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004563 struct btrfs_map_token token;
4564
4565 btrfs_init_map_token(&token);
Chris Masonb18c6682007-04-17 13:26:50 -04004566
Chris Mason5f39d392007-10-15 16:14:19 -04004567 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04004568 slot = path->slots[0];
4569
4570 old_size = btrfs_item_size_nr(leaf, slot);
4571 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004572 return;
Chris Masonb18c6682007-04-17 13:26:50 -04004573
Chris Mason5f39d392007-10-15 16:14:19 -04004574 nritems = btrfs_header_nritems(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004575 data_end = leaf_data_end(root, leaf);
4576
Chris Mason5f39d392007-10-15 16:14:19 -04004577 old_data_start = btrfs_item_offset_nr(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04004578
Chris Masonb18c6682007-04-17 13:26:50 -04004579 size_diff = old_size - new_size;
4580
4581 BUG_ON(slot < 0);
4582 BUG_ON(slot >= nritems);
4583
4584 /*
4585 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4586 */
4587 /* first correct the data pointers */
4588 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004589 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004590 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004591
Chris Masoncfed81a2012-03-03 07:40:03 -05004592 ioff = btrfs_token_item_offset(leaf, item, &token);
4593 btrfs_set_token_item_offset(leaf, item,
4594 ioff + size_diff, &token);
Chris Masonb18c6682007-04-17 13:26:50 -04004595 }
Chris Masondb945352007-10-15 16:15:53 -04004596
Chris Masonb18c6682007-04-17 13:26:50 -04004597 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04004598 if (from_end) {
4599 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4600 data_end + size_diff, btrfs_leaf_data(leaf) +
4601 data_end, old_data_start + new_size - data_end);
4602 } else {
4603 struct btrfs_disk_key disk_key;
4604 u64 offset;
4605
4606 btrfs_item_key(leaf, &disk_key, slot);
4607
4608 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
4609 unsigned long ptr;
4610 struct btrfs_file_extent_item *fi;
4611
4612 fi = btrfs_item_ptr(leaf, slot,
4613 struct btrfs_file_extent_item);
4614 fi = (struct btrfs_file_extent_item *)(
4615 (unsigned long)fi - size_diff);
4616
4617 if (btrfs_file_extent_type(leaf, fi) ==
4618 BTRFS_FILE_EXTENT_INLINE) {
4619 ptr = btrfs_item_ptr_offset(leaf, slot);
4620 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05004621 (unsigned long)fi,
David Sterba7ec20af2014-07-24 17:34:58 +02004622 BTRFS_FILE_EXTENT_INLINE_DATA_START);
Chris Mason179e29e2007-11-01 11:28:41 -04004623 }
4624 }
4625
4626 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
4627 data_end + size_diff, btrfs_leaf_data(leaf) +
4628 data_end, old_data_start - data_end);
4629
4630 offset = btrfs_disk_key_offset(&disk_key);
4631 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4632 btrfs_set_item_key(leaf, &disk_key, slot);
4633 if (slot == 0)
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004634 fixup_low_keys(root->fs_info, path, &disk_key, 1);
Chris Mason179e29e2007-11-01 11:28:41 -04004635 }
Chris Mason5f39d392007-10-15 16:14:19 -04004636
Ross Kirkdd3cc162013-09-16 15:58:09 +01004637 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004638 btrfs_set_item_size(leaf, item, new_size);
4639 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004640
Chris Mason5f39d392007-10-15 16:14:19 -04004641 if (btrfs_leaf_free_space(root, leaf) < 0) {
4642 btrfs_print_leaf(root, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004643 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004644 }
Chris Masonb18c6682007-04-17 13:26:50 -04004645}
4646
Chris Masond352ac62008-09-29 15:18:18 -04004647/*
Stefan Behrens8f69dbd2013-05-07 10:23:30 +00004648 * make the item pointed to by the path bigger, data_size is the added size.
Chris Masond352ac62008-09-29 15:18:18 -04004649 */
Tsutomu Itoh4b90c682013-04-16 05:18:49 +00004650void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004651 u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04004652{
Chris Mason6567e832007-04-16 09:22:45 -04004653 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004654 struct extent_buffer *leaf;
4655 struct btrfs_item *item;
Chris Mason6567e832007-04-16 09:22:45 -04004656 u32 nritems;
4657 unsigned int data_end;
4658 unsigned int old_data;
4659 unsigned int old_size;
4660 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004661 struct btrfs_map_token token;
4662
4663 btrfs_init_map_token(&token);
Chris Mason6567e832007-04-16 09:22:45 -04004664
Chris Mason5f39d392007-10-15 16:14:19 -04004665 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04004666
Chris Mason5f39d392007-10-15 16:14:19 -04004667 nritems = btrfs_header_nritems(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004668 data_end = leaf_data_end(root, leaf);
4669
Chris Mason5f39d392007-10-15 16:14:19 -04004670 if (btrfs_leaf_free_space(root, leaf) < data_size) {
4671 btrfs_print_leaf(root, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004672 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004673 }
Chris Mason6567e832007-04-16 09:22:45 -04004674 slot = path->slots[0];
Chris Mason5f39d392007-10-15 16:14:19 -04004675 old_data = btrfs_item_end_nr(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04004676
4677 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04004678 if (slot >= nritems) {
4679 btrfs_print_leaf(root, leaf);
Frank Holtonefe120a2013-12-20 11:37:06 -05004680 btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
Chris Masond3977122009-01-05 21:25:51 -05004681 slot, nritems);
Chris Mason3326d1b2007-10-15 16:18:25 -04004682 BUG_ON(1);
4683 }
Chris Mason6567e832007-04-16 09:22:45 -04004684
4685 /*
4686 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4687 */
4688 /* first correct the data pointers */
4689 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004690 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004691 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004692
Chris Masoncfed81a2012-03-03 07:40:03 -05004693 ioff = btrfs_token_item_offset(leaf, item, &token);
4694 btrfs_set_token_item_offset(leaf, item,
4695 ioff - data_size, &token);
Chris Mason6567e832007-04-16 09:22:45 -04004696 }
Chris Mason5f39d392007-10-15 16:14:19 -04004697
Chris Mason6567e832007-04-16 09:22:45 -04004698 /* shift the data */
Chris Mason5f39d392007-10-15 16:14:19 -04004699 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Mason6567e832007-04-16 09:22:45 -04004700 data_end - data_size, btrfs_leaf_data(leaf) +
4701 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004702
Chris Mason6567e832007-04-16 09:22:45 -04004703 data_end = old_data;
Chris Mason5f39d392007-10-15 16:14:19 -04004704 old_size = btrfs_item_size_nr(leaf, slot);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004705 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004706 btrfs_set_item_size(leaf, item, old_size + data_size);
4707 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004708
Chris Mason5f39d392007-10-15 16:14:19 -04004709 if (btrfs_leaf_free_space(root, leaf) < 0) {
4710 btrfs_print_leaf(root, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004711 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004712 }
Chris Mason6567e832007-04-16 09:22:45 -04004713}
4714
Chris Mason74123bd2007-02-02 11:05:29 -05004715/*
Chris Mason44871b12009-03-13 10:04:31 -04004716 * this is a helper for btrfs_insert_empty_items, the main goal here is
4717 * to save stack depth by doing the bulk of the work in a function
4718 * that doesn't call btrfs_search_slot
Chris Mason74123bd2007-02-02 11:05:29 -05004719 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004720void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004721 struct btrfs_key *cpu_key, u32 *data_size,
4722 u32 total_data, u32 total_size, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004723{
Chris Mason5f39d392007-10-15 16:14:19 -04004724 struct btrfs_item *item;
Chris Mason9c583092008-01-29 15:15:18 -05004725 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004726 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004727 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04004728 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004729 struct extent_buffer *leaf;
4730 int slot;
Chris Masoncfed81a2012-03-03 07:40:03 -05004731 struct btrfs_map_token token;
4732
Filipe Manana24cdc842014-07-28 19:34:35 +01004733 if (path->slots[0] == 0) {
4734 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004735 fixup_low_keys(root->fs_info, path, &disk_key, 1);
Filipe Manana24cdc842014-07-28 19:34:35 +01004736 }
4737 btrfs_unlock_up_safe(path, 1);
4738
Chris Masoncfed81a2012-03-03 07:40:03 -05004739 btrfs_init_map_token(&token);
Chris Masone2fa7222007-03-12 16:22:34 -04004740
Chris Mason5f39d392007-10-15 16:14:19 -04004741 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04004742 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05004743
Chris Mason5f39d392007-10-15 16:14:19 -04004744 nritems = btrfs_header_nritems(leaf);
Chris Mason123abc82007-03-14 14:14:43 -04004745 data_end = leaf_data_end(root, leaf);
Chris Masoneb60cea2007-02-02 09:18:22 -05004746
Chris Masonf25956c2008-09-12 15:32:53 -04004747 if (btrfs_leaf_free_space(root, leaf) < total_size) {
Chris Mason3326d1b2007-10-15 16:18:25 -04004748 btrfs_print_leaf(root, leaf);
Jeff Mahoney5d163e02016-09-20 10:05:00 -04004749 btrfs_crit(root->fs_info,
4750 "not enough freespace need %u have %d",
4751 total_size, btrfs_leaf_free_space(root, leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004752 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04004753 }
Chris Mason5f39d392007-10-15 16:14:19 -04004754
Chris Masonbe0e5c02007-01-26 15:51:26 -05004755 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04004756 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004757
Chris Mason5f39d392007-10-15 16:14:19 -04004758 if (old_data < data_end) {
4759 btrfs_print_leaf(root, leaf);
Jeff Mahoney5d163e02016-09-20 10:05:00 -04004760 btrfs_crit(root->fs_info,
4761 "slot %d old_data %d data_end %d",
4762 slot, old_data, data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004763 BUG_ON(1);
4764 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004765 /*
4766 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4767 */
4768 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04004769 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004770 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004771
Jeff Mahoney62e85572016-09-20 10:05:01 -04004772 item = btrfs_item_nr(i);
Chris Masoncfed81a2012-03-03 07:40:03 -05004773 ioff = btrfs_token_item_offset(leaf, item, &token);
4774 btrfs_set_token_item_offset(leaf, item,
4775 ioff - total_data, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04004776 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004777 /* shift the items */
Chris Mason9c583092008-01-29 15:15:18 -05004778 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
Chris Mason5f39d392007-10-15 16:14:19 -04004779 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04004780 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004781
4782 /* shift the data */
Chris Mason5f39d392007-10-15 16:14:19 -04004783 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Mason9c583092008-01-29 15:15:18 -05004784 data_end - total_data, btrfs_leaf_data(leaf) +
Chris Masond6025572007-03-30 14:27:56 -04004785 data_end, old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004786 data_end = old_data;
4787 }
Chris Mason5f39d392007-10-15 16:14:19 -04004788
Chris Mason62e27492007-03-15 12:56:47 -04004789 /* setup the item for the new data */
Chris Mason9c583092008-01-29 15:15:18 -05004790 for (i = 0; i < nr; i++) {
4791 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4792 btrfs_set_item_key(leaf, &disk_key, slot + i);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004793 item = btrfs_item_nr(slot + i);
Chris Masoncfed81a2012-03-03 07:40:03 -05004794 btrfs_set_token_item_offset(leaf, item,
4795 data_end - data_size[i], &token);
Chris Mason9c583092008-01-29 15:15:18 -05004796 data_end -= data_size[i];
Chris Masoncfed81a2012-03-03 07:40:03 -05004797 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
Chris Mason9c583092008-01-29 15:15:18 -05004798 }
Chris Mason44871b12009-03-13 10:04:31 -04004799
Chris Mason9c583092008-01-29 15:15:18 -05004800 btrfs_set_header_nritems(leaf, nritems + nr);
Chris Masonb9473432009-03-13 11:00:37 -04004801 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004802
Chris Mason5f39d392007-10-15 16:14:19 -04004803 if (btrfs_leaf_free_space(root, leaf) < 0) {
4804 btrfs_print_leaf(root, leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004805 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004806 }
Chris Mason44871b12009-03-13 10:04:31 -04004807}
4808
4809/*
4810 * Given a key and some data, insert items into the tree.
4811 * This does all the path init required, making room in the tree if needed.
4812 */
4813int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4814 struct btrfs_root *root,
4815 struct btrfs_path *path,
4816 struct btrfs_key *cpu_key, u32 *data_size,
4817 int nr)
4818{
Chris Mason44871b12009-03-13 10:04:31 -04004819 int ret = 0;
4820 int slot;
4821 int i;
4822 u32 total_size = 0;
4823 u32 total_data = 0;
4824
4825 for (i = 0; i < nr; i++)
4826 total_data += data_size[i];
4827
4828 total_size = total_data + (nr * sizeof(struct btrfs_item));
4829 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
4830 if (ret == 0)
4831 return -EEXIST;
4832 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004833 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004834
Chris Mason44871b12009-03-13 10:04:31 -04004835 slot = path->slots[0];
4836 BUG_ON(slot < 0);
4837
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004838 setup_items_for_insert(root, path, cpu_key, data_size,
Chris Mason44871b12009-03-13 10:04:31 -04004839 total_data, total_size, nr);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004840 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04004841}
4842
4843/*
4844 * Given a key and some data, insert an item into the tree.
4845 * This does all the path init required, making room in the tree if needed.
4846 */
Chris Masone089f052007-03-16 16:20:31 -04004847int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
4848 *root, struct btrfs_key *cpu_key, void *data, u32
4849 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04004850{
4851 int ret = 0;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004852 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04004853 struct extent_buffer *leaf;
4854 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04004855
Chris Mason2c90e5d2007-04-02 10:50:19 -04004856 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004857 if (!path)
4858 return -ENOMEM;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004859 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04004860 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04004861 leaf = path->nodes[0];
4862 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4863 write_extent_buffer(leaf, data, ptr, data_size);
4864 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04004865 }
Chris Mason2c90e5d2007-04-02 10:50:19 -04004866 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004867 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004868}
4869
Chris Mason74123bd2007-02-02 11:05:29 -05004870/*
Chris Mason5de08d72007-02-24 06:24:44 -05004871 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05004872 *
Chris Masond352ac62008-09-29 15:18:18 -04004873 * the tree should have been previously balanced so the deletion does not
4874 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05004875 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004876static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4877 int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004878{
Chris Mason5f39d392007-10-15 16:14:19 -04004879 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04004880 u32 nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004881 int ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004882
Chris Mason5f39d392007-10-15 16:14:19 -04004883 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05004884 if (slot != nritems - 1) {
Liu Bo0e411ec2012-10-19 09:50:54 +00004885 if (level)
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004886 tree_mod_log_eb_move(root->fs_info, parent, slot,
4887 slot + 1, nritems - slot - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04004888 memmove_extent_buffer(parent,
4889 btrfs_node_key_ptr_offset(slot),
4890 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04004891 sizeof(struct btrfs_key_ptr) *
4892 (nritems - slot - 1));
Chris Mason57ba86c2012-12-18 19:35:32 -05004893 } else if (level) {
4894 ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -04004895 MOD_LOG_KEY_REMOVE, GFP_NOFS);
Chris Mason57ba86c2012-12-18 19:35:32 -05004896 BUG_ON(ret < 0);
Chris Masonbb803952007-03-01 12:04:21 -05004897 }
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004898
Chris Mason7518a232007-03-12 12:01:18 -04004899 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04004900 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04004901 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04004902 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05004903 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04004904 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05004905 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004906 struct btrfs_disk_key disk_key;
4907
4908 btrfs_node_key(parent, &disk_key, 0);
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004909 fixup_low_keys(root->fs_info, path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004910 }
Chris Masond6025572007-03-30 14:27:56 -04004911 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004912}
4913
Chris Mason74123bd2007-02-02 11:05:29 -05004914/*
Chris Mason323ac952008-10-01 19:05:46 -04004915 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004916 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04004917 *
4918 * This deletes the pointer in path->nodes[1] and frees the leaf
4919 * block extent. zero is returned if it all worked out, < 0 otherwise.
4920 *
4921 * The path must have already been setup for deleting the leaf, including
4922 * all the proper balancing. path->nodes[1] must be locked.
4923 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004924static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4925 struct btrfs_root *root,
4926 struct btrfs_path *path,
4927 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04004928{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004929 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004930 del_ptr(root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04004931
Chris Mason4d081c42009-02-04 09:31:28 -05004932 /*
4933 * btrfs_free_extent is expensive, we want to make sure we
4934 * aren't holding any locks when we call it
4935 */
4936 btrfs_unlock_up_safe(path, 0);
4937
Yan, Zhengf0486c62010-05-16 10:46:25 -04004938 root_sub_used(root, leaf->len);
4939
Josef Bacik3083ee22012-03-09 16:01:49 -05004940 extent_buffer_get(leaf);
Jan Schmidt5581a512012-05-16 17:04:52 +02004941 btrfs_free_tree_block(trans, root, leaf, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05004942 free_extent_buffer_stale(leaf);
Chris Mason323ac952008-10-01 19:05:46 -04004943}
4944/*
Chris Mason74123bd2007-02-02 11:05:29 -05004945 * delete the item at the leaf level in path. If that empties
4946 * the leaf, remove it from the tree
4947 */
Chris Mason85e21ba2008-01-29 15:11:36 -05004948int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4949 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004950{
Chris Mason5f39d392007-10-15 16:14:19 -04004951 struct extent_buffer *leaf;
4952 struct btrfs_item *item;
Alexandru Moisece0eac22015-08-23 16:01:42 +00004953 u32 last_off;
4954 u32 dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05004955 int ret = 0;
4956 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05004957 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004958 u32 nritems;
Chris Masoncfed81a2012-03-03 07:40:03 -05004959 struct btrfs_map_token token;
4960
4961 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004962
Chris Mason5f39d392007-10-15 16:14:19 -04004963 leaf = path->nodes[0];
Chris Mason85e21ba2008-01-29 15:11:36 -05004964 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
4965
4966 for (i = 0; i < nr; i++)
4967 dsize += btrfs_item_size_nr(leaf, slot + i);
4968
Chris Mason5f39d392007-10-15 16:14:19 -04004969 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004970
Chris Mason85e21ba2008-01-29 15:11:36 -05004971 if (slot + nr != nritems) {
Chris Mason123abc82007-03-14 14:14:43 -04004972 int data_end = leaf_data_end(root, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04004973
4974 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Masond6025572007-03-30 14:27:56 -04004975 data_end + dsize,
4976 btrfs_leaf_data(leaf) + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05004977 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004978
Chris Mason85e21ba2008-01-29 15:11:36 -05004979 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004980 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004981
Ross Kirkdd3cc162013-09-16 15:58:09 +01004982 item = btrfs_item_nr(i);
Chris Masoncfed81a2012-03-03 07:40:03 -05004983 ioff = btrfs_token_item_offset(leaf, item, &token);
4984 btrfs_set_token_item_offset(leaf, item,
4985 ioff + dsize, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04004986 }
Chris Masondb945352007-10-15 16:15:53 -04004987
Chris Mason5f39d392007-10-15 16:14:19 -04004988 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05004989 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04004990 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05004991 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004992 }
Chris Mason85e21ba2008-01-29 15:11:36 -05004993 btrfs_set_header_nritems(leaf, nritems - nr);
4994 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04004995
Chris Mason74123bd2007-02-02 11:05:29 -05004996 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04004997 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004998 if (leaf == root->node) {
4999 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05005000 } else {
Yan, Zhengf0486c62010-05-16 10:46:25 -04005001 btrfs_set_path_blocking(path);
Daniel Dressler01d58472014-11-21 17:15:07 +09005002 clean_tree_block(trans, root->fs_info, leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005003 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05005004 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05005005 } else {
Chris Mason7518a232007-03-12 12:01:18 -04005006 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005007 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04005008 struct btrfs_disk_key disk_key;
5009
5010 btrfs_item_key(leaf, &disk_key, 0);
Daniel Dresslerb7a03652014-11-12 13:43:09 +09005011 fixup_low_keys(root->fs_info, path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005012 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005013
Chris Mason74123bd2007-02-02 11:05:29 -05005014 /* delete the leaf if it is mostly empty */
Yan Zhengd717aa12009-07-24 12:42:46 -04005015 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05005016 /* push_leaf_left fixes the path.
5017 * make sure the path still points to our leaf
5018 * for possible call to del_ptr below
5019 */
Chris Mason4920c9a2007-01-26 16:38:42 -05005020 slot = path->slots[1];
Chris Mason5f39d392007-10-15 16:14:19 -04005021 extent_buffer_get(leaf);
5022
Chris Masonb9473432009-03-13 11:00:37 -04005023 btrfs_set_path_blocking(path);
Chris Mason99d8f832010-07-07 10:51:48 -04005024 wret = push_leaf_left(trans, root, path, 1, 1,
5025 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04005026 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005027 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04005028
5029 if (path->nodes[0] == leaf &&
5030 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04005031 wret = push_leaf_right(trans, root, path, 1,
5032 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04005033 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005034 ret = wret;
5035 }
Chris Mason5f39d392007-10-15 16:14:19 -04005036
5037 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04005038 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01005039 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005040 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005041 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05005042 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005043 /* if we're still in the path, make sure
5044 * we're dirty. Otherwise, one of the
5045 * push_leaf functions must have already
5046 * dirtied this buffer
5047 */
5048 if (path->nodes[0] == leaf)
5049 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005050 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005051 }
Chris Masond5719762007-03-23 10:01:08 -04005052 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04005053 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005054 }
5055 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005056 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05005057}
5058
Chris Mason97571fd2007-02-24 13:39:08 -05005059/*
Chris Mason925baed2008-06-25 16:01:30 -04005060 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05005061 * returns 0 if it found something or 1 if there are no lesser leaves.
5062 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04005063 *
5064 * This may release the path, and so you may lose any locks held at the
5065 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05005066 */
Josef Bacik16e75492013-10-22 12:18:51 -04005067int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Mason7bb86312007-12-11 09:25:06 -05005068{
Chris Mason925baed2008-06-25 16:01:30 -04005069 struct btrfs_key key;
5070 struct btrfs_disk_key found_key;
5071 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05005072
Chris Mason925baed2008-06-25 16:01:30 -04005073 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05005074
Filipe David Borba Mananae8b0d722013-10-15 00:12:27 +01005075 if (key.offset > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005076 key.offset--;
Filipe David Borba Mananae8b0d722013-10-15 00:12:27 +01005077 } else if (key.type > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005078 key.type--;
Filipe David Borba Mananae8b0d722013-10-15 00:12:27 +01005079 key.offset = (u64)-1;
5080 } else if (key.objectid > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005081 key.objectid--;
Filipe David Borba Mananae8b0d722013-10-15 00:12:27 +01005082 key.type = (u8)-1;
5083 key.offset = (u64)-1;
5084 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005085 return 1;
Filipe David Borba Mananae8b0d722013-10-15 00:12:27 +01005086 }
Chris Mason7bb86312007-12-11 09:25:06 -05005087
David Sterbab3b4aa72011-04-21 01:20:15 +02005088 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04005089 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5090 if (ret < 0)
5091 return ret;
5092 btrfs_item_key(path->nodes[0], &found_key, 0);
5093 ret = comp_keys(&found_key, &key);
Filipe Manana337c6f62014-06-09 13:22:13 +01005094 /*
5095 * We might have had an item with the previous key in the tree right
5096 * before we released our path. And after we released our path, that
5097 * item might have been pushed to the first slot (0) of the leaf we
5098 * were holding due to a tree balance. Alternatively, an item with the
5099 * previous key can exist as the only element of a leaf (big fat item).
5100 * Therefore account for these 2 cases, so that our callers (like
5101 * btrfs_previous_item) don't miss an existing item with a key matching
5102 * the previous key we computed above.
5103 */
5104 if (ret <= 0)
Chris Mason925baed2008-06-25 16:01:30 -04005105 return 0;
5106 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05005107}
5108
Chris Mason3f157a22008-06-25 16:01:31 -04005109/*
5110 * A helper function to walk down the tree starting at min_key, and looking
Eric Sandeende78b512013-01-31 18:21:12 +00005111 * for nodes or leaves that are have a minimum transaction id.
5112 * This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04005113 *
5114 * This does not cow, but it does stuff the starting key it finds back
5115 * into min_key, so you can call btrfs_search_slot with cow=1 on the
5116 * key and get a writable path.
5117 *
5118 * This does lock as it descends, and path->keep_locks should be set
5119 * to 1 by the caller.
5120 *
5121 * This honors path->lowest_level to prevent descent past a given level
5122 * of the tree.
5123 *
Chris Masond352ac62008-09-29 15:18:18 -04005124 * min_trans indicates the oldest transaction that you are interested
5125 * in walking through. Any nodes or leaves older than min_trans are
5126 * skipped over (without reading them).
5127 *
Chris Mason3f157a22008-06-25 16:01:31 -04005128 * returns zero if something useful was found, < 0 on error and 1 if there
5129 * was nothing in the tree that matched the search criteria.
5130 */
5131int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Eric Sandeende78b512013-01-31 18:21:12 +00005132 struct btrfs_path *path,
Chris Mason3f157a22008-06-25 16:01:31 -04005133 u64 min_trans)
5134{
5135 struct extent_buffer *cur;
5136 struct btrfs_key found_key;
5137 int slot;
Yan96524802008-07-24 12:19:49 -04005138 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04005139 u32 nritems;
5140 int level;
5141 int ret = 1;
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005142 int keep_locks = path->keep_locks;
Chris Mason3f157a22008-06-25 16:01:31 -04005143
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005144 path->keep_locks = 1;
Chris Mason3f157a22008-06-25 16:01:31 -04005145again:
Chris Masonbd681512011-07-16 15:23:14 -04005146 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04005147 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04005148 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04005149 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04005150 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005151
5152 if (btrfs_header_generation(cur) < min_trans) {
5153 ret = 1;
5154 goto out;
5155 }
Chris Masond3977122009-01-05 21:25:51 -05005156 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04005157 nritems = btrfs_header_nritems(cur);
5158 level = btrfs_header_level(cur);
Yan96524802008-07-24 12:19:49 -04005159 sret = bin_search(cur, min_key, level, &slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005160
Chris Mason323ac952008-10-01 19:05:46 -04005161 /* at the lowest level, we're done, setup the path and exit */
5162 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04005163 if (slot >= nritems)
5164 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04005165 ret = 0;
5166 path->slots[level] = slot;
5167 btrfs_item_key_to_cpu(cur, &found_key, slot);
5168 goto out;
5169 }
Yan96524802008-07-24 12:19:49 -04005170 if (sret && slot > 0)
5171 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04005172 /*
Eric Sandeende78b512013-01-31 18:21:12 +00005173 * check this node pointer against the min_trans parameters.
5174 * If it is too old, old, skip to the next one.
Chris Mason3f157a22008-06-25 16:01:31 -04005175 */
Chris Masond3977122009-01-05 21:25:51 -05005176 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04005177 u64 gen;
Chris Masone02119d2008-09-05 16:13:11 -04005178
Chris Mason3f157a22008-06-25 16:01:31 -04005179 gen = btrfs_node_ptr_generation(cur, slot);
5180 if (gen < min_trans) {
5181 slot++;
5182 continue;
5183 }
Eric Sandeende78b512013-01-31 18:21:12 +00005184 break;
Chris Mason3f157a22008-06-25 16:01:31 -04005185 }
Chris Masone02119d2008-09-05 16:13:11 -04005186find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04005187 /*
5188 * we didn't find a candidate key in this node, walk forward
5189 * and find another one
5190 */
5191 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04005192 path->slots[level] = slot;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005193 btrfs_set_path_blocking(path);
Chris Masone02119d2008-09-05 16:13:11 -04005194 sret = btrfs_find_next_key(root, path, min_key, level,
Eric Sandeende78b512013-01-31 18:21:12 +00005195 min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04005196 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005197 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005198 goto again;
5199 } else {
5200 goto out;
5201 }
5202 }
5203 /* save our key for returning back */
5204 btrfs_node_key_to_cpu(cur, &found_key, slot);
5205 path->slots[level] = slot;
5206 if (level == path->lowest_level) {
5207 ret = 0;
Chris Mason3f157a22008-06-25 16:01:31 -04005208 goto out;
5209 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05005210 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005211 cur = read_node_slot(root, cur, slot);
Liu Bofb770ae2016-07-05 12:10:14 -07005212 if (IS_ERR(cur)) {
5213 ret = PTR_ERR(cur);
5214 goto out;
5215 }
Chris Mason3f157a22008-06-25 16:01:31 -04005216
Chris Masonbd681512011-07-16 15:23:14 -04005217 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05005218
Chris Masonbd681512011-07-16 15:23:14 -04005219 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005220 path->nodes[level - 1] = cur;
Chris Masonf7c79f32012-03-19 15:54:38 -04005221 unlock_up(path, level, 1, 0, NULL);
Chris Masonbd681512011-07-16 15:23:14 -04005222 btrfs_clear_path_blocking(path, NULL, 0);
Chris Mason3f157a22008-06-25 16:01:31 -04005223 }
5224out:
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005225 path->keep_locks = keep_locks;
5226 if (ret == 0) {
5227 btrfs_unlock_up_safe(path, path->lowest_level + 1);
5228 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005229 memcpy(min_key, &found_key, sizeof(found_key));
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005230 }
Chris Mason3f157a22008-06-25 16:01:31 -04005231 return ret;
5232}
5233
Liu Bofb770ae2016-07-05 12:10:14 -07005234static int tree_move_down(struct btrfs_root *root,
Alexander Block70698302012-06-05 21:07:48 +02005235 struct btrfs_path *path,
5236 int *level, int root_level)
5237{
Liu Bofb770ae2016-07-05 12:10:14 -07005238 struct extent_buffer *eb;
5239
Chris Mason74dd17f2012-08-07 16:25:13 -04005240 BUG_ON(*level == 0);
Liu Bofb770ae2016-07-05 12:10:14 -07005241 eb = read_node_slot(root, path->nodes[*level], path->slots[*level]);
5242 if (IS_ERR(eb))
5243 return PTR_ERR(eb);
5244
5245 path->nodes[*level - 1] = eb;
Alexander Block70698302012-06-05 21:07:48 +02005246 path->slots[*level - 1] = 0;
5247 (*level)--;
Liu Bofb770ae2016-07-05 12:10:14 -07005248 return 0;
Alexander Block70698302012-06-05 21:07:48 +02005249}
5250
5251static int tree_move_next_or_upnext(struct btrfs_root *root,
5252 struct btrfs_path *path,
5253 int *level, int root_level)
5254{
5255 int ret = 0;
5256 int nritems;
5257 nritems = btrfs_header_nritems(path->nodes[*level]);
5258
5259 path->slots[*level]++;
5260
Chris Mason74dd17f2012-08-07 16:25:13 -04005261 while (path->slots[*level] >= nritems) {
Alexander Block70698302012-06-05 21:07:48 +02005262 if (*level == root_level)
5263 return -1;
5264
5265 /* move upnext */
5266 path->slots[*level] = 0;
5267 free_extent_buffer(path->nodes[*level]);
5268 path->nodes[*level] = NULL;
5269 (*level)++;
5270 path->slots[*level]++;
5271
5272 nritems = btrfs_header_nritems(path->nodes[*level]);
5273 ret = 1;
5274 }
5275 return ret;
5276}
5277
5278/*
5279 * Returns 1 if it had to move up and next. 0 is returned if it moved only next
5280 * or down.
5281 */
5282static int tree_advance(struct btrfs_root *root,
5283 struct btrfs_path *path,
5284 int *level, int root_level,
5285 int allow_down,
5286 struct btrfs_key *key)
5287{
5288 int ret;
5289
5290 if (*level == 0 || !allow_down) {
5291 ret = tree_move_next_or_upnext(root, path, level, root_level);
5292 } else {
Liu Bofb770ae2016-07-05 12:10:14 -07005293 ret = tree_move_down(root, path, level, root_level);
Alexander Block70698302012-06-05 21:07:48 +02005294 }
5295 if (ret >= 0) {
5296 if (*level == 0)
5297 btrfs_item_key_to_cpu(path->nodes[*level], key,
5298 path->slots[*level]);
5299 else
5300 btrfs_node_key_to_cpu(path->nodes[*level], key,
5301 path->slots[*level]);
5302 }
5303 return ret;
5304}
5305
5306static int tree_compare_item(struct btrfs_root *left_root,
5307 struct btrfs_path *left_path,
5308 struct btrfs_path *right_path,
5309 char *tmp_buf)
5310{
5311 int cmp;
5312 int len1, len2;
5313 unsigned long off1, off2;
5314
5315 len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]);
5316 len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]);
5317 if (len1 != len2)
5318 return 1;
5319
5320 off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
5321 off2 = btrfs_item_ptr_offset(right_path->nodes[0],
5322 right_path->slots[0]);
5323
5324 read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
5325
5326 cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
5327 if (cmp)
5328 return 1;
5329 return 0;
5330}
5331
5332#define ADVANCE 1
5333#define ADVANCE_ONLY_NEXT -1
5334
5335/*
5336 * This function compares two trees and calls the provided callback for
5337 * every changed/new/deleted item it finds.
5338 * If shared tree blocks are encountered, whole subtrees are skipped, making
5339 * the compare pretty fast on snapshotted subvolumes.
5340 *
5341 * This currently works on commit roots only. As commit roots are read only,
5342 * we don't do any locking. The commit roots are protected with transactions.
5343 * Transactions are ended and rejoined when a commit is tried in between.
5344 *
5345 * This function checks for modifications done to the trees while comparing.
5346 * If it detects a change, it aborts immediately.
5347 */
5348int btrfs_compare_trees(struct btrfs_root *left_root,
5349 struct btrfs_root *right_root,
5350 btrfs_changed_cb_t changed_cb, void *ctx)
5351{
5352 int ret;
5353 int cmp;
Alexander Block70698302012-06-05 21:07:48 +02005354 struct btrfs_path *left_path = NULL;
5355 struct btrfs_path *right_path = NULL;
5356 struct btrfs_key left_key;
5357 struct btrfs_key right_key;
5358 char *tmp_buf = NULL;
5359 int left_root_level;
5360 int right_root_level;
5361 int left_level;
5362 int right_level;
5363 int left_end_reached;
5364 int right_end_reached;
5365 int advance_left;
5366 int advance_right;
5367 u64 left_blockptr;
5368 u64 right_blockptr;
Filipe Manana6baa4292014-02-20 21:15:25 +00005369 u64 left_gen;
5370 u64 right_gen;
Alexander Block70698302012-06-05 21:07:48 +02005371
5372 left_path = btrfs_alloc_path();
5373 if (!left_path) {
5374 ret = -ENOMEM;
5375 goto out;
5376 }
5377 right_path = btrfs_alloc_path();
5378 if (!right_path) {
5379 ret = -ENOMEM;
5380 goto out;
5381 }
5382
David Sterba8f282f72016-03-30 16:01:12 +02005383 tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL | __GFP_NOWARN);
Alexander Block70698302012-06-05 21:07:48 +02005384 if (!tmp_buf) {
David Sterba8f282f72016-03-30 16:01:12 +02005385 tmp_buf = vmalloc(left_root->nodesize);
5386 if (!tmp_buf) {
5387 ret = -ENOMEM;
5388 goto out;
5389 }
Alexander Block70698302012-06-05 21:07:48 +02005390 }
5391
5392 left_path->search_commit_root = 1;
5393 left_path->skip_locking = 1;
5394 right_path->search_commit_root = 1;
5395 right_path->skip_locking = 1;
5396
Alexander Block70698302012-06-05 21:07:48 +02005397 /*
5398 * Strategy: Go to the first items of both trees. Then do
5399 *
5400 * If both trees are at level 0
5401 * Compare keys of current items
5402 * If left < right treat left item as new, advance left tree
5403 * and repeat
5404 * If left > right treat right item as deleted, advance right tree
5405 * and repeat
5406 * If left == right do deep compare of items, treat as changed if
5407 * needed, advance both trees and repeat
5408 * If both trees are at the same level but not at level 0
5409 * Compare keys of current nodes/leafs
5410 * If left < right advance left tree and repeat
5411 * If left > right advance right tree and repeat
5412 * If left == right compare blockptrs of the next nodes/leafs
5413 * If they match advance both trees but stay at the same level
5414 * and repeat
5415 * If they don't match advance both trees while allowing to go
5416 * deeper and repeat
5417 * If tree levels are different
5418 * Advance the tree that needs it and repeat
5419 *
5420 * Advancing a tree means:
5421 * If we are at level 0, try to go to the next slot. If that's not
5422 * possible, go one level up and repeat. Stop when we found a level
5423 * where we could go to the next slot. We may at this point be on a
5424 * node or a leaf.
5425 *
5426 * If we are not at level 0 and not on shared tree blocks, go one
5427 * level deeper.
5428 *
5429 * If we are not at level 0 and on shared tree blocks, go one slot to
5430 * the right if possible or go up and right.
5431 */
5432
Josef Bacik3f8a18c2014-03-28 17:16:01 -04005433 down_read(&left_root->fs_info->commit_root_sem);
Alexander Block70698302012-06-05 21:07:48 +02005434 left_level = btrfs_header_level(left_root->commit_root);
5435 left_root_level = left_level;
5436 left_path->nodes[left_level] = left_root->commit_root;
5437 extent_buffer_get(left_path->nodes[left_level]);
5438
5439 right_level = btrfs_header_level(right_root->commit_root);
5440 right_root_level = right_level;
5441 right_path->nodes[right_level] = right_root->commit_root;
5442 extent_buffer_get(right_path->nodes[right_level]);
Josef Bacik3f8a18c2014-03-28 17:16:01 -04005443 up_read(&left_root->fs_info->commit_root_sem);
Alexander Block70698302012-06-05 21:07:48 +02005444
5445 if (left_level == 0)
5446 btrfs_item_key_to_cpu(left_path->nodes[left_level],
5447 &left_key, left_path->slots[left_level]);
5448 else
5449 btrfs_node_key_to_cpu(left_path->nodes[left_level],
5450 &left_key, left_path->slots[left_level]);
5451 if (right_level == 0)
5452 btrfs_item_key_to_cpu(right_path->nodes[right_level],
5453 &right_key, right_path->slots[right_level]);
5454 else
5455 btrfs_node_key_to_cpu(right_path->nodes[right_level],
5456 &right_key, right_path->slots[right_level]);
5457
5458 left_end_reached = right_end_reached = 0;
5459 advance_left = advance_right = 0;
5460
5461 while (1) {
Alexander Block70698302012-06-05 21:07:48 +02005462 if (advance_left && !left_end_reached) {
5463 ret = tree_advance(left_root, left_path, &left_level,
5464 left_root_level,
5465 advance_left != ADVANCE_ONLY_NEXT,
5466 &left_key);
Liu Bofb770ae2016-07-05 12:10:14 -07005467 if (ret == -1)
Alexander Block70698302012-06-05 21:07:48 +02005468 left_end_reached = ADVANCE;
Liu Bofb770ae2016-07-05 12:10:14 -07005469 else if (ret < 0)
5470 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005471 advance_left = 0;
5472 }
5473 if (advance_right && !right_end_reached) {
5474 ret = tree_advance(right_root, right_path, &right_level,
5475 right_root_level,
5476 advance_right != ADVANCE_ONLY_NEXT,
5477 &right_key);
Liu Bofb770ae2016-07-05 12:10:14 -07005478 if (ret == -1)
Alexander Block70698302012-06-05 21:07:48 +02005479 right_end_reached = ADVANCE;
Liu Bofb770ae2016-07-05 12:10:14 -07005480 else if (ret < 0)
5481 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005482 advance_right = 0;
5483 }
5484
5485 if (left_end_reached && right_end_reached) {
5486 ret = 0;
5487 goto out;
5488 } else if (left_end_reached) {
5489 if (right_level == 0) {
5490 ret = changed_cb(left_root, right_root,
5491 left_path, right_path,
5492 &right_key,
5493 BTRFS_COMPARE_TREE_DELETED,
5494 ctx);
5495 if (ret < 0)
5496 goto out;
5497 }
5498 advance_right = ADVANCE;
5499 continue;
5500 } else if (right_end_reached) {
5501 if (left_level == 0) {
5502 ret = changed_cb(left_root, right_root,
5503 left_path, right_path,
5504 &left_key,
5505 BTRFS_COMPARE_TREE_NEW,
5506 ctx);
5507 if (ret < 0)
5508 goto out;
5509 }
5510 advance_left = ADVANCE;
5511 continue;
5512 }
5513
5514 if (left_level == 0 && right_level == 0) {
5515 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
5516 if (cmp < 0) {
5517 ret = changed_cb(left_root, right_root,
5518 left_path, right_path,
5519 &left_key,
5520 BTRFS_COMPARE_TREE_NEW,
5521 ctx);
5522 if (ret < 0)
5523 goto out;
5524 advance_left = ADVANCE;
5525 } else if (cmp > 0) {
5526 ret = changed_cb(left_root, right_root,
5527 left_path, right_path,
5528 &right_key,
5529 BTRFS_COMPARE_TREE_DELETED,
5530 ctx);
5531 if (ret < 0)
5532 goto out;
5533 advance_right = ADVANCE;
5534 } else {
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005535 enum btrfs_compare_tree_result result;
Josef Bacikba5e8f22013-08-16 16:52:55 -04005536
Chris Mason74dd17f2012-08-07 16:25:13 -04005537 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
Alexander Block70698302012-06-05 21:07:48 +02005538 ret = tree_compare_item(left_root, left_path,
5539 right_path, tmp_buf);
Josef Bacikba5e8f22013-08-16 16:52:55 -04005540 if (ret)
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005541 result = BTRFS_COMPARE_TREE_CHANGED;
Josef Bacikba5e8f22013-08-16 16:52:55 -04005542 else
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005543 result = BTRFS_COMPARE_TREE_SAME;
Josef Bacikba5e8f22013-08-16 16:52:55 -04005544 ret = changed_cb(left_root, right_root,
5545 left_path, right_path,
Fabian Frederickb99d9a62014-09-25 19:35:02 +02005546 &left_key, result, ctx);
Josef Bacikba5e8f22013-08-16 16:52:55 -04005547 if (ret < 0)
5548 goto out;
Alexander Block70698302012-06-05 21:07:48 +02005549 advance_left = ADVANCE;
5550 advance_right = ADVANCE;
5551 }
5552 } else if (left_level == right_level) {
5553 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
5554 if (cmp < 0) {
5555 advance_left = ADVANCE;
5556 } else if (cmp > 0) {
5557 advance_right = ADVANCE;
5558 } else {
5559 left_blockptr = btrfs_node_blockptr(
5560 left_path->nodes[left_level],
5561 left_path->slots[left_level]);
5562 right_blockptr = btrfs_node_blockptr(
5563 right_path->nodes[right_level],
5564 right_path->slots[right_level]);
Filipe Manana6baa4292014-02-20 21:15:25 +00005565 left_gen = btrfs_node_ptr_generation(
5566 left_path->nodes[left_level],
5567 left_path->slots[left_level]);
5568 right_gen = btrfs_node_ptr_generation(
5569 right_path->nodes[right_level],
5570 right_path->slots[right_level]);
5571 if (left_blockptr == right_blockptr &&
5572 left_gen == right_gen) {
Alexander Block70698302012-06-05 21:07:48 +02005573 /*
5574 * As we're on a shared block, don't
5575 * allow to go deeper.
5576 */
5577 advance_left = ADVANCE_ONLY_NEXT;
5578 advance_right = ADVANCE_ONLY_NEXT;
5579 } else {
5580 advance_left = ADVANCE;
5581 advance_right = ADVANCE;
5582 }
5583 }
5584 } else if (left_level < right_level) {
5585 advance_right = ADVANCE;
5586 } else {
5587 advance_left = ADVANCE;
5588 }
5589 }
5590
5591out:
5592 btrfs_free_path(left_path);
5593 btrfs_free_path(right_path);
David Sterba8f282f72016-03-30 16:01:12 +02005594 kvfree(tmp_buf);
Alexander Block70698302012-06-05 21:07:48 +02005595 return ret;
5596}
5597
Chris Mason3f157a22008-06-25 16:01:31 -04005598/*
5599 * this is similar to btrfs_next_leaf, but does not try to preserve
5600 * and fixup the path. It looks for and returns the next key in the
Eric Sandeende78b512013-01-31 18:21:12 +00005601 * tree based on the current path and the min_trans parameters.
Chris Mason3f157a22008-06-25 16:01:31 -04005602 *
5603 * 0 is returned if another key is found, < 0 if there are any errors
5604 * and 1 is returned if there are no higher keys in the tree
5605 *
5606 * path->keep_locks should be set to 1 on the search made before
5607 * calling this function.
5608 */
Chris Masone7a84562008-06-25 16:01:31 -04005609int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Eric Sandeende78b512013-01-31 18:21:12 +00005610 struct btrfs_key *key, int level, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04005611{
Chris Masone7a84562008-06-25 16:01:31 -04005612 int slot;
5613 struct extent_buffer *c;
5614
Chris Mason934d3752008-12-08 16:43:10 -05005615 WARN_ON(!path->keep_locks);
Chris Masond3977122009-01-05 21:25:51 -05005616 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04005617 if (!path->nodes[level])
5618 return 1;
5619
5620 slot = path->slots[level] + 1;
5621 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04005622next:
Chris Masone7a84562008-06-25 16:01:31 -04005623 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04005624 int ret;
5625 int orig_lowest;
5626 struct btrfs_key cur_key;
5627 if (level + 1 >= BTRFS_MAX_LEVEL ||
5628 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04005629 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04005630
5631 if (path->locks[level + 1]) {
5632 level++;
5633 continue;
5634 }
5635
5636 slot = btrfs_header_nritems(c) - 1;
5637 if (level == 0)
5638 btrfs_item_key_to_cpu(c, &cur_key, slot);
5639 else
5640 btrfs_node_key_to_cpu(c, &cur_key, slot);
5641
5642 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02005643 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04005644 path->lowest_level = level;
5645 ret = btrfs_search_slot(NULL, root, &cur_key, path,
5646 0, 0);
5647 path->lowest_level = orig_lowest;
5648 if (ret < 0)
5649 return ret;
5650
5651 c = path->nodes[level];
5652 slot = path->slots[level];
5653 if (ret == 0)
5654 slot++;
5655 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04005656 }
Yan Zheng33c66f42009-07-22 09:59:00 -04005657
Chris Masone7a84562008-06-25 16:01:31 -04005658 if (level == 0)
5659 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005660 else {
Chris Mason3f157a22008-06-25 16:01:31 -04005661 u64 gen = btrfs_node_ptr_generation(c, slot);
5662
Chris Mason3f157a22008-06-25 16:01:31 -04005663 if (gen < min_trans) {
5664 slot++;
5665 goto next;
5666 }
Chris Masone7a84562008-06-25 16:01:31 -04005667 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005668 }
Chris Masone7a84562008-06-25 16:01:31 -04005669 return 0;
5670 }
5671 return 1;
5672}
5673
Chris Mason7bb86312007-12-11 09:25:06 -05005674/*
Chris Mason925baed2008-06-25 16:01:30 -04005675 * search the tree again to find a leaf with greater keys
Chris Mason0f70abe2007-02-28 16:46:22 -05005676 * returns 0 if it found something or 1 if there are no greater leaves.
5677 * returns < 0 on io errors.
Chris Mason97571fd2007-02-24 13:39:08 -05005678 */
Chris Mason234b63a2007-03-13 10:46:10 -04005679int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Masond97e63b2007-02-20 16:40:44 -05005680{
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005681 return btrfs_next_old_leaf(root, path, 0);
5682}
5683
5684int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5685 u64 time_seq)
5686{
Chris Masond97e63b2007-02-20 16:40:44 -05005687 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04005688 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04005689 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04005690 struct extent_buffer *next;
Chris Mason925baed2008-06-25 16:01:30 -04005691 struct btrfs_key key;
5692 u32 nritems;
5693 int ret;
Chris Mason8e73f272009-04-03 10:14:18 -04005694 int old_spinning = path->leave_spinning;
Chris Masonbd681512011-07-16 15:23:14 -04005695 int next_rw_lock = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005696
5697 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05005698 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04005699 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04005700
Chris Mason8e73f272009-04-03 10:14:18 -04005701 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5702again:
5703 level = 1;
5704 next = NULL;
Chris Masonbd681512011-07-16 15:23:14 -04005705 next_rw_lock = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02005706 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04005707
Chris Masona2135012008-06-25 16:01:30 -04005708 path->keep_locks = 1;
Chris Mason31533fb2011-07-26 16:01:59 -04005709 path->leave_spinning = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04005710
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005711 if (time_seq)
5712 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5713 else
5714 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Chris Mason925baed2008-06-25 16:01:30 -04005715 path->keep_locks = 0;
5716
5717 if (ret < 0)
5718 return ret;
5719
Chris Masona2135012008-06-25 16:01:30 -04005720 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04005721 /*
5722 * by releasing the path above we dropped all our locks. A balance
5723 * could have added more items next to the key that used to be
5724 * at the very end of the block. So, check again here and
5725 * advance the path if there are now more items available.
5726 */
Chris Masona2135012008-06-25 16:01:30 -04005727 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04005728 if (ret == 0)
5729 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04005730 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005731 goto done;
5732 }
Liu Bo0b43e042014-06-09 11:04:49 +08005733 /*
5734 * So the above check misses one case:
5735 * - after releasing the path above, someone has removed the item that
5736 * used to be at the very end of the block, and balance between leafs
5737 * gets another one with bigger key.offset to replace it.
5738 *
5739 * This one should be returned as well, or we can get leaf corruption
5740 * later(esp. in __btrfs_drop_extents()).
5741 *
5742 * And a bit more explanation about this check,
5743 * with ret > 0, the key isn't found, the path points to the slot
5744 * where it should be inserted, so the path->slots[0] item must be the
5745 * bigger one.
5746 */
5747 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
5748 ret = 0;
5749 goto done;
5750 }
Chris Masond97e63b2007-02-20 16:40:44 -05005751
Chris Masond3977122009-01-05 21:25:51 -05005752 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04005753 if (!path->nodes[level]) {
5754 ret = 1;
5755 goto done;
5756 }
Chris Mason5f39d392007-10-15 16:14:19 -04005757
Chris Masond97e63b2007-02-20 16:40:44 -05005758 slot = path->slots[level] + 1;
5759 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04005760 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05005761 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04005762 if (level == BTRFS_MAX_LEVEL) {
5763 ret = 1;
5764 goto done;
5765 }
Chris Masond97e63b2007-02-20 16:40:44 -05005766 continue;
5767 }
Chris Mason5f39d392007-10-15 16:14:19 -04005768
Chris Mason925baed2008-06-25 16:01:30 -04005769 if (next) {
Chris Masonbd681512011-07-16 15:23:14 -04005770 btrfs_tree_unlock_rw(next, next_rw_lock);
Chris Mason5f39d392007-10-15 16:14:19 -04005771 free_extent_buffer(next);
Chris Mason925baed2008-06-25 16:01:30 -04005772 }
Chris Mason5f39d392007-10-15 16:14:19 -04005773
Chris Mason8e73f272009-04-03 10:14:18 -04005774 next = c;
Chris Masonbd681512011-07-16 15:23:14 -04005775 next_rw_lock = path->locks[level];
Chris Mason8e73f272009-04-03 10:14:18 -04005776 ret = read_block_for_search(NULL, root, path, &next, level,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02005777 slot, &key, 0);
Chris Mason8e73f272009-04-03 10:14:18 -04005778 if (ret == -EAGAIN)
5779 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04005780
Chris Mason76a05b32009-05-14 13:24:30 -04005781 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005782 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005783 goto done;
5784 }
5785
Chris Mason5cd57b22008-06-25 16:01:30 -04005786 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005787 ret = btrfs_try_tree_read_lock(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005788 if (!ret && time_seq) {
5789 /*
5790 * If we don't get the lock, we may be racing
5791 * with push_leaf_left, holding that lock while
5792 * itself waiting for the leaf we've currently
5793 * locked. To solve this situation, we give up
5794 * on our lock and cycle.
5795 */
Jan Schmidtcf538832012-07-04 15:42:48 +02005796 free_extent_buffer(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005797 btrfs_release_path(path);
5798 cond_resched();
5799 goto again;
5800 }
Chris Mason8e73f272009-04-03 10:14:18 -04005801 if (!ret) {
5802 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04005803 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04005804 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04005805 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04005806 }
Chris Mason31533fb2011-07-26 16:01:59 -04005807 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005808 }
Chris Masond97e63b2007-02-20 16:40:44 -05005809 break;
5810 }
5811 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05005812 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05005813 level--;
5814 c = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04005815 if (path->locks[level])
Chris Masonbd681512011-07-16 15:23:14 -04005816 btrfs_tree_unlock_rw(c, path->locks[level]);
Chris Mason8e73f272009-04-03 10:14:18 -04005817
Chris Mason5f39d392007-10-15 16:14:19 -04005818 free_extent_buffer(c);
Chris Masond97e63b2007-02-20 16:40:44 -05005819 path->nodes[level] = next;
5820 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04005821 if (!path->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04005822 path->locks[level] = next_rw_lock;
Chris Masond97e63b2007-02-20 16:40:44 -05005823 if (!level)
5824 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005825
Chris Mason8e73f272009-04-03 10:14:18 -04005826 ret = read_block_for_search(NULL, root, path, &next, level,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02005827 0, &key, 0);
Chris Mason8e73f272009-04-03 10:14:18 -04005828 if (ret == -EAGAIN)
5829 goto again;
5830
Chris Mason76a05b32009-05-14 13:24:30 -04005831 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005832 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005833 goto done;
5834 }
5835
Chris Mason5cd57b22008-06-25 16:01:30 -04005836 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005837 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04005838 if (!ret) {
5839 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04005840 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04005841 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04005842 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04005843 }
Chris Mason31533fb2011-07-26 16:01:59 -04005844 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005845 }
Chris Masond97e63b2007-02-20 16:40:44 -05005846 }
Chris Mason8e73f272009-04-03 10:14:18 -04005847 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005848done:
Chris Masonf7c79f32012-03-19 15:54:38 -04005849 unlock_up(path, 0, 1, 0, NULL);
Chris Mason8e73f272009-04-03 10:14:18 -04005850 path->leave_spinning = old_spinning;
5851 if (!old_spinning)
5852 btrfs_set_path_blocking(path);
5853
5854 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05005855}
Chris Mason0b86a832008-03-24 15:01:56 -04005856
Chris Mason3f157a22008-06-25 16:01:31 -04005857/*
5858 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
5859 * searching until it gets past min_objectid or finds an item of 'type'
5860 *
5861 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5862 */
Chris Mason0b86a832008-03-24 15:01:56 -04005863int btrfs_previous_item(struct btrfs_root *root,
5864 struct btrfs_path *path, u64 min_objectid,
5865 int type)
5866{
5867 struct btrfs_key found_key;
5868 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04005869 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04005870 int ret;
5871
Chris Masond3977122009-01-05 21:25:51 -05005872 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04005873 if (path->slots[0] == 0) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05005874 btrfs_set_path_blocking(path);
Chris Mason0b86a832008-03-24 15:01:56 -04005875 ret = btrfs_prev_leaf(root, path);
5876 if (ret != 0)
5877 return ret;
5878 } else {
5879 path->slots[0]--;
5880 }
5881 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04005882 nritems = btrfs_header_nritems(leaf);
5883 if (nritems == 0)
5884 return 1;
5885 if (path->slots[0] == nritems)
5886 path->slots[0]--;
5887
Chris Mason0b86a832008-03-24 15:01:56 -04005888 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04005889 if (found_key.objectid < min_objectid)
5890 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04005891 if (found_key.type == type)
5892 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04005893 if (found_key.objectid == min_objectid &&
5894 found_key.type < type)
5895 break;
Chris Mason0b86a832008-03-24 15:01:56 -04005896 }
5897 return 1;
5898}
Wang Shilongade2e0b2014-01-12 21:38:33 +08005899
5900/*
5901 * search in extent tree to find a previous Metadata/Data extent item with
5902 * min objecitd.
5903 *
5904 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5905 */
5906int btrfs_previous_extent_item(struct btrfs_root *root,
5907 struct btrfs_path *path, u64 min_objectid)
5908{
5909 struct btrfs_key found_key;
5910 struct extent_buffer *leaf;
5911 u32 nritems;
5912 int ret;
5913
5914 while (1) {
5915 if (path->slots[0] == 0) {
5916 btrfs_set_path_blocking(path);
5917 ret = btrfs_prev_leaf(root, path);
5918 if (ret != 0)
5919 return ret;
5920 } else {
5921 path->slots[0]--;
5922 }
5923 leaf = path->nodes[0];
5924 nritems = btrfs_header_nritems(leaf);
5925 if (nritems == 0)
5926 return 1;
5927 if (path->slots[0] == nritems)
5928 path->slots[0]--;
5929
5930 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5931 if (found_key.objectid < min_objectid)
5932 break;
5933 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5934 found_key.type == BTRFS_METADATA_ITEM_KEY)
5935 return 0;
5936 if (found_key.objectid == min_objectid &&
5937 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5938 break;
5939 }
5940 return 1;
5941}