blob: 455894f1ca3bdba5a6fa8904def71387482d39fa [file] [log] [blame]
Chris Mason56bec292009-03-13 10:10:06 -04001/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Chris Mason56bec292009-03-13 10:10:06 -040021#include <linux/sort.h>
Chris Mason56bec292009-03-13 10:10:06 -040022#include "ctree.h"
23#include "delayed-ref.h"
24#include "transaction.h"
25
Miao Xie78a61842012-11-21 02:21:28 +000026struct kmem_cache *btrfs_delayed_ref_head_cachep;
27struct kmem_cache *btrfs_delayed_tree_ref_cachep;
28struct kmem_cache *btrfs_delayed_data_ref_cachep;
29struct kmem_cache *btrfs_delayed_extent_op_cachep;
Chris Mason56bec292009-03-13 10:10:06 -040030/*
31 * delayed back reference update tracking. For subvolume trees
32 * we queue up extent allocations and backref maintenance for
33 * delayed processing. This avoids deep call chains where we
34 * add extents in the middle of btrfs_search_slot, and it allows
35 * us to buffer up frequently modified backrefs in an rb tree instead
36 * of hammering updates on the extent allocation tree.
Chris Mason56bec292009-03-13 10:10:06 -040037 */
38
39/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -040040 * compare two delayed tree backrefs with same bytenr and type
Chris Mason56bec292009-03-13 10:10:06 -040041 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -040042static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
43 struct btrfs_delayed_tree_ref *ref1)
Chris Mason56bec292009-03-13 10:10:06 -040044{
Josef Bacikae1e2062012-08-07 16:00:32 -040045 if (ref1->root < ref2->root)
46 return -1;
47 if (ref1->root > ref2->root)
48 return 1;
49 if (ref1->parent < ref2->parent)
50 return -1;
51 if (ref1->parent > ref2->parent)
52 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040053 return 0;
54}
55
56/*
57 * compare two delayed data backrefs with same bytenr and type
58 */
59static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
60 struct btrfs_delayed_data_ref *ref1)
61{
62 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
63 if (ref1->root < ref2->root)
64 return -1;
65 if (ref1->root > ref2->root)
66 return 1;
67 if (ref1->objectid < ref2->objectid)
68 return -1;
69 if (ref1->objectid > ref2->objectid)
70 return 1;
71 if (ref1->offset < ref2->offset)
72 return -1;
73 if (ref1->offset > ref2->offset)
74 return 1;
75 } else {
76 if (ref1->parent < ref2->parent)
77 return -1;
78 if (ref1->parent > ref2->parent)
79 return 1;
80 }
81 return 0;
82}
83
84/*
85 * entries in the rb tree are ordered by the byte number of the extent,
86 * type of the delayed backrefs and content of delayed backrefs.
87 */
88static int comp_entry(struct btrfs_delayed_ref_node *ref2,
Josef Bacikae1e2062012-08-07 16:00:32 -040089 struct btrfs_delayed_ref_node *ref1,
90 bool compare_seq)
Yan Zheng5d4f98a2009-06-10 10:45:14 -040091{
92 if (ref1->bytenr < ref2->bytenr)
Chris Mason56bec292009-03-13 10:10:06 -040093 return -1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040094 if (ref1->bytenr > ref2->bytenr)
Chris Mason56bec292009-03-13 10:10:06 -040095 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040096 if (ref1->is_head && ref2->is_head)
97 return 0;
98 if (ref2->is_head)
Chris Mason56bec292009-03-13 10:10:06 -040099 return -1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400100 if (ref1->is_head)
Chris Mason56bec292009-03-13 10:10:06 -0400101 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400102 if (ref1->type < ref2->type)
103 return -1;
104 if (ref1->type > ref2->type)
105 return 1;
Arne Jansen00f04b82011-09-14 12:37:00 +0200106 /* merging of sequenced refs is not allowed */
Josef Bacikae1e2062012-08-07 16:00:32 -0400107 if (compare_seq) {
108 if (ref1->seq < ref2->seq)
109 return -1;
110 if (ref1->seq > ref2->seq)
111 return 1;
112 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400113 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
114 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
115 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
116 btrfs_delayed_node_to_tree_ref(ref1));
117 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
118 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
119 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
120 btrfs_delayed_node_to_data_ref(ref1));
121 }
122 BUG();
Chris Mason56bec292009-03-13 10:10:06 -0400123 return 0;
124}
125
126/*
127 * insert a new ref into the rbtree. This returns any existing refs
128 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
129 * inserted.
130 */
131static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
Chris Mason56bec292009-03-13 10:10:06 -0400132 struct rb_node *node)
133{
134 struct rb_node **p = &root->rb_node;
135 struct rb_node *parent_node = NULL;
136 struct btrfs_delayed_ref_node *entry;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400137 struct btrfs_delayed_ref_node *ins;
Chris Mason56bec292009-03-13 10:10:06 -0400138 int cmp;
139
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400140 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400141 while (*p) {
142 parent_node = *p;
143 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
144 rb_node);
145
Josef Bacikae1e2062012-08-07 16:00:32 -0400146 cmp = comp_entry(entry, ins, 1);
Chris Mason56bec292009-03-13 10:10:06 -0400147 if (cmp < 0)
148 p = &(*p)->rb_left;
149 else if (cmp > 0)
150 p = &(*p)->rb_right;
151 else
152 return entry;
153 }
154
Chris Mason56bec292009-03-13 10:10:06 -0400155 rb_link_node(node, parent_node, p);
156 rb_insert_color(node, root);
157 return NULL;
158}
159
160/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400161 * find an head entry based on bytenr. This returns the delayed ref
Arne Jansend1270cd2011-09-13 15:16:43 +0200162 * head if it was able to find one, or NULL if nothing was in that spot.
163 * If return_bigger is given, the next bigger entry is returned if no exact
164 * match is found.
Chris Mason56bec292009-03-13 10:10:06 -0400165 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400166static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
167 u64 bytenr,
Arne Jansend1270cd2011-09-13 15:16:43 +0200168 struct btrfs_delayed_ref_node **last,
169 int return_bigger)
Chris Mason56bec292009-03-13 10:10:06 -0400170{
Arne Jansend1270cd2011-09-13 15:16:43 +0200171 struct rb_node *n;
Chris Mason56bec292009-03-13 10:10:06 -0400172 struct btrfs_delayed_ref_node *entry;
Arne Jansend1270cd2011-09-13 15:16:43 +0200173 int cmp = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400174
Arne Jansend1270cd2011-09-13 15:16:43 +0200175again:
176 n = root->rb_node;
177 entry = NULL;
Chris Mason56bec292009-03-13 10:10:06 -0400178 while (n) {
179 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
180 WARN_ON(!entry->in_tree);
Chris Masonc3e69d52009-03-13 10:17:05 -0400181 if (last)
182 *last = entry;
Chris Mason56bec292009-03-13 10:10:06 -0400183
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400184 if (bytenr < entry->bytenr)
185 cmp = -1;
186 else if (bytenr > entry->bytenr)
187 cmp = 1;
188 else if (!btrfs_delayed_ref_is_head(entry))
189 cmp = 1;
190 else
191 cmp = 0;
192
Chris Mason56bec292009-03-13 10:10:06 -0400193 if (cmp < 0)
194 n = n->rb_left;
195 else if (cmp > 0)
196 n = n->rb_right;
197 else
198 return entry;
199 }
Arne Jansend1270cd2011-09-13 15:16:43 +0200200 if (entry && return_bigger) {
201 if (cmp > 0) {
202 n = rb_next(&entry->rb_node);
203 if (!n)
204 n = rb_first(root);
205 entry = rb_entry(n, struct btrfs_delayed_ref_node,
206 rb_node);
207 bytenr = entry->bytenr;
208 return_bigger = 0;
209 goto again;
210 }
211 return entry;
212 }
Chris Mason56bec292009-03-13 10:10:06 -0400213 return NULL;
214}
215
Chris Masonc3e69d52009-03-13 10:17:05 -0400216int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
217 struct btrfs_delayed_ref_head *head)
Chris Mason56bec292009-03-13 10:10:06 -0400218{
Chris Masonc3e69d52009-03-13 10:17:05 -0400219 struct btrfs_delayed_ref_root *delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -0400220
Chris Masonc3e69d52009-03-13 10:17:05 -0400221 delayed_refs = &trans->transaction->delayed_refs;
222 assert_spin_locked(&delayed_refs->lock);
223 if (mutex_trylock(&head->mutex))
224 return 0;
225
226 atomic_inc(&head->node.refs);
227 spin_unlock(&delayed_refs->lock);
228
229 mutex_lock(&head->mutex);
230 spin_lock(&delayed_refs->lock);
231 if (!head->node.in_tree) {
232 mutex_unlock(&head->mutex);
233 btrfs_put_delayed_ref(&head->node);
234 return -EAGAIN;
235 }
236 btrfs_put_delayed_ref(&head->node);
237 return 0;
238}
239
Josef Bacikae1e2062012-08-07 16:00:32 -0400240static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
241 struct btrfs_delayed_ref_root *delayed_refs,
242 struct btrfs_delayed_ref_node *ref)
243{
244 rb_erase(&ref->rb_node, &delayed_refs->root);
245 ref->in_tree = 0;
246 btrfs_put_delayed_ref(ref);
247 delayed_refs->num_entries--;
248 if (trans->delayed_ref_updates)
249 trans->delayed_ref_updates--;
250}
251
252static int merge_ref(struct btrfs_trans_handle *trans,
253 struct btrfs_delayed_ref_root *delayed_refs,
254 struct btrfs_delayed_ref_node *ref, u64 seq)
255{
256 struct rb_node *node;
257 int merged = 0;
258 int mod = 0;
259 int done = 0;
260
261 node = rb_prev(&ref->rb_node);
262 while (node) {
263 struct btrfs_delayed_ref_node *next;
264
265 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
266 node = rb_prev(node);
267 if (next->bytenr != ref->bytenr)
268 break;
269 if (seq && next->seq >= seq)
270 break;
271 if (comp_entry(ref, next, 0))
272 continue;
273
274 if (ref->action == next->action) {
275 mod = next->ref_mod;
276 } else {
277 if (ref->ref_mod < next->ref_mod) {
278 struct btrfs_delayed_ref_node *tmp;
279
280 tmp = ref;
281 ref = next;
282 next = tmp;
283 done = 1;
284 }
285 mod = -next->ref_mod;
286 }
287
288 merged++;
289 drop_delayed_ref(trans, delayed_refs, next);
290 ref->ref_mod += mod;
291 if (ref->ref_mod == 0) {
292 drop_delayed_ref(trans, delayed_refs, ref);
293 break;
294 } else {
295 /*
296 * You can't have multiples of the same ref on a tree
297 * block.
298 */
299 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
300 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
301 }
302
303 if (done)
304 break;
305 node = rb_prev(&ref->rb_node);
306 }
307
308 return merged;
309}
310
311void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
312 struct btrfs_fs_info *fs_info,
313 struct btrfs_delayed_ref_root *delayed_refs,
314 struct btrfs_delayed_ref_head *head)
315{
316 struct rb_node *node;
317 u64 seq = 0;
318
319 spin_lock(&fs_info->tree_mod_seq_lock);
320 if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 struct seq_list *elem;
322
323 elem = list_first_entry(&fs_info->tree_mod_seq_list,
324 struct seq_list, list);
325 seq = elem->seq;
326 }
327 spin_unlock(&fs_info->tree_mod_seq_lock);
328
329 node = rb_prev(&head->node.rb_node);
330 while (node) {
331 struct btrfs_delayed_ref_node *ref;
332
333 ref = rb_entry(node, struct btrfs_delayed_ref_node,
334 rb_node);
335 if (ref->bytenr != head->node.bytenr)
336 break;
337
338 /* We can't merge refs that are outside of our seq count */
339 if (seq && ref->seq >= seq)
340 break;
341 if (merge_ref(trans, delayed_refs, ref, seq))
342 node = rb_prev(&head->node.rb_node);
343 else
344 node = rb_prev(node);
345 }
346}
347
Jan Schmidt097b8a72012-06-21 11:08:04 +0200348int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
349 struct btrfs_delayed_ref_root *delayed_refs,
Arne Jansen00f04b82011-09-14 12:37:00 +0200350 u64 seq)
351{
352 struct seq_list *elem;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200353 int ret = 0;
Arne Jansen00f04b82011-09-14 12:37:00 +0200354
Jan Schmidt097b8a72012-06-21 11:08:04 +0200355 spin_lock(&fs_info->tree_mod_seq_lock);
356 if (!list_empty(&fs_info->tree_mod_seq_list)) {
357 elem = list_first_entry(&fs_info->tree_mod_seq_list,
358 struct seq_list, list);
359 if (seq >= elem->seq) {
360 pr_debug("holding back delayed_ref %llu, lowest is "
361 "%llu (%p)\n", seq, elem->seq, delayed_refs);
362 ret = 1;
363 }
Arne Jansen00f04b82011-09-14 12:37:00 +0200364 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200365
366 spin_unlock(&fs_info->tree_mod_seq_lock);
367 return ret;
Arne Jansen00f04b82011-09-14 12:37:00 +0200368}
369
Chris Masonc3e69d52009-03-13 10:17:05 -0400370int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
371 struct list_head *cluster, u64 start)
372{
373 int count = 0;
374 struct btrfs_delayed_ref_root *delayed_refs;
375 struct rb_node *node;
376 struct btrfs_delayed_ref_node *ref;
377 struct btrfs_delayed_ref_head *head;
378
379 delayed_refs = &trans->transaction->delayed_refs;
380 if (start == 0) {
381 node = rb_first(&delayed_refs->root);
382 } else {
383 ref = NULL;
Arne Jansend1270cd2011-09-13 15:16:43 +0200384 find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
Chris Masonc3e69d52009-03-13 10:17:05 -0400385 if (ref) {
Chris Masonc3e69d52009-03-13 10:17:05 -0400386 node = &ref->rb_node;
387 } else
388 node = rb_first(&delayed_refs->root);
389 }
390again:
391 while (node && count < 32) {
392 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400393 if (btrfs_delayed_ref_is_head(ref)) {
394 head = btrfs_delayed_node_to_head(ref);
Chris Masonc3e69d52009-03-13 10:17:05 -0400395 if (list_empty(&head->cluster)) {
396 list_add_tail(&head->cluster, cluster);
397 delayed_refs->run_delayed_start =
398 head->node.bytenr;
399 count++;
400
401 WARN_ON(delayed_refs->num_heads_ready == 0);
402 delayed_refs->num_heads_ready--;
403 } else if (count) {
404 /* the goal of the clustering is to find extents
405 * that are likely to end up in the same extent
406 * leaf on disk. So, we don't want them spread
407 * all over the tree. Stop now if we've hit
408 * a head that was already in use
409 */
Chris Mason56bec292009-03-13 10:10:06 -0400410 break;
411 }
412 }
Chris Masonc3e69d52009-03-13 10:17:05 -0400413 node = rb_next(node);
Chris Mason56bec292009-03-13 10:10:06 -0400414 }
Chris Masonc3e69d52009-03-13 10:17:05 -0400415 if (count) {
416 return 0;
417 } else if (start) {
418 /*
419 * we've gone to the end of the rbtree without finding any
420 * clusters. start from the beginning and try again
421 */
422 start = 0;
423 node = rb_first(&delayed_refs->root);
424 goto again;
425 }
426 return 1;
Chris Mason56bec292009-03-13 10:10:06 -0400427}
428
429/*
Chris Mason56bec292009-03-13 10:10:06 -0400430 * helper function to update an extent delayed ref in the
431 * rbtree. existing and update must both have the same
432 * bytenr and parent
433 *
434 * This may free existing if the update cancels out whatever
435 * operation it was doing.
436 */
437static noinline void
438update_existing_ref(struct btrfs_trans_handle *trans,
439 struct btrfs_delayed_ref_root *delayed_refs,
440 struct btrfs_delayed_ref_node *existing,
441 struct btrfs_delayed_ref_node *update)
442{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400443 if (update->action != existing->action) {
Chris Mason56bec292009-03-13 10:10:06 -0400444 /*
445 * this is effectively undoing either an add or a
446 * drop. We decrement the ref_mod, and if it goes
447 * down to zero we just delete the entry without
448 * every changing the extent allocation tree.
449 */
450 existing->ref_mod--;
Josef Bacikae1e2062012-08-07 16:00:32 -0400451 if (existing->ref_mod == 0)
452 drop_delayed_ref(trans, delayed_refs, existing);
453 else
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400454 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
455 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
Chris Mason56bec292009-03-13 10:10:06 -0400456 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400457 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
458 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
Chris Mason56bec292009-03-13 10:10:06 -0400459 /*
460 * the action on the existing ref matches
461 * the action on the ref we're trying to add.
462 * Bump the ref_mod by one so the backref that
463 * is eventually added/removed has the correct
464 * reference count
465 */
466 existing->ref_mod += update->ref_mod;
467 }
468}
469
470/*
471 * helper function to update the accounting in the head ref
472 * existing and update must have the same bytenr
473 */
474static noinline void
475update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
476 struct btrfs_delayed_ref_node *update)
477{
478 struct btrfs_delayed_ref_head *existing_ref;
479 struct btrfs_delayed_ref_head *ref;
480
481 existing_ref = btrfs_delayed_node_to_head(existing);
482 ref = btrfs_delayed_node_to_head(update);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400483 BUG_ON(existing_ref->is_data != ref->is_data);
Chris Mason56bec292009-03-13 10:10:06 -0400484
485 if (ref->must_insert_reserved) {
486 /* if the extent was freed and then
487 * reallocated before the delayed ref
488 * entries were processed, we can end up
489 * with an existing head ref without
490 * the must_insert_reserved flag set.
491 * Set it again here
492 */
493 existing_ref->must_insert_reserved = ref->must_insert_reserved;
494
495 /*
496 * update the num_bytes so we make sure the accounting
497 * is done correctly
498 */
499 existing->num_bytes = update->num_bytes;
500
501 }
502
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400503 if (ref->extent_op) {
504 if (!existing_ref->extent_op) {
505 existing_ref->extent_op = ref->extent_op;
506 } else {
507 if (ref->extent_op->update_key) {
508 memcpy(&existing_ref->extent_op->key,
509 &ref->extent_op->key,
510 sizeof(ref->extent_op->key));
511 existing_ref->extent_op->update_key = 1;
512 }
513 if (ref->extent_op->update_flags) {
514 existing_ref->extent_op->flags_to_set |=
515 ref->extent_op->flags_to_set;
516 existing_ref->extent_op->update_flags = 1;
517 }
Miao Xie78a61842012-11-21 02:21:28 +0000518 btrfs_free_delayed_extent_op(ref->extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400519 }
520 }
Chris Mason56bec292009-03-13 10:10:06 -0400521 /*
522 * update the reference mod on the head to reflect this new operation
523 */
524 existing->ref_mod += update->ref_mod;
525}
526
527/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400528 * helper function to actually insert a head node into the rbtree.
Chris Mason56bec292009-03-13 10:10:06 -0400529 * this does all the dirty work in terms of maintaining the correct
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400530 * overall modification count.
Chris Mason56bec292009-03-13 10:10:06 -0400531 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100532static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200533 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400534 struct btrfs_delayed_ref_node *ref,
535 u64 bytenr, u64 num_bytes,
536 int action, int is_data)
Chris Mason56bec292009-03-13 10:10:06 -0400537{
538 struct btrfs_delayed_ref_node *existing;
Chris Masonc3e69d52009-03-13 10:17:05 -0400539 struct btrfs_delayed_ref_head *head_ref = NULL;
Chris Mason56bec292009-03-13 10:10:06 -0400540 struct btrfs_delayed_ref_root *delayed_refs;
541 int count_mod = 1;
542 int must_insert_reserved = 0;
543
544 /*
545 * the head node stores the sum of all the mods, so dropping a ref
546 * should drop the sum in the head node by one.
547 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400548 if (action == BTRFS_UPDATE_DELAYED_HEAD)
549 count_mod = 0;
550 else if (action == BTRFS_DROP_DELAYED_REF)
551 count_mod = -1;
Chris Mason56bec292009-03-13 10:10:06 -0400552
553 /*
554 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
555 * the reserved accounting when the extent is finally added, or
556 * if a later modification deletes the delayed ref without ever
557 * inserting the extent into the extent allocation tree.
558 * ref->must_insert_reserved is the flag used to record
559 * that accounting mods are required.
560 *
561 * Once we record must_insert_reserved, switch the action to
562 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
563 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400564 if (action == BTRFS_ADD_DELAYED_EXTENT)
Chris Mason56bec292009-03-13 10:10:06 -0400565 must_insert_reserved = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400566 else
Chris Mason56bec292009-03-13 10:10:06 -0400567 must_insert_reserved = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400568
569 delayed_refs = &trans->transaction->delayed_refs;
570
571 /* first set the basic ref node struct up */
572 atomic_set(&ref->refs, 1);
573 ref->bytenr = bytenr;
Chris Mason56bec292009-03-13 10:10:06 -0400574 ref->num_bytes = num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400575 ref->ref_mod = count_mod;
576 ref->type = 0;
577 ref->action = 0;
578 ref->is_head = 1;
579 ref->in_tree = 1;
Arne Jansen00f04b82011-09-14 12:37:00 +0200580 ref->seq = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400581
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400582 head_ref = btrfs_delayed_node_to_head(ref);
583 head_ref->must_insert_reserved = must_insert_reserved;
584 head_ref->is_data = is_data;
Chris Mason56bec292009-03-13 10:10:06 -0400585
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400586 INIT_LIST_HEAD(&head_ref->cluster);
587 mutex_init(&head_ref->mutex);
588
liubo1abe9b82011-03-24 11:18:59 +0000589 trace_btrfs_delayed_ref_head(ref, head_ref, action);
590
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400591 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400592
593 if (existing) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400594 update_existing_head_ref(existing, ref);
Chris Mason56bec292009-03-13 10:10:06 -0400595 /*
596 * we've updated the existing ref, free the newly
597 * allocated ref
598 */
Miao Xie78a61842012-11-21 02:21:28 +0000599 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
Chris Mason56bec292009-03-13 10:10:06 -0400600 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400601 delayed_refs->num_heads++;
602 delayed_refs->num_heads_ready++;
Chris Mason56bec292009-03-13 10:10:06 -0400603 delayed_refs->num_entries++;
604 trans->delayed_ref_updates++;
605 }
Chris Mason56bec292009-03-13 10:10:06 -0400606}
607
608/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400609 * helper to insert a delayed tree ref into the rbtree.
610 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100611static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200612 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400613 struct btrfs_delayed_ref_node *ref,
614 u64 bytenr, u64 num_bytes, u64 parent,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200615 u64 ref_root, int level, int action,
616 int for_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400617{
618 struct btrfs_delayed_ref_node *existing;
619 struct btrfs_delayed_tree_ref *full_ref;
620 struct btrfs_delayed_ref_root *delayed_refs;
Arne Jansen00f04b82011-09-14 12:37:00 +0200621 u64 seq = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400622
623 if (action == BTRFS_ADD_DELAYED_EXTENT)
624 action = BTRFS_ADD_DELAYED_REF;
625
626 delayed_refs = &trans->transaction->delayed_refs;
627
628 /* first set the basic ref node struct up */
629 atomic_set(&ref->refs, 1);
630 ref->bytenr = bytenr;
631 ref->num_bytes = num_bytes;
632 ref->ref_mod = 1;
633 ref->action = action;
634 ref->is_head = 0;
635 ref->in_tree = 1;
636
Jan Schmidt546adb02012-06-14 16:37:44 +0200637 if (need_ref_seq(for_cow, ref_root))
638 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
Arne Jansen00f04b82011-09-14 12:37:00 +0200639 ref->seq = seq;
640
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400641 full_ref = btrfs_delayed_node_to_tree_ref(ref);
Arne Janseneebe0632011-09-14 14:01:24 +0200642 full_ref->parent = parent;
643 full_ref->root = ref_root;
644 if (parent)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400645 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
Arne Janseneebe0632011-09-14 14:01:24 +0200646 else
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400647 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400648 full_ref->level = level;
649
liubo1abe9b82011-03-24 11:18:59 +0000650 trace_btrfs_delayed_tree_ref(ref, full_ref, action);
651
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400652 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
653
654 if (existing) {
655 update_existing_ref(trans, delayed_refs, existing, ref);
656 /*
657 * we've updated the existing ref, free the newly
658 * allocated ref
659 */
Miao Xie78a61842012-11-21 02:21:28 +0000660 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400661 } else {
662 delayed_refs->num_entries++;
663 trans->delayed_ref_updates++;
664 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400665}
666
667/*
668 * helper to insert a delayed data ref into the rbtree.
669 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100670static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200671 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400672 struct btrfs_delayed_ref_node *ref,
673 u64 bytenr, u64 num_bytes, u64 parent,
674 u64 ref_root, u64 owner, u64 offset,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200675 int action, int for_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400676{
677 struct btrfs_delayed_ref_node *existing;
678 struct btrfs_delayed_data_ref *full_ref;
679 struct btrfs_delayed_ref_root *delayed_refs;
Arne Jansen00f04b82011-09-14 12:37:00 +0200680 u64 seq = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400681
682 if (action == BTRFS_ADD_DELAYED_EXTENT)
683 action = BTRFS_ADD_DELAYED_REF;
684
685 delayed_refs = &trans->transaction->delayed_refs;
686
687 /* first set the basic ref node struct up */
688 atomic_set(&ref->refs, 1);
689 ref->bytenr = bytenr;
690 ref->num_bytes = num_bytes;
691 ref->ref_mod = 1;
692 ref->action = action;
693 ref->is_head = 0;
694 ref->in_tree = 1;
695
Jan Schmidt546adb02012-06-14 16:37:44 +0200696 if (need_ref_seq(for_cow, ref_root))
697 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
Arne Jansen00f04b82011-09-14 12:37:00 +0200698 ref->seq = seq;
699
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400700 full_ref = btrfs_delayed_node_to_data_ref(ref);
Arne Janseneebe0632011-09-14 14:01:24 +0200701 full_ref->parent = parent;
702 full_ref->root = ref_root;
703 if (parent)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400704 ref->type = BTRFS_SHARED_DATA_REF_KEY;
Arne Janseneebe0632011-09-14 14:01:24 +0200705 else
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400706 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200707
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400708 full_ref->objectid = owner;
709 full_ref->offset = offset;
710
liubo1abe9b82011-03-24 11:18:59 +0000711 trace_btrfs_delayed_data_ref(ref, full_ref, action);
712
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400713 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
714
715 if (existing) {
716 update_existing_ref(trans, delayed_refs, existing, ref);
717 /*
718 * we've updated the existing ref, free the newly
719 * allocated ref
720 */
Miao Xie78a61842012-11-21 02:21:28 +0000721 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400722 } else {
723 delayed_refs->num_entries++;
724 trans->delayed_ref_updates++;
725 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400726}
727
728/*
729 * add a delayed tree ref. This does all of the accounting required
Chris Mason56bec292009-03-13 10:10:06 -0400730 * to make sure the delayed ref is eventually processed before this
731 * transaction commits.
732 */
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200733int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
734 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400735 u64 bytenr, u64 num_bytes, u64 parent,
736 u64 ref_root, int level, int action,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200737 struct btrfs_delayed_extent_op *extent_op,
738 int for_cow)
Chris Mason56bec292009-03-13 10:10:06 -0400739{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400740 struct btrfs_delayed_tree_ref *ref;
Chris Mason56bec292009-03-13 10:10:06 -0400741 struct btrfs_delayed_ref_head *head_ref;
742 struct btrfs_delayed_ref_root *delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -0400743
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400744 BUG_ON(extent_op && extent_op->is_data);
Miao Xie78a61842012-11-21 02:21:28 +0000745 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
Chris Mason56bec292009-03-13 10:10:06 -0400746 if (!ref)
747 return -ENOMEM;
748
Miao Xie78a61842012-11-21 02:21:28 +0000749 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
Chris Mason56bec292009-03-13 10:10:06 -0400750 if (!head_ref) {
Miao Xie78a61842012-11-21 02:21:28 +0000751 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
Chris Mason56bec292009-03-13 10:10:06 -0400752 return -ENOMEM;
753 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400754
755 head_ref->extent_op = extent_op;
756
Chris Mason56bec292009-03-13 10:10:06 -0400757 delayed_refs = &trans->transaction->delayed_refs;
758 spin_lock(&delayed_refs->lock);
759
760 /*
761 * insert both the head node and the new ref without dropping
762 * the spin lock
763 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100764 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200765 num_bytes, action, 0);
Chris Mason56bec292009-03-13 10:10:06 -0400766
Jeff Mahoney143bede2012-03-01 14:56:26 +0100767 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200768 num_bytes, parent, ref_root, level, action,
769 for_cow);
Chris Mason56bec292009-03-13 10:10:06 -0400770 spin_unlock(&delayed_refs->lock);
Jan Schmidt546adb02012-06-14 16:37:44 +0200771 if (need_ref_seq(for_cow, ref_root))
772 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
Jan Schmidt95a06072012-05-29 17:06:54 +0200773
Chris Mason56bec292009-03-13 10:10:06 -0400774 return 0;
775}
776
777/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400778 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
779 */
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200780int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
781 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400782 u64 bytenr, u64 num_bytes,
783 u64 parent, u64 ref_root,
784 u64 owner, u64 offset, int action,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200785 struct btrfs_delayed_extent_op *extent_op,
786 int for_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400787{
788 struct btrfs_delayed_data_ref *ref;
789 struct btrfs_delayed_ref_head *head_ref;
790 struct btrfs_delayed_ref_root *delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400791
792 BUG_ON(extent_op && !extent_op->is_data);
Miao Xie78a61842012-11-21 02:21:28 +0000793 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400794 if (!ref)
795 return -ENOMEM;
796
Miao Xie78a61842012-11-21 02:21:28 +0000797 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400798 if (!head_ref) {
Miao Xie78a61842012-11-21 02:21:28 +0000799 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400800 return -ENOMEM;
801 }
802
803 head_ref->extent_op = extent_op;
804
805 delayed_refs = &trans->transaction->delayed_refs;
806 spin_lock(&delayed_refs->lock);
807
808 /*
809 * insert both the head node and the new ref without dropping
810 * the spin lock
811 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100812 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200813 num_bytes, action, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400814
Jeff Mahoney143bede2012-03-01 14:56:26 +0100815 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200816 num_bytes, parent, ref_root, owner, offset,
817 action, for_cow);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400818 spin_unlock(&delayed_refs->lock);
Jan Schmidt546adb02012-06-14 16:37:44 +0200819 if (need_ref_seq(for_cow, ref_root))
820 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
Jan Schmidt95a06072012-05-29 17:06:54 +0200821
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400822 return 0;
823}
824
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200825int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
826 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400827 u64 bytenr, u64 num_bytes,
828 struct btrfs_delayed_extent_op *extent_op)
829{
830 struct btrfs_delayed_ref_head *head_ref;
831 struct btrfs_delayed_ref_root *delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400832
Miao Xie78a61842012-11-21 02:21:28 +0000833 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400834 if (!head_ref)
835 return -ENOMEM;
836
837 head_ref->extent_op = extent_op;
838
839 delayed_refs = &trans->transaction->delayed_refs;
840 spin_lock(&delayed_refs->lock);
841
Jeff Mahoney143bede2012-03-01 14:56:26 +0100842 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400843 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
844 extent_op->is_data);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400845
846 spin_unlock(&delayed_refs->lock);
847 return 0;
848}
849
850/*
Chris Mason1887be62009-03-13 10:11:24 -0400851 * this does a simple search for the head node for a given extent.
852 * It must be called with the delayed ref spinlock held, and it returns
853 * the head node if any where found, or NULL if not.
854 */
855struct btrfs_delayed_ref_head *
856btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
857{
858 struct btrfs_delayed_ref_node *ref;
859 struct btrfs_delayed_ref_root *delayed_refs;
860
861 delayed_refs = &trans->transaction->delayed_refs;
Arne Jansend1270cd2011-09-13 15:16:43 +0200862 ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
Chris Mason1887be62009-03-13 10:11:24 -0400863 if (ref)
864 return btrfs_delayed_node_to_head(ref);
865 return NULL;
866}
Miao Xie78a61842012-11-21 02:21:28 +0000867
868void btrfs_delayed_ref_exit(void)
869{
870 if (btrfs_delayed_ref_head_cachep)
871 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
872 if (btrfs_delayed_tree_ref_cachep)
873 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
874 if (btrfs_delayed_data_ref_cachep)
875 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
876 if (btrfs_delayed_extent_op_cachep)
877 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
878}
879
880int btrfs_delayed_ref_init(void)
881{
882 btrfs_delayed_ref_head_cachep = kmem_cache_create(
883 "btrfs_delayed_ref_head",
884 sizeof(struct btrfs_delayed_ref_head), 0,
885 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
886 if (!btrfs_delayed_ref_head_cachep)
887 goto fail;
888
889 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
890 "btrfs_delayed_tree_ref",
891 sizeof(struct btrfs_delayed_tree_ref), 0,
892 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
893 if (!btrfs_delayed_tree_ref_cachep)
894 goto fail;
895
896 btrfs_delayed_data_ref_cachep = kmem_cache_create(
897 "btrfs_delayed_data_ref",
898 sizeof(struct btrfs_delayed_data_ref), 0,
899 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
900 if (!btrfs_delayed_data_ref_cachep)
901 goto fail;
902
903 btrfs_delayed_extent_op_cachep = kmem_cache_create(
904 "btrfs_delayed_extent_op",
905 sizeof(struct btrfs_delayed_extent_op), 0,
906 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
907 if (!btrfs_delayed_extent_op_cachep)
908 goto fail;
909
910 return 0;
911fail:
912 btrfs_delayed_ref_exit();
913 return -ENOMEM;
914}