blob: b7a0641ead7729bd25b21ad3f0d1a44b76799172 [file] [log] [blame]
Chris Mason56bec292009-03-13 10:10:06 -04001/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Chris Mason56bec292009-03-13 10:10:06 -040021#include <linux/sort.h>
Chris Mason56bec292009-03-13 10:10:06 -040022#include "ctree.h"
23#include "delayed-ref.h"
24#include "transaction.h"
25
Miao Xie78a61842012-11-21 02:21:28 +000026struct kmem_cache *btrfs_delayed_ref_head_cachep;
27struct kmem_cache *btrfs_delayed_tree_ref_cachep;
28struct kmem_cache *btrfs_delayed_data_ref_cachep;
29struct kmem_cache *btrfs_delayed_extent_op_cachep;
Chris Mason56bec292009-03-13 10:10:06 -040030/*
31 * delayed back reference update tracking. For subvolume trees
32 * we queue up extent allocations and backref maintenance for
33 * delayed processing. This avoids deep call chains where we
34 * add extents in the middle of btrfs_search_slot, and it allows
35 * us to buffer up frequently modified backrefs in an rb tree instead
36 * of hammering updates on the extent allocation tree.
Chris Mason56bec292009-03-13 10:10:06 -040037 */
38
39/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -040040 * compare two delayed tree backrefs with same bytenr and type
Chris Mason56bec292009-03-13 10:10:06 -040041 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -040042static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
43 struct btrfs_delayed_tree_ref *ref1)
Chris Mason56bec292009-03-13 10:10:06 -040044{
Josef Bacikae1e2062012-08-07 16:00:32 -040045 if (ref1->root < ref2->root)
46 return -1;
47 if (ref1->root > ref2->root)
48 return 1;
49 if (ref1->parent < ref2->parent)
50 return -1;
51 if (ref1->parent > ref2->parent)
52 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040053 return 0;
54}
55
56/*
57 * compare two delayed data backrefs with same bytenr and type
58 */
59static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
60 struct btrfs_delayed_data_ref *ref1)
61{
62 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
63 if (ref1->root < ref2->root)
64 return -1;
65 if (ref1->root > ref2->root)
66 return 1;
67 if (ref1->objectid < ref2->objectid)
68 return -1;
69 if (ref1->objectid > ref2->objectid)
70 return 1;
71 if (ref1->offset < ref2->offset)
72 return -1;
73 if (ref1->offset > ref2->offset)
74 return 1;
75 } else {
76 if (ref1->parent < ref2->parent)
77 return -1;
78 if (ref1->parent > ref2->parent)
79 return 1;
80 }
81 return 0;
82}
83
84/*
85 * entries in the rb tree are ordered by the byte number of the extent,
86 * type of the delayed backrefs and content of delayed backrefs.
87 */
88static int comp_entry(struct btrfs_delayed_ref_node *ref2,
Josef Bacikae1e2062012-08-07 16:00:32 -040089 struct btrfs_delayed_ref_node *ref1,
90 bool compare_seq)
Yan Zheng5d4f98a2009-06-10 10:45:14 -040091{
92 if (ref1->bytenr < ref2->bytenr)
Chris Mason56bec292009-03-13 10:10:06 -040093 return -1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040094 if (ref1->bytenr > ref2->bytenr)
Chris Mason56bec292009-03-13 10:10:06 -040095 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040096 if (ref1->is_head && ref2->is_head)
97 return 0;
98 if (ref2->is_head)
Chris Mason56bec292009-03-13 10:10:06 -040099 return -1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400100 if (ref1->is_head)
Chris Mason56bec292009-03-13 10:10:06 -0400101 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400102 if (ref1->type < ref2->type)
103 return -1;
104 if (ref1->type > ref2->type)
105 return 1;
Arne Jansen00f04b82011-09-14 12:37:00 +0200106 /* merging of sequenced refs is not allowed */
Josef Bacikae1e2062012-08-07 16:00:32 -0400107 if (compare_seq) {
108 if (ref1->seq < ref2->seq)
109 return -1;
110 if (ref1->seq > ref2->seq)
111 return 1;
112 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400113 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
114 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
115 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
116 btrfs_delayed_node_to_tree_ref(ref1));
117 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
118 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
119 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
120 btrfs_delayed_node_to_data_ref(ref1));
121 }
122 BUG();
Chris Mason56bec292009-03-13 10:10:06 -0400123 return 0;
124}
125
126/*
127 * insert a new ref into the rbtree. This returns any existing refs
128 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
129 * inserted.
130 */
131static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
Chris Mason56bec292009-03-13 10:10:06 -0400132 struct rb_node *node)
133{
134 struct rb_node **p = &root->rb_node;
135 struct rb_node *parent_node = NULL;
136 struct btrfs_delayed_ref_node *entry;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400137 struct btrfs_delayed_ref_node *ins;
Chris Mason56bec292009-03-13 10:10:06 -0400138 int cmp;
139
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400140 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400141 while (*p) {
142 parent_node = *p;
143 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
144 rb_node);
145
Josef Bacikae1e2062012-08-07 16:00:32 -0400146 cmp = comp_entry(entry, ins, 1);
Chris Mason56bec292009-03-13 10:10:06 -0400147 if (cmp < 0)
148 p = &(*p)->rb_left;
149 else if (cmp > 0)
150 p = &(*p)->rb_right;
151 else
152 return entry;
153 }
154
Chris Mason56bec292009-03-13 10:10:06 -0400155 rb_link_node(node, parent_node, p);
156 rb_insert_color(node, root);
157 return NULL;
158}
159
160/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400161 * find an head entry based on bytenr. This returns the delayed ref
Arne Jansend1270cd2011-09-13 15:16:43 +0200162 * head if it was able to find one, or NULL if nothing was in that spot.
163 * If return_bigger is given, the next bigger entry is returned if no exact
164 * match is found.
Chris Mason56bec292009-03-13 10:10:06 -0400165 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400166static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
167 u64 bytenr,
Arne Jansend1270cd2011-09-13 15:16:43 +0200168 struct btrfs_delayed_ref_node **last,
169 int return_bigger)
Chris Mason56bec292009-03-13 10:10:06 -0400170{
Arne Jansend1270cd2011-09-13 15:16:43 +0200171 struct rb_node *n;
Chris Mason56bec292009-03-13 10:10:06 -0400172 struct btrfs_delayed_ref_node *entry;
Arne Jansend1270cd2011-09-13 15:16:43 +0200173 int cmp = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400174
Arne Jansend1270cd2011-09-13 15:16:43 +0200175again:
176 n = root->rb_node;
177 entry = NULL;
Chris Mason56bec292009-03-13 10:10:06 -0400178 while (n) {
179 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
180 WARN_ON(!entry->in_tree);
Chris Masonc3e69d52009-03-13 10:17:05 -0400181 if (last)
182 *last = entry;
Chris Mason56bec292009-03-13 10:10:06 -0400183
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400184 if (bytenr < entry->bytenr)
185 cmp = -1;
186 else if (bytenr > entry->bytenr)
187 cmp = 1;
188 else if (!btrfs_delayed_ref_is_head(entry))
189 cmp = 1;
190 else
191 cmp = 0;
192
Chris Mason56bec292009-03-13 10:10:06 -0400193 if (cmp < 0)
194 n = n->rb_left;
195 else if (cmp > 0)
196 n = n->rb_right;
197 else
198 return entry;
199 }
Arne Jansend1270cd2011-09-13 15:16:43 +0200200 if (entry && return_bigger) {
201 if (cmp > 0) {
202 n = rb_next(&entry->rb_node);
203 if (!n)
204 n = rb_first(root);
205 entry = rb_entry(n, struct btrfs_delayed_ref_node,
206 rb_node);
207 bytenr = entry->bytenr;
208 return_bigger = 0;
209 goto again;
210 }
211 return entry;
212 }
Chris Mason56bec292009-03-13 10:10:06 -0400213 return NULL;
214}
215
Chris Masonc3e69d52009-03-13 10:17:05 -0400216int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
217 struct btrfs_delayed_ref_head *head)
Chris Mason56bec292009-03-13 10:10:06 -0400218{
Chris Masonc3e69d52009-03-13 10:17:05 -0400219 struct btrfs_delayed_ref_root *delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -0400220
Chris Masonc3e69d52009-03-13 10:17:05 -0400221 delayed_refs = &trans->transaction->delayed_refs;
222 assert_spin_locked(&delayed_refs->lock);
223 if (mutex_trylock(&head->mutex))
224 return 0;
225
226 atomic_inc(&head->node.refs);
227 spin_unlock(&delayed_refs->lock);
228
229 mutex_lock(&head->mutex);
230 spin_lock(&delayed_refs->lock);
231 if (!head->node.in_tree) {
232 mutex_unlock(&head->mutex);
233 btrfs_put_delayed_ref(&head->node);
234 return -EAGAIN;
235 }
236 btrfs_put_delayed_ref(&head->node);
237 return 0;
238}
239
Josef Bacikae1e2062012-08-07 16:00:32 -0400240static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
241 struct btrfs_delayed_ref_root *delayed_refs,
242 struct btrfs_delayed_ref_node *ref)
243{
244 rb_erase(&ref->rb_node, &delayed_refs->root);
245 ref->in_tree = 0;
246 btrfs_put_delayed_ref(ref);
247 delayed_refs->num_entries--;
248 if (trans->delayed_ref_updates)
249 trans->delayed_ref_updates--;
250}
251
252static int merge_ref(struct btrfs_trans_handle *trans,
253 struct btrfs_delayed_ref_root *delayed_refs,
254 struct btrfs_delayed_ref_node *ref, u64 seq)
255{
256 struct rb_node *node;
257 int merged = 0;
258 int mod = 0;
259 int done = 0;
260
261 node = rb_prev(&ref->rb_node);
262 while (node) {
263 struct btrfs_delayed_ref_node *next;
264
265 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
266 node = rb_prev(node);
267 if (next->bytenr != ref->bytenr)
268 break;
269 if (seq && next->seq >= seq)
270 break;
271 if (comp_entry(ref, next, 0))
272 continue;
273
274 if (ref->action == next->action) {
275 mod = next->ref_mod;
276 } else {
277 if (ref->ref_mod < next->ref_mod) {
278 struct btrfs_delayed_ref_node *tmp;
279
280 tmp = ref;
281 ref = next;
282 next = tmp;
283 done = 1;
284 }
285 mod = -next->ref_mod;
286 }
287
288 merged++;
289 drop_delayed_ref(trans, delayed_refs, next);
290 ref->ref_mod += mod;
291 if (ref->ref_mod == 0) {
292 drop_delayed_ref(trans, delayed_refs, ref);
293 break;
294 } else {
295 /*
296 * You can't have multiples of the same ref on a tree
297 * block.
298 */
299 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
300 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
301 }
302
303 if (done)
304 break;
305 node = rb_prev(&ref->rb_node);
306 }
307
308 return merged;
309}
310
311void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
312 struct btrfs_fs_info *fs_info,
313 struct btrfs_delayed_ref_root *delayed_refs,
314 struct btrfs_delayed_ref_head *head)
315{
316 struct rb_node *node;
317 u64 seq = 0;
318
319 spin_lock(&fs_info->tree_mod_seq_lock);
320 if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 struct seq_list *elem;
322
323 elem = list_first_entry(&fs_info->tree_mod_seq_list,
324 struct seq_list, list);
325 seq = elem->seq;
326 }
327 spin_unlock(&fs_info->tree_mod_seq_lock);
328
329 node = rb_prev(&head->node.rb_node);
330 while (node) {
331 struct btrfs_delayed_ref_node *ref;
332
333 ref = rb_entry(node, struct btrfs_delayed_ref_node,
334 rb_node);
335 if (ref->bytenr != head->node.bytenr)
336 break;
337
338 /* We can't merge refs that are outside of our seq count */
339 if (seq && ref->seq >= seq)
340 break;
341 if (merge_ref(trans, delayed_refs, ref, seq))
342 node = rb_prev(&head->node.rb_node);
343 else
344 node = rb_prev(node);
345 }
346}
347
Jan Schmidt097b8a72012-06-21 11:08:04 +0200348int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
349 struct btrfs_delayed_ref_root *delayed_refs,
Arne Jansen00f04b82011-09-14 12:37:00 +0200350 u64 seq)
351{
352 struct seq_list *elem;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200353 int ret = 0;
Arne Jansen00f04b82011-09-14 12:37:00 +0200354
Jan Schmidt097b8a72012-06-21 11:08:04 +0200355 spin_lock(&fs_info->tree_mod_seq_lock);
356 if (!list_empty(&fs_info->tree_mod_seq_list)) {
357 elem = list_first_entry(&fs_info->tree_mod_seq_list,
358 struct seq_list, list);
359 if (seq >= elem->seq) {
360 pr_debug("holding back delayed_ref %llu, lowest is "
361 "%llu (%p)\n", seq, elem->seq, delayed_refs);
362 ret = 1;
363 }
Arne Jansen00f04b82011-09-14 12:37:00 +0200364 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200365
366 spin_unlock(&fs_info->tree_mod_seq_lock);
367 return ret;
Arne Jansen00f04b82011-09-14 12:37:00 +0200368}
369
Chris Masonc3e69d52009-03-13 10:17:05 -0400370int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
371 struct list_head *cluster, u64 start)
372{
373 int count = 0;
374 struct btrfs_delayed_ref_root *delayed_refs;
375 struct rb_node *node;
376 struct btrfs_delayed_ref_node *ref;
377 struct btrfs_delayed_ref_head *head;
378
379 delayed_refs = &trans->transaction->delayed_refs;
380 if (start == 0) {
381 node = rb_first(&delayed_refs->root);
382 } else {
383 ref = NULL;
Arne Jansend1270cd2011-09-13 15:16:43 +0200384 find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
Chris Masonc3e69d52009-03-13 10:17:05 -0400385 if (ref) {
Chris Masonc3e69d52009-03-13 10:17:05 -0400386 node = &ref->rb_node;
387 } else
388 node = rb_first(&delayed_refs->root);
389 }
390again:
391 while (node && count < 32) {
392 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400393 if (btrfs_delayed_ref_is_head(ref)) {
394 head = btrfs_delayed_node_to_head(ref);
Chris Masonc3e69d52009-03-13 10:17:05 -0400395 if (list_empty(&head->cluster)) {
396 list_add_tail(&head->cluster, cluster);
397 delayed_refs->run_delayed_start =
398 head->node.bytenr;
399 count++;
400
401 WARN_ON(delayed_refs->num_heads_ready == 0);
402 delayed_refs->num_heads_ready--;
403 } else if (count) {
404 /* the goal of the clustering is to find extents
405 * that are likely to end up in the same extent
406 * leaf on disk. So, we don't want them spread
407 * all over the tree. Stop now if we've hit
408 * a head that was already in use
409 */
Chris Mason56bec292009-03-13 10:10:06 -0400410 break;
411 }
412 }
Chris Masonc3e69d52009-03-13 10:17:05 -0400413 node = rb_next(node);
Chris Mason56bec292009-03-13 10:10:06 -0400414 }
Chris Masonc3e69d52009-03-13 10:17:05 -0400415 if (count) {
416 return 0;
417 } else if (start) {
418 /*
419 * we've gone to the end of the rbtree without finding any
420 * clusters. start from the beginning and try again
421 */
422 start = 0;
423 node = rb_first(&delayed_refs->root);
424 goto again;
425 }
426 return 1;
Chris Mason56bec292009-03-13 10:10:06 -0400427}
428
Miao Xie093486c2012-12-19 08:10:10 +0000429void btrfs_release_ref_cluster(struct list_head *cluster)
430{
431 struct list_head *pos, *q;
432
433 list_for_each_safe(pos, q, cluster)
434 list_del_init(pos);
435}
436
Chris Mason56bec292009-03-13 10:10:06 -0400437/*
Chris Mason56bec292009-03-13 10:10:06 -0400438 * helper function to update an extent delayed ref in the
439 * rbtree. existing and update must both have the same
440 * bytenr and parent
441 *
442 * This may free existing if the update cancels out whatever
443 * operation it was doing.
444 */
445static noinline void
446update_existing_ref(struct btrfs_trans_handle *trans,
447 struct btrfs_delayed_ref_root *delayed_refs,
448 struct btrfs_delayed_ref_node *existing,
449 struct btrfs_delayed_ref_node *update)
450{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400451 if (update->action != existing->action) {
Chris Mason56bec292009-03-13 10:10:06 -0400452 /*
453 * this is effectively undoing either an add or a
454 * drop. We decrement the ref_mod, and if it goes
455 * down to zero we just delete the entry without
456 * every changing the extent allocation tree.
457 */
458 existing->ref_mod--;
Josef Bacikae1e2062012-08-07 16:00:32 -0400459 if (existing->ref_mod == 0)
460 drop_delayed_ref(trans, delayed_refs, existing);
461 else
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400462 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
463 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
Chris Mason56bec292009-03-13 10:10:06 -0400464 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400465 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
466 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
Chris Mason56bec292009-03-13 10:10:06 -0400467 /*
468 * the action on the existing ref matches
469 * the action on the ref we're trying to add.
470 * Bump the ref_mod by one so the backref that
471 * is eventually added/removed has the correct
472 * reference count
473 */
474 existing->ref_mod += update->ref_mod;
475 }
476}
477
478/*
479 * helper function to update the accounting in the head ref
480 * existing and update must have the same bytenr
481 */
482static noinline void
483update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
484 struct btrfs_delayed_ref_node *update)
485{
486 struct btrfs_delayed_ref_head *existing_ref;
487 struct btrfs_delayed_ref_head *ref;
488
489 existing_ref = btrfs_delayed_node_to_head(existing);
490 ref = btrfs_delayed_node_to_head(update);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400491 BUG_ON(existing_ref->is_data != ref->is_data);
Chris Mason56bec292009-03-13 10:10:06 -0400492
493 if (ref->must_insert_reserved) {
494 /* if the extent was freed and then
495 * reallocated before the delayed ref
496 * entries were processed, we can end up
497 * with an existing head ref without
498 * the must_insert_reserved flag set.
499 * Set it again here
500 */
501 existing_ref->must_insert_reserved = ref->must_insert_reserved;
502
503 /*
504 * update the num_bytes so we make sure the accounting
505 * is done correctly
506 */
507 existing->num_bytes = update->num_bytes;
508
509 }
510
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400511 if (ref->extent_op) {
512 if (!existing_ref->extent_op) {
513 existing_ref->extent_op = ref->extent_op;
514 } else {
515 if (ref->extent_op->update_key) {
516 memcpy(&existing_ref->extent_op->key,
517 &ref->extent_op->key,
518 sizeof(ref->extent_op->key));
519 existing_ref->extent_op->update_key = 1;
520 }
521 if (ref->extent_op->update_flags) {
522 existing_ref->extent_op->flags_to_set |=
523 ref->extent_op->flags_to_set;
524 existing_ref->extent_op->update_flags = 1;
525 }
Miao Xie78a61842012-11-21 02:21:28 +0000526 btrfs_free_delayed_extent_op(ref->extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400527 }
528 }
Chris Mason56bec292009-03-13 10:10:06 -0400529 /*
530 * update the reference mod on the head to reflect this new operation
531 */
532 existing->ref_mod += update->ref_mod;
533}
534
535/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400536 * helper function to actually insert a head node into the rbtree.
Chris Mason56bec292009-03-13 10:10:06 -0400537 * this does all the dirty work in terms of maintaining the correct
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400538 * overall modification count.
Chris Mason56bec292009-03-13 10:10:06 -0400539 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100540static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200541 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400542 struct btrfs_delayed_ref_node *ref,
543 u64 bytenr, u64 num_bytes,
544 int action, int is_data)
Chris Mason56bec292009-03-13 10:10:06 -0400545{
546 struct btrfs_delayed_ref_node *existing;
Chris Masonc3e69d52009-03-13 10:17:05 -0400547 struct btrfs_delayed_ref_head *head_ref = NULL;
Chris Mason56bec292009-03-13 10:10:06 -0400548 struct btrfs_delayed_ref_root *delayed_refs;
549 int count_mod = 1;
550 int must_insert_reserved = 0;
551
552 /*
553 * the head node stores the sum of all the mods, so dropping a ref
554 * should drop the sum in the head node by one.
555 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400556 if (action == BTRFS_UPDATE_DELAYED_HEAD)
557 count_mod = 0;
558 else if (action == BTRFS_DROP_DELAYED_REF)
559 count_mod = -1;
Chris Mason56bec292009-03-13 10:10:06 -0400560
561 /*
562 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
563 * the reserved accounting when the extent is finally added, or
564 * if a later modification deletes the delayed ref without ever
565 * inserting the extent into the extent allocation tree.
566 * ref->must_insert_reserved is the flag used to record
567 * that accounting mods are required.
568 *
569 * Once we record must_insert_reserved, switch the action to
570 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
571 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400572 if (action == BTRFS_ADD_DELAYED_EXTENT)
Chris Mason56bec292009-03-13 10:10:06 -0400573 must_insert_reserved = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400574 else
Chris Mason56bec292009-03-13 10:10:06 -0400575 must_insert_reserved = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400576
577 delayed_refs = &trans->transaction->delayed_refs;
578
579 /* first set the basic ref node struct up */
580 atomic_set(&ref->refs, 1);
581 ref->bytenr = bytenr;
Chris Mason56bec292009-03-13 10:10:06 -0400582 ref->num_bytes = num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400583 ref->ref_mod = count_mod;
584 ref->type = 0;
585 ref->action = 0;
586 ref->is_head = 1;
587 ref->in_tree = 1;
Arne Jansen00f04b82011-09-14 12:37:00 +0200588 ref->seq = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400589
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400590 head_ref = btrfs_delayed_node_to_head(ref);
591 head_ref->must_insert_reserved = must_insert_reserved;
592 head_ref->is_data = is_data;
Chris Mason56bec292009-03-13 10:10:06 -0400593
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400594 INIT_LIST_HEAD(&head_ref->cluster);
595 mutex_init(&head_ref->mutex);
596
liubo1abe9b82011-03-24 11:18:59 +0000597 trace_btrfs_delayed_ref_head(ref, head_ref, action);
598
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400599 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400600
601 if (existing) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400602 update_existing_head_ref(existing, ref);
Chris Mason56bec292009-03-13 10:10:06 -0400603 /*
604 * we've updated the existing ref, free the newly
605 * allocated ref
606 */
Miao Xie78a61842012-11-21 02:21:28 +0000607 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
Chris Mason56bec292009-03-13 10:10:06 -0400608 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400609 delayed_refs->num_heads++;
610 delayed_refs->num_heads_ready++;
Chris Mason56bec292009-03-13 10:10:06 -0400611 delayed_refs->num_entries++;
612 trans->delayed_ref_updates++;
613 }
Chris Mason56bec292009-03-13 10:10:06 -0400614}
615
616/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400617 * helper to insert a delayed tree ref into the rbtree.
618 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100619static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200620 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400621 struct btrfs_delayed_ref_node *ref,
622 u64 bytenr, u64 num_bytes, u64 parent,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200623 u64 ref_root, int level, int action,
624 int for_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400625{
626 struct btrfs_delayed_ref_node *existing;
627 struct btrfs_delayed_tree_ref *full_ref;
628 struct btrfs_delayed_ref_root *delayed_refs;
Arne Jansen00f04b82011-09-14 12:37:00 +0200629 u64 seq = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400630
631 if (action == BTRFS_ADD_DELAYED_EXTENT)
632 action = BTRFS_ADD_DELAYED_REF;
633
634 delayed_refs = &trans->transaction->delayed_refs;
635
636 /* first set the basic ref node struct up */
637 atomic_set(&ref->refs, 1);
638 ref->bytenr = bytenr;
639 ref->num_bytes = num_bytes;
640 ref->ref_mod = 1;
641 ref->action = action;
642 ref->is_head = 0;
643 ref->in_tree = 1;
644
Jan Schmidt546adb02012-06-14 16:37:44 +0200645 if (need_ref_seq(for_cow, ref_root))
646 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
Arne Jansen00f04b82011-09-14 12:37:00 +0200647 ref->seq = seq;
648
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400649 full_ref = btrfs_delayed_node_to_tree_ref(ref);
Arne Janseneebe0632011-09-14 14:01:24 +0200650 full_ref->parent = parent;
651 full_ref->root = ref_root;
652 if (parent)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400653 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
Arne Janseneebe0632011-09-14 14:01:24 +0200654 else
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400655 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400656 full_ref->level = level;
657
liubo1abe9b82011-03-24 11:18:59 +0000658 trace_btrfs_delayed_tree_ref(ref, full_ref, action);
659
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400660 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
661
662 if (existing) {
663 update_existing_ref(trans, delayed_refs, existing, ref);
664 /*
665 * we've updated the existing ref, free the newly
666 * allocated ref
667 */
Miao Xie78a61842012-11-21 02:21:28 +0000668 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400669 } else {
670 delayed_refs->num_entries++;
671 trans->delayed_ref_updates++;
672 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400673}
674
675/*
676 * helper to insert a delayed data ref into the rbtree.
677 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100678static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200679 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400680 struct btrfs_delayed_ref_node *ref,
681 u64 bytenr, u64 num_bytes, u64 parent,
682 u64 ref_root, u64 owner, u64 offset,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200683 int action, int for_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400684{
685 struct btrfs_delayed_ref_node *existing;
686 struct btrfs_delayed_data_ref *full_ref;
687 struct btrfs_delayed_ref_root *delayed_refs;
Arne Jansen00f04b82011-09-14 12:37:00 +0200688 u64 seq = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400689
690 if (action == BTRFS_ADD_DELAYED_EXTENT)
691 action = BTRFS_ADD_DELAYED_REF;
692
693 delayed_refs = &trans->transaction->delayed_refs;
694
695 /* first set the basic ref node struct up */
696 atomic_set(&ref->refs, 1);
697 ref->bytenr = bytenr;
698 ref->num_bytes = num_bytes;
699 ref->ref_mod = 1;
700 ref->action = action;
701 ref->is_head = 0;
702 ref->in_tree = 1;
703
Jan Schmidt546adb02012-06-14 16:37:44 +0200704 if (need_ref_seq(for_cow, ref_root))
705 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
Arne Jansen00f04b82011-09-14 12:37:00 +0200706 ref->seq = seq;
707
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400708 full_ref = btrfs_delayed_node_to_data_ref(ref);
Arne Janseneebe0632011-09-14 14:01:24 +0200709 full_ref->parent = parent;
710 full_ref->root = ref_root;
711 if (parent)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400712 ref->type = BTRFS_SHARED_DATA_REF_KEY;
Arne Janseneebe0632011-09-14 14:01:24 +0200713 else
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400714 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200715
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400716 full_ref->objectid = owner;
717 full_ref->offset = offset;
718
liubo1abe9b82011-03-24 11:18:59 +0000719 trace_btrfs_delayed_data_ref(ref, full_ref, action);
720
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400721 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
722
723 if (existing) {
724 update_existing_ref(trans, delayed_refs, existing, ref);
725 /*
726 * we've updated the existing ref, free the newly
727 * allocated ref
728 */
Miao Xie78a61842012-11-21 02:21:28 +0000729 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400730 } else {
731 delayed_refs->num_entries++;
732 trans->delayed_ref_updates++;
733 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400734}
735
736/*
737 * add a delayed tree ref. This does all of the accounting required
Chris Mason56bec292009-03-13 10:10:06 -0400738 * to make sure the delayed ref is eventually processed before this
739 * transaction commits.
740 */
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200741int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
742 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400743 u64 bytenr, u64 num_bytes, u64 parent,
744 u64 ref_root, int level, int action,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200745 struct btrfs_delayed_extent_op *extent_op,
746 int for_cow)
Chris Mason56bec292009-03-13 10:10:06 -0400747{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400748 struct btrfs_delayed_tree_ref *ref;
Chris Mason56bec292009-03-13 10:10:06 -0400749 struct btrfs_delayed_ref_head *head_ref;
750 struct btrfs_delayed_ref_root *delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -0400751
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400752 BUG_ON(extent_op && extent_op->is_data);
Miao Xie78a61842012-11-21 02:21:28 +0000753 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
Chris Mason56bec292009-03-13 10:10:06 -0400754 if (!ref)
755 return -ENOMEM;
756
Miao Xie78a61842012-11-21 02:21:28 +0000757 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
Chris Mason56bec292009-03-13 10:10:06 -0400758 if (!head_ref) {
Miao Xie78a61842012-11-21 02:21:28 +0000759 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
Chris Mason56bec292009-03-13 10:10:06 -0400760 return -ENOMEM;
761 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400762
763 head_ref->extent_op = extent_op;
764
Chris Mason56bec292009-03-13 10:10:06 -0400765 delayed_refs = &trans->transaction->delayed_refs;
766 spin_lock(&delayed_refs->lock);
767
768 /*
769 * insert both the head node and the new ref without dropping
770 * the spin lock
771 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100772 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200773 num_bytes, action, 0);
Chris Mason56bec292009-03-13 10:10:06 -0400774
Jeff Mahoney143bede2012-03-01 14:56:26 +0100775 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200776 num_bytes, parent, ref_root, level, action,
777 for_cow);
Chris Mason56bec292009-03-13 10:10:06 -0400778 spin_unlock(&delayed_refs->lock);
Jan Schmidt546adb02012-06-14 16:37:44 +0200779 if (need_ref_seq(for_cow, ref_root))
780 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
Jan Schmidt95a06072012-05-29 17:06:54 +0200781
Chris Mason56bec292009-03-13 10:10:06 -0400782 return 0;
783}
784
785/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400786 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
787 */
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200788int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
789 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400790 u64 bytenr, u64 num_bytes,
791 u64 parent, u64 ref_root,
792 u64 owner, u64 offset, int action,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200793 struct btrfs_delayed_extent_op *extent_op,
794 int for_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400795{
796 struct btrfs_delayed_data_ref *ref;
797 struct btrfs_delayed_ref_head *head_ref;
798 struct btrfs_delayed_ref_root *delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400799
800 BUG_ON(extent_op && !extent_op->is_data);
Miao Xie78a61842012-11-21 02:21:28 +0000801 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400802 if (!ref)
803 return -ENOMEM;
804
Miao Xie78a61842012-11-21 02:21:28 +0000805 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400806 if (!head_ref) {
Miao Xie78a61842012-11-21 02:21:28 +0000807 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400808 return -ENOMEM;
809 }
810
811 head_ref->extent_op = extent_op;
812
813 delayed_refs = &trans->transaction->delayed_refs;
814 spin_lock(&delayed_refs->lock);
815
816 /*
817 * insert both the head node and the new ref without dropping
818 * the spin lock
819 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100820 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200821 num_bytes, action, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400822
Jeff Mahoney143bede2012-03-01 14:56:26 +0100823 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200824 num_bytes, parent, ref_root, owner, offset,
825 action, for_cow);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400826 spin_unlock(&delayed_refs->lock);
Jan Schmidt546adb02012-06-14 16:37:44 +0200827 if (need_ref_seq(for_cow, ref_root))
828 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
Jan Schmidt95a06072012-05-29 17:06:54 +0200829
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400830 return 0;
831}
832
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200833int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
834 struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400835 u64 bytenr, u64 num_bytes,
836 struct btrfs_delayed_extent_op *extent_op)
837{
838 struct btrfs_delayed_ref_head *head_ref;
839 struct btrfs_delayed_ref_root *delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400840
Miao Xie78a61842012-11-21 02:21:28 +0000841 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400842 if (!head_ref)
843 return -ENOMEM;
844
845 head_ref->extent_op = extent_op;
846
847 delayed_refs = &trans->transaction->delayed_refs;
848 spin_lock(&delayed_refs->lock);
849
Jeff Mahoney143bede2012-03-01 14:56:26 +0100850 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400851 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
852 extent_op->is_data);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400853
854 spin_unlock(&delayed_refs->lock);
855 return 0;
856}
857
858/*
Chris Mason1887be62009-03-13 10:11:24 -0400859 * this does a simple search for the head node for a given extent.
860 * It must be called with the delayed ref spinlock held, and it returns
861 * the head node if any where found, or NULL if not.
862 */
863struct btrfs_delayed_ref_head *
864btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
865{
866 struct btrfs_delayed_ref_node *ref;
867 struct btrfs_delayed_ref_root *delayed_refs;
868
869 delayed_refs = &trans->transaction->delayed_refs;
Arne Jansend1270cd2011-09-13 15:16:43 +0200870 ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
Chris Mason1887be62009-03-13 10:11:24 -0400871 if (ref)
872 return btrfs_delayed_node_to_head(ref);
873 return NULL;
874}
Miao Xie78a61842012-11-21 02:21:28 +0000875
876void btrfs_delayed_ref_exit(void)
877{
878 if (btrfs_delayed_ref_head_cachep)
879 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
880 if (btrfs_delayed_tree_ref_cachep)
881 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
882 if (btrfs_delayed_data_ref_cachep)
883 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
884 if (btrfs_delayed_extent_op_cachep)
885 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
886}
887
888int btrfs_delayed_ref_init(void)
889{
890 btrfs_delayed_ref_head_cachep = kmem_cache_create(
891 "btrfs_delayed_ref_head",
892 sizeof(struct btrfs_delayed_ref_head), 0,
893 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
894 if (!btrfs_delayed_ref_head_cachep)
895 goto fail;
896
897 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
898 "btrfs_delayed_tree_ref",
899 sizeof(struct btrfs_delayed_tree_ref), 0,
900 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
901 if (!btrfs_delayed_tree_ref_cachep)
902 goto fail;
903
904 btrfs_delayed_data_ref_cachep = kmem_cache_create(
905 "btrfs_delayed_data_ref",
906 sizeof(struct btrfs_delayed_data_ref), 0,
907 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
908 if (!btrfs_delayed_data_ref_cachep)
909 goto fail;
910
911 btrfs_delayed_extent_op_cachep = kmem_cache_create(
912 "btrfs_delayed_extent_op",
913 sizeof(struct btrfs_delayed_extent_op), 0,
914 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
915 if (!btrfs_delayed_extent_op_cachep)
916 goto fail;
917
918 return 0;
919fail:
920 btrfs_delayed_ref_exit();
921 return -ENOMEM;
922}