blob: 84e6781413b177acfd04c1a922c72da48c8f4e01 [file] [log] [blame]
Chris Mason56bec292009-03-13 10:10:06 -04001/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/sort.h>
Chris Mason56bec292009-03-13 10:10:06 -040021#include "ctree.h"
22#include "delayed-ref.h"
23#include "transaction.h"
24
25/*
26 * delayed back reference update tracking. For subvolume trees
27 * we queue up extent allocations and backref maintenance for
28 * delayed processing. This avoids deep call chains where we
29 * add extents in the middle of btrfs_search_slot, and it allows
30 * us to buffer up frequently modified backrefs in an rb tree instead
31 * of hammering updates on the extent allocation tree.
Chris Mason56bec292009-03-13 10:10:06 -040032 */
33
34/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -040035 * compare two delayed tree backrefs with same bytenr and type
Chris Mason56bec292009-03-13 10:10:06 -040036 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -040037static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
38 struct btrfs_delayed_tree_ref *ref1)
Chris Mason56bec292009-03-13 10:10:06 -040039{
Yan Zheng5d4f98a2009-06-10 10:45:14 -040040 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41 if (ref1->root < ref2->root)
42 return -1;
43 if (ref1->root > ref2->root)
44 return 1;
45 } else {
46 if (ref1->parent < ref2->parent)
47 return -1;
48 if (ref1->parent > ref2->parent)
49 return 1;
50 }
51 return 0;
52}
53
54/*
55 * compare two delayed data backrefs with same bytenr and type
56 */
57static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
58 struct btrfs_delayed_data_ref *ref1)
59{
60 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
61 if (ref1->root < ref2->root)
62 return -1;
63 if (ref1->root > ref2->root)
64 return 1;
65 if (ref1->objectid < ref2->objectid)
66 return -1;
67 if (ref1->objectid > ref2->objectid)
68 return 1;
69 if (ref1->offset < ref2->offset)
70 return -1;
71 if (ref1->offset > ref2->offset)
72 return 1;
73 } else {
74 if (ref1->parent < ref2->parent)
75 return -1;
76 if (ref1->parent > ref2->parent)
77 return 1;
78 }
79 return 0;
80}
81
82/*
83 * entries in the rb tree are ordered by the byte number of the extent,
84 * type of the delayed backrefs and content of delayed backrefs.
85 */
86static int comp_entry(struct btrfs_delayed_ref_node *ref2,
87 struct btrfs_delayed_ref_node *ref1)
88{
89 if (ref1->bytenr < ref2->bytenr)
Chris Mason56bec292009-03-13 10:10:06 -040090 return -1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040091 if (ref1->bytenr > ref2->bytenr)
Chris Mason56bec292009-03-13 10:10:06 -040092 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040093 if (ref1->is_head && ref2->is_head)
94 return 0;
95 if (ref2->is_head)
Chris Mason56bec292009-03-13 10:10:06 -040096 return -1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040097 if (ref1->is_head)
Chris Mason56bec292009-03-13 10:10:06 -040098 return 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040099 if (ref1->type < ref2->type)
100 return -1;
101 if (ref1->type > ref2->type)
102 return 1;
103 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
104 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
105 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
106 btrfs_delayed_node_to_tree_ref(ref1));
107 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
108 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
109 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
110 btrfs_delayed_node_to_data_ref(ref1));
111 }
112 BUG();
Chris Mason56bec292009-03-13 10:10:06 -0400113 return 0;
114}
115
116/*
117 * insert a new ref into the rbtree. This returns any existing refs
118 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
119 * inserted.
120 */
121static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
Chris Mason56bec292009-03-13 10:10:06 -0400122 struct rb_node *node)
123{
124 struct rb_node **p = &root->rb_node;
125 struct rb_node *parent_node = NULL;
126 struct btrfs_delayed_ref_node *entry;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400127 struct btrfs_delayed_ref_node *ins;
Chris Mason56bec292009-03-13 10:10:06 -0400128 int cmp;
129
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400130 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400131 while (*p) {
132 parent_node = *p;
133 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
134 rb_node);
135
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400136 cmp = comp_entry(entry, ins);
Chris Mason56bec292009-03-13 10:10:06 -0400137 if (cmp < 0)
138 p = &(*p)->rb_left;
139 else if (cmp > 0)
140 p = &(*p)->rb_right;
141 else
142 return entry;
143 }
144
Chris Mason56bec292009-03-13 10:10:06 -0400145 rb_link_node(node, parent_node, p);
146 rb_insert_color(node, root);
147 return NULL;
148}
149
150/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400151 * find an head entry based on bytenr. This returns the delayed ref
152 * head if it was able to find one, or NULL if nothing was in that spot
Chris Mason56bec292009-03-13 10:10:06 -0400153 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400154static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
155 u64 bytenr,
Chris Masonc3e69d52009-03-13 10:17:05 -0400156 struct btrfs_delayed_ref_node **last)
Chris Mason56bec292009-03-13 10:10:06 -0400157{
158 struct rb_node *n = root->rb_node;
159 struct btrfs_delayed_ref_node *entry;
160 int cmp;
161
162 while (n) {
163 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
164 WARN_ON(!entry->in_tree);
Chris Masonc3e69d52009-03-13 10:17:05 -0400165 if (last)
166 *last = entry;
Chris Mason56bec292009-03-13 10:10:06 -0400167
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400168 if (bytenr < entry->bytenr)
169 cmp = -1;
170 else if (bytenr > entry->bytenr)
171 cmp = 1;
172 else if (!btrfs_delayed_ref_is_head(entry))
173 cmp = 1;
174 else
175 cmp = 0;
176
Chris Mason56bec292009-03-13 10:10:06 -0400177 if (cmp < 0)
178 n = n->rb_left;
179 else if (cmp > 0)
180 n = n->rb_right;
181 else
182 return entry;
183 }
184 return NULL;
185}
186
Chris Masonc3e69d52009-03-13 10:17:05 -0400187int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
188 struct btrfs_delayed_ref_head *head)
Chris Mason56bec292009-03-13 10:10:06 -0400189{
Chris Masonc3e69d52009-03-13 10:17:05 -0400190 struct btrfs_delayed_ref_root *delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -0400191
Chris Masonc3e69d52009-03-13 10:17:05 -0400192 delayed_refs = &trans->transaction->delayed_refs;
193 assert_spin_locked(&delayed_refs->lock);
194 if (mutex_trylock(&head->mutex))
195 return 0;
196
197 atomic_inc(&head->node.refs);
198 spin_unlock(&delayed_refs->lock);
199
200 mutex_lock(&head->mutex);
201 spin_lock(&delayed_refs->lock);
202 if (!head->node.in_tree) {
203 mutex_unlock(&head->mutex);
204 btrfs_put_delayed_ref(&head->node);
205 return -EAGAIN;
206 }
207 btrfs_put_delayed_ref(&head->node);
208 return 0;
209}
210
211int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
212 struct list_head *cluster, u64 start)
213{
214 int count = 0;
215 struct btrfs_delayed_ref_root *delayed_refs;
216 struct rb_node *node;
217 struct btrfs_delayed_ref_node *ref;
218 struct btrfs_delayed_ref_head *head;
219
220 delayed_refs = &trans->transaction->delayed_refs;
221 if (start == 0) {
222 node = rb_first(&delayed_refs->root);
223 } else {
224 ref = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400225 find_ref_head(&delayed_refs->root, start, &ref);
Chris Masonc3e69d52009-03-13 10:17:05 -0400226 if (ref) {
227 struct btrfs_delayed_ref_node *tmp;
228
229 node = rb_prev(&ref->rb_node);
230 while (node) {
231 tmp = rb_entry(node,
232 struct btrfs_delayed_ref_node,
233 rb_node);
234 if (tmp->bytenr < start)
235 break;
236 ref = tmp;
237 node = rb_prev(&ref->rb_node);
238 }
239 node = &ref->rb_node;
240 } else
241 node = rb_first(&delayed_refs->root);
242 }
243again:
244 while (node && count < 32) {
245 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400246 if (btrfs_delayed_ref_is_head(ref)) {
247 head = btrfs_delayed_node_to_head(ref);
Chris Masonc3e69d52009-03-13 10:17:05 -0400248 if (list_empty(&head->cluster)) {
249 list_add_tail(&head->cluster, cluster);
250 delayed_refs->run_delayed_start =
251 head->node.bytenr;
252 count++;
253
254 WARN_ON(delayed_refs->num_heads_ready == 0);
255 delayed_refs->num_heads_ready--;
256 } else if (count) {
257 /* the goal of the clustering is to find extents
258 * that are likely to end up in the same extent
259 * leaf on disk. So, we don't want them spread
260 * all over the tree. Stop now if we've hit
261 * a head that was already in use
262 */
Chris Mason56bec292009-03-13 10:10:06 -0400263 break;
264 }
265 }
Chris Masonc3e69d52009-03-13 10:17:05 -0400266 node = rb_next(node);
Chris Mason56bec292009-03-13 10:10:06 -0400267 }
Chris Masonc3e69d52009-03-13 10:17:05 -0400268 if (count) {
269 return 0;
270 } else if (start) {
271 /*
272 * we've gone to the end of the rbtree without finding any
273 * clusters. start from the beginning and try again
274 */
275 start = 0;
276 node = rb_first(&delayed_refs->root);
277 goto again;
278 }
279 return 1;
Chris Mason56bec292009-03-13 10:10:06 -0400280}
281
282/*
283 * This checks to see if there are any delayed refs in the
284 * btree for a given bytenr. It returns one if it finds any
285 * and zero otherwise.
286 *
287 * If it only finds a head node, it returns 0.
288 *
289 * The idea is to use this when deciding if you can safely delete an
290 * extent from the extent allocation tree. There may be a pending
291 * ref in the rbtree that adds or removes references, so as long as this
292 * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
293 * allocation tree.
294 */
295int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
296{
297 struct btrfs_delayed_ref_node *ref;
298 struct btrfs_delayed_ref_root *delayed_refs;
299 struct rb_node *prev_node;
300 int ret = 0;
301
302 delayed_refs = &trans->transaction->delayed_refs;
303 spin_lock(&delayed_refs->lock);
304
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400305 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
Chris Mason56bec292009-03-13 10:10:06 -0400306 if (ref) {
307 prev_node = rb_prev(&ref->rb_node);
308 if (!prev_node)
309 goto out;
310 ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
311 rb_node);
312 if (ref->bytenr == bytenr)
313 ret = 1;
314 }
315out:
316 spin_unlock(&delayed_refs->lock);
317 return ret;
318}
319
320/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400321 * helper function to lookup reference count and flags of extent.
Chris Mason56bec292009-03-13 10:10:06 -0400322 *
323 * the head node for delayed ref is used to store the sum of all the
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400324 * reference count modifications queued up in the rbtree. the head
325 * node may also store the extent flags to set. This way you can check
326 * to see what the reference count and extent flags would be if all of
327 * the delayed refs are not processed.
Chris Mason56bec292009-03-13 10:10:06 -0400328 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400329int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
330 struct btrfs_root *root, u64 bytenr,
331 u64 num_bytes, u64 *refs, u64 *flags)
Chris Mason56bec292009-03-13 10:10:06 -0400332{
333 struct btrfs_delayed_ref_node *ref;
334 struct btrfs_delayed_ref_head *head;
335 struct btrfs_delayed_ref_root *delayed_refs;
336 struct btrfs_path *path;
Chris Mason56bec292009-03-13 10:10:06 -0400337 struct btrfs_extent_item *ei;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400338 struct extent_buffer *leaf;
Chris Mason56bec292009-03-13 10:10:06 -0400339 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400340 u32 item_size;
341 u64 num_refs;
342 u64 extent_flags;
Chris Mason56bec292009-03-13 10:10:06 -0400343 int ret;
344
345 path = btrfs_alloc_path();
346 if (!path)
347 return -ENOMEM;
348
349 key.objectid = bytenr;
350 key.type = BTRFS_EXTENT_ITEM_KEY;
351 key.offset = num_bytes;
352 delayed_refs = &trans->transaction->delayed_refs;
353again:
354 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
355 &key, path, 0, 0);
356 if (ret < 0)
357 goto out;
358
359 if (ret == 0) {
360 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400361 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
362 if (item_size >= sizeof(*ei)) {
363 ei = btrfs_item_ptr(leaf, path->slots[0],
364 struct btrfs_extent_item);
365 num_refs = btrfs_extent_refs(leaf, ei);
366 extent_flags = btrfs_extent_flags(leaf, ei);
367 } else {
368#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
369 struct btrfs_extent_item_v0 *ei0;
370 BUG_ON(item_size != sizeof(*ei0));
371 ei0 = btrfs_item_ptr(leaf, path->slots[0],
372 struct btrfs_extent_item_v0);
373 num_refs = btrfs_extent_refs_v0(leaf, ei0);
374 /* FIXME: this isn't correct for data */
375 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
376#else
377 BUG();
378#endif
379 }
380 BUG_ON(num_refs == 0);
Chris Mason56bec292009-03-13 10:10:06 -0400381 } else {
382 num_refs = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400383 extent_flags = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400384 ret = 0;
385 }
386
387 spin_lock(&delayed_refs->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400388 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
Chris Mason56bec292009-03-13 10:10:06 -0400389 if (ref) {
390 head = btrfs_delayed_node_to_head(ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400391 if (!mutex_trylock(&head->mutex)) {
392 atomic_inc(&ref->refs);
393 spin_unlock(&delayed_refs->lock);
394
395 btrfs_release_path(root->fs_info->extent_root, path);
396
397 mutex_lock(&head->mutex);
Chris Mason56bec292009-03-13 10:10:06 -0400398 mutex_unlock(&head->mutex);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400399 btrfs_put_delayed_ref(ref);
400 goto again;
Chris Mason56bec292009-03-13 10:10:06 -0400401 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400402 if (head->extent_op && head->extent_op->update_flags)
403 extent_flags |= head->extent_op->flags_to_set;
404 else
405 BUG_ON(num_refs == 0);
Chris Mason56bec292009-03-13 10:10:06 -0400406
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400407 num_refs += ref->ref_mod;
Chris Mason56bec292009-03-13 10:10:06 -0400408 mutex_unlock(&head->mutex);
Chris Mason56bec292009-03-13 10:10:06 -0400409 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400410 WARN_ON(num_refs == 0);
411 if (refs)
412 *refs = num_refs;
413 if (flags)
414 *flags = extent_flags;
Chris Mason56bec292009-03-13 10:10:06 -0400415out:
416 spin_unlock(&delayed_refs->lock);
417 btrfs_free_path(path);
418 return ret;
419}
420
421/*
422 * helper function to update an extent delayed ref in the
423 * rbtree. existing and update must both have the same
424 * bytenr and parent
425 *
426 * This may free existing if the update cancels out whatever
427 * operation it was doing.
428 */
429static noinline void
430update_existing_ref(struct btrfs_trans_handle *trans,
431 struct btrfs_delayed_ref_root *delayed_refs,
432 struct btrfs_delayed_ref_node *existing,
433 struct btrfs_delayed_ref_node *update)
434{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400435 if (update->action != existing->action) {
Chris Mason56bec292009-03-13 10:10:06 -0400436 /*
437 * this is effectively undoing either an add or a
438 * drop. We decrement the ref_mod, and if it goes
439 * down to zero we just delete the entry without
440 * every changing the extent allocation tree.
441 */
442 existing->ref_mod--;
443 if (existing->ref_mod == 0) {
444 rb_erase(&existing->rb_node,
445 &delayed_refs->root);
446 existing->in_tree = 0;
447 btrfs_put_delayed_ref(existing);
448 delayed_refs->num_entries--;
449 if (trans->delayed_ref_updates)
450 trans->delayed_ref_updates--;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400451 } else {
452 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
453 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
Chris Mason56bec292009-03-13 10:10:06 -0400454 }
455 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400456 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
457 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
Chris Mason56bec292009-03-13 10:10:06 -0400458 /*
459 * the action on the existing ref matches
460 * the action on the ref we're trying to add.
461 * Bump the ref_mod by one so the backref that
462 * is eventually added/removed has the correct
463 * reference count
464 */
465 existing->ref_mod += update->ref_mod;
466 }
467}
468
469/*
470 * helper function to update the accounting in the head ref
471 * existing and update must have the same bytenr
472 */
473static noinline void
474update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
475 struct btrfs_delayed_ref_node *update)
476{
477 struct btrfs_delayed_ref_head *existing_ref;
478 struct btrfs_delayed_ref_head *ref;
479
480 existing_ref = btrfs_delayed_node_to_head(existing);
481 ref = btrfs_delayed_node_to_head(update);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400482 BUG_ON(existing_ref->is_data != ref->is_data);
Chris Mason56bec292009-03-13 10:10:06 -0400483
484 if (ref->must_insert_reserved) {
485 /* if the extent was freed and then
486 * reallocated before the delayed ref
487 * entries were processed, we can end up
488 * with an existing head ref without
489 * the must_insert_reserved flag set.
490 * Set it again here
491 */
492 existing_ref->must_insert_reserved = ref->must_insert_reserved;
493
494 /*
495 * update the num_bytes so we make sure the accounting
496 * is done correctly
497 */
498 existing->num_bytes = update->num_bytes;
499
500 }
501
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400502 if (ref->extent_op) {
503 if (!existing_ref->extent_op) {
504 existing_ref->extent_op = ref->extent_op;
505 } else {
506 if (ref->extent_op->update_key) {
507 memcpy(&existing_ref->extent_op->key,
508 &ref->extent_op->key,
509 sizeof(ref->extent_op->key));
510 existing_ref->extent_op->update_key = 1;
511 }
512 if (ref->extent_op->update_flags) {
513 existing_ref->extent_op->flags_to_set |=
514 ref->extent_op->flags_to_set;
515 existing_ref->extent_op->update_flags = 1;
516 }
517 kfree(ref->extent_op);
518 }
519 }
Chris Mason56bec292009-03-13 10:10:06 -0400520 /*
521 * update the reference mod on the head to reflect this new operation
522 */
523 existing->ref_mod += update->ref_mod;
524}
525
526/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400527 * helper function to actually insert a head node into the rbtree.
Chris Mason56bec292009-03-13 10:10:06 -0400528 * this does all the dirty work in terms of maintaining the correct
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400529 * overall modification count.
Chris Mason56bec292009-03-13 10:10:06 -0400530 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400531static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
532 struct btrfs_delayed_ref_node *ref,
533 u64 bytenr, u64 num_bytes,
534 int action, int is_data)
Chris Mason56bec292009-03-13 10:10:06 -0400535{
536 struct btrfs_delayed_ref_node *existing;
Chris Masonc3e69d52009-03-13 10:17:05 -0400537 struct btrfs_delayed_ref_head *head_ref = NULL;
Chris Mason56bec292009-03-13 10:10:06 -0400538 struct btrfs_delayed_ref_root *delayed_refs;
539 int count_mod = 1;
540 int must_insert_reserved = 0;
541
542 /*
543 * the head node stores the sum of all the mods, so dropping a ref
544 * should drop the sum in the head node by one.
545 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400546 if (action == BTRFS_UPDATE_DELAYED_HEAD)
547 count_mod = 0;
548 else if (action == BTRFS_DROP_DELAYED_REF)
549 count_mod = -1;
Chris Mason56bec292009-03-13 10:10:06 -0400550
551 /*
552 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
553 * the reserved accounting when the extent is finally added, or
554 * if a later modification deletes the delayed ref without ever
555 * inserting the extent into the extent allocation tree.
556 * ref->must_insert_reserved is the flag used to record
557 * that accounting mods are required.
558 *
559 * Once we record must_insert_reserved, switch the action to
560 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
561 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400562 if (action == BTRFS_ADD_DELAYED_EXTENT)
Chris Mason56bec292009-03-13 10:10:06 -0400563 must_insert_reserved = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400564 else
Chris Mason56bec292009-03-13 10:10:06 -0400565 must_insert_reserved = 0;
Chris Mason56bec292009-03-13 10:10:06 -0400566
567 delayed_refs = &trans->transaction->delayed_refs;
568
569 /* first set the basic ref node struct up */
570 atomic_set(&ref->refs, 1);
571 ref->bytenr = bytenr;
Chris Mason56bec292009-03-13 10:10:06 -0400572 ref->num_bytes = num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400573 ref->ref_mod = count_mod;
574 ref->type = 0;
575 ref->action = 0;
576 ref->is_head = 1;
577 ref->in_tree = 1;
Chris Mason56bec292009-03-13 10:10:06 -0400578
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400579 head_ref = btrfs_delayed_node_to_head(ref);
580 head_ref->must_insert_reserved = must_insert_reserved;
581 head_ref->is_data = is_data;
Chris Mason56bec292009-03-13 10:10:06 -0400582
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400583 INIT_LIST_HEAD(&head_ref->cluster);
584 mutex_init(&head_ref->mutex);
585
586 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
Chris Mason56bec292009-03-13 10:10:06 -0400587
588 if (existing) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400589 update_existing_head_ref(existing, ref);
Chris Mason56bec292009-03-13 10:10:06 -0400590 /*
591 * we've updated the existing ref, free the newly
592 * allocated ref
593 */
594 kfree(ref);
595 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400596 delayed_refs->num_heads++;
597 delayed_refs->num_heads_ready++;
Chris Mason56bec292009-03-13 10:10:06 -0400598 delayed_refs->num_entries++;
599 trans->delayed_ref_updates++;
600 }
601 return 0;
602}
603
604/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400605 * helper to insert a delayed tree ref into the rbtree.
606 */
607static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
608 struct btrfs_delayed_ref_node *ref,
609 u64 bytenr, u64 num_bytes, u64 parent,
610 u64 ref_root, int level, int action)
611{
612 struct btrfs_delayed_ref_node *existing;
613 struct btrfs_delayed_tree_ref *full_ref;
614 struct btrfs_delayed_ref_root *delayed_refs;
615
616 if (action == BTRFS_ADD_DELAYED_EXTENT)
617 action = BTRFS_ADD_DELAYED_REF;
618
619 delayed_refs = &trans->transaction->delayed_refs;
620
621 /* first set the basic ref node struct up */
622 atomic_set(&ref->refs, 1);
623 ref->bytenr = bytenr;
624 ref->num_bytes = num_bytes;
625 ref->ref_mod = 1;
626 ref->action = action;
627 ref->is_head = 0;
628 ref->in_tree = 1;
629
630 full_ref = btrfs_delayed_node_to_tree_ref(ref);
631 if (parent) {
632 full_ref->parent = parent;
633 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
634 } else {
635 full_ref->root = ref_root;
636 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
637 }
638 full_ref->level = level;
639
640 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
641
642 if (existing) {
643 update_existing_ref(trans, delayed_refs, existing, ref);
644 /*
645 * we've updated the existing ref, free the newly
646 * allocated ref
647 */
648 kfree(ref);
649 } else {
650 delayed_refs->num_entries++;
651 trans->delayed_ref_updates++;
652 }
653 return 0;
654}
655
656/*
657 * helper to insert a delayed data ref into the rbtree.
658 */
659static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
660 struct btrfs_delayed_ref_node *ref,
661 u64 bytenr, u64 num_bytes, u64 parent,
662 u64 ref_root, u64 owner, u64 offset,
663 int action)
664{
665 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_data_ref *full_ref;
667 struct btrfs_delayed_ref_root *delayed_refs;
668
669 if (action == BTRFS_ADD_DELAYED_EXTENT)
670 action = BTRFS_ADD_DELAYED_REF;
671
672 delayed_refs = &trans->transaction->delayed_refs;
673
674 /* first set the basic ref node struct up */
675 atomic_set(&ref->refs, 1);
676 ref->bytenr = bytenr;
677 ref->num_bytes = num_bytes;
678 ref->ref_mod = 1;
679 ref->action = action;
680 ref->is_head = 0;
681 ref->in_tree = 1;
682
683 full_ref = btrfs_delayed_node_to_data_ref(ref);
684 if (parent) {
685 full_ref->parent = parent;
686 ref->type = BTRFS_SHARED_DATA_REF_KEY;
687 } else {
688 full_ref->root = ref_root;
689 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
690 }
691 full_ref->objectid = owner;
692 full_ref->offset = offset;
693
694 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
695
696 if (existing) {
697 update_existing_ref(trans, delayed_refs, existing, ref);
698 /*
699 * we've updated the existing ref, free the newly
700 * allocated ref
701 */
702 kfree(ref);
703 } else {
704 delayed_refs->num_entries++;
705 trans->delayed_ref_updates++;
706 }
707 return 0;
708}
709
710/*
711 * add a delayed tree ref. This does all of the accounting required
Chris Mason56bec292009-03-13 10:10:06 -0400712 * to make sure the delayed ref is eventually processed before this
713 * transaction commits.
714 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400715int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
716 u64 bytenr, u64 num_bytes, u64 parent,
717 u64 ref_root, int level, int action,
718 struct btrfs_delayed_extent_op *extent_op)
Chris Mason56bec292009-03-13 10:10:06 -0400719{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400720 struct btrfs_delayed_tree_ref *ref;
Chris Mason56bec292009-03-13 10:10:06 -0400721 struct btrfs_delayed_ref_head *head_ref;
722 struct btrfs_delayed_ref_root *delayed_refs;
723 int ret;
724
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400725 BUG_ON(extent_op && extent_op->is_data);
Chris Mason56bec292009-03-13 10:10:06 -0400726 ref = kmalloc(sizeof(*ref), GFP_NOFS);
727 if (!ref)
728 return -ENOMEM;
729
Chris Mason56bec292009-03-13 10:10:06 -0400730 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
731 if (!head_ref) {
732 kfree(ref);
733 return -ENOMEM;
734 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400735
736 head_ref->extent_op = extent_op;
737
Chris Mason56bec292009-03-13 10:10:06 -0400738 delayed_refs = &trans->transaction->delayed_refs;
739 spin_lock(&delayed_refs->lock);
740
741 /*
742 * insert both the head node and the new ref without dropping
743 * the spin lock
744 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400745 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
746 action, 0);
Chris Mason56bec292009-03-13 10:10:06 -0400747 BUG_ON(ret);
748
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400749 ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
750 parent, ref_root, level, action);
Chris Mason56bec292009-03-13 10:10:06 -0400751 BUG_ON(ret);
752 spin_unlock(&delayed_refs->lock);
753 return 0;
754}
755
756/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400757 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
758 */
759int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
760 u64 bytenr, u64 num_bytes,
761 u64 parent, u64 ref_root,
762 u64 owner, u64 offset, int action,
763 struct btrfs_delayed_extent_op *extent_op)
764{
765 struct btrfs_delayed_data_ref *ref;
766 struct btrfs_delayed_ref_head *head_ref;
767 struct btrfs_delayed_ref_root *delayed_refs;
768 int ret;
769
770 BUG_ON(extent_op && !extent_op->is_data);
771 ref = kmalloc(sizeof(*ref), GFP_NOFS);
772 if (!ref)
773 return -ENOMEM;
774
775 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
776 if (!head_ref) {
777 kfree(ref);
778 return -ENOMEM;
779 }
780
781 head_ref->extent_op = extent_op;
782
783 delayed_refs = &trans->transaction->delayed_refs;
784 spin_lock(&delayed_refs->lock);
785
786 /*
787 * insert both the head node and the new ref without dropping
788 * the spin lock
789 */
790 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
791 action, 1);
792 BUG_ON(ret);
793
794 ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
795 parent, ref_root, owner, offset, action);
796 BUG_ON(ret);
797 spin_unlock(&delayed_refs->lock);
798 return 0;
799}
800
801int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
802 u64 bytenr, u64 num_bytes,
803 struct btrfs_delayed_extent_op *extent_op)
804{
805 struct btrfs_delayed_ref_head *head_ref;
806 struct btrfs_delayed_ref_root *delayed_refs;
807 int ret;
808
809 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
810 if (!head_ref)
811 return -ENOMEM;
812
813 head_ref->extent_op = extent_op;
814
815 delayed_refs = &trans->transaction->delayed_refs;
816 spin_lock(&delayed_refs->lock);
817
818 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
819 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
820 extent_op->is_data);
821 BUG_ON(ret);
822
823 spin_unlock(&delayed_refs->lock);
824 return 0;
825}
826
827/*
Chris Mason1887be62009-03-13 10:11:24 -0400828 * this does a simple search for the head node for a given extent.
829 * It must be called with the delayed ref spinlock held, and it returns
830 * the head node if any where found, or NULL if not.
831 */
832struct btrfs_delayed_ref_head *
833btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
834{
835 struct btrfs_delayed_ref_node *ref;
836 struct btrfs_delayed_ref_root *delayed_refs;
837
838 delayed_refs = &trans->transaction->delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400839 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
Chris Mason1887be62009-03-13 10:11:24 -0400840 if (ref)
841 return btrfs_delayed_node_to_head(ref);
842 return NULL;
843}
844
845/*
Chris Mason56bec292009-03-13 10:10:06 -0400846 * add a delayed ref to the tree. This does all of the accounting required
847 * to make sure the delayed ref is eventually processed before this
848 * transaction commits.
849 *
850 * The main point of this call is to add and remove a backreference in a single
851 * shot, taking the lock only once, and only searching for the head node once.
852 *
853 * It is the same as doing a ref add and delete in two separate calls.
854 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400855#if 0
Chris Mason56bec292009-03-13 10:10:06 -0400856int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
857 u64 bytenr, u64 num_bytes, u64 orig_parent,
858 u64 parent, u64 orig_ref_root, u64 ref_root,
859 u64 orig_ref_generation, u64 ref_generation,
860 u64 owner_objectid, int pin)
861{
862 struct btrfs_delayed_ref *ref;
863 struct btrfs_delayed_ref *old_ref;
864 struct btrfs_delayed_ref_head *head_ref;
865 struct btrfs_delayed_ref_root *delayed_refs;
866 int ret;
867
868 ref = kmalloc(sizeof(*ref), GFP_NOFS);
869 if (!ref)
870 return -ENOMEM;
871
872 old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
873 if (!old_ref) {
874 kfree(ref);
875 return -ENOMEM;
876 }
877
878 /*
879 * the parent = 0 case comes from cases where we don't actually
880 * know the parent yet. It will get updated later via a add/drop
881 * pair.
882 */
883 if (parent == 0)
884 parent = bytenr;
885 if (orig_parent == 0)
886 orig_parent = bytenr;
887
888 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
889 if (!head_ref) {
890 kfree(ref);
891 kfree(old_ref);
892 return -ENOMEM;
893 }
894 delayed_refs = &trans->transaction->delayed_refs;
895 spin_lock(&delayed_refs->lock);
896
897 /*
898 * insert both the head node and the new ref without dropping
899 * the spin lock
900 */
901 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
902 (u64)-1, 0, 0, 0,
Chris Mason1a81af42009-03-25 09:55:11 -0400903 BTRFS_UPDATE_DELAYED_HEAD, 0);
Chris Mason56bec292009-03-13 10:10:06 -0400904 BUG_ON(ret);
905
906 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
907 parent, ref_root, ref_generation,
908 owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
909 BUG_ON(ret);
910
911 ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
912 orig_parent, orig_ref_root,
913 orig_ref_generation, owner_objectid,
914 BTRFS_DROP_DELAYED_REF, pin);
915 BUG_ON(ret);
916 spin_unlock(&delayed_refs->lock);
917 return 0;
918}
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400919#endif