blob: 39adb68a653f7a6fe982a7ff5401097c1ffe8b20 [file] [log] [blame]
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090024#include <linux/slab.h>
Yan Zheng5d4f98a2009-06-10 10:45:14 -040025#include "ctree.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "volumes.h"
29#include "locking.h"
30#include "btrfs_inode.h"
31#include "async-thread.h"
32
33/*
34 * backref_node, mapping_node and tree_block start with this
35 */
36struct tree_entry {
37 struct rb_node rb_node;
38 u64 bytenr;
39};
40
41/*
42 * present a tree block in the backref cache
43 */
44struct backref_node {
45 struct rb_node rb_node;
46 u64 bytenr;
Yan, Zheng3fd0a552010-05-16 10:49:59 -040047
48 u64 new_bytenr;
49 /* objectid of tree block owner, can be not uptodate */
Yan Zheng5d4f98a2009-06-10 10:45:14 -040050 u64 owner;
Yan, Zheng3fd0a552010-05-16 10:49:59 -040051 /* link to pending, changed or detached list */
52 struct list_head list;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040053 /* list of upper level blocks reference this block */
54 struct list_head upper;
55 /* list of child blocks in the cache */
56 struct list_head lower;
57 /* NULL if this node is not tree root */
58 struct btrfs_root *root;
59 /* extent buffer got by COW the block */
60 struct extent_buffer *eb;
61 /* level of tree block */
62 unsigned int level:8;
Yan, Zheng3fd0a552010-05-16 10:49:59 -040063 /* is the block in non-reference counted tree */
64 unsigned int cowonly:1;
65 /* 1 if no child node in the cache */
Yan Zheng5d4f98a2009-06-10 10:45:14 -040066 unsigned int lowest:1;
67 /* is the extent buffer locked */
68 unsigned int locked:1;
69 /* has the block been processed */
70 unsigned int processed:1;
71 /* have backrefs of this block been checked */
72 unsigned int checked:1;
Yan, Zheng3fd0a552010-05-16 10:49:59 -040073 /*
74 * 1 if corresponding block has been cowed but some upper
75 * level block pointers may not point to the new location
76 */
77 unsigned int pending:1;
78 /*
79 * 1 if the backref node isn't connected to any other
80 * backref node.
81 */
82 unsigned int detached:1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -040083};
84
85/*
86 * present a block pointer in the backref cache
87 */
88struct backref_edge {
89 struct list_head list[2];
90 struct backref_node *node[2];
Yan Zheng5d4f98a2009-06-10 10:45:14 -040091};
92
93#define LOWER 0
94#define UPPER 1
95
96struct backref_cache {
97 /* red black tree of all backref nodes in the cache */
98 struct rb_root rb_root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -040099 /* for passing backref nodes to btrfs_reloc_cow_block */
100 struct backref_node *path[BTRFS_MAX_LEVEL];
101 /*
102 * list of blocks that have been cowed but some block
103 * pointers in upper level blocks may not reflect the
104 * new location
105 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400106 struct list_head pending[BTRFS_MAX_LEVEL];
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400107 /* list of backref nodes with no child node */
108 struct list_head leaves;
109 /* list of blocks that have been cowed in current transaction */
110 struct list_head changed;
111 /* list of detached backref node. */
112 struct list_head detached;
113
114 u64 last_trans;
115
116 int nr_nodes;
117 int nr_edges;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400118};
119
120/*
121 * map address of tree root to tree
122 */
123struct mapping_node {
124 struct rb_node rb_node;
125 u64 bytenr;
126 void *data;
127};
128
129struct mapping_tree {
130 struct rb_root rb_root;
131 spinlock_t lock;
132};
133
134/*
135 * present a tree block to process
136 */
137struct tree_block {
138 struct rb_node rb_node;
139 u64 bytenr;
140 struct btrfs_key key;
141 unsigned int level:8;
142 unsigned int key_ready:1;
143};
144
Yan, Zheng0257bb82009-09-24 09:17:31 -0400145#define MAX_EXTENTS 128
146
147struct file_extent_cluster {
148 u64 start;
149 u64 end;
150 u64 boundary[MAX_EXTENTS];
151 unsigned int nr;
152};
153
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400154struct reloc_control {
155 /* block group to relocate */
156 struct btrfs_block_group_cache *block_group;
157 /* extent tree */
158 struct btrfs_root *extent_root;
159 /* inode for moving data */
160 struct inode *data_inode;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400161
162 struct btrfs_block_rsv *block_rsv;
163
164 struct backref_cache backref_cache;
165
166 struct file_extent_cluster cluster;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400167 /* tree blocks have been processed */
168 struct extent_io_tree processed_blocks;
169 /* map start of tree root to corresponding reloc tree */
170 struct mapping_tree reloc_root_tree;
171 /* list of reloc trees */
172 struct list_head reloc_roots;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400173 /* size of metadata reservation for merging reloc trees */
174 u64 merging_rsv_size;
175 /* size of relocated tree nodes */
176 u64 nodes_relocated;
177
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400178 u64 search_start;
179 u64 extents_found;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400180
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400181 unsigned int stage:8;
182 unsigned int create_reloc_tree:1;
183 unsigned int merge_reloc_tree:1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400184 unsigned int found_file_extent:1;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400185 unsigned int commit_transaction:1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400186};
187
188/* stages of data relocation */
189#define MOVE_DATA_EXTENTS 0
190#define UPDATE_DATA_PTRS 1
191
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400192static void remove_backref_node(struct backref_cache *cache,
193 struct backref_node *node);
194static void __mark_block_processed(struct reloc_control *rc,
195 struct backref_node *node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400196
197static void mapping_tree_init(struct mapping_tree *tree)
198{
Eric Paris6bef4d32010-02-23 19:43:04 +0000199 tree->rb_root = RB_ROOT;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400200 spin_lock_init(&tree->lock);
201}
202
203static void backref_cache_init(struct backref_cache *cache)
204{
205 int i;
Eric Paris6bef4d32010-02-23 19:43:04 +0000206 cache->rb_root = RB_ROOT;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400207 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
208 INIT_LIST_HEAD(&cache->pending[i]);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400209 INIT_LIST_HEAD(&cache->changed);
210 INIT_LIST_HEAD(&cache->detached);
211 INIT_LIST_HEAD(&cache->leaves);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400212}
213
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400214static void backref_cache_cleanup(struct backref_cache *cache)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400215{
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400216 struct backref_node *node;
217 int i;
218
219 while (!list_empty(&cache->detached)) {
220 node = list_entry(cache->detached.next,
221 struct backref_node, list);
222 remove_backref_node(cache, node);
223 }
224
225 while (!list_empty(&cache->leaves)) {
226 node = list_entry(cache->leaves.next,
227 struct backref_node, lower);
228 remove_backref_node(cache, node);
229 }
230
231 cache->last_trans = 0;
232
233 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
234 BUG_ON(!list_empty(&cache->pending[i]));
235 BUG_ON(!list_empty(&cache->changed));
236 BUG_ON(!list_empty(&cache->detached));
237 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
238 BUG_ON(cache->nr_nodes);
239 BUG_ON(cache->nr_edges);
240}
241
242static struct backref_node *alloc_backref_node(struct backref_cache *cache)
243{
244 struct backref_node *node;
245
246 node = kzalloc(sizeof(*node), GFP_NOFS);
247 if (node) {
248 INIT_LIST_HEAD(&node->list);
249 INIT_LIST_HEAD(&node->upper);
250 INIT_LIST_HEAD(&node->lower);
251 RB_CLEAR_NODE(&node->rb_node);
252 cache->nr_nodes++;
253 }
254 return node;
255}
256
257static void free_backref_node(struct backref_cache *cache,
258 struct backref_node *node)
259{
260 if (node) {
261 cache->nr_nodes--;
262 kfree(node);
263 }
264}
265
266static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
267{
268 struct backref_edge *edge;
269
270 edge = kzalloc(sizeof(*edge), GFP_NOFS);
271 if (edge)
272 cache->nr_edges++;
273 return edge;
274}
275
276static void free_backref_edge(struct backref_cache *cache,
277 struct backref_edge *edge)
278{
279 if (edge) {
280 cache->nr_edges--;
281 kfree(edge);
282 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400283}
284
285static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
286 struct rb_node *node)
287{
288 struct rb_node **p = &root->rb_node;
289 struct rb_node *parent = NULL;
290 struct tree_entry *entry;
291
292 while (*p) {
293 parent = *p;
294 entry = rb_entry(parent, struct tree_entry, rb_node);
295
296 if (bytenr < entry->bytenr)
297 p = &(*p)->rb_left;
298 else if (bytenr > entry->bytenr)
299 p = &(*p)->rb_right;
300 else
301 return parent;
302 }
303
304 rb_link_node(node, parent, p);
305 rb_insert_color(node, root);
306 return NULL;
307}
308
309static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
310{
311 struct rb_node *n = root->rb_node;
312 struct tree_entry *entry;
313
314 while (n) {
315 entry = rb_entry(n, struct tree_entry, rb_node);
316
317 if (bytenr < entry->bytenr)
318 n = n->rb_left;
319 else if (bytenr > entry->bytenr)
320 n = n->rb_right;
321 else
322 return n;
323 }
324 return NULL;
325}
326
327/*
328 * walk up backref nodes until reach node presents tree root
329 */
330static struct backref_node *walk_up_backref(struct backref_node *node,
331 struct backref_edge *edges[],
332 int *index)
333{
334 struct backref_edge *edge;
335 int idx = *index;
336
337 while (!list_empty(&node->upper)) {
338 edge = list_entry(node->upper.next,
339 struct backref_edge, list[LOWER]);
340 edges[idx++] = edge;
341 node = edge->node[UPPER];
342 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400343 BUG_ON(node->detached);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400344 *index = idx;
345 return node;
346}
347
348/*
349 * walk down backref nodes to find start of next reference path
350 */
351static struct backref_node *walk_down_backref(struct backref_edge *edges[],
352 int *index)
353{
354 struct backref_edge *edge;
355 struct backref_node *lower;
356 int idx = *index;
357
358 while (idx > 0) {
359 edge = edges[idx - 1];
360 lower = edge->node[LOWER];
361 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
362 idx--;
363 continue;
364 }
365 edge = list_entry(edge->list[LOWER].next,
366 struct backref_edge, list[LOWER]);
367 edges[idx - 1] = edge;
368 *index = idx;
369 return edge->node[UPPER];
370 }
371 *index = 0;
372 return NULL;
373}
374
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400375static void unlock_node_buffer(struct backref_node *node)
376{
377 if (node->locked) {
378 btrfs_tree_unlock(node->eb);
379 node->locked = 0;
380 }
381}
382
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400383static void drop_node_buffer(struct backref_node *node)
384{
385 if (node->eb) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400386 unlock_node_buffer(node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400387 free_extent_buffer(node->eb);
388 node->eb = NULL;
389 }
390}
391
392static void drop_backref_node(struct backref_cache *tree,
393 struct backref_node *node)
394{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400395 BUG_ON(!list_empty(&node->upper));
396
397 drop_node_buffer(node);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400398 list_del(&node->list);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400399 list_del(&node->lower);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400400 if (!RB_EMPTY_NODE(&node->rb_node))
401 rb_erase(&node->rb_node, &tree->rb_root);
402 free_backref_node(tree, node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400403}
404
405/*
406 * remove a backref node from the backref cache
407 */
408static void remove_backref_node(struct backref_cache *cache,
409 struct backref_node *node)
410{
411 struct backref_node *upper;
412 struct backref_edge *edge;
413
414 if (!node)
415 return;
416
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400417 BUG_ON(!node->lowest && !node->detached);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400418 while (!list_empty(&node->upper)) {
419 edge = list_entry(node->upper.next, struct backref_edge,
420 list[LOWER]);
421 upper = edge->node[UPPER];
422 list_del(&edge->list[LOWER]);
423 list_del(&edge->list[UPPER]);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400424 free_backref_edge(cache, edge);
425
426 if (RB_EMPTY_NODE(&upper->rb_node)) {
427 BUG_ON(!list_empty(&node->upper));
428 drop_backref_node(cache, node);
429 node = upper;
430 node->lowest = 1;
431 continue;
432 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400433 /*
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400434 * add the node to leaf node list if no other
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400435 * child block cached.
436 */
437 if (list_empty(&upper->lower)) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400438 list_add_tail(&upper->lower, &cache->leaves);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400439 upper->lowest = 1;
440 }
441 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400442
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400443 drop_backref_node(cache, node);
444}
445
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400446static void update_backref_node(struct backref_cache *cache,
447 struct backref_node *node, u64 bytenr)
448{
449 struct rb_node *rb_node;
450 rb_erase(&node->rb_node, &cache->rb_root);
451 node->bytenr = bytenr;
452 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
453 BUG_ON(rb_node);
454}
455
456/*
457 * update backref cache after a transaction commit
458 */
459static int update_backref_cache(struct btrfs_trans_handle *trans,
460 struct backref_cache *cache)
461{
462 struct backref_node *node;
463 int level = 0;
464
465 if (cache->last_trans == 0) {
466 cache->last_trans = trans->transid;
467 return 0;
468 }
469
470 if (cache->last_trans == trans->transid)
471 return 0;
472
473 /*
474 * detached nodes are used to avoid unnecessary backref
475 * lookup. transaction commit changes the extent tree.
476 * so the detached nodes are no longer useful.
477 */
478 while (!list_empty(&cache->detached)) {
479 node = list_entry(cache->detached.next,
480 struct backref_node, list);
481 remove_backref_node(cache, node);
482 }
483
484 while (!list_empty(&cache->changed)) {
485 node = list_entry(cache->changed.next,
486 struct backref_node, list);
487 list_del_init(&node->list);
488 BUG_ON(node->pending);
489 update_backref_node(cache, node, node->new_bytenr);
490 }
491
492 /*
493 * some nodes can be left in the pending list if there were
494 * errors during processing the pending nodes.
495 */
496 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
497 list_for_each_entry(node, &cache->pending[level], list) {
498 BUG_ON(!node->pending);
499 if (node->bytenr == node->new_bytenr)
500 continue;
501 update_backref_node(cache, node, node->new_bytenr);
502 }
503 }
504
505 cache->last_trans = 0;
506 return 1;
507}
508
509static int should_ignore_root(struct btrfs_root *root)
510{
511 struct btrfs_root *reloc_root;
512
513 if (!root->ref_cows)
514 return 0;
515
516 reloc_root = root->reloc_root;
517 if (!reloc_root)
518 return 0;
519
520 if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
521 root->fs_info->running_transaction->transid - 1)
522 return 0;
523 /*
524 * if there is reloc tree and it was created in previous
525 * transaction backref lookup can find the reloc tree,
526 * so backref node for the fs tree root is useless for
527 * relocation.
528 */
529 return 1;
530}
531
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400532/*
533 * find reloc tree by address of tree root
534 */
535static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
536 u64 bytenr)
537{
538 struct rb_node *rb_node;
539 struct mapping_node *node;
540 struct btrfs_root *root = NULL;
541
542 spin_lock(&rc->reloc_root_tree.lock);
543 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
544 if (rb_node) {
545 node = rb_entry(rb_node, struct mapping_node, rb_node);
546 root = (struct btrfs_root *)node->data;
547 }
548 spin_unlock(&rc->reloc_root_tree.lock);
549 return root;
550}
551
552static int is_cowonly_root(u64 root_objectid)
553{
554 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
555 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
556 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
557 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
558 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
559 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
560 return 1;
561 return 0;
562}
563
564static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
565 u64 root_objectid)
566{
567 struct btrfs_key key;
568
569 key.objectid = root_objectid;
570 key.type = BTRFS_ROOT_ITEM_KEY;
571 if (is_cowonly_root(root_objectid))
572 key.offset = 0;
573 else
574 key.offset = (u64)-1;
575
576 return btrfs_read_fs_root_no_name(fs_info, &key);
577}
578
579#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
580static noinline_for_stack
581struct btrfs_root *find_tree_root(struct reloc_control *rc,
582 struct extent_buffer *leaf,
583 struct btrfs_extent_ref_v0 *ref0)
584{
585 struct btrfs_root *root;
586 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
587 u64 generation = btrfs_ref_generation_v0(leaf, ref0);
588
589 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
590
591 root = read_fs_root(rc->extent_root->fs_info, root_objectid);
592 BUG_ON(IS_ERR(root));
593
594 if (root->ref_cows &&
595 generation != btrfs_root_generation(&root->root_item))
596 return NULL;
597
598 return root;
599}
600#endif
601
602static noinline_for_stack
603int find_inline_backref(struct extent_buffer *leaf, int slot,
604 unsigned long *ptr, unsigned long *end)
605{
606 struct btrfs_extent_item *ei;
607 struct btrfs_tree_block_info *bi;
608 u32 item_size;
609
610 item_size = btrfs_item_size_nr(leaf, slot);
611#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
612 if (item_size < sizeof(*ei)) {
613 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
614 return 1;
615 }
616#endif
617 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
618 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
619 BTRFS_EXTENT_FLAG_TREE_BLOCK));
620
621 if (item_size <= sizeof(*ei) + sizeof(*bi)) {
622 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
623 return 1;
624 }
625
626 bi = (struct btrfs_tree_block_info *)(ei + 1);
627 *ptr = (unsigned long)(bi + 1);
628 *end = (unsigned long)ei + item_size;
629 return 0;
630}
631
632/*
633 * build backref tree for a given tree block. root of the backref tree
634 * corresponds the tree block, leaves of the backref tree correspond
635 * roots of b-trees that reference the tree block.
636 *
637 * the basic idea of this function is check backrefs of a given block
638 * to find upper level blocks that refernece the block, and then check
639 * bakcrefs of these upper level blocks recursively. the recursion stop
640 * when tree root is reached or backrefs for the block is cached.
641 *
642 * NOTE: if we find backrefs for a block are cached, we know backrefs
643 * for all upper level blocks that directly/indirectly reference the
644 * block are also cached.
645 */
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400646static noinline_for_stack
647struct backref_node *build_backref_tree(struct reloc_control *rc,
648 struct btrfs_key *node_key,
649 int level, u64 bytenr)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400650{
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400651 struct backref_cache *cache = &rc->backref_cache;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400652 struct btrfs_path *path1;
653 struct btrfs_path *path2;
654 struct extent_buffer *eb;
655 struct btrfs_root *root;
656 struct backref_node *cur;
657 struct backref_node *upper;
658 struct backref_node *lower;
659 struct backref_node *node = NULL;
660 struct backref_node *exist = NULL;
661 struct backref_edge *edge;
662 struct rb_node *rb_node;
663 struct btrfs_key key;
664 unsigned long end;
665 unsigned long ptr;
666 LIST_HEAD(list);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400667 LIST_HEAD(useless);
668 int cowonly;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400669 int ret;
670 int err = 0;
671
672 path1 = btrfs_alloc_path();
673 path2 = btrfs_alloc_path();
674 if (!path1 || !path2) {
675 err = -ENOMEM;
676 goto out;
677 }
678
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400679 node = alloc_backref_node(cache);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400680 if (!node) {
681 err = -ENOMEM;
682 goto out;
683 }
684
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400685 node->bytenr = bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400686 node->level = level;
687 node->lowest = 1;
688 cur = node;
689again:
690 end = 0;
691 ptr = 0;
692 key.objectid = cur->bytenr;
693 key.type = BTRFS_EXTENT_ITEM_KEY;
694 key.offset = (u64)-1;
695
696 path1->search_commit_root = 1;
697 path1->skip_locking = 1;
698 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
699 0, 0);
700 if (ret < 0) {
701 err = ret;
702 goto out;
703 }
704 BUG_ON(!ret || !path1->slots[0]);
705
706 path1->slots[0]--;
707
708 WARN_ON(cur->checked);
709 if (!list_empty(&cur->upper)) {
710 /*
711 * the backref was added previously when processsing
712 * backref of type BTRFS_TREE_BLOCK_REF_KEY
713 */
714 BUG_ON(!list_is_singular(&cur->upper));
715 edge = list_entry(cur->upper.next, struct backref_edge,
716 list[LOWER]);
717 BUG_ON(!list_empty(&edge->list[UPPER]));
718 exist = edge->node[UPPER];
719 /*
720 * add the upper level block to pending list if we need
721 * check its backrefs
722 */
723 if (!exist->checked)
724 list_add_tail(&edge->list[UPPER], &list);
725 } else {
726 exist = NULL;
727 }
728
729 while (1) {
730 cond_resched();
731 eb = path1->nodes[0];
732
733 if (ptr >= end) {
734 if (path1->slots[0] >= btrfs_header_nritems(eb)) {
735 ret = btrfs_next_leaf(rc->extent_root, path1);
736 if (ret < 0) {
737 err = ret;
738 goto out;
739 }
740 if (ret > 0)
741 break;
742 eb = path1->nodes[0];
743 }
744
745 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
746 if (key.objectid != cur->bytenr) {
747 WARN_ON(exist);
748 break;
749 }
750
751 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
752 ret = find_inline_backref(eb, path1->slots[0],
753 &ptr, &end);
754 if (ret)
755 goto next;
756 }
757 }
758
759 if (ptr < end) {
760 /* update key for inline back ref */
761 struct btrfs_extent_inline_ref *iref;
762 iref = (struct btrfs_extent_inline_ref *)ptr;
763 key.type = btrfs_extent_inline_ref_type(eb, iref);
764 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
765 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
766 key.type != BTRFS_SHARED_BLOCK_REF_KEY);
767 }
768
769 if (exist &&
770 ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
771 exist->owner == key.offset) ||
772 (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
773 exist->bytenr == key.offset))) {
774 exist = NULL;
775 goto next;
776 }
777
778#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
779 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
780 key.type == BTRFS_EXTENT_REF_V0_KEY) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400781 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400782 struct btrfs_extent_ref_v0 *ref0;
783 ref0 = btrfs_item_ptr(eb, path1->slots[0],
784 struct btrfs_extent_ref_v0);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400785 if (key.objectid == key.offset) {
Yan, Zheng046f2642010-05-31 08:58:47 +0000786 root = find_tree_root(rc, eb, ref0);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400787 if (root && !should_ignore_root(root))
788 cur->root = root;
789 else
790 list_add(&cur->list, &useless);
791 break;
792 }
Yan, Zheng046f2642010-05-31 08:58:47 +0000793 if (is_cowonly_root(btrfs_ref_root_v0(eb,
794 ref0)))
795 cur->cowonly = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400796 }
797#else
798 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
799 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
800#endif
801 if (key.objectid == key.offset) {
802 /*
803 * only root blocks of reloc trees use
804 * backref of this type.
805 */
806 root = find_reloc_root(rc, cur->bytenr);
807 BUG_ON(!root);
808 cur->root = root;
809 break;
810 }
811
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400812 edge = alloc_backref_edge(cache);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400813 if (!edge) {
814 err = -ENOMEM;
815 goto out;
816 }
817 rb_node = tree_search(&cache->rb_root, key.offset);
818 if (!rb_node) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400819 upper = alloc_backref_node(cache);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400820 if (!upper) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400821 free_backref_edge(cache, edge);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400822 err = -ENOMEM;
823 goto out;
824 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400825 upper->bytenr = key.offset;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400826 upper->level = cur->level + 1;
827 /*
828 * backrefs for the upper level block isn't
829 * cached, add the block to pending list
830 */
831 list_add_tail(&edge->list[UPPER], &list);
832 } else {
833 upper = rb_entry(rb_node, struct backref_node,
834 rb_node);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400835 BUG_ON(!upper->checked);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400836 INIT_LIST_HEAD(&edge->list[UPPER]);
837 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400838 list_add_tail(&edge->list[LOWER], &cur->upper);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400839 edge->node[LOWER] = cur;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400840 edge->node[UPPER] = upper;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400841
842 goto next;
843 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
844 goto next;
845 }
846
847 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
848 root = read_fs_root(rc->extent_root->fs_info, key.offset);
849 if (IS_ERR(root)) {
850 err = PTR_ERR(root);
851 goto out;
852 }
853
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400854 if (!root->ref_cows)
855 cur->cowonly = 1;
856
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400857 if (btrfs_root_level(&root->root_item) == cur->level) {
858 /* tree root */
859 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
860 cur->bytenr);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400861 if (should_ignore_root(root))
862 list_add(&cur->list, &useless);
863 else
864 cur->root = root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400865 break;
866 }
867
868 level = cur->level + 1;
869
870 /*
871 * searching the tree to find upper level blocks
872 * reference the block.
873 */
874 path2->search_commit_root = 1;
875 path2->skip_locking = 1;
876 path2->lowest_level = level;
877 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
878 path2->lowest_level = 0;
879 if (ret < 0) {
880 err = ret;
881 goto out;
882 }
Yan Zheng33c66f42009-07-22 09:59:00 -0400883 if (ret > 0 && path2->slots[level] > 0)
884 path2->slots[level]--;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400885
886 eb = path2->nodes[level];
887 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
888 cur->bytenr);
889
890 lower = cur;
891 for (; level < BTRFS_MAX_LEVEL; level++) {
892 if (!path2->nodes[level]) {
893 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
894 lower->bytenr);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400895 if (should_ignore_root(root))
896 list_add(&lower->list, &useless);
897 else
898 lower->root = root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400899 break;
900 }
901
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400902 edge = alloc_backref_edge(cache);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400903 if (!edge) {
904 err = -ENOMEM;
905 goto out;
906 }
907
908 eb = path2->nodes[level];
909 rb_node = tree_search(&cache->rb_root, eb->start);
910 if (!rb_node) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400911 upper = alloc_backref_node(cache);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400912 if (!upper) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400913 free_backref_edge(cache, edge);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400914 err = -ENOMEM;
915 goto out;
916 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400917 upper->bytenr = eb->start;
918 upper->owner = btrfs_header_owner(eb);
919 upper->level = lower->level + 1;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400920 if (!root->ref_cows)
921 upper->cowonly = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400922
923 /*
924 * if we know the block isn't shared
925 * we can void checking its backrefs.
926 */
927 if (btrfs_block_can_be_shared(root, eb))
928 upper->checked = 0;
929 else
930 upper->checked = 1;
931
932 /*
933 * add the block to pending list if we
934 * need check its backrefs. only block
935 * at 'cur->level + 1' is added to the
936 * tail of pending list. this guarantees
937 * we check backrefs from lower level
938 * blocks to upper level blocks.
939 */
940 if (!upper->checked &&
941 level == cur->level + 1) {
942 list_add_tail(&edge->list[UPPER],
943 &list);
944 } else
945 INIT_LIST_HEAD(&edge->list[UPPER]);
946 } else {
947 upper = rb_entry(rb_node, struct backref_node,
948 rb_node);
949 BUG_ON(!upper->checked);
950 INIT_LIST_HEAD(&edge->list[UPPER]);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400951 if (!upper->owner)
952 upper->owner = btrfs_header_owner(eb);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400953 }
954 list_add_tail(&edge->list[LOWER], &lower->upper);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400955 edge->node[LOWER] = lower;
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400956 edge->node[UPPER] = upper;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400957
958 if (rb_node)
959 break;
960 lower = upper;
961 upper = NULL;
962 }
963 btrfs_release_path(root, path2);
964next:
965 if (ptr < end) {
966 ptr += btrfs_extent_inline_ref_size(key.type);
967 if (ptr >= end) {
968 WARN_ON(ptr > end);
969 ptr = 0;
970 end = 0;
971 }
972 }
973 if (ptr >= end)
974 path1->slots[0]++;
975 }
976 btrfs_release_path(rc->extent_root, path1);
977
978 cur->checked = 1;
979 WARN_ON(exist);
980
981 /* the pending list isn't empty, take the first block to process */
982 if (!list_empty(&list)) {
983 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
984 list_del_init(&edge->list[UPPER]);
985 cur = edge->node[UPPER];
986 goto again;
987 }
988
989 /*
990 * everything goes well, connect backref nodes and insert backref nodes
991 * into the cache.
992 */
993 BUG_ON(!node->checked);
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400994 cowonly = node->cowonly;
995 if (!cowonly) {
996 rb_node = tree_insert(&cache->rb_root, node->bytenr,
997 &node->rb_node);
998 BUG_ON(rb_node);
999 list_add_tail(&node->lower, &cache->leaves);
1000 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001001
1002 list_for_each_entry(edge, &node->upper, list[LOWER])
1003 list_add_tail(&edge->list[UPPER], &list);
1004
1005 while (!list_empty(&list)) {
1006 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1007 list_del_init(&edge->list[UPPER]);
1008 upper = edge->node[UPPER];
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001009 if (upper->detached) {
1010 list_del(&edge->list[LOWER]);
1011 lower = edge->node[LOWER];
1012 free_backref_edge(cache, edge);
1013 if (list_empty(&lower->upper))
1014 list_add(&lower->list, &useless);
1015 continue;
1016 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001017
1018 if (!RB_EMPTY_NODE(&upper->rb_node)) {
1019 if (upper->lowest) {
1020 list_del_init(&upper->lower);
1021 upper->lowest = 0;
1022 }
1023
1024 list_add_tail(&edge->list[UPPER], &upper->lower);
1025 continue;
1026 }
1027
1028 BUG_ON(!upper->checked);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001029 BUG_ON(cowonly != upper->cowonly);
1030 if (!cowonly) {
1031 rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1032 &upper->rb_node);
1033 BUG_ON(rb_node);
1034 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001035
1036 list_add_tail(&edge->list[UPPER], &upper->lower);
1037
1038 list_for_each_entry(edge, &upper->upper, list[LOWER])
1039 list_add_tail(&edge->list[UPPER], &list);
1040 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001041 /*
1042 * process useless backref nodes. backref nodes for tree leaves
1043 * are deleted from the cache. backref nodes for upper level
1044 * tree blocks are left in the cache to avoid unnecessary backref
1045 * lookup.
1046 */
1047 while (!list_empty(&useless)) {
1048 upper = list_entry(useless.next, struct backref_node, list);
1049 list_del_init(&upper->list);
1050 BUG_ON(!list_empty(&upper->upper));
1051 if (upper == node)
1052 node = NULL;
1053 if (upper->lowest) {
1054 list_del_init(&upper->lower);
1055 upper->lowest = 0;
1056 }
1057 while (!list_empty(&upper->lower)) {
1058 edge = list_entry(upper->lower.next,
1059 struct backref_edge, list[UPPER]);
1060 list_del(&edge->list[UPPER]);
1061 list_del(&edge->list[LOWER]);
1062 lower = edge->node[LOWER];
1063 free_backref_edge(cache, edge);
1064
1065 if (list_empty(&lower->upper))
1066 list_add(&lower->list, &useless);
1067 }
1068 __mark_block_processed(rc, upper);
1069 if (upper->level > 0) {
1070 list_add(&upper->list, &cache->detached);
1071 upper->detached = 1;
1072 } else {
1073 rb_erase(&upper->rb_node, &cache->rb_root);
1074 free_backref_node(cache, upper);
1075 }
1076 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001077out:
1078 btrfs_free_path(path1);
1079 btrfs_free_path(path2);
1080 if (err) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001081 while (!list_empty(&useless)) {
1082 lower = list_entry(useless.next,
1083 struct backref_node, upper);
1084 list_del_init(&lower->upper);
1085 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001086 upper = node;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001087 INIT_LIST_HEAD(&list);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001088 while (upper) {
1089 if (RB_EMPTY_NODE(&upper->rb_node)) {
1090 list_splice_tail(&upper->upper, &list);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001091 free_backref_node(cache, upper);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001092 }
1093
1094 if (list_empty(&list))
1095 break;
1096
1097 edge = list_entry(list.next, struct backref_edge,
1098 list[LOWER]);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001099 list_del(&edge->list[LOWER]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001100 upper = edge->node[UPPER];
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001101 free_backref_edge(cache, edge);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001102 }
1103 return ERR_PTR(err);
1104 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001105 BUG_ON(node && node->detached);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001106 return node;
1107}
1108
1109/*
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001110 * helper to add backref node for the newly created snapshot.
1111 * the backref node is created by cloning backref node that
1112 * corresponds to root of source tree
1113 */
1114static int clone_backref_node(struct btrfs_trans_handle *trans,
1115 struct reloc_control *rc,
1116 struct btrfs_root *src,
1117 struct btrfs_root *dest)
1118{
1119 struct btrfs_root *reloc_root = src->reloc_root;
1120 struct backref_cache *cache = &rc->backref_cache;
1121 struct backref_node *node = NULL;
1122 struct backref_node *new_node;
1123 struct backref_edge *edge;
1124 struct backref_edge *new_edge;
1125 struct rb_node *rb_node;
1126
1127 if (cache->last_trans > 0)
1128 update_backref_cache(trans, cache);
1129
1130 rb_node = tree_search(&cache->rb_root, src->commit_root->start);
1131 if (rb_node) {
1132 node = rb_entry(rb_node, struct backref_node, rb_node);
1133 if (node->detached)
1134 node = NULL;
1135 else
1136 BUG_ON(node->new_bytenr != reloc_root->node->start);
1137 }
1138
1139 if (!node) {
1140 rb_node = tree_search(&cache->rb_root,
1141 reloc_root->commit_root->start);
1142 if (rb_node) {
1143 node = rb_entry(rb_node, struct backref_node,
1144 rb_node);
1145 BUG_ON(node->detached);
1146 }
1147 }
1148
1149 if (!node)
1150 return 0;
1151
1152 new_node = alloc_backref_node(cache);
1153 if (!new_node)
1154 return -ENOMEM;
1155
1156 new_node->bytenr = dest->node->start;
1157 new_node->level = node->level;
1158 new_node->lowest = node->lowest;
1159 new_node->root = dest;
1160
1161 if (!node->lowest) {
1162 list_for_each_entry(edge, &node->lower, list[UPPER]) {
1163 new_edge = alloc_backref_edge(cache);
1164 if (!new_edge)
1165 goto fail;
1166
1167 new_edge->node[UPPER] = new_node;
1168 new_edge->node[LOWER] = edge->node[LOWER];
1169 list_add_tail(&new_edge->list[UPPER],
1170 &new_node->lower);
1171 }
1172 }
1173
1174 rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1175 &new_node->rb_node);
1176 BUG_ON(rb_node);
1177
1178 if (!new_node->lowest) {
1179 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
1180 list_add_tail(&new_edge->list[LOWER],
1181 &new_edge->node[LOWER]->upper);
1182 }
1183 }
1184 return 0;
1185fail:
1186 while (!list_empty(&new_node->lower)) {
1187 new_edge = list_entry(new_node->lower.next,
1188 struct backref_edge, list[UPPER]);
1189 list_del(&new_edge->list[UPPER]);
1190 free_backref_edge(cache, new_edge);
1191 }
1192 free_backref_node(cache, new_node);
1193 return -ENOMEM;
1194}
1195
1196/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001197 * helper to add 'address of tree root -> reloc tree' mapping
1198 */
1199static int __add_reloc_root(struct btrfs_root *root)
1200{
1201 struct rb_node *rb_node;
1202 struct mapping_node *node;
1203 struct reloc_control *rc = root->fs_info->reloc_ctl;
1204
1205 node = kmalloc(sizeof(*node), GFP_NOFS);
1206 BUG_ON(!node);
1207
1208 node->bytenr = root->node->start;
1209 node->data = root;
1210
1211 spin_lock(&rc->reloc_root_tree.lock);
1212 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1213 node->bytenr, &node->rb_node);
1214 spin_unlock(&rc->reloc_root_tree.lock);
1215 BUG_ON(rb_node);
1216
1217 list_add_tail(&root->root_list, &rc->reloc_roots);
1218 return 0;
1219}
1220
1221/*
1222 * helper to update/delete the 'address of tree root -> reloc tree'
1223 * mapping
1224 */
1225static int __update_reloc_root(struct btrfs_root *root, int del)
1226{
1227 struct rb_node *rb_node;
1228 struct mapping_node *node = NULL;
1229 struct reloc_control *rc = root->fs_info->reloc_ctl;
1230
1231 spin_lock(&rc->reloc_root_tree.lock);
1232 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1233 root->commit_root->start);
1234 if (rb_node) {
1235 node = rb_entry(rb_node, struct mapping_node, rb_node);
1236 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1237 }
1238 spin_unlock(&rc->reloc_root_tree.lock);
1239
1240 BUG_ON((struct btrfs_root *)node->data != root);
1241
1242 if (!del) {
1243 spin_lock(&rc->reloc_root_tree.lock);
1244 node->bytenr = root->node->start;
1245 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1246 node->bytenr, &node->rb_node);
1247 spin_unlock(&rc->reloc_root_tree.lock);
1248 BUG_ON(rb_node);
1249 } else {
1250 list_del_init(&root->root_list);
1251 kfree(node);
1252 }
1253 return 0;
1254}
1255
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001256static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
1257 struct btrfs_root *root, u64 objectid)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001258{
1259 struct btrfs_root *reloc_root;
1260 struct extent_buffer *eb;
1261 struct btrfs_root_item *root_item;
1262 struct btrfs_key root_key;
1263 int ret;
1264
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001265 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1266 BUG_ON(!root_item);
1267
1268 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
1269 root_key.type = BTRFS_ROOT_ITEM_KEY;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001270 root_key.offset = objectid;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001271
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001272 if (root->root_key.objectid == objectid) {
1273 /* called by btrfs_init_reloc_root */
1274 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1275 BTRFS_TREE_RELOC_OBJECTID);
1276 BUG_ON(ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001277
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001278 btrfs_set_root_last_snapshot(&root->root_item,
1279 trans->transid - 1);
1280 } else {
1281 /*
1282 * called by btrfs_reloc_post_snapshot_hook.
1283 * the source tree is a reloc tree, all tree blocks
1284 * modified after it was created have RELOC flag
1285 * set in their headers. so it's OK to not update
1286 * the 'last_snapshot'.
1287 */
1288 ret = btrfs_copy_root(trans, root, root->node, &eb,
1289 BTRFS_TREE_RELOC_OBJECTID);
1290 BUG_ON(ret);
1291 }
1292
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001293 memcpy(root_item, &root->root_item, sizeof(*root_item));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001294 btrfs_set_root_bytenr(root_item, eb->start);
1295 btrfs_set_root_level(root_item, btrfs_header_level(eb));
1296 btrfs_set_root_generation(root_item, trans->transid);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001297
1298 if (root->root_key.objectid == objectid) {
1299 btrfs_set_root_refs(root_item, 0);
1300 memset(&root_item->drop_progress, 0,
1301 sizeof(struct btrfs_disk_key));
1302 root_item->drop_level = 0;
1303 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001304
1305 btrfs_tree_unlock(eb);
1306 free_extent_buffer(eb);
1307
1308 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1309 &root_key, root_item);
1310 BUG_ON(ret);
1311 kfree(root_item);
1312
1313 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
1314 &root_key);
1315 BUG_ON(IS_ERR(reloc_root));
1316 reloc_root->last_trans = trans->transid;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001317 return reloc_root;
1318}
1319
1320/*
1321 * create reloc tree for a given fs tree. reloc tree is just a
1322 * snapshot of the fs tree with special root objectid.
1323 */
1324int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1325 struct btrfs_root *root)
1326{
1327 struct btrfs_root *reloc_root;
1328 struct reloc_control *rc = root->fs_info->reloc_ctl;
1329 int clear_rsv = 0;
1330
1331 if (root->reloc_root) {
1332 reloc_root = root->reloc_root;
1333 reloc_root->last_trans = trans->transid;
1334 return 0;
1335 }
1336
1337 if (!rc || !rc->create_reloc_tree ||
1338 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1339 return 0;
1340
1341 if (!trans->block_rsv) {
1342 trans->block_rsv = rc->block_rsv;
1343 clear_rsv = 1;
1344 }
1345 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1346 if (clear_rsv)
1347 trans->block_rsv = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001348
1349 __add_reloc_root(reloc_root);
1350 root->reloc_root = reloc_root;
1351 return 0;
1352}
1353
1354/*
1355 * update root item of reloc tree
1356 */
1357int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1358 struct btrfs_root *root)
1359{
1360 struct btrfs_root *reloc_root;
1361 struct btrfs_root_item *root_item;
1362 int del = 0;
1363 int ret;
1364
1365 if (!root->reloc_root)
1366 return 0;
1367
1368 reloc_root = root->reloc_root;
1369 root_item = &reloc_root->root_item;
1370
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001371 if (root->fs_info->reloc_ctl->merge_reloc_tree &&
1372 btrfs_root_refs(root_item) == 0) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001373 root->reloc_root = NULL;
1374 del = 1;
1375 }
1376
1377 __update_reloc_root(reloc_root, del);
1378
1379 if (reloc_root->commit_root != reloc_root->node) {
1380 btrfs_set_root_node(root_item, reloc_root->node);
1381 free_extent_buffer(reloc_root->commit_root);
1382 reloc_root->commit_root = btrfs_root_node(reloc_root);
1383 }
1384
1385 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1386 &reloc_root->root_key, root_item);
1387 BUG_ON(ret);
1388 return 0;
1389}
1390
1391/*
1392 * helper to find first cached inode with inode number >= objectid
1393 * in a subvolume
1394 */
1395static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1396{
1397 struct rb_node *node;
1398 struct rb_node *prev;
1399 struct btrfs_inode *entry;
1400 struct inode *inode;
1401
1402 spin_lock(&root->inode_lock);
1403again:
1404 node = root->inode_tree.rb_node;
1405 prev = NULL;
1406 while (node) {
1407 prev = node;
1408 entry = rb_entry(node, struct btrfs_inode, rb_node);
1409
1410 if (objectid < entry->vfs_inode.i_ino)
1411 node = node->rb_left;
1412 else if (objectid > entry->vfs_inode.i_ino)
1413 node = node->rb_right;
1414 else
1415 break;
1416 }
1417 if (!node) {
1418 while (prev) {
1419 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1420 if (objectid <= entry->vfs_inode.i_ino) {
1421 node = prev;
1422 break;
1423 }
1424 prev = rb_next(prev);
1425 }
1426 }
1427 while (node) {
1428 entry = rb_entry(node, struct btrfs_inode, rb_node);
1429 inode = igrab(&entry->vfs_inode);
1430 if (inode) {
1431 spin_unlock(&root->inode_lock);
1432 return inode;
1433 }
1434
1435 objectid = entry->vfs_inode.i_ino + 1;
1436 if (cond_resched_lock(&root->inode_lock))
1437 goto again;
1438
1439 node = rb_next(node);
1440 }
1441 spin_unlock(&root->inode_lock);
1442 return NULL;
1443}
1444
1445static int in_block_group(u64 bytenr,
1446 struct btrfs_block_group_cache *block_group)
1447{
1448 if (bytenr >= block_group->key.objectid &&
1449 bytenr < block_group->key.objectid + block_group->key.offset)
1450 return 1;
1451 return 0;
1452}
1453
1454/*
1455 * get new location of data
1456 */
1457static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1458 u64 bytenr, u64 num_bytes)
1459{
1460 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1461 struct btrfs_path *path;
1462 struct btrfs_file_extent_item *fi;
1463 struct extent_buffer *leaf;
1464 int ret;
1465
1466 path = btrfs_alloc_path();
1467 if (!path)
1468 return -ENOMEM;
1469
1470 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1471 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1472 bytenr, 0);
1473 if (ret < 0)
1474 goto out;
1475 if (ret > 0) {
1476 ret = -ENOENT;
1477 goto out;
1478 }
1479
1480 leaf = path->nodes[0];
1481 fi = btrfs_item_ptr(leaf, path->slots[0],
1482 struct btrfs_file_extent_item);
1483
1484 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1485 btrfs_file_extent_compression(leaf, fi) ||
1486 btrfs_file_extent_encryption(leaf, fi) ||
1487 btrfs_file_extent_other_encoding(leaf, fi));
1488
1489 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1490 ret = 1;
1491 goto out;
1492 }
1493
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001494 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001495 ret = 0;
1496out:
1497 btrfs_free_path(path);
1498 return ret;
1499}
1500
1501/*
1502 * update file extent items in the tree leaf to point to
1503 * the new locations.
1504 */
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001505static noinline_for_stack
1506int replace_file_extents(struct btrfs_trans_handle *trans,
1507 struct reloc_control *rc,
1508 struct btrfs_root *root,
1509 struct extent_buffer *leaf)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001510{
1511 struct btrfs_key key;
1512 struct btrfs_file_extent_item *fi;
1513 struct inode *inode = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001514 u64 parent;
1515 u64 bytenr;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001516 u64 new_bytenr = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001517 u64 num_bytes;
1518 u64 end;
1519 u32 nritems;
1520 u32 i;
1521 int ret;
1522 int first = 1;
1523 int dirty = 0;
1524
1525 if (rc->stage != UPDATE_DATA_PTRS)
1526 return 0;
1527
1528 /* reloc trees always use full backref */
1529 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1530 parent = leaf->start;
1531 else
1532 parent = 0;
1533
1534 nritems = btrfs_header_nritems(leaf);
1535 for (i = 0; i < nritems; i++) {
1536 cond_resched();
1537 btrfs_item_key_to_cpu(leaf, &key, i);
1538 if (key.type != BTRFS_EXTENT_DATA_KEY)
1539 continue;
1540 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1541 if (btrfs_file_extent_type(leaf, fi) ==
1542 BTRFS_FILE_EXTENT_INLINE)
1543 continue;
1544 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1545 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1546 if (bytenr == 0)
1547 continue;
1548 if (!in_block_group(bytenr, rc->block_group))
1549 continue;
1550
1551 /*
1552 * if we are modifying block in fs tree, wait for readpage
1553 * to complete and drop the extent cache
1554 */
1555 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001556 if (first) {
1557 inode = find_next_inode(root, key.objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001558 first = 0;
1559 } else if (inode && inode->i_ino < key.objectid) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001560 btrfs_add_delayed_iput(inode);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001561 inode = find_next_inode(root, key.objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001562 }
1563 if (inode && inode->i_ino == key.objectid) {
1564 end = key.offset +
1565 btrfs_file_extent_num_bytes(leaf, fi);
1566 WARN_ON(!IS_ALIGNED(key.offset,
1567 root->sectorsize));
1568 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1569 end--;
1570 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1571 key.offset, end,
1572 GFP_NOFS);
1573 if (!ret)
1574 continue;
1575
1576 btrfs_drop_extent_cache(inode, key.offset, end,
1577 1);
1578 unlock_extent(&BTRFS_I(inode)->io_tree,
1579 key.offset, end, GFP_NOFS);
1580 }
1581 }
1582
1583 ret = get_new_location(rc->data_inode, &new_bytenr,
1584 bytenr, num_bytes);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001585 if (ret > 0) {
1586 WARN_ON(1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001587 continue;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001588 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001589 BUG_ON(ret < 0);
1590
1591 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1592 dirty = 1;
1593
1594 key.offset -= btrfs_file_extent_offset(leaf, fi);
1595 ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1596 num_bytes, parent,
1597 btrfs_header_owner(leaf),
1598 key.objectid, key.offset);
1599 BUG_ON(ret);
1600
1601 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1602 parent, btrfs_header_owner(leaf),
1603 key.objectid, key.offset);
1604 BUG_ON(ret);
1605 }
1606 if (dirty)
1607 btrfs_mark_buffer_dirty(leaf);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001608 if (inode)
1609 btrfs_add_delayed_iput(inode);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001610 return 0;
1611}
1612
1613static noinline_for_stack
1614int memcmp_node_keys(struct extent_buffer *eb, int slot,
1615 struct btrfs_path *path, int level)
1616{
1617 struct btrfs_disk_key key1;
1618 struct btrfs_disk_key key2;
1619 btrfs_node_key(eb, &key1, slot);
1620 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1621 return memcmp(&key1, &key2, sizeof(key1));
1622}
1623
1624/*
1625 * try to replace tree blocks in fs tree with the new blocks
1626 * in reloc tree. tree blocks haven't been modified since the
1627 * reloc tree was create can be replaced.
1628 *
1629 * if a block was replaced, level of the block + 1 is returned.
1630 * if no block got replaced, 0 is returned. if there are other
1631 * errors, a negative error number is returned.
1632 */
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001633static noinline_for_stack
1634int replace_path(struct btrfs_trans_handle *trans,
1635 struct btrfs_root *dest, struct btrfs_root *src,
1636 struct btrfs_path *path, struct btrfs_key *next_key,
1637 int lowest_level, int max_level)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001638{
1639 struct extent_buffer *eb;
1640 struct extent_buffer *parent;
1641 struct btrfs_key key;
1642 u64 old_bytenr;
1643 u64 new_bytenr;
1644 u64 old_ptr_gen;
1645 u64 new_ptr_gen;
1646 u64 last_snapshot;
1647 u32 blocksize;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001648 int cow = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001649 int level;
1650 int ret;
1651 int slot;
1652
1653 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1654 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001655
1656 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001657again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001658 slot = path->slots[lowest_level];
1659 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1660
1661 eb = btrfs_lock_root_node(dest);
1662 btrfs_set_lock_blocking(eb);
1663 level = btrfs_header_level(eb);
1664
1665 if (level < lowest_level) {
1666 btrfs_tree_unlock(eb);
1667 free_extent_buffer(eb);
1668 return 0;
1669 }
1670
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001671 if (cow) {
1672 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1673 BUG_ON(ret);
1674 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001675 btrfs_set_lock_blocking(eb);
1676
1677 if (next_key) {
1678 next_key->objectid = (u64)-1;
1679 next_key->type = (u8)-1;
1680 next_key->offset = (u64)-1;
1681 }
1682
1683 parent = eb;
1684 while (1) {
1685 level = btrfs_header_level(parent);
1686 BUG_ON(level < lowest_level);
1687
1688 ret = btrfs_bin_search(parent, &key, level, &slot);
1689 if (ret && slot > 0)
1690 slot--;
1691
1692 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1693 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1694
1695 old_bytenr = btrfs_node_blockptr(parent, slot);
1696 blocksize = btrfs_level_size(dest, level - 1);
1697 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1698
1699 if (level <= max_level) {
1700 eb = path->nodes[level];
1701 new_bytenr = btrfs_node_blockptr(eb,
1702 path->slots[level]);
1703 new_ptr_gen = btrfs_node_ptr_generation(eb,
1704 path->slots[level]);
1705 } else {
1706 new_bytenr = 0;
1707 new_ptr_gen = 0;
1708 }
1709
1710 if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1711 WARN_ON(1);
1712 ret = level;
1713 break;
1714 }
1715
1716 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1717 memcmp_node_keys(parent, slot, path, level)) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001718 if (level <= lowest_level) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001719 ret = 0;
1720 break;
1721 }
1722
1723 eb = read_tree_block(dest, old_bytenr, blocksize,
1724 old_ptr_gen);
1725 btrfs_tree_lock(eb);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001726 if (cow) {
1727 ret = btrfs_cow_block(trans, dest, eb, parent,
1728 slot, &eb);
1729 BUG_ON(ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001730 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001731 btrfs_set_lock_blocking(eb);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001732
1733 btrfs_tree_unlock(parent);
1734 free_extent_buffer(parent);
1735
1736 parent = eb;
1737 continue;
1738 }
1739
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001740 if (!cow) {
1741 btrfs_tree_unlock(parent);
1742 free_extent_buffer(parent);
1743 cow = 1;
1744 goto again;
1745 }
1746
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001747 btrfs_node_key_to_cpu(path->nodes[level], &key,
1748 path->slots[level]);
1749 btrfs_release_path(src, path);
1750
1751 path->lowest_level = level;
1752 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1753 path->lowest_level = 0;
1754 BUG_ON(ret);
1755
1756 /*
1757 * swap blocks in fs tree and reloc tree.
1758 */
1759 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1760 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1761 btrfs_mark_buffer_dirty(parent);
1762
1763 btrfs_set_node_blockptr(path->nodes[level],
1764 path->slots[level], old_bytenr);
1765 btrfs_set_node_ptr_generation(path->nodes[level],
1766 path->slots[level], old_ptr_gen);
1767 btrfs_mark_buffer_dirty(path->nodes[level]);
1768
1769 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1770 path->nodes[level]->start,
1771 src->root_key.objectid, level - 1, 0);
1772 BUG_ON(ret);
1773 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1774 0, dest->root_key.objectid, level - 1,
1775 0);
1776 BUG_ON(ret);
1777
1778 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1779 path->nodes[level]->start,
1780 src->root_key.objectid, level - 1, 0);
1781 BUG_ON(ret);
1782
1783 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1784 0, dest->root_key.objectid, level - 1,
1785 0);
1786 BUG_ON(ret);
1787
1788 btrfs_unlock_up_safe(path, 0);
1789
1790 ret = level;
1791 break;
1792 }
1793 btrfs_tree_unlock(parent);
1794 free_extent_buffer(parent);
1795 return ret;
1796}
1797
1798/*
1799 * helper to find next relocated block in reloc tree
1800 */
1801static noinline_for_stack
1802int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1803 int *level)
1804{
1805 struct extent_buffer *eb;
1806 int i;
1807 u64 last_snapshot;
1808 u32 nritems;
1809
1810 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1811
1812 for (i = 0; i < *level; i++) {
1813 free_extent_buffer(path->nodes[i]);
1814 path->nodes[i] = NULL;
1815 }
1816
1817 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1818 eb = path->nodes[i];
1819 nritems = btrfs_header_nritems(eb);
1820 while (path->slots[i] + 1 < nritems) {
1821 path->slots[i]++;
1822 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1823 last_snapshot)
1824 continue;
1825
1826 *level = i;
1827 return 0;
1828 }
1829 free_extent_buffer(path->nodes[i]);
1830 path->nodes[i] = NULL;
1831 }
1832 return 1;
1833}
1834
1835/*
1836 * walk down reloc tree to find relocated block of lowest level
1837 */
1838static noinline_for_stack
1839int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1840 int *level)
1841{
1842 struct extent_buffer *eb = NULL;
1843 int i;
1844 u64 bytenr;
1845 u64 ptr_gen = 0;
1846 u64 last_snapshot;
1847 u32 blocksize;
1848 u32 nritems;
1849
1850 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1851
1852 for (i = *level; i > 0; i--) {
1853 eb = path->nodes[i];
1854 nritems = btrfs_header_nritems(eb);
1855 while (path->slots[i] < nritems) {
1856 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1857 if (ptr_gen > last_snapshot)
1858 break;
1859 path->slots[i]++;
1860 }
1861 if (path->slots[i] >= nritems) {
1862 if (i == *level)
1863 break;
1864 *level = i + 1;
1865 return 0;
1866 }
1867 if (i == 1) {
1868 *level = i;
1869 return 0;
1870 }
1871
1872 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1873 blocksize = btrfs_level_size(root, i - 1);
1874 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1875 BUG_ON(btrfs_header_level(eb) != i - 1);
1876 path->nodes[i - 1] = eb;
1877 path->slots[i - 1] = 0;
1878 }
1879 return 1;
1880}
1881
1882/*
1883 * invalidate extent cache for file extents whose key in range of
1884 * [min_key, max_key)
1885 */
1886static int invalidate_extent_cache(struct btrfs_root *root,
1887 struct btrfs_key *min_key,
1888 struct btrfs_key *max_key)
1889{
1890 struct inode *inode = NULL;
1891 u64 objectid;
1892 u64 start, end;
1893
1894 objectid = min_key->objectid;
1895 while (1) {
1896 cond_resched();
1897 iput(inode);
1898
1899 if (objectid > max_key->objectid)
1900 break;
1901
1902 inode = find_next_inode(root, objectid);
1903 if (!inode)
1904 break;
1905
1906 if (inode->i_ino > max_key->objectid) {
1907 iput(inode);
1908 break;
1909 }
1910
1911 objectid = inode->i_ino + 1;
1912 if (!S_ISREG(inode->i_mode))
1913 continue;
1914
1915 if (unlikely(min_key->objectid == inode->i_ino)) {
1916 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1917 continue;
1918 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1919 start = 0;
1920 else {
1921 start = min_key->offset;
1922 WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1923 }
1924 } else {
1925 start = 0;
1926 }
1927
1928 if (unlikely(max_key->objectid == inode->i_ino)) {
1929 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1930 continue;
1931 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1932 end = (u64)-1;
1933 } else {
1934 if (max_key->offset == 0)
1935 continue;
1936 end = max_key->offset;
1937 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1938 end--;
1939 }
1940 } else {
1941 end = (u64)-1;
1942 }
1943
1944 /* the lock_extent waits for readpage to complete */
1945 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1946 btrfs_drop_extent_cache(inode, start, end, 1);
1947 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1948 }
1949 return 0;
1950}
1951
1952static int find_next_key(struct btrfs_path *path, int level,
1953 struct btrfs_key *key)
1954
1955{
1956 while (level < BTRFS_MAX_LEVEL) {
1957 if (!path->nodes[level])
1958 break;
1959 if (path->slots[level] + 1 <
1960 btrfs_header_nritems(path->nodes[level])) {
1961 btrfs_node_key_to_cpu(path->nodes[level], key,
1962 path->slots[level] + 1);
1963 return 0;
1964 }
1965 level++;
1966 }
1967 return 1;
1968}
1969
1970/*
1971 * merge the relocated tree blocks in reloc tree with corresponding
1972 * fs tree.
1973 */
1974static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1975 struct btrfs_root *root)
1976{
1977 LIST_HEAD(inode_list);
1978 struct btrfs_key key;
1979 struct btrfs_key next_key;
1980 struct btrfs_trans_handle *trans;
1981 struct btrfs_root *reloc_root;
1982 struct btrfs_root_item *root_item;
1983 struct btrfs_path *path;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001984 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001985 unsigned long nr;
1986 int level;
1987 int max_level;
1988 int replaced = 0;
1989 int ret;
1990 int err = 0;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001991 u32 min_reserved;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001992
1993 path = btrfs_alloc_path();
1994 if (!path)
1995 return -ENOMEM;
1996
1997 reloc_root = root->reloc_root;
1998 root_item = &reloc_root->root_item;
1999
2000 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2001 level = btrfs_root_level(root_item);
2002 extent_buffer_get(reloc_root->node);
2003 path->nodes[level] = reloc_root->node;
2004 path->slots[level] = 0;
2005 } else {
2006 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2007
2008 level = root_item->drop_level;
2009 BUG_ON(level == 0);
2010 path->lowest_level = level;
2011 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
Yan Zheng33c66f42009-07-22 09:59:00 -04002012 path->lowest_level = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002013 if (ret < 0) {
2014 btrfs_free_path(path);
2015 return ret;
2016 }
2017
2018 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
2019 path->slots[level]);
2020 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
2021
2022 btrfs_unlock_up_safe(path, 0);
2023 }
2024
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002025 min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002026 memset(&next_key, 0, sizeof(next_key));
2027
2028 while (1) {
Yan, Zhenga22285a2010-05-16 10:48:46 -04002029 trans = btrfs_start_transaction(root, 0);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002030 trans->block_rsv = rc->block_rsv;
2031
2032 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
2033 min_reserved, 0);
2034 if (ret) {
2035 BUG_ON(ret != -EAGAIN);
2036 ret = btrfs_commit_transaction(trans, root);
2037 BUG_ON(ret);
2038 continue;
2039 }
2040
2041 replaced = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002042 max_level = level;
2043
2044 ret = walk_down_reloc_tree(reloc_root, path, &level);
2045 if (ret < 0) {
2046 err = ret;
2047 goto out;
2048 }
2049 if (ret > 0)
2050 break;
2051
2052 if (!find_next_key(path, level, &key) &&
2053 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
2054 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002055 } else {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002056 ret = replace_path(trans, root, reloc_root, path,
2057 &next_key, level, max_level);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002058 }
2059 if (ret < 0) {
2060 err = ret;
2061 goto out;
2062 }
2063
2064 if (ret > 0) {
2065 level = ret;
2066 btrfs_node_key_to_cpu(path->nodes[level], &key,
2067 path->slots[level]);
2068 replaced = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002069 }
2070
2071 ret = walk_up_reloc_tree(reloc_root, path, &level);
2072 if (ret > 0)
2073 break;
2074
2075 BUG_ON(level == 0);
2076 /*
2077 * save the merging progress in the drop_progress.
2078 * this is OK since root refs == 1 in this case.
2079 */
2080 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
2081 path->slots[level]);
2082 root_item->drop_level = level;
2083
2084 nr = trans->blocks_used;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002085 btrfs_end_transaction_throttle(trans, root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002086
2087 btrfs_btree_balance_dirty(root, nr);
2088
2089 if (replaced && rc->stage == UPDATE_DATA_PTRS)
2090 invalidate_extent_cache(root, &key, &next_key);
2091 }
2092
2093 /*
2094 * handle the case only one block in the fs tree need to be
2095 * relocated and the block is tree root.
2096 */
2097 leaf = btrfs_lock_root_node(root);
2098 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
2099 btrfs_tree_unlock(leaf);
2100 free_extent_buffer(leaf);
2101 if (ret < 0)
2102 err = ret;
2103out:
2104 btrfs_free_path(path);
2105
2106 if (err == 0) {
2107 memset(&root_item->drop_progress, 0,
2108 sizeof(root_item->drop_progress));
2109 root_item->drop_level = 0;
2110 btrfs_set_root_refs(root_item, 0);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002111 btrfs_update_reloc_root(trans, root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002112 }
2113
2114 nr = trans->blocks_used;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002115 btrfs_end_transaction_throttle(trans, root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002116
2117 btrfs_btree_balance_dirty(root, nr);
2118
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002119 if (replaced && rc->stage == UPDATE_DATA_PTRS)
2120 invalidate_extent_cache(root, &key, &next_key);
2121
2122 return err;
2123}
2124
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002125static noinline_for_stack
2126int prepare_to_merge(struct reloc_control *rc, int err)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002127{
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002128 struct btrfs_root *root = rc->extent_root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002129 struct btrfs_root *reloc_root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002130 struct btrfs_trans_handle *trans;
2131 LIST_HEAD(reloc_roots);
2132 u64 num_bytes = 0;
2133 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002134
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002135 mutex_lock(&root->fs_info->trans_mutex);
2136 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2137 rc->merging_rsv_size += rc->nodes_relocated * 2;
2138 mutex_unlock(&root->fs_info->trans_mutex);
2139again:
2140 if (!err) {
2141 num_bytes = rc->merging_rsv_size;
2142 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
Josef Bacik8bb8ab22010-10-15 16:52:49 -04002143 num_bytes);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002144 if (ret)
2145 err = ret;
2146 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002147
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002148 trans = btrfs_join_transaction(rc->extent_root, 1);
2149
2150 if (!err) {
2151 if (num_bytes != rc->merging_rsv_size) {
2152 btrfs_end_transaction(trans, rc->extent_root);
2153 btrfs_block_rsv_release(rc->extent_root,
2154 rc->block_rsv, num_bytes);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002155 goto again;
2156 }
2157 }
2158
2159 rc->merge_reloc_tree = 1;
2160
2161 while (!list_empty(&rc->reloc_roots)) {
2162 reloc_root = list_entry(rc->reloc_roots.next,
2163 struct btrfs_root, root_list);
2164 list_del_init(&reloc_root->root_list);
2165
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002166 root = read_fs_root(reloc_root->fs_info,
2167 reloc_root->root_key.offset);
2168 BUG_ON(IS_ERR(root));
2169 BUG_ON(root->reloc_root != reloc_root);
2170
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002171 /*
2172 * set reference count to 1, so btrfs_recover_relocation
2173 * knows it should resumes merging
2174 */
2175 if (!err)
2176 btrfs_set_root_refs(&reloc_root->root_item, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002177 btrfs_update_reloc_root(trans, root);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002178
2179 list_add(&reloc_root->root_list, &reloc_roots);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002180 }
2181
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002182 list_splice(&reloc_roots, &rc->reloc_roots);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002183
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002184 if (!err)
2185 btrfs_commit_transaction(trans, rc->extent_root);
2186 else
2187 btrfs_end_transaction(trans, rc->extent_root);
2188 return err;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002189}
2190
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002191static noinline_for_stack
2192int merge_reloc_roots(struct reloc_control *rc)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002193{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002194 struct btrfs_root *root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002195 struct btrfs_root *reloc_root;
2196 LIST_HEAD(reloc_roots);
2197 int found = 0;
2198 int ret;
2199again:
2200 root = rc->extent_root;
2201 mutex_lock(&root->fs_info->trans_mutex);
2202 list_splice_init(&rc->reloc_roots, &reloc_roots);
2203 mutex_unlock(&root->fs_info->trans_mutex);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002204
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002205 while (!list_empty(&reloc_roots)) {
2206 found = 1;
2207 reloc_root = list_entry(reloc_roots.next,
2208 struct btrfs_root, root_list);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002209
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002210 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
2211 root = read_fs_root(reloc_root->fs_info,
2212 reloc_root->root_key.offset);
2213 BUG_ON(IS_ERR(root));
2214 BUG_ON(root->reloc_root != reloc_root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002215
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002216 ret = merge_reloc_root(rc, root);
2217 BUG_ON(ret);
2218 } else {
2219 list_del_init(&reloc_root->root_list);
2220 }
2221 btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002222 }
2223
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002224 if (found) {
2225 found = 0;
2226 goto again;
2227 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002228 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2229 return 0;
2230}
2231
2232static void free_block_list(struct rb_root *blocks)
2233{
2234 struct tree_block *block;
2235 struct rb_node *rb_node;
2236 while ((rb_node = rb_first(blocks))) {
2237 block = rb_entry(rb_node, struct tree_block, rb_node);
2238 rb_erase(rb_node, blocks);
2239 kfree(block);
2240 }
2241}
2242
2243static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2244 struct btrfs_root *reloc_root)
2245{
2246 struct btrfs_root *root;
2247
2248 if (reloc_root->last_trans == trans->transid)
2249 return 0;
2250
2251 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
2252 BUG_ON(IS_ERR(root));
2253 BUG_ON(root->reloc_root != reloc_root);
2254
2255 return btrfs_record_root_in_trans(trans, root);
2256}
2257
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002258static noinline_for_stack
2259struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2260 struct reloc_control *rc,
2261 struct backref_node *node,
2262 struct backref_edge *edges[], int *nr)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002263{
2264 struct backref_node *next;
2265 struct btrfs_root *root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002266 int index = 0;
2267
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002268 next = node;
2269 while (1) {
2270 cond_resched();
2271 next = walk_up_backref(next, edges, &index);
2272 root = next->root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002273 BUG_ON(!root);
2274 BUG_ON(!root->ref_cows);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002275
2276 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2277 record_reloc_root_in_trans(trans, root);
2278 break;
2279 }
2280
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002281 btrfs_record_root_in_trans(trans, root);
2282 root = root->reloc_root;
2283
2284 if (next->new_bytenr != root->node->start) {
2285 BUG_ON(next->new_bytenr);
2286 BUG_ON(!list_empty(&next->list));
2287 next->new_bytenr = root->node->start;
2288 next->root = root;
2289 list_add_tail(&next->list,
2290 &rc->backref_cache.changed);
2291 __mark_block_processed(rc, next);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002292 break;
2293 }
2294
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002295 WARN_ON(1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002296 root = NULL;
2297 next = walk_down_backref(edges, &index);
2298 if (!next || next->level <= node->level)
2299 break;
2300 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002301 if (!root)
2302 return NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002303
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002304 *nr = index;
2305 next = node;
2306 /* setup backref node path for btrfs_reloc_cow_block */
2307 while (1) {
2308 rc->backref_cache.path[next->level] = next;
2309 if (--index < 0)
2310 break;
2311 next = edges[index]->node[UPPER];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002312 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002313 return root;
2314}
2315
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002316/*
2317 * select a tree root for relocation. return NULL if the block
2318 * is reference counted. we should use do_relocation() in this
2319 * case. return a tree root pointer if the block isn't reference
2320 * counted. return -ENOENT if the block is root of reloc tree.
2321 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002322static noinline_for_stack
2323struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2324 struct backref_node *node)
2325{
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002326 struct backref_node *next;
2327 struct btrfs_root *root;
2328 struct btrfs_root *fs_root = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002329 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002330 int index = 0;
2331
2332 next = node;
2333 while (1) {
2334 cond_resched();
2335 next = walk_up_backref(next, edges, &index);
2336 root = next->root;
2337 BUG_ON(!root);
2338
2339 /* no other choice for non-refernce counted tree */
2340 if (!root->ref_cows)
2341 return root;
2342
2343 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2344 fs_root = root;
2345
2346 if (next != node)
2347 return NULL;
2348
2349 next = walk_down_backref(edges, &index);
2350 if (!next || next->level <= node->level)
2351 break;
2352 }
2353
2354 if (!fs_root)
2355 return ERR_PTR(-ENOENT);
2356 return fs_root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002357}
2358
2359static noinline_for_stack
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002360u64 calcu_metadata_size(struct reloc_control *rc,
2361 struct backref_node *node, int reserve)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002362{
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002363 struct backref_node *next = node;
2364 struct backref_edge *edge;
2365 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2366 u64 num_bytes = 0;
2367 int index = 0;
2368
2369 BUG_ON(reserve && node->processed);
2370
2371 while (next) {
2372 cond_resched();
2373 while (1) {
2374 if (next->processed && (reserve || next != node))
2375 break;
2376
2377 num_bytes += btrfs_level_size(rc->extent_root,
2378 next->level);
2379
2380 if (list_empty(&next->upper))
2381 break;
2382
2383 edge = list_entry(next->upper.next,
2384 struct backref_edge, list[LOWER]);
2385 edges[index++] = edge;
2386 next = edge->node[UPPER];
2387 }
2388 next = walk_down_backref(edges, &index);
2389 }
2390 return num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002391}
2392
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002393static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2394 struct reloc_control *rc,
2395 struct backref_node *node)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002396{
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002397 struct btrfs_root *root = rc->extent_root;
2398 u64 num_bytes;
2399 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002400
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002401 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002402
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002403 trans->block_rsv = rc->block_rsv;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04002404 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002405 if (ret) {
2406 if (ret == -EAGAIN)
2407 rc->commit_transaction = 1;
2408 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002409 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002410
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002411 return 0;
2412}
2413
2414static void release_metadata_space(struct reloc_control *rc,
2415 struct backref_node *node)
2416{
2417 u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
2418 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002419}
2420
2421/*
2422 * relocate a block tree, and then update pointers in upper level
2423 * blocks that reference the block to point to the new location.
2424 *
2425 * if called by link_to_upper, the block has already been relocated.
2426 * in that case this function just updates pointers.
2427 */
2428static int do_relocation(struct btrfs_trans_handle *trans,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002429 struct reloc_control *rc,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002430 struct backref_node *node,
2431 struct btrfs_key *key,
2432 struct btrfs_path *path, int lowest)
2433{
2434 struct backref_node *upper;
2435 struct backref_edge *edge;
2436 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2437 struct btrfs_root *root;
2438 struct extent_buffer *eb;
2439 u32 blocksize;
2440 u64 bytenr;
2441 u64 generation;
2442 int nr;
2443 int slot;
2444 int ret;
2445 int err = 0;
2446
2447 BUG_ON(lowest && node->eb);
2448
2449 path->lowest_level = node->level + 1;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002450 rc->backref_cache.path[node->level] = node;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002451 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2452 cond_resched();
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002453
2454 upper = edge->node[UPPER];
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002455 root = select_reloc_root(trans, rc, upper, edges, &nr);
2456 BUG_ON(!root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002457
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002458 if (upper->eb && !upper->locked) {
2459 if (!lowest) {
2460 ret = btrfs_bin_search(upper->eb, key,
2461 upper->level, &slot);
2462 BUG_ON(ret);
2463 bytenr = btrfs_node_blockptr(upper->eb, slot);
2464 if (node->eb->start == bytenr)
2465 goto next;
2466 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002467 drop_node_buffer(upper);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002468 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002469
2470 if (!upper->eb) {
2471 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2472 if (ret < 0) {
2473 err = ret;
2474 break;
2475 }
2476 BUG_ON(ret > 0);
2477
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002478 if (!upper->eb) {
2479 upper->eb = path->nodes[upper->level];
2480 path->nodes[upper->level] = NULL;
2481 } else {
2482 BUG_ON(upper->eb != path->nodes[upper->level]);
2483 }
2484
2485 upper->locked = 1;
2486 path->locks[upper->level] = 0;
2487
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002488 slot = path->slots[upper->level];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002489 btrfs_release_path(NULL, path);
2490 } else {
2491 ret = btrfs_bin_search(upper->eb, key, upper->level,
2492 &slot);
2493 BUG_ON(ret);
2494 }
2495
2496 bytenr = btrfs_node_blockptr(upper->eb, slot);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002497 if (lowest) {
2498 BUG_ON(bytenr != node->bytenr);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002499 } else {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002500 if (node->eb->start == bytenr)
2501 goto next;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002502 }
2503
2504 blocksize = btrfs_level_size(root, node->level);
2505 generation = btrfs_node_ptr_generation(upper->eb, slot);
2506 eb = read_tree_block(root, bytenr, blocksize, generation);
2507 btrfs_tree_lock(eb);
2508 btrfs_set_lock_blocking(eb);
2509
2510 if (!node->eb) {
2511 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2512 slot, &eb);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002513 btrfs_tree_unlock(eb);
2514 free_extent_buffer(eb);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002515 if (ret < 0) {
2516 err = ret;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002517 goto next;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002518 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002519 BUG_ON(node->eb != eb);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002520 } else {
2521 btrfs_set_node_blockptr(upper->eb, slot,
2522 node->eb->start);
2523 btrfs_set_node_ptr_generation(upper->eb, slot,
2524 trans->transid);
2525 btrfs_mark_buffer_dirty(upper->eb);
2526
2527 ret = btrfs_inc_extent_ref(trans, root,
2528 node->eb->start, blocksize,
2529 upper->eb->start,
2530 btrfs_header_owner(upper->eb),
2531 node->level, 0);
2532 BUG_ON(ret);
2533
2534 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2535 BUG_ON(ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002536 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002537next:
2538 if (!upper->pending)
2539 drop_node_buffer(upper);
2540 else
2541 unlock_node_buffer(upper);
2542 if (err)
2543 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002544 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002545
2546 if (!err && node->pending) {
2547 drop_node_buffer(node);
2548 list_move_tail(&node->list, &rc->backref_cache.changed);
2549 node->pending = 0;
2550 }
2551
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002552 path->lowest_level = 0;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002553 BUG_ON(err == -ENOSPC);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002554 return err;
2555}
2556
2557static int link_to_upper(struct btrfs_trans_handle *trans,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002558 struct reloc_control *rc,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002559 struct backref_node *node,
2560 struct btrfs_path *path)
2561{
2562 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002563
2564 btrfs_node_key_to_cpu(node->eb, &key, 0);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002565 return do_relocation(trans, rc, node, &key, path, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002566}
2567
2568static int finish_pending_nodes(struct btrfs_trans_handle *trans,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002569 struct reloc_control *rc,
2570 struct btrfs_path *path, int err)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002571{
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002572 LIST_HEAD(list);
2573 struct backref_cache *cache = &rc->backref_cache;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002574 struct backref_node *node;
2575 int level;
2576 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002577
2578 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2579 while (!list_empty(&cache->pending[level])) {
2580 node = list_entry(cache->pending[level].next,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002581 struct backref_node, list);
2582 list_move_tail(&node->list, &list);
2583 BUG_ON(!node->pending);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002584
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002585 if (!err) {
2586 ret = link_to_upper(trans, rc, node, path);
2587 if (ret < 0)
2588 err = ret;
2589 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002590 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002591 list_splice_init(&list, &cache->pending[level]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002592 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002593 return err;
2594}
2595
2596static void mark_block_processed(struct reloc_control *rc,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002597 u64 bytenr, u32 blocksize)
2598{
2599 set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
2600 EXTENT_DIRTY, GFP_NOFS);
2601}
2602
2603static void __mark_block_processed(struct reloc_control *rc,
2604 struct backref_node *node)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002605{
2606 u32 blocksize;
2607 if (node->level == 0 ||
2608 in_block_group(node->bytenr, rc->block_group)) {
2609 blocksize = btrfs_level_size(rc->extent_root, node->level);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002610 mark_block_processed(rc, node->bytenr, blocksize);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002611 }
2612 node->processed = 1;
2613}
2614
2615/*
2616 * mark a block and all blocks directly/indirectly reference the block
2617 * as processed.
2618 */
2619static void update_processed_blocks(struct reloc_control *rc,
2620 struct backref_node *node)
2621{
2622 struct backref_node *next = node;
2623 struct backref_edge *edge;
2624 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2625 int index = 0;
2626
2627 while (next) {
2628 cond_resched();
2629 while (1) {
2630 if (next->processed)
2631 break;
2632
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002633 __mark_block_processed(rc, next);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002634
2635 if (list_empty(&next->upper))
2636 break;
2637
2638 edge = list_entry(next->upper.next,
2639 struct backref_edge, list[LOWER]);
2640 edges[index++] = edge;
2641 next = edge->node[UPPER];
2642 }
2643 next = walk_down_backref(edges, &index);
2644 }
2645}
2646
2647static int tree_block_processed(u64 bytenr, u32 blocksize,
2648 struct reloc_control *rc)
2649{
2650 if (test_range_bit(&rc->processed_blocks, bytenr,
Chris Mason9655d292009-09-02 15:22:30 -04002651 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002652 return 1;
2653 return 0;
2654}
2655
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002656static int get_tree_block_key(struct reloc_control *rc,
2657 struct tree_block *block)
2658{
2659 struct extent_buffer *eb;
2660
2661 BUG_ON(block->key_ready);
2662 eb = read_tree_block(rc->extent_root, block->bytenr,
2663 block->key.objectid, block->key.offset);
2664 WARN_ON(btrfs_header_level(eb) != block->level);
2665 if (block->level == 0)
2666 btrfs_item_key_to_cpu(eb, &block->key, 0);
2667 else
2668 btrfs_node_key_to_cpu(eb, &block->key, 0);
2669 free_extent_buffer(eb);
2670 block->key_ready = 1;
2671 return 0;
2672}
2673
2674static int reada_tree_block(struct reloc_control *rc,
2675 struct tree_block *block)
2676{
2677 BUG_ON(block->key_ready);
2678 readahead_tree_block(rc->extent_root, block->bytenr,
2679 block->key.objectid, block->key.offset);
2680 return 0;
2681}
2682
2683/*
2684 * helper function to relocate a tree block
2685 */
2686static int relocate_tree_block(struct btrfs_trans_handle *trans,
2687 struct reloc_control *rc,
2688 struct backref_node *node,
2689 struct btrfs_key *key,
2690 struct btrfs_path *path)
2691{
2692 struct btrfs_root *root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002693 int release = 0;
2694 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002695
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002696 if (!node)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002697 return 0;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002698
2699 BUG_ON(node->processed);
2700 root = select_one_root(trans, node);
2701 if (root == ERR_PTR(-ENOENT)) {
2702 update_processed_blocks(rc, node);
2703 goto out;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002704 }
2705
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002706 if (!root || root->ref_cows) {
2707 ret = reserve_metadata_space(trans, rc, node);
2708 if (ret)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002709 goto out;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002710 release = 1;
2711 }
2712
2713 if (root) {
2714 if (root->ref_cows) {
2715 BUG_ON(node->new_bytenr);
2716 BUG_ON(!list_empty(&node->list));
2717 btrfs_record_root_in_trans(trans, root);
2718 root = root->reloc_root;
2719 node->new_bytenr = root->node->start;
2720 node->root = root;
2721 list_add_tail(&node->list, &rc->backref_cache.changed);
2722 } else {
2723 path->lowest_level = node->level;
2724 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2725 btrfs_release_path(root, path);
2726 if (ret > 0)
2727 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002728 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002729 if (!ret)
2730 update_processed_blocks(rc, node);
2731 } else {
2732 ret = do_relocation(trans, rc, node, key, path, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002733 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002734out:
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002735 if (ret || node->level == 0 || node->cowonly) {
2736 if (release)
2737 release_metadata_space(rc, node);
2738 remove_backref_node(&rc->backref_cache, node);
2739 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002740 return ret;
2741}
2742
2743/*
2744 * relocate a list of blocks
2745 */
2746static noinline_for_stack
2747int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2748 struct reloc_control *rc, struct rb_root *blocks)
2749{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002750 struct backref_node *node;
2751 struct btrfs_path *path;
2752 struct tree_block *block;
2753 struct rb_node *rb_node;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002754 int ret;
2755 int err = 0;
2756
2757 path = btrfs_alloc_path();
2758 if (!path)
2759 return -ENOMEM;
2760
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002761 rb_node = rb_first(blocks);
2762 while (rb_node) {
2763 block = rb_entry(rb_node, struct tree_block, rb_node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002764 if (!block->key_ready)
2765 reada_tree_block(rc, block);
2766 rb_node = rb_next(rb_node);
2767 }
2768
2769 rb_node = rb_first(blocks);
2770 while (rb_node) {
2771 block = rb_entry(rb_node, struct tree_block, rb_node);
2772 if (!block->key_ready)
2773 get_tree_block_key(rc, block);
2774 rb_node = rb_next(rb_node);
2775 }
2776
2777 rb_node = rb_first(blocks);
2778 while (rb_node) {
2779 block = rb_entry(rb_node, struct tree_block, rb_node);
2780
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002781 node = build_backref_tree(rc, &block->key,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002782 block->level, block->bytenr);
2783 if (IS_ERR(node)) {
2784 err = PTR_ERR(node);
2785 goto out;
2786 }
2787
2788 ret = relocate_tree_block(trans, rc, node, &block->key,
2789 path);
2790 if (ret < 0) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002791 if (ret != -EAGAIN || rb_node == rb_first(blocks))
2792 err = ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002793 goto out;
2794 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002795 rb_node = rb_next(rb_node);
2796 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002797out:
2798 free_block_list(blocks);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04002799 err = finish_pending_nodes(trans, rc, path, err);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002800
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002801 btrfs_free_path(path);
2802 return err;
2803}
2804
2805static noinline_for_stack
Yan, Zhengefa56462010-05-16 10:49:59 -04002806int prealloc_file_extent_cluster(struct inode *inode,
2807 struct file_extent_cluster *cluster)
2808{
2809 u64 alloc_hint = 0;
2810 u64 start;
2811 u64 end;
2812 u64 offset = BTRFS_I(inode)->index_cnt;
2813 u64 num_bytes;
2814 int nr = 0;
2815 int ret = 0;
2816
2817 BUG_ON(cluster->start != cluster->boundary[0]);
2818 mutex_lock(&inode->i_mutex);
2819
2820 ret = btrfs_check_data_free_space(inode, cluster->end +
2821 1 - cluster->start);
2822 if (ret)
2823 goto out;
2824
2825 while (nr < cluster->nr) {
2826 start = cluster->boundary[nr] - offset;
2827 if (nr + 1 < cluster->nr)
2828 end = cluster->boundary[nr + 1] - 1 - offset;
2829 else
2830 end = cluster->end - offset;
2831
2832 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2833 num_bytes = end + 1 - start;
2834 ret = btrfs_prealloc_file_range(inode, 0, start,
2835 num_bytes, num_bytes,
2836 end + 1, &alloc_hint);
2837 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2838 if (ret)
2839 break;
2840 nr++;
2841 }
2842 btrfs_free_reserved_data_space(inode, cluster->end +
2843 1 - cluster->start);
2844out:
2845 mutex_unlock(&inode->i_mutex);
2846 return ret;
2847}
2848
2849static noinline_for_stack
Yan, Zheng0257bb82009-09-24 09:17:31 -04002850int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2851 u64 block_start)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002852{
2853 struct btrfs_root *root = BTRFS_I(inode)->root;
2854 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2855 struct extent_map *em;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002856 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002857
2858 em = alloc_extent_map(GFP_NOFS);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002859 if (!em)
2860 return -ENOMEM;
2861
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002862 em->start = start;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002863 em->len = end + 1 - start;
2864 em->block_len = em->len;
2865 em->block_start = block_start;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002866 em->bdev = root->fs_info->fs_devices->latest_bdev;
2867 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2868
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002869 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2870 while (1) {
Chris Mason890871b2009-09-02 16:24:52 -04002871 write_lock(&em_tree->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002872 ret = add_extent_mapping(em_tree, em);
Chris Mason890871b2009-09-02 16:24:52 -04002873 write_unlock(&em_tree->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002874 if (ret != -EEXIST) {
2875 free_extent_map(em);
2876 break;
2877 }
2878 btrfs_drop_extent_cache(inode, start, end, 0);
2879 }
2880 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002881 return ret;
2882}
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002883
Yan, Zheng0257bb82009-09-24 09:17:31 -04002884static int relocate_file_extent_cluster(struct inode *inode,
2885 struct file_extent_cluster *cluster)
2886{
2887 u64 page_start;
2888 u64 page_end;
2889 u64 offset = BTRFS_I(inode)->index_cnt;
2890 unsigned long index;
2891 unsigned long last_index;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002892 struct page *page;
2893 struct file_ra_state *ra;
2894 int nr = 0;
2895 int ret = 0;
2896
2897 if (!cluster->nr)
2898 return 0;
2899
2900 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2901 if (!ra)
2902 return -ENOMEM;
2903
Yan, Zhengefa56462010-05-16 10:49:59 -04002904 ret = prealloc_file_extent_cluster(inode, cluster);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002905 if (ret)
Yan, Zhengefa56462010-05-16 10:49:59 -04002906 goto out;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002907
2908 file_ra_state_init(ra, inode->i_mapping);
2909
Yan, Zhengefa56462010-05-16 10:49:59 -04002910 ret = setup_extent_mapping(inode, cluster->start - offset,
2911 cluster->end - offset, cluster->start);
2912 if (ret)
2913 goto out;
2914
2915 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2916 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002917 while (index <= last_index) {
Yan, Zhengefa56462010-05-16 10:49:59 -04002918 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2919 if (ret)
2920 goto out;
2921
Yan, Zheng0257bb82009-09-24 09:17:31 -04002922 page = find_lock_page(inode->i_mapping, index);
2923 if (!page) {
2924 page_cache_sync_readahead(inode->i_mapping,
2925 ra, NULL, index,
2926 last_index + 1 - index);
2927 page = grab_cache_page(inode->i_mapping, index);
2928 if (!page) {
Yan, Zhengefa56462010-05-16 10:49:59 -04002929 btrfs_delalloc_release_metadata(inode,
2930 PAGE_CACHE_SIZE);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002931 ret = -ENOMEM;
Yan, Zhengefa56462010-05-16 10:49:59 -04002932 goto out;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002933 }
2934 }
2935
2936 if (PageReadahead(page)) {
2937 page_cache_async_readahead(inode->i_mapping,
2938 ra, NULL, page, index,
2939 last_index + 1 - index);
2940 }
2941
2942 if (!PageUptodate(page)) {
2943 btrfs_readpage(NULL, page);
2944 lock_page(page);
2945 if (!PageUptodate(page)) {
2946 unlock_page(page);
2947 page_cache_release(page);
Yan, Zhengefa56462010-05-16 10:49:59 -04002948 btrfs_delalloc_release_metadata(inode,
2949 PAGE_CACHE_SIZE);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002950 ret = -EIO;
Yan, Zhengefa56462010-05-16 10:49:59 -04002951 goto out;
Yan, Zheng0257bb82009-09-24 09:17:31 -04002952 }
2953 }
2954
2955 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2956 page_end = page_start + PAGE_CACHE_SIZE - 1;
2957
2958 lock_extent(&BTRFS_I(inode)->io_tree,
2959 page_start, page_end, GFP_NOFS);
2960
2961 set_page_extent_mapped(page);
2962
2963 if (nr < cluster->nr &&
2964 page_start + offset == cluster->boundary[nr]) {
2965 set_extent_bits(&BTRFS_I(inode)->io_tree,
2966 page_start, page_end,
2967 EXTENT_BOUNDARY, GFP_NOFS);
2968 nr++;
2969 }
Yan, Zheng0257bb82009-09-24 09:17:31 -04002970
Yan, Zhengefa56462010-05-16 10:49:59 -04002971 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002972 set_page_dirty(page);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002973
2974 unlock_extent(&BTRFS_I(inode)->io_tree,
2975 page_start, page_end, GFP_NOFS);
2976 unlock_page(page);
2977 page_cache_release(page);
2978
2979 index++;
Yan, Zhengefa56462010-05-16 10:49:59 -04002980 balance_dirty_pages_ratelimited(inode->i_mapping);
2981 btrfs_throttle(BTRFS_I(inode)->root);
Yan, Zheng0257bb82009-09-24 09:17:31 -04002982 }
2983 WARN_ON(nr != cluster->nr);
Yan, Zhengefa56462010-05-16 10:49:59 -04002984out:
Yan, Zheng0257bb82009-09-24 09:17:31 -04002985 kfree(ra);
2986 return ret;
2987}
2988
2989static noinline_for_stack
2990int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
2991 struct file_extent_cluster *cluster)
2992{
2993 int ret;
2994
2995 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
2996 ret = relocate_file_extent_cluster(inode, cluster);
2997 if (ret)
2998 return ret;
2999 cluster->nr = 0;
3000 }
3001
3002 if (!cluster->nr)
3003 cluster->start = extent_key->objectid;
3004 else
3005 BUG_ON(cluster->nr >= MAX_EXTENTS);
3006 cluster->end = extent_key->objectid + extent_key->offset - 1;
3007 cluster->boundary[cluster->nr] = extent_key->objectid;
3008 cluster->nr++;
3009
3010 if (cluster->nr >= MAX_EXTENTS) {
3011 ret = relocate_file_extent_cluster(inode, cluster);
3012 if (ret)
3013 return ret;
3014 cluster->nr = 0;
3015 }
3016 return 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003017}
3018
3019#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3020static int get_ref_objectid_v0(struct reloc_control *rc,
3021 struct btrfs_path *path,
3022 struct btrfs_key *extent_key,
3023 u64 *ref_objectid, int *path_change)
3024{
3025 struct btrfs_key key;
3026 struct extent_buffer *leaf;
3027 struct btrfs_extent_ref_v0 *ref0;
3028 int ret;
3029 int slot;
3030
3031 leaf = path->nodes[0];
3032 slot = path->slots[0];
3033 while (1) {
3034 if (slot >= btrfs_header_nritems(leaf)) {
3035 ret = btrfs_next_leaf(rc->extent_root, path);
3036 if (ret < 0)
3037 return ret;
3038 BUG_ON(ret > 0);
3039 leaf = path->nodes[0];
3040 slot = path->slots[0];
3041 if (path_change)
3042 *path_change = 1;
3043 }
3044 btrfs_item_key_to_cpu(leaf, &key, slot);
3045 if (key.objectid != extent_key->objectid)
3046 return -ENOENT;
3047
3048 if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
3049 slot++;
3050 continue;
3051 }
3052 ref0 = btrfs_item_ptr(leaf, slot,
3053 struct btrfs_extent_ref_v0);
3054 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
3055 break;
3056 }
3057 return 0;
3058}
3059#endif
3060
3061/*
3062 * helper to add a tree block to the list.
3063 * the major work is getting the generation and level of the block
3064 */
3065static int add_tree_block(struct reloc_control *rc,
3066 struct btrfs_key *extent_key,
3067 struct btrfs_path *path,
3068 struct rb_root *blocks)
3069{
3070 struct extent_buffer *eb;
3071 struct btrfs_extent_item *ei;
3072 struct btrfs_tree_block_info *bi;
3073 struct tree_block *block;
3074 struct rb_node *rb_node;
3075 u32 item_size;
3076 int level = -1;
3077 int generation;
3078
3079 eb = path->nodes[0];
3080 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3081
3082 if (item_size >= sizeof(*ei) + sizeof(*bi)) {
3083 ei = btrfs_item_ptr(eb, path->slots[0],
3084 struct btrfs_extent_item);
3085 bi = (struct btrfs_tree_block_info *)(ei + 1);
3086 generation = btrfs_extent_generation(eb, ei);
3087 level = btrfs_tree_block_level(eb, bi);
3088 } else {
3089#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3090 u64 ref_owner;
3091 int ret;
3092
3093 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3094 ret = get_ref_objectid_v0(rc, path, extent_key,
3095 &ref_owner, NULL);
3096 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3097 level = (int)ref_owner;
3098 /* FIXME: get real generation */
3099 generation = 0;
3100#else
3101 BUG();
3102#endif
3103 }
3104
3105 btrfs_release_path(rc->extent_root, path);
3106
3107 BUG_ON(level == -1);
3108
3109 block = kmalloc(sizeof(*block), GFP_NOFS);
3110 if (!block)
3111 return -ENOMEM;
3112
3113 block->bytenr = extent_key->objectid;
3114 block->key.objectid = extent_key->offset;
3115 block->key.offset = generation;
3116 block->level = level;
3117 block->key_ready = 0;
3118
3119 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3120 BUG_ON(rb_node);
3121
3122 return 0;
3123}
3124
3125/*
3126 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
3127 */
3128static int __add_tree_block(struct reloc_control *rc,
3129 u64 bytenr, u32 blocksize,
3130 struct rb_root *blocks)
3131{
3132 struct btrfs_path *path;
3133 struct btrfs_key key;
3134 int ret;
3135
3136 if (tree_block_processed(bytenr, blocksize, rc))
3137 return 0;
3138
3139 if (tree_search(blocks, bytenr))
3140 return 0;
3141
3142 path = btrfs_alloc_path();
3143 if (!path)
3144 return -ENOMEM;
3145
3146 key.objectid = bytenr;
3147 key.type = BTRFS_EXTENT_ITEM_KEY;
3148 key.offset = blocksize;
3149
3150 path->search_commit_root = 1;
3151 path->skip_locking = 1;
3152 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3153 if (ret < 0)
3154 goto out;
3155 BUG_ON(ret);
3156
3157 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3158 ret = add_tree_block(rc, &key, path, blocks);
3159out:
3160 btrfs_free_path(path);
3161 return ret;
3162}
3163
3164/*
3165 * helper to check if the block use full backrefs for pointers in it
3166 */
3167static int block_use_full_backref(struct reloc_control *rc,
3168 struct extent_buffer *eb)
3169{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003170 u64 flags;
3171 int ret;
3172
3173 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
3174 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
3175 return 1;
3176
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003177 ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
3178 eb->start, eb->len, NULL, &flags);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003179 BUG_ON(ret);
3180
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003181 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3182 ret = 1;
3183 else
3184 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003185 return ret;
3186}
3187
3188/*
3189 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3190 * this function scans fs tree to find blocks reference the data extent
3191 */
3192static int find_data_references(struct reloc_control *rc,
3193 struct btrfs_key *extent_key,
3194 struct extent_buffer *leaf,
3195 struct btrfs_extent_data_ref *ref,
3196 struct rb_root *blocks)
3197{
3198 struct btrfs_path *path;
3199 struct tree_block *block;
3200 struct btrfs_root *root;
3201 struct btrfs_file_extent_item *fi;
3202 struct rb_node *rb_node;
3203 struct btrfs_key key;
3204 u64 ref_root;
3205 u64 ref_objectid;
3206 u64 ref_offset;
3207 u32 ref_count;
3208 u32 nritems;
3209 int err = 0;
3210 int added = 0;
3211 int counted;
3212 int ret;
3213
3214 path = btrfs_alloc_path();
3215 if (!path)
3216 return -ENOMEM;
3217
3218 ref_root = btrfs_extent_data_ref_root(leaf, ref);
3219 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3220 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3221 ref_count = btrfs_extent_data_ref_count(leaf, ref);
3222
3223 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3224 if (IS_ERR(root)) {
3225 err = PTR_ERR(root);
3226 goto out;
3227 }
3228
3229 key.objectid = ref_objectid;
3230 key.offset = ref_offset;
3231 key.type = BTRFS_EXTENT_DATA_KEY;
3232
3233 path->search_commit_root = 1;
3234 path->skip_locking = 1;
3235 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3236 if (ret < 0) {
3237 err = ret;
3238 goto out;
3239 }
3240
3241 leaf = path->nodes[0];
3242 nritems = btrfs_header_nritems(leaf);
3243 /*
3244 * the references in tree blocks that use full backrefs
3245 * are not counted in
3246 */
3247 if (block_use_full_backref(rc, leaf))
3248 counted = 0;
3249 else
3250 counted = 1;
3251 rb_node = tree_search(blocks, leaf->start);
3252 if (rb_node) {
3253 if (counted)
3254 added = 1;
3255 else
3256 path->slots[0] = nritems;
3257 }
3258
3259 while (ref_count > 0) {
3260 while (path->slots[0] >= nritems) {
3261 ret = btrfs_next_leaf(root, path);
3262 if (ret < 0) {
3263 err = ret;
3264 goto out;
3265 }
3266 if (ret > 0) {
3267 WARN_ON(1);
3268 goto out;
3269 }
3270
3271 leaf = path->nodes[0];
3272 nritems = btrfs_header_nritems(leaf);
3273 added = 0;
3274
3275 if (block_use_full_backref(rc, leaf))
3276 counted = 0;
3277 else
3278 counted = 1;
3279 rb_node = tree_search(blocks, leaf->start);
3280 if (rb_node) {
3281 if (counted)
3282 added = 1;
3283 else
3284 path->slots[0] = nritems;
3285 }
3286 }
3287
3288 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3289 if (key.objectid != ref_objectid ||
3290 key.type != BTRFS_EXTENT_DATA_KEY) {
3291 WARN_ON(1);
3292 break;
3293 }
3294
3295 fi = btrfs_item_ptr(leaf, path->slots[0],
3296 struct btrfs_file_extent_item);
3297
3298 if (btrfs_file_extent_type(leaf, fi) ==
3299 BTRFS_FILE_EXTENT_INLINE)
3300 goto next;
3301
3302 if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3303 extent_key->objectid)
3304 goto next;
3305
3306 key.offset -= btrfs_file_extent_offset(leaf, fi);
3307 if (key.offset != ref_offset)
3308 goto next;
3309
3310 if (counted)
3311 ref_count--;
3312 if (added)
3313 goto next;
3314
3315 if (!tree_block_processed(leaf->start, leaf->len, rc)) {
3316 block = kmalloc(sizeof(*block), GFP_NOFS);
3317 if (!block) {
3318 err = -ENOMEM;
3319 break;
3320 }
3321 block->bytenr = leaf->start;
3322 btrfs_item_key_to_cpu(leaf, &block->key, 0);
3323 block->level = 0;
3324 block->key_ready = 1;
3325 rb_node = tree_insert(blocks, block->bytenr,
3326 &block->rb_node);
3327 BUG_ON(rb_node);
3328 }
3329 if (counted)
3330 added = 1;
3331 else
3332 path->slots[0] = nritems;
3333next:
3334 path->slots[0]++;
3335
3336 }
3337out:
3338 btrfs_free_path(path);
3339 return err;
3340}
3341
3342/*
3343 * hepler to find all tree blocks that reference a given data extent
3344 */
3345static noinline_for_stack
3346int add_data_references(struct reloc_control *rc,
3347 struct btrfs_key *extent_key,
3348 struct btrfs_path *path,
3349 struct rb_root *blocks)
3350{
3351 struct btrfs_key key;
3352 struct extent_buffer *eb;
3353 struct btrfs_extent_data_ref *dref;
3354 struct btrfs_extent_inline_ref *iref;
3355 unsigned long ptr;
3356 unsigned long end;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003357 u32 blocksize = btrfs_level_size(rc->extent_root, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003358 int ret;
3359 int err = 0;
3360
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003361 eb = path->nodes[0];
3362 ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3363 end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3364#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3365 if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3366 ptr = end;
3367 else
3368#endif
3369 ptr += sizeof(struct btrfs_extent_item);
3370
3371 while (ptr < end) {
3372 iref = (struct btrfs_extent_inline_ref *)ptr;
3373 key.type = btrfs_extent_inline_ref_type(eb, iref);
3374 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3375 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3376 ret = __add_tree_block(rc, key.offset, blocksize,
3377 blocks);
3378 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3379 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3380 ret = find_data_references(rc, extent_key,
3381 eb, dref, blocks);
3382 } else {
3383 BUG();
3384 }
3385 ptr += btrfs_extent_inline_ref_size(key.type);
3386 }
3387 WARN_ON(ptr > end);
3388
3389 while (1) {
3390 cond_resched();
3391 eb = path->nodes[0];
3392 if (path->slots[0] >= btrfs_header_nritems(eb)) {
3393 ret = btrfs_next_leaf(rc->extent_root, path);
3394 if (ret < 0) {
3395 err = ret;
3396 break;
3397 }
3398 if (ret > 0)
3399 break;
3400 eb = path->nodes[0];
3401 }
3402
3403 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3404 if (key.objectid != extent_key->objectid)
3405 break;
3406
3407#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3408 if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3409 key.type == BTRFS_EXTENT_REF_V0_KEY) {
3410#else
3411 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3412 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3413#endif
3414 ret = __add_tree_block(rc, key.offset, blocksize,
3415 blocks);
3416 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3417 dref = btrfs_item_ptr(eb, path->slots[0],
3418 struct btrfs_extent_data_ref);
3419 ret = find_data_references(rc, extent_key,
3420 eb, dref, blocks);
3421 } else {
3422 ret = 0;
3423 }
3424 if (ret) {
3425 err = ret;
3426 break;
3427 }
3428 path->slots[0]++;
3429 }
3430 btrfs_release_path(rc->extent_root, path);
3431 if (err)
3432 free_block_list(blocks);
3433 return err;
3434}
3435
3436/*
3437 * hepler to find next unprocessed extent
3438 */
3439static noinline_for_stack
3440int find_next_extent(struct btrfs_trans_handle *trans,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003441 struct reloc_control *rc, struct btrfs_path *path,
3442 struct btrfs_key *extent_key)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003443{
3444 struct btrfs_key key;
3445 struct extent_buffer *leaf;
3446 u64 start, end, last;
3447 int ret;
3448
3449 last = rc->block_group->key.objectid + rc->block_group->key.offset;
3450 while (1) {
3451 cond_resched();
3452 if (rc->search_start >= last) {
3453 ret = 1;
3454 break;
3455 }
3456
3457 key.objectid = rc->search_start;
3458 key.type = BTRFS_EXTENT_ITEM_KEY;
3459 key.offset = 0;
3460
3461 path->search_commit_root = 1;
3462 path->skip_locking = 1;
3463 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3464 0, 0);
3465 if (ret < 0)
3466 break;
3467next:
3468 leaf = path->nodes[0];
3469 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3470 ret = btrfs_next_leaf(rc->extent_root, path);
3471 if (ret != 0)
3472 break;
3473 leaf = path->nodes[0];
3474 }
3475
3476 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3477 if (key.objectid >= last) {
3478 ret = 1;
3479 break;
3480 }
3481
3482 if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3483 key.objectid + key.offset <= rc->search_start) {
3484 path->slots[0]++;
3485 goto next;
3486 }
3487
3488 ret = find_first_extent_bit(&rc->processed_blocks,
3489 key.objectid, &start, &end,
3490 EXTENT_DIRTY);
3491
3492 if (ret == 0 && start <= key.objectid) {
3493 btrfs_release_path(rc->extent_root, path);
3494 rc->search_start = end + 1;
3495 } else {
3496 rc->search_start = key.objectid + key.offset;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003497 memcpy(extent_key, &key, sizeof(key));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003498 return 0;
3499 }
3500 }
3501 btrfs_release_path(rc->extent_root, path);
3502 return ret;
3503}
3504
3505static void set_reloc_control(struct reloc_control *rc)
3506{
3507 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3508 mutex_lock(&fs_info->trans_mutex);
3509 fs_info->reloc_ctl = rc;
3510 mutex_unlock(&fs_info->trans_mutex);
3511}
3512
3513static void unset_reloc_control(struct reloc_control *rc)
3514{
3515 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3516 mutex_lock(&fs_info->trans_mutex);
3517 fs_info->reloc_ctl = NULL;
3518 mutex_unlock(&fs_info->trans_mutex);
3519}
3520
3521static int check_extent_flags(u64 flags)
3522{
3523 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3524 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3525 return 1;
3526 if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3527 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3528 return 1;
3529 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3530 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3531 return 1;
3532 return 0;
3533}
3534
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003535static noinline_for_stack
3536int prepare_to_relocate(struct reloc_control *rc)
3537{
3538 struct btrfs_trans_handle *trans;
3539 int ret;
3540
3541 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
3542 if (!rc->block_rsv)
3543 return -ENOMEM;
3544
3545 /*
3546 * reserve some space for creating reloc trees.
3547 * btrfs_init_reloc_root will use them when there
3548 * is no reservation in transaction handle.
3549 */
3550 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
Josef Bacik8bb8ab22010-10-15 16:52:49 -04003551 rc->extent_root->nodesize * 256);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003552 if (ret)
3553 return ret;
3554
3555 rc->block_rsv->refill_used = 1;
3556 btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv);
3557
3558 memset(&rc->cluster, 0, sizeof(rc->cluster));
3559 rc->search_start = rc->block_group->key.objectid;
3560 rc->extents_found = 0;
3561 rc->nodes_relocated = 0;
3562 rc->merging_rsv_size = 0;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003563
3564 rc->create_reloc_tree = 1;
3565 set_reloc_control(rc);
3566
3567 trans = btrfs_join_transaction(rc->extent_root, 1);
3568 btrfs_commit_transaction(trans, rc->extent_root);
3569 return 0;
3570}
Yan, Zheng76dda932009-09-21 16:00:26 -04003571
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003572static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3573{
3574 struct rb_root blocks = RB_ROOT;
3575 struct btrfs_key key;
3576 struct btrfs_trans_handle *trans = NULL;
3577 struct btrfs_path *path;
3578 struct btrfs_extent_item *ei;
3579 unsigned long nr;
3580 u64 flags;
3581 u32 item_size;
3582 int ret;
3583 int err = 0;
3584
3585 path = btrfs_alloc_path();
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003586 if (!path)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003587 return -ENOMEM;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003588
3589 ret = prepare_to_relocate(rc);
3590 if (ret) {
3591 err = ret;
3592 goto out_free;
Jiri Slaby2423fdf2010-01-06 16:57:22 +00003593 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003594
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003595 while (1) {
Yan, Zhenga22285a2010-05-16 10:48:46 -04003596 trans = btrfs_start_transaction(rc->extent_root, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003597
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003598 if (update_backref_cache(trans, &rc->backref_cache)) {
3599 btrfs_end_transaction(trans, rc->extent_root);
3600 continue;
3601 }
3602
3603 ret = find_next_extent(trans, rc, path, &key);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003604 if (ret < 0)
3605 err = ret;
3606 if (ret != 0)
3607 break;
3608
3609 rc->extents_found++;
3610
3611 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3612 struct btrfs_extent_item);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003613 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003614 if (item_size >= sizeof(*ei)) {
3615 flags = btrfs_extent_flags(path->nodes[0], ei);
3616 ret = check_extent_flags(flags);
3617 BUG_ON(ret);
3618
3619 } else {
3620#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3621 u64 ref_owner;
3622 int path_change = 0;
3623
3624 BUG_ON(item_size !=
3625 sizeof(struct btrfs_extent_item_v0));
3626 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3627 &path_change);
3628 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3629 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3630 else
3631 flags = BTRFS_EXTENT_FLAG_DATA;
3632
3633 if (path_change) {
3634 btrfs_release_path(rc->extent_root, path);
3635
3636 path->search_commit_root = 1;
3637 path->skip_locking = 1;
3638 ret = btrfs_search_slot(NULL, rc->extent_root,
3639 &key, path, 0, 0);
3640 if (ret < 0) {
3641 err = ret;
3642 break;
3643 }
3644 BUG_ON(ret > 0);
3645 }
3646#else
3647 BUG();
3648#endif
3649 }
3650
3651 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3652 ret = add_tree_block(rc, &key, path, &blocks);
3653 } else if (rc->stage == UPDATE_DATA_PTRS &&
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003654 (flags & BTRFS_EXTENT_FLAG_DATA)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003655 ret = add_data_references(rc, &key, path, &blocks);
3656 } else {
3657 btrfs_release_path(rc->extent_root, path);
3658 ret = 0;
3659 }
3660 if (ret < 0) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003661 err = ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003662 break;
3663 }
3664
3665 if (!RB_EMPTY_ROOT(&blocks)) {
3666 ret = relocate_tree_blocks(trans, rc, &blocks);
3667 if (ret < 0) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003668 if (ret != -EAGAIN) {
3669 err = ret;
3670 break;
3671 }
3672 rc->extents_found--;
3673 rc->search_start = key.objectid;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003674 }
3675 }
3676
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003677 ret = btrfs_block_rsv_check(trans, rc->extent_root,
3678 rc->block_rsv, 0, 5);
3679 if (ret < 0) {
3680 if (ret != -EAGAIN) {
3681 err = ret;
3682 WARN_ON(1);
3683 break;
3684 }
3685 rc->commit_transaction = 1;
3686 }
3687
3688 if (rc->commit_transaction) {
3689 rc->commit_transaction = 0;
3690 ret = btrfs_commit_transaction(trans, rc->extent_root);
3691 BUG_ON(ret);
3692 } else {
3693 nr = trans->blocks_used;
3694 btrfs_end_transaction_throttle(trans, rc->extent_root);
3695 btrfs_btree_balance_dirty(rc->extent_root, nr);
3696 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003697 trans = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003698
3699 if (rc->stage == MOVE_DATA_EXTENTS &&
3700 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3701 rc->found_file_extent = 1;
Yan, Zheng0257bb82009-09-24 09:17:31 -04003702 ret = relocate_data_extent(rc->data_inode,
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003703 &key, &rc->cluster);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003704 if (ret < 0) {
3705 err = ret;
3706 break;
3707 }
3708 }
3709 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003710
3711 btrfs_release_path(rc->extent_root, path);
3712 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3713 GFP_NOFS);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003714
3715 if (trans) {
3716 nr = trans->blocks_used;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003717 btrfs_end_transaction_throttle(trans, rc->extent_root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003718 btrfs_btree_balance_dirty(rc->extent_root, nr);
3719 }
3720
Yan, Zheng0257bb82009-09-24 09:17:31 -04003721 if (!err) {
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003722 ret = relocate_file_extent_cluster(rc->data_inode,
3723 &rc->cluster);
Yan, Zheng0257bb82009-09-24 09:17:31 -04003724 if (ret < 0)
3725 err = ret;
3726 }
3727
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003728 rc->create_reloc_tree = 0;
3729 set_reloc_control(rc);
Yan, Zheng0257bb82009-09-24 09:17:31 -04003730
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003731 backref_cache_cleanup(&rc->backref_cache);
3732 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003733
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003734 err = prepare_to_merge(rc, err);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003735
3736 merge_reloc_roots(rc);
3737
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003738 rc->merge_reloc_tree = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003739 unset_reloc_control(rc);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003740 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003741
3742 /* get rid of pinned extents */
Yan, Zhenga22285a2010-05-16 10:48:46 -04003743 trans = btrfs_join_transaction(rc->extent_root, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003744 btrfs_commit_transaction(trans, rc->extent_root);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003745out_free:
3746 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
3747 btrfs_free_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003748 return err;
3749}
3750
3751static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
Yan, Zheng0257bb82009-09-24 09:17:31 -04003752 struct btrfs_root *root, u64 objectid)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003753{
3754 struct btrfs_path *path;
3755 struct btrfs_inode_item *item;
3756 struct extent_buffer *leaf;
3757 int ret;
3758
3759 path = btrfs_alloc_path();
3760 if (!path)
3761 return -ENOMEM;
3762
3763 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3764 if (ret)
3765 goto out;
3766
3767 leaf = path->nodes[0];
3768 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3769 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3770 btrfs_set_inode_generation(leaf, item, 1);
Yan, Zheng0257bb82009-09-24 09:17:31 -04003771 btrfs_set_inode_size(leaf, item, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003772 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003773 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3774 BTRFS_INODE_PREALLOC);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003775 btrfs_mark_buffer_dirty(leaf);
3776 btrfs_release_path(root, path);
3777out:
3778 btrfs_free_path(path);
3779 return ret;
3780}
3781
3782/*
3783 * helper to create inode for data relocation.
3784 * the inode is in data relocation tree and its link count is 0
3785 */
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003786static noinline_for_stack
3787struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3788 struct btrfs_block_group_cache *group)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003789{
3790 struct inode *inode = NULL;
3791 struct btrfs_trans_handle *trans;
3792 struct btrfs_root *root;
3793 struct btrfs_key key;
3794 unsigned long nr;
3795 u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3796 int err = 0;
3797
3798 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3799 if (IS_ERR(root))
3800 return ERR_CAST(root);
3801
Yan, Zhenga22285a2010-05-16 10:48:46 -04003802 trans = btrfs_start_transaction(root, 6);
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003803 if (IS_ERR(trans))
3804 return ERR_CAST(trans);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003805
3806 err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3807 if (err)
3808 goto out;
3809
Yan, Zheng0257bb82009-09-24 09:17:31 -04003810 err = __insert_orphan_inode(trans, root, objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003811 BUG_ON(err);
3812
3813 key.objectid = objectid;
3814 key.type = BTRFS_INODE_ITEM_KEY;
3815 key.offset = 0;
Josef Bacik73f73412009-12-04 17:38:27 +00003816 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003817 BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3818 BTRFS_I(inode)->index_cnt = group->key.objectid;
3819
3820 err = btrfs_orphan_add(trans, inode);
3821out:
3822 nr = trans->blocks_used;
3823 btrfs_end_transaction(trans, root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003824 btrfs_btree_balance_dirty(root, nr);
3825 if (err) {
3826 if (inode)
3827 iput(inode);
3828 inode = ERR_PTR(err);
3829 }
3830 return inode;
3831}
3832
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003833static struct reloc_control *alloc_reloc_control(void)
3834{
3835 struct reloc_control *rc;
3836
3837 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3838 if (!rc)
3839 return NULL;
3840
3841 INIT_LIST_HEAD(&rc->reloc_roots);
3842 backref_cache_init(&rc->backref_cache);
3843 mapping_tree_init(&rc->reloc_root_tree);
3844 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3845 return rc;
3846}
3847
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003848/*
3849 * function to relocate all extents in a block group.
3850 */
3851int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3852{
3853 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3854 struct reloc_control *rc;
3855 int ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04003856 int rw = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003857 int err = 0;
3858
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003859 rc = alloc_reloc_control();
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003860 if (!rc)
3861 return -ENOMEM;
3862
Yan, Zhengf0486c62010-05-16 10:46:25 -04003863 rc->extent_root = extent_root;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003864
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003865 rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3866 BUG_ON(!rc->block_group);
3867
Yan, Zhengf0486c62010-05-16 10:46:25 -04003868 if (!rc->block_group->ro) {
3869 ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
3870 if (ret) {
3871 err = ret;
3872 goto out;
3873 }
3874 rw = 1;
3875 }
3876
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003877 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3878 if (IS_ERR(rc->data_inode)) {
3879 err = PTR_ERR(rc->data_inode);
3880 rc->data_inode = NULL;
3881 goto out;
3882 }
3883
3884 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3885 (unsigned long long)rc->block_group->key.objectid,
3886 (unsigned long long)rc->block_group->flags);
3887
Yan, Zheng24bbcf02009-11-12 09:36:34 +00003888 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3889 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003890
3891 while (1) {
Yan, Zheng76dda932009-09-21 16:00:26 -04003892 mutex_lock(&fs_info->cleaner_mutex);
3893
3894 btrfs_clean_old_snapshots(fs_info->tree_root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003895 ret = relocate_block_group(rc);
Yan, Zheng76dda932009-09-21 16:00:26 -04003896
3897 mutex_unlock(&fs_info->cleaner_mutex);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003898 if (ret < 0) {
3899 err = ret;
Yan, Zheng3fd0a552010-05-16 10:49:59 -04003900 goto out;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003901 }
3902
3903 if (rc->extents_found == 0)
3904 break;
3905
3906 printk(KERN_INFO "btrfs: found %llu extents\n",
3907 (unsigned long long)rc->extents_found);
3908
3909 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3910 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
3911 invalidate_mapping_pages(rc->data_inode->i_mapping,
3912 0, -1);
3913 rc->stage = UPDATE_DATA_PTRS;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003914 }
3915 }
3916
Yan, Zheng0257bb82009-09-24 09:17:31 -04003917 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
3918 rc->block_group->key.objectid,
3919 rc->block_group->key.objectid +
3920 rc->block_group->key.offset - 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003921
3922 WARN_ON(rc->block_group->pinned > 0);
3923 WARN_ON(rc->block_group->reserved > 0);
3924 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3925out:
Yan, Zhengf0486c62010-05-16 10:46:25 -04003926 if (err && rw)
3927 btrfs_set_block_group_rw(extent_root, rc->block_group);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003928 iput(rc->data_inode);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003929 btrfs_put_block_group(rc->block_group);
3930 kfree(rc);
3931 return err;
3932}
3933
Yan, Zheng76dda932009-09-21 16:00:26 -04003934static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
3935{
3936 struct btrfs_trans_handle *trans;
3937 int ret;
3938
Yan, Zhenga22285a2010-05-16 10:48:46 -04003939 trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
Yan, Zheng76dda932009-09-21 16:00:26 -04003940
3941 memset(&root->root_item.drop_progress, 0,
3942 sizeof(root->root_item.drop_progress));
3943 root->root_item.drop_level = 0;
3944 btrfs_set_root_refs(&root->root_item, 0);
3945 ret = btrfs_update_root(trans, root->fs_info->tree_root,
3946 &root->root_key, &root->root_item);
3947 BUG_ON(ret);
3948
3949 ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
3950 BUG_ON(ret);
3951 return 0;
3952}
3953
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003954/*
3955 * recover relocation interrupted by system crash.
3956 *
3957 * this function resumes merging reloc trees with corresponding fs trees.
3958 * this is important for keeping the sharing of tree blocks
3959 */
3960int btrfs_recover_relocation(struct btrfs_root *root)
3961{
3962 LIST_HEAD(reloc_roots);
3963 struct btrfs_key key;
3964 struct btrfs_root *fs_root;
3965 struct btrfs_root *reloc_root;
3966 struct btrfs_path *path;
3967 struct extent_buffer *leaf;
3968 struct reloc_control *rc = NULL;
3969 struct btrfs_trans_handle *trans;
3970 int ret;
3971 int err = 0;
3972
3973 path = btrfs_alloc_path();
3974 if (!path)
3975 return -ENOMEM;
3976
3977 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
3978 key.type = BTRFS_ROOT_ITEM_KEY;
3979 key.offset = (u64)-1;
3980
3981 while (1) {
3982 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
3983 path, 0, 0);
3984 if (ret < 0) {
3985 err = ret;
3986 goto out;
3987 }
3988 if (ret > 0) {
3989 if (path->slots[0] == 0)
3990 break;
3991 path->slots[0]--;
3992 }
3993 leaf = path->nodes[0];
3994 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3995 btrfs_release_path(root->fs_info->tree_root, path);
3996
3997 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
3998 key.type != BTRFS_ROOT_ITEM_KEY)
3999 break;
4000
4001 reloc_root = btrfs_read_fs_root_no_radix(root, &key);
4002 if (IS_ERR(reloc_root)) {
4003 err = PTR_ERR(reloc_root);
4004 goto out;
4005 }
4006
4007 list_add(&reloc_root->root_list, &reloc_roots);
4008
4009 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4010 fs_root = read_fs_root(root->fs_info,
4011 reloc_root->root_key.offset);
4012 if (IS_ERR(fs_root)) {
Yan, Zheng76dda932009-09-21 16:00:26 -04004013 ret = PTR_ERR(fs_root);
4014 if (ret != -ENOENT) {
4015 err = ret;
4016 goto out;
4017 }
4018 mark_garbage_root(reloc_root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004019 }
4020 }
4021
4022 if (key.offset == 0)
4023 break;
4024
4025 key.offset--;
4026 }
4027 btrfs_release_path(root->fs_info->tree_root, path);
4028
4029 if (list_empty(&reloc_roots))
4030 goto out;
4031
Yan, Zheng3fd0a552010-05-16 10:49:59 -04004032 rc = alloc_reloc_control();
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004033 if (!rc) {
4034 err = -ENOMEM;
4035 goto out;
4036 }
4037
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004038 rc->extent_root = root->fs_info->extent_root;
4039
4040 set_reloc_control(rc);
4041
Yan, Zheng3fd0a552010-05-16 10:49:59 -04004042 trans = btrfs_join_transaction(rc->extent_root, 1);
4043
4044 rc->merge_reloc_tree = 1;
4045
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004046 while (!list_empty(&reloc_roots)) {
4047 reloc_root = list_entry(reloc_roots.next,
4048 struct btrfs_root, root_list);
4049 list_del(&reloc_root->root_list);
4050
4051 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4052 list_add_tail(&reloc_root->root_list,
4053 &rc->reloc_roots);
4054 continue;
4055 }
4056
4057 fs_root = read_fs_root(root->fs_info,
4058 reloc_root->root_key.offset);
4059 BUG_ON(IS_ERR(fs_root));
4060
4061 __add_reloc_root(reloc_root);
4062 fs_root->reloc_root = reloc_root;
4063 }
4064
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004065 btrfs_commit_transaction(trans, rc->extent_root);
4066
4067 merge_reloc_roots(rc);
4068
4069 unset_reloc_control(rc);
4070
Yan, Zheng3fd0a552010-05-16 10:49:59 -04004071 trans = btrfs_join_transaction(rc->extent_root, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004072 btrfs_commit_transaction(trans, rc->extent_root);
4073out:
Yan, Zheng3fd0a552010-05-16 10:49:59 -04004074 kfree(rc);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004075 while (!list_empty(&reloc_roots)) {
4076 reloc_root = list_entry(reloc_roots.next,
4077 struct btrfs_root, root_list);
4078 list_del(&reloc_root->root_list);
4079 free_extent_buffer(reloc_root->node);
4080 free_extent_buffer(reloc_root->commit_root);
4081 kfree(reloc_root);
4082 }
4083 btrfs_free_path(path);
4084
4085 if (err == 0) {
4086 /* cleanup orphan inode in data relocation tree */
4087 fs_root = read_fs_root(root->fs_info,
4088 BTRFS_DATA_RELOC_TREE_OBJECTID);
4089 if (IS_ERR(fs_root))
4090 err = PTR_ERR(fs_root);
Miao Xied7ce5842010-02-02 08:46:44 +00004091 else
4092 btrfs_orphan_cleanup(fs_root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004093 }
4094 return err;
4095}
4096
4097/*
4098 * helper to add ordered checksum for data relocation.
4099 *
4100 * cloning checksum properly handles the nodatasum extents.
4101 * it also saves CPU time to re-calculate the checksum.
4102 */
4103int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4104{
4105 struct btrfs_ordered_sum *sums;
4106 struct btrfs_sector_sum *sector_sum;
4107 struct btrfs_ordered_extent *ordered;
4108 struct btrfs_root *root = BTRFS_I(inode)->root;
4109 size_t offset;
4110 int ret;
4111 u64 disk_bytenr;
4112 LIST_HEAD(list);
4113
4114 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4115 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
4116
4117 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4118 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4119 disk_bytenr + len - 1, &list);
4120
4121 while (!list_empty(&list)) {
4122 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4123 list_del_init(&sums->list);
4124
4125 sector_sum = sums->sums;
4126 sums->bytenr = ordered->start;
4127
4128 offset = 0;
4129 while (offset < sums->len) {
4130 sector_sum->bytenr += ordered->start - disk_bytenr;
4131 sector_sum++;
4132 offset += root->sectorsize;
4133 }
4134
4135 btrfs_add_ordered_sum(inode, ordered, sums);
4136 }
4137 btrfs_put_ordered_extent(ordered);
4138 return 0;
4139}
Yan, Zheng3fd0a552010-05-16 10:49:59 -04004140
4141void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4142 struct btrfs_root *root, struct extent_buffer *buf,
4143 struct extent_buffer *cow)
4144{
4145 struct reloc_control *rc;
4146 struct backref_node *node;
4147 int first_cow = 0;
4148 int level;
4149 int ret;
4150
4151 rc = root->fs_info->reloc_ctl;
4152 if (!rc)
4153 return;
4154
4155 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4156 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4157
4158 level = btrfs_header_level(buf);
4159 if (btrfs_header_generation(buf) <=
4160 btrfs_root_last_snapshot(&root->root_item))
4161 first_cow = 1;
4162
4163 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4164 rc->create_reloc_tree) {
4165 WARN_ON(!first_cow && level == 0);
4166
4167 node = rc->backref_cache.path[level];
4168 BUG_ON(node->bytenr != buf->start &&
4169 node->new_bytenr != buf->start);
4170
4171 drop_node_buffer(node);
4172 extent_buffer_get(cow);
4173 node->eb = cow;
4174 node->new_bytenr = cow->start;
4175
4176 if (!node->pending) {
4177 list_move_tail(&node->list,
4178 &rc->backref_cache.pending[level]);
4179 node->pending = 1;
4180 }
4181
4182 if (first_cow)
4183 __mark_block_processed(rc, node);
4184
4185 if (first_cow && level > 0)
4186 rc->nodes_relocated += buf->len;
4187 }
4188
4189 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
4190 ret = replace_file_extents(trans, rc, root, cow);
4191 BUG_ON(ret);
4192 }
4193}
4194
4195/*
4196 * called before creating snapshot. it calculates metadata reservation
4197 * requried for relocating tree blocks in the snapshot
4198 */
4199void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
4200 struct btrfs_pending_snapshot *pending,
4201 u64 *bytes_to_reserve)
4202{
4203 struct btrfs_root *root;
4204 struct reloc_control *rc;
4205
4206 root = pending->root;
4207 if (!root->reloc_root)
4208 return;
4209
4210 rc = root->fs_info->reloc_ctl;
4211 if (!rc->merge_reloc_tree)
4212 return;
4213
4214 root = root->reloc_root;
4215 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4216 /*
4217 * relocation is in the stage of merging trees. the space
4218 * used by merging a reloc tree is twice the size of
4219 * relocated tree nodes in the worst case. half for cowing
4220 * the reloc tree, half for cowing the fs tree. the space
4221 * used by cowing the reloc tree will be freed after the
4222 * tree is dropped. if we create snapshot, cowing the fs
4223 * tree may use more space than it frees. so we need
4224 * reserve extra space.
4225 */
4226 *bytes_to_reserve += rc->nodes_relocated;
4227}
4228
4229/*
4230 * called after snapshot is created. migrate block reservation
4231 * and create reloc root for the newly created snapshot
4232 */
4233void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4234 struct btrfs_pending_snapshot *pending)
4235{
4236 struct btrfs_root *root = pending->root;
4237 struct btrfs_root *reloc_root;
4238 struct btrfs_root *new_root;
4239 struct reloc_control *rc;
4240 int ret;
4241
4242 if (!root->reloc_root)
4243 return;
4244
4245 rc = root->fs_info->reloc_ctl;
4246 rc->merging_rsv_size += rc->nodes_relocated;
4247
4248 if (rc->merge_reloc_tree) {
4249 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4250 rc->block_rsv,
4251 rc->nodes_relocated);
4252 BUG_ON(ret);
4253 }
4254
4255 new_root = pending->snap;
4256 reloc_root = create_reloc_root(trans, root->reloc_root,
4257 new_root->root_key.objectid);
4258
4259 __add_reloc_root(reloc_root);
4260 new_root->reloc_root = reloc_root;
4261
4262 if (rc->create_reloc_tree) {
4263 ret = clone_backref_node(trans, rc, root, reloc_root);
4264 BUG_ON(ret);
4265 }
4266}