blob: c91309dea8daf3f8daec0952c837a0a922bb26c7 [file] [log] [blame]
Chris Masone02119d2008-09-05 16:13:11 -04001/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Miao Xiec6adc9c2013-05-28 10:05:39 +000021#include <linux/blkdev.h>
Josef Bacik5dc562c2012-08-17 13:14:17 -040022#include <linux/list_sort.h>
Chris Masone02119d2008-09-05 16:13:11 -040023#include "ctree.h"
24#include "transaction.h"
25#include "disk-io.h"
26#include "locking.h"
27#include "print-tree.h"
Mark Fashehf1863732012-08-08 11:32:27 -070028#include "backref.h"
Chris Masone02119d2008-09-05 16:13:11 -040029#include "compat.h"
Christoph Hellwigb2950862008-12-02 09:54:17 -050030#include "tree-log.h"
Mark Fashehf1863732012-08-08 11:32:27 -070031#include "hash.h"
Chris Masone02119d2008-09-05 16:13:11 -040032
33/* magic values for the inode_only field in btrfs_log_inode:
34 *
35 * LOG_INODE_ALL means to log everything
36 * LOG_INODE_EXISTS means to log just enough to recreate the inode
37 * during log replay
38 */
39#define LOG_INODE_ALL 0
40#define LOG_INODE_EXISTS 1
41
42/*
Chris Mason12fcfd22009-03-24 10:24:20 -040043 * directory trouble cases
44 *
45 * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
46 * log, we must force a full commit before doing an fsync of the directory
47 * where the unlink was done.
48 * ---> record transid of last unlink/rename per directory
49 *
50 * mkdir foo/some_dir
51 * normal commit
52 * rename foo/some_dir foo2/some_dir
53 * mkdir foo/some_dir
54 * fsync foo/some_dir/some_file
55 *
56 * The fsync above will unlink the original some_dir without recording
57 * it in its new location (foo2). After a crash, some_dir will be gone
58 * unless the fsync of some_file forces a full commit
59 *
60 * 2) we must log any new names for any file or dir that is in the fsync
61 * log. ---> check inode while renaming/linking.
62 *
63 * 2a) we must log any new names for any file or dir during rename
64 * when the directory they are being removed from was logged.
65 * ---> check inode and old parent dir during rename
66 *
67 * 2a is actually the more important variant. With the extra logging
68 * a crash might unlink the old name without recreating the new one
69 *
70 * 3) after a crash, we must go through any directories with a link count
71 * of zero and redo the rm -rf
72 *
73 * mkdir f1/foo
74 * normal commit
75 * rm -rf f1/foo
76 * fsync(f1)
77 *
78 * The directory f1 was fully removed from the FS, but fsync was never
79 * called on f1, only its parent dir. After a crash the rm -rf must
80 * be replayed. This must be able to recurse down the entire
81 * directory tree. The inode link count fixup code takes care of the
82 * ugly details.
83 */
84
85/*
Chris Masone02119d2008-09-05 16:13:11 -040086 * stages for the tree walking. The first
87 * stage (0) is to only pin down the blocks we find
88 * the second stage (1) is to make sure that all the inodes
89 * we find in the log are created in the subvolume.
90 *
91 * The last stage is to deal with directories and links and extents
92 * and all the other fun semantics
93 */
94#define LOG_WALK_PIN_ONLY 0
95#define LOG_WALK_REPLAY_INODES 1
Josef Bacikdd8e7212013-09-11 11:57:23 -040096#define LOG_WALK_REPLAY_DIR_INDEX 2
97#define LOG_WALK_REPLAY_ALL 3
Chris Masone02119d2008-09-05 16:13:11 -040098
Chris Mason12fcfd22009-03-24 10:24:20 -040099static int btrfs_log_inode(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -0400100 struct btrfs_root *root, struct inode *inode,
101 int inode_only);
Yan Zhengec051c02009-01-05 15:43:42 -0500102static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root,
104 struct btrfs_path *path, u64 objectid);
Chris Mason12fcfd22009-03-24 10:24:20 -0400105static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
106 struct btrfs_root *root,
107 struct btrfs_root *log,
108 struct btrfs_path *path,
109 u64 dirid, int del_all);
Chris Masone02119d2008-09-05 16:13:11 -0400110
111/*
112 * tree logging is a special write ahead log used to make sure that
113 * fsyncs and O_SYNCs can happen without doing full tree commits.
114 *
115 * Full tree commits are expensive because they require commonly
116 * modified blocks to be recowed, creating many dirty pages in the
117 * extent tree an 4x-6x higher write load than ext3.
118 *
119 * Instead of doing a tree commit on every fsync, we use the
120 * key ranges and transaction ids to find items for a given file or directory
121 * that have changed in this transaction. Those items are copied into
122 * a special tree (one per subvolume root), that tree is written to disk
123 * and then the fsync is considered complete.
124 *
125 * After a crash, items are copied out of the log-tree back into the
126 * subvolume tree. Any file data extents found are recorded in the extent
127 * allocation tree, and the log-tree freed.
128 *
129 * The log tree is read three times, once to pin down all the extents it is
130 * using in ram and once, once to create all the inodes logged in the tree
131 * and once to do all the other items.
132 */
133
134/*
Chris Masone02119d2008-09-05 16:13:11 -0400135 * start a sub transaction and setup the log tree
136 * this increments the log tree writer count to make the people
137 * syncing the tree wait for us to finish
138 */
139static int start_log_trans(struct btrfs_trans_handle *trans,
140 struct btrfs_root *root)
141{
142 int ret;
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400143 int err = 0;
Yan Zheng7237f182009-01-21 12:54:03 -0500144
145 mutex_lock(&root->log_mutex);
146 if (root->log_root) {
Josef Bacikff782e02009-10-08 15:30:04 -0400147 if (!root->log_start_pid) {
148 root->log_start_pid = current->pid;
149 root->log_multiple_pids = false;
150 } else if (root->log_start_pid != current->pid) {
151 root->log_multiple_pids = true;
152 }
153
Miao Xie2ecb7922012-09-06 04:04:27 -0600154 atomic_inc(&root->log_batch);
Yan Zheng7237f182009-01-21 12:54:03 -0500155 atomic_inc(&root->log_writers);
156 mutex_unlock(&root->log_mutex);
157 return 0;
158 }
Josef Bacikff782e02009-10-08 15:30:04 -0400159 root->log_multiple_pids = false;
160 root->log_start_pid = current->pid;
Chris Masone02119d2008-09-05 16:13:11 -0400161 mutex_lock(&root->fs_info->tree_log_mutex);
162 if (!root->fs_info->log_root_tree) {
163 ret = btrfs_init_log_root_tree(trans, root->fs_info);
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400164 if (ret)
165 err = ret;
Chris Masone02119d2008-09-05 16:13:11 -0400166 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400167 if (err == 0 && !root->log_root) {
Chris Masone02119d2008-09-05 16:13:11 -0400168 ret = btrfs_add_log_tree(trans, root);
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400169 if (ret)
170 err = ret;
Chris Masone02119d2008-09-05 16:13:11 -0400171 }
Chris Masone02119d2008-09-05 16:13:11 -0400172 mutex_unlock(&root->fs_info->tree_log_mutex);
Miao Xie2ecb7922012-09-06 04:04:27 -0600173 atomic_inc(&root->log_batch);
Yan Zheng7237f182009-01-21 12:54:03 -0500174 atomic_inc(&root->log_writers);
175 mutex_unlock(&root->log_mutex);
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400176 return err;
Chris Masone02119d2008-09-05 16:13:11 -0400177}
178
179/*
180 * returns 0 if there was a log transaction running and we were able
181 * to join, or returns -ENOENT if there were not transactions
182 * in progress
183 */
184static int join_running_log_trans(struct btrfs_root *root)
185{
186 int ret = -ENOENT;
187
188 smp_mb();
189 if (!root->log_root)
190 return -ENOENT;
191
Yan Zheng7237f182009-01-21 12:54:03 -0500192 mutex_lock(&root->log_mutex);
Chris Masone02119d2008-09-05 16:13:11 -0400193 if (root->log_root) {
194 ret = 0;
Yan Zheng7237f182009-01-21 12:54:03 -0500195 atomic_inc(&root->log_writers);
Chris Masone02119d2008-09-05 16:13:11 -0400196 }
Yan Zheng7237f182009-01-21 12:54:03 -0500197 mutex_unlock(&root->log_mutex);
Chris Masone02119d2008-09-05 16:13:11 -0400198 return ret;
199}
200
201/*
Chris Mason12fcfd22009-03-24 10:24:20 -0400202 * This either makes the current running log transaction wait
203 * until you call btrfs_end_log_trans() or it makes any future
204 * log transactions wait until you call btrfs_end_log_trans()
205 */
206int btrfs_pin_log_trans(struct btrfs_root *root)
207{
208 int ret = -ENOENT;
209
210 mutex_lock(&root->log_mutex);
211 atomic_inc(&root->log_writers);
212 mutex_unlock(&root->log_mutex);
213 return ret;
214}
215
216/*
Chris Masone02119d2008-09-05 16:13:11 -0400217 * indicate we're done making changes to the log tree
218 * and wake up anyone waiting to do a sync
219 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100220void btrfs_end_log_trans(struct btrfs_root *root)
Chris Masone02119d2008-09-05 16:13:11 -0400221{
Yan Zheng7237f182009-01-21 12:54:03 -0500222 if (atomic_dec_and_test(&root->log_writers)) {
223 smp_mb();
224 if (waitqueue_active(&root->log_writer_wait))
225 wake_up(&root->log_writer_wait);
226 }
Chris Masone02119d2008-09-05 16:13:11 -0400227}
228
229
230/*
231 * the walk control struct is used to pass state down the chain when
232 * processing the log tree. The stage field tells us which part
233 * of the log tree processing we are currently doing. The others
234 * are state fields used for that specific part
235 */
236struct walk_control {
237 /* should we free the extent on disk when done? This is used
238 * at transaction commit time while freeing a log tree
239 */
240 int free;
241
242 /* should we write out the extent buffer? This is used
243 * while flushing the log tree to disk during a sync
244 */
245 int write;
246
247 /* should we wait for the extent buffer io to finish? Also used
248 * while flushing the log tree to disk for a sync
249 */
250 int wait;
251
252 /* pin only walk, we record which extents on disk belong to the
253 * log trees
254 */
255 int pin;
256
257 /* what stage of the replay code we're currently in */
258 int stage;
259
260 /* the root we are currently replaying */
261 struct btrfs_root *replay_dest;
262
263 /* the trans handle for the current replay */
264 struct btrfs_trans_handle *trans;
265
266 /* the function that gets used to process blocks we find in the
267 * tree. Note the extent_buffer might not be up to date when it is
268 * passed in, and it must be checked or read if you need the data
269 * inside it
270 */
271 int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
272 struct walk_control *wc, u64 gen);
273};
274
275/*
276 * process_func used to pin down extents, write them or wait on them
277 */
278static int process_one_buffer(struct btrfs_root *log,
279 struct extent_buffer *eb,
280 struct walk_control *wc, u64 gen)
281{
Josef Bacikb50c6e22013-04-25 15:55:30 -0400282 int ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -0400283
Josef Bacik8c2a1a32013-06-06 13:19:32 -0400284 /*
285 * If this fs is mixed then we need to be able to process the leaves to
286 * pin down any logged extents, so we have to read the block.
287 */
288 if (btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) {
289 ret = btrfs_read_buffer(eb, gen);
290 if (ret)
291 return ret;
292 }
293
Josef Bacikb50c6e22013-04-25 15:55:30 -0400294 if (wc->pin)
295 ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
296 eb->start, eb->len);
297
298 if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
Josef Bacik8c2a1a32013-06-06 13:19:32 -0400299 if (wc->pin && btrfs_header_level(eb) == 0)
300 ret = btrfs_exclude_logged_extents(log, eb);
Chris Masone02119d2008-09-05 16:13:11 -0400301 if (wc->write)
302 btrfs_write_tree_block(eb);
303 if (wc->wait)
304 btrfs_wait_tree_block_writeback(eb);
305 }
Josef Bacikb50c6e22013-04-25 15:55:30 -0400306 return ret;
Chris Masone02119d2008-09-05 16:13:11 -0400307}
308
309/*
310 * Item overwrite used by replay and tree logging. eb, slot and key all refer
311 * to the src data we are copying out.
312 *
313 * root is the tree we are copying into, and path is a scratch
314 * path for use in this function (it should be released on entry and
315 * will be released on exit).
316 *
317 * If the key is already in the destination tree the existing item is
318 * overwritten. If the existing item isn't big enough, it is extended.
319 * If it is too large, it is truncated.
320 *
321 * If the key isn't in the destination yet, a new item is inserted.
322 */
323static noinline int overwrite_item(struct btrfs_trans_handle *trans,
324 struct btrfs_root *root,
325 struct btrfs_path *path,
326 struct extent_buffer *eb, int slot,
327 struct btrfs_key *key)
328{
329 int ret;
330 u32 item_size;
331 u64 saved_i_size = 0;
332 int save_old_i_size = 0;
333 unsigned long src_ptr;
334 unsigned long dst_ptr;
335 int overwrite_root = 0;
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000336 bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
Chris Masone02119d2008-09-05 16:13:11 -0400337
338 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
339 overwrite_root = 1;
340
341 item_size = btrfs_item_size_nr(eb, slot);
342 src_ptr = btrfs_item_ptr_offset(eb, slot);
343
344 /* look for the key in the destination tree */
345 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000346 if (ret < 0)
347 return ret;
348
Chris Masone02119d2008-09-05 16:13:11 -0400349 if (ret == 0) {
350 char *src_copy;
351 char *dst_copy;
352 u32 dst_size = btrfs_item_size_nr(path->nodes[0],
353 path->slots[0]);
354 if (dst_size != item_size)
355 goto insert;
356
357 if (item_size == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200358 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400359 return 0;
360 }
361 dst_copy = kmalloc(item_size, GFP_NOFS);
362 src_copy = kmalloc(item_size, GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +0000363 if (!dst_copy || !src_copy) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200364 btrfs_release_path(path);
liubo2a29edc2011-01-26 06:22:08 +0000365 kfree(dst_copy);
366 kfree(src_copy);
367 return -ENOMEM;
368 }
Chris Masone02119d2008-09-05 16:13:11 -0400369
370 read_extent_buffer(eb, src_copy, src_ptr, item_size);
371
372 dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
373 read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
374 item_size);
375 ret = memcmp(dst_copy, src_copy, item_size);
376
377 kfree(dst_copy);
378 kfree(src_copy);
379 /*
380 * they have the same contents, just return, this saves
381 * us from cowing blocks in the destination tree and doing
382 * extra writes that may not have been done by a previous
383 * sync
384 */
385 if (ret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200386 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400387 return 0;
388 }
389
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000390 /*
391 * We need to load the old nbytes into the inode so when we
392 * replay the extents we've logged we get the right nbytes.
393 */
394 if (inode_item) {
395 struct btrfs_inode_item *item;
396 u64 nbytes;
397
398 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
399 struct btrfs_inode_item);
400 nbytes = btrfs_inode_nbytes(path->nodes[0], item);
401 item = btrfs_item_ptr(eb, slot,
402 struct btrfs_inode_item);
403 btrfs_set_inode_nbytes(eb, item, nbytes);
404 }
405 } else if (inode_item) {
406 struct btrfs_inode_item *item;
407
408 /*
409 * New inode, set nbytes to 0 so that the nbytes comes out
410 * properly when we replay the extents.
411 */
412 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
413 btrfs_set_inode_nbytes(eb, item, 0);
Chris Masone02119d2008-09-05 16:13:11 -0400414 }
415insert:
David Sterbab3b4aa72011-04-21 01:20:15 +0200416 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400417 /* try to insert the key into the destination tree */
418 ret = btrfs_insert_empty_item(trans, root, path,
419 key, item_size);
420
421 /* make sure any existing item is the correct size */
422 if (ret == -EEXIST) {
423 u32 found_size;
424 found_size = btrfs_item_size_nr(path->nodes[0],
425 path->slots[0]);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100426 if (found_size > item_size)
Tsutomu Itohafe5fea2013-04-16 05:18:22 +0000427 btrfs_truncate_item(root, path, item_size, 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100428 else if (found_size < item_size)
Tsutomu Itoh4b90c682013-04-16 05:18:49 +0000429 btrfs_extend_item(root, path,
Jeff Mahoney143bede2012-03-01 14:56:26 +0100430 item_size - found_size);
Chris Masone02119d2008-09-05 16:13:11 -0400431 } else if (ret) {
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400432 return ret;
Chris Masone02119d2008-09-05 16:13:11 -0400433 }
434 dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
435 path->slots[0]);
436
437 /* don't overwrite an existing inode if the generation number
438 * was logged as zero. This is done when the tree logging code
439 * is just logging an inode to make sure it exists after recovery.
440 *
441 * Also, don't overwrite i_size on directories during replay.
442 * log replay inserts and removes directory items based on the
443 * state of the tree found in the subvolume, and i_size is modified
444 * as it goes
445 */
446 if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
447 struct btrfs_inode_item *src_item;
448 struct btrfs_inode_item *dst_item;
449
450 src_item = (struct btrfs_inode_item *)src_ptr;
451 dst_item = (struct btrfs_inode_item *)dst_ptr;
452
453 if (btrfs_inode_generation(eb, src_item) == 0)
454 goto no_copy;
455
456 if (overwrite_root &&
457 S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
458 S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
459 save_old_i_size = 1;
460 saved_i_size = btrfs_inode_size(path->nodes[0],
461 dst_item);
462 }
463 }
464
465 copy_extent_buffer(path->nodes[0], eb, dst_ptr,
466 src_ptr, item_size);
467
468 if (save_old_i_size) {
469 struct btrfs_inode_item *dst_item;
470 dst_item = (struct btrfs_inode_item *)dst_ptr;
471 btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
472 }
473
474 /* make sure the generation is filled in */
475 if (key->type == BTRFS_INODE_ITEM_KEY) {
476 struct btrfs_inode_item *dst_item;
477 dst_item = (struct btrfs_inode_item *)dst_ptr;
478 if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
479 btrfs_set_inode_generation(path->nodes[0], dst_item,
480 trans->transid);
481 }
482 }
483no_copy:
484 btrfs_mark_buffer_dirty(path->nodes[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +0200485 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400486 return 0;
487}
488
489/*
490 * simple helper to read an inode off the disk from a given root
491 * This can only be called for subvolume roots and not for the log
492 */
493static noinline struct inode *read_one_inode(struct btrfs_root *root,
494 u64 objectid)
495{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400496 struct btrfs_key key;
Chris Masone02119d2008-09-05 16:13:11 -0400497 struct inode *inode;
Chris Masone02119d2008-09-05 16:13:11 -0400498
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400499 key.objectid = objectid;
500 key.type = BTRFS_INODE_ITEM_KEY;
501 key.offset = 0;
Josef Bacik73f73412009-12-04 17:38:27 +0000502 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400503 if (IS_ERR(inode)) {
504 inode = NULL;
505 } else if (is_bad_inode(inode)) {
Chris Masone02119d2008-09-05 16:13:11 -0400506 iput(inode);
507 inode = NULL;
508 }
509 return inode;
510}
511
512/* replays a single extent in 'eb' at 'slot' with 'key' into the
513 * subvolume 'root'. path is released on entry and should be released
514 * on exit.
515 *
516 * extents in the log tree have not been allocated out of the extent
517 * tree yet. So, this completes the allocation, taking a reference
518 * as required if the extent already exists or creating a new extent
519 * if it isn't in the extent allocation tree yet.
520 *
521 * The extent is inserted into the file, dropping any existing extents
522 * from the file that overlap the new one.
523 */
524static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
525 struct btrfs_root *root,
526 struct btrfs_path *path,
527 struct extent_buffer *eb, int slot,
528 struct btrfs_key *key)
529{
530 int found_type;
Chris Masone02119d2008-09-05 16:13:11 -0400531 u64 extent_end;
Chris Masone02119d2008-09-05 16:13:11 -0400532 u64 start = key->offset;
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000533 u64 nbytes = 0;
Chris Masone02119d2008-09-05 16:13:11 -0400534 struct btrfs_file_extent_item *item;
535 struct inode *inode = NULL;
536 unsigned long size;
537 int ret = 0;
538
539 item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
540 found_type = btrfs_file_extent_type(eb, item);
541
Yan Zhengd899e052008-10-30 14:25:28 -0400542 if (found_type == BTRFS_FILE_EXTENT_REG ||
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000543 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
544 nbytes = btrfs_file_extent_num_bytes(eb, item);
545 extent_end = start + nbytes;
546
547 /*
548 * We don't add to the inodes nbytes if we are prealloc or a
549 * hole.
550 */
551 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
552 nbytes = 0;
553 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
Chris Masonc8b97812008-10-29 14:49:59 -0400554 size = btrfs_file_extent_inline_len(eb, item);
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000555 nbytes = btrfs_file_extent_ram_bytes(eb, item);
Qu Wenruofda28322013-02-26 08:10:22 +0000556 extent_end = ALIGN(start + size, root->sectorsize);
Chris Masone02119d2008-09-05 16:13:11 -0400557 } else {
558 ret = 0;
559 goto out;
560 }
561
562 inode = read_one_inode(root, key->objectid);
563 if (!inode) {
564 ret = -EIO;
565 goto out;
566 }
567
568 /*
569 * first check to see if we already have this extent in the
570 * file. This must be done before the btrfs_drop_extents run
571 * so we don't try to drop this extent.
572 */
Li Zefan33345d012011-04-20 10:31:50 +0800573 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
Chris Masone02119d2008-09-05 16:13:11 -0400574 start, 0);
575
Yan Zhengd899e052008-10-30 14:25:28 -0400576 if (ret == 0 &&
577 (found_type == BTRFS_FILE_EXTENT_REG ||
578 found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
Chris Masone02119d2008-09-05 16:13:11 -0400579 struct btrfs_file_extent_item cmp1;
580 struct btrfs_file_extent_item cmp2;
581 struct btrfs_file_extent_item *existing;
582 struct extent_buffer *leaf;
583
584 leaf = path->nodes[0];
585 existing = btrfs_item_ptr(leaf, path->slots[0],
586 struct btrfs_file_extent_item);
587
588 read_extent_buffer(eb, &cmp1, (unsigned long)item,
589 sizeof(cmp1));
590 read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
591 sizeof(cmp2));
592
593 /*
594 * we already have a pointer to this exact extent,
595 * we don't have to do anything
596 */
597 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200598 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400599 goto out;
600 }
601 }
David Sterbab3b4aa72011-04-21 01:20:15 +0200602 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400603
604 /* drop any overlapping extents */
Josef Bacik26714852012-08-29 12:24:27 -0400605 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
Josef Bacik36508602013-04-25 16:23:32 -0400606 if (ret)
607 goto out;
Chris Masone02119d2008-09-05 16:13:11 -0400608
Yan Zheng07d400a2009-01-06 11:42:00 -0500609 if (found_type == BTRFS_FILE_EXTENT_REG ||
610 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400611 u64 offset;
Yan Zheng07d400a2009-01-06 11:42:00 -0500612 unsigned long dest_offset;
613 struct btrfs_key ins;
Chris Masone02119d2008-09-05 16:13:11 -0400614
Yan Zheng07d400a2009-01-06 11:42:00 -0500615 ret = btrfs_insert_empty_item(trans, root, path, key,
616 sizeof(*item));
Josef Bacik36508602013-04-25 16:23:32 -0400617 if (ret)
618 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500619 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
620 path->slots[0]);
621 copy_extent_buffer(path->nodes[0], eb, dest_offset,
622 (unsigned long)item, sizeof(*item));
623
624 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
625 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
626 ins.type = BTRFS_EXTENT_ITEM_KEY;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400627 offset = key->offset - btrfs_file_extent_offset(eb, item);
Yan Zheng07d400a2009-01-06 11:42:00 -0500628
629 if (ins.objectid > 0) {
630 u64 csum_start;
631 u64 csum_end;
632 LIST_HEAD(ordered_sums);
633 /*
634 * is this extent already allocated in the extent
635 * allocation tree? If so, just add a reference
636 */
637 ret = btrfs_lookup_extent(root, ins.objectid,
638 ins.offset);
639 if (ret == 0) {
640 ret = btrfs_inc_extent_ref(trans, root,
641 ins.objectid, ins.offset,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400642 0, root->root_key.objectid,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200643 key->objectid, offset, 0);
Josef Bacikb50c6e22013-04-25 15:55:30 -0400644 if (ret)
645 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500646 } else {
647 /*
648 * insert the extent pointer in the extent
649 * allocation tree
650 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400651 ret = btrfs_alloc_logged_file_extent(trans,
652 root, root->root_key.objectid,
653 key->objectid, offset, &ins);
Josef Bacikb50c6e22013-04-25 15:55:30 -0400654 if (ret)
655 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500656 }
David Sterbab3b4aa72011-04-21 01:20:15 +0200657 btrfs_release_path(path);
Yan Zheng07d400a2009-01-06 11:42:00 -0500658
659 if (btrfs_file_extent_compression(eb, item)) {
660 csum_start = ins.objectid;
661 csum_end = csum_start + ins.offset;
662 } else {
663 csum_start = ins.objectid +
664 btrfs_file_extent_offset(eb, item);
665 csum_end = csum_start +
666 btrfs_file_extent_num_bytes(eb, item);
667 }
668
669 ret = btrfs_lookup_csums_range(root->log_root,
670 csum_start, csum_end - 1,
Arne Jansena2de7332011-03-08 14:14:00 +0100671 &ordered_sums, 0);
Josef Bacik36508602013-04-25 16:23:32 -0400672 if (ret)
673 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500674 while (!list_empty(&ordered_sums)) {
675 struct btrfs_ordered_sum *sums;
676 sums = list_entry(ordered_sums.next,
677 struct btrfs_ordered_sum,
678 list);
Josef Bacik36508602013-04-25 16:23:32 -0400679 if (!ret)
680 ret = btrfs_csum_file_blocks(trans,
Yan Zheng07d400a2009-01-06 11:42:00 -0500681 root->fs_info->csum_root,
682 sums);
Yan Zheng07d400a2009-01-06 11:42:00 -0500683 list_del(&sums->list);
684 kfree(sums);
685 }
Josef Bacik36508602013-04-25 16:23:32 -0400686 if (ret)
687 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500688 } else {
David Sterbab3b4aa72011-04-21 01:20:15 +0200689 btrfs_release_path(path);
Yan Zheng07d400a2009-01-06 11:42:00 -0500690 }
691 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
692 /* inline extents are easy, we just overwrite them */
693 ret = overwrite_item(trans, root, path, eb, slot, key);
Josef Bacik36508602013-04-25 16:23:32 -0400694 if (ret)
695 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500696 }
697
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000698 inode_add_bytes(inode, nbytes);
Tsutomu Itohb9959292012-06-25 21:25:22 -0600699 ret = btrfs_update_inode(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -0400700out:
701 if (inode)
702 iput(inode);
703 return ret;
704}
705
706/*
707 * when cleaning up conflicts between the directory names in the
708 * subvolume, directory names in the log and directory names in the
709 * inode back references, we may have to unlink inodes from directories.
710 *
711 * This is a helper function to do the unlink of a specific directory
712 * item
713 */
714static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
715 struct btrfs_root *root,
716 struct btrfs_path *path,
717 struct inode *dir,
718 struct btrfs_dir_item *di)
719{
720 struct inode *inode;
721 char *name;
722 int name_len;
723 struct extent_buffer *leaf;
724 struct btrfs_key location;
725 int ret;
726
727 leaf = path->nodes[0];
728
729 btrfs_dir_item_key_to_cpu(leaf, di, &location);
730 name_len = btrfs_dir_name_len(leaf, di);
731 name = kmalloc(name_len, GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +0000732 if (!name)
733 return -ENOMEM;
734
Chris Masone02119d2008-09-05 16:13:11 -0400735 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
David Sterbab3b4aa72011-04-21 01:20:15 +0200736 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400737
738 inode = read_one_inode(root, location.objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +0000739 if (!inode) {
Josef Bacik36508602013-04-25 16:23:32 -0400740 ret = -EIO;
741 goto out;
Tsutomu Itohc00e9492011-04-28 09:10:23 +0000742 }
Chris Masone02119d2008-09-05 16:13:11 -0400743
Yan Zhengec051c02009-01-05 15:43:42 -0500744 ret = link_to_fixup_dir(trans, root, path, location.objectid);
Josef Bacik36508602013-04-25 16:23:32 -0400745 if (ret)
746 goto out;
Chris Mason12fcfd22009-03-24 10:24:20 -0400747
Chris Masone02119d2008-09-05 16:13:11 -0400748 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
Josef Bacik36508602013-04-25 16:23:32 -0400749 if (ret)
750 goto out;
Filipe David Borba Mananaada9af22013-08-05 09:25:47 +0100751 else
752 ret = btrfs_run_delayed_items(trans, root);
Josef Bacik36508602013-04-25 16:23:32 -0400753out:
754 kfree(name);
755 iput(inode);
Chris Masone02119d2008-09-05 16:13:11 -0400756 return ret;
757}
758
759/*
760 * helper function to see if a given name and sequence number found
761 * in an inode back reference are already in a directory and correctly
762 * point to this inode
763 */
764static noinline int inode_in_dir(struct btrfs_root *root,
765 struct btrfs_path *path,
766 u64 dirid, u64 objectid, u64 index,
767 const char *name, int name_len)
768{
769 struct btrfs_dir_item *di;
770 struct btrfs_key location;
771 int match = 0;
772
773 di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
774 index, name, name_len, 0);
775 if (di && !IS_ERR(di)) {
776 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
777 if (location.objectid != objectid)
778 goto out;
779 } else
780 goto out;
David Sterbab3b4aa72011-04-21 01:20:15 +0200781 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400782
783 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
784 if (di && !IS_ERR(di)) {
785 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
786 if (location.objectid != objectid)
787 goto out;
788 } else
789 goto out;
790 match = 1;
791out:
David Sterbab3b4aa72011-04-21 01:20:15 +0200792 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400793 return match;
794}
795
796/*
797 * helper function to check a log tree for a named back reference in
798 * an inode. This is used to decide if a back reference that is
799 * found in the subvolume conflicts with what we find in the log.
800 *
801 * inode backreferences may have multiple refs in a single item,
802 * during replay we process one reference at a time, and we don't
803 * want to delete valid links to a file from the subvolume if that
804 * link is also in the log.
805 */
806static noinline int backref_in_log(struct btrfs_root *log,
807 struct btrfs_key *key,
Mark Fashehf1863732012-08-08 11:32:27 -0700808 u64 ref_objectid,
Chris Masone02119d2008-09-05 16:13:11 -0400809 char *name, int namelen)
810{
811 struct btrfs_path *path;
812 struct btrfs_inode_ref *ref;
813 unsigned long ptr;
814 unsigned long ptr_end;
815 unsigned long name_ptr;
816 int found_name_len;
817 int item_size;
818 int ret;
819 int match = 0;
820
821 path = btrfs_alloc_path();
liubo2a29edc2011-01-26 06:22:08 +0000822 if (!path)
823 return -ENOMEM;
824
Chris Masone02119d2008-09-05 16:13:11 -0400825 ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
826 if (ret != 0)
827 goto out;
828
Chris Masone02119d2008-09-05 16:13:11 -0400829 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
Mark Fashehf1863732012-08-08 11:32:27 -0700830
831 if (key->type == BTRFS_INODE_EXTREF_KEY) {
832 if (btrfs_find_name_in_ext_backref(path, ref_objectid,
833 name, namelen, NULL))
834 match = 1;
835
836 goto out;
837 }
838
839 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -0400840 ptr_end = ptr + item_size;
841 while (ptr < ptr_end) {
842 ref = (struct btrfs_inode_ref *)ptr;
843 found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
844 if (found_name_len == namelen) {
845 name_ptr = (unsigned long)(ref + 1);
846 ret = memcmp_extent_buffer(path->nodes[0], name,
847 name_ptr, namelen);
848 if (ret == 0) {
849 match = 1;
850 goto out;
851 }
852 }
853 ptr = (unsigned long)(ref + 1) + found_name_len;
854 }
855out:
856 btrfs_free_path(path);
857 return match;
858}
859
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700860static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
861 struct btrfs_root *root,
862 struct btrfs_path *path,
863 struct btrfs_root *log_root,
864 struct inode *dir, struct inode *inode,
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700865 struct extent_buffer *eb,
Mark Fashehf1863732012-08-08 11:32:27 -0700866 u64 inode_objectid, u64 parent_objectid,
867 u64 ref_index, char *name, int namelen,
868 int *search_done)
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700869{
870 int ret;
Mark Fashehf1863732012-08-08 11:32:27 -0700871 char *victim_name;
872 int victim_name_len;
873 struct extent_buffer *leaf;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700874 struct btrfs_dir_item *di;
Mark Fashehf1863732012-08-08 11:32:27 -0700875 struct btrfs_key search_key;
876 struct btrfs_inode_extref *extref;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700877
Mark Fashehf1863732012-08-08 11:32:27 -0700878again:
879 /* Search old style refs */
880 search_key.objectid = inode_objectid;
881 search_key.type = BTRFS_INODE_REF_KEY;
882 search_key.offset = parent_objectid;
883 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700884 if (ret == 0) {
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700885 struct btrfs_inode_ref *victim_ref;
886 unsigned long ptr;
887 unsigned long ptr_end;
Mark Fashehf1863732012-08-08 11:32:27 -0700888
889 leaf = path->nodes[0];
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700890
891 /* are we trying to overwrite a back ref for the root directory
892 * if so, just jump out, we're done
893 */
Mark Fashehf1863732012-08-08 11:32:27 -0700894 if (search_key.objectid == search_key.offset)
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700895 return 1;
896
897 /* check all the names in this back reference to see
898 * if they are in the log. if so, we allow them to stay
899 * otherwise they must be unlinked as a conflict
900 */
901 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
902 ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
903 while (ptr < ptr_end) {
904 victim_ref = (struct btrfs_inode_ref *)ptr;
905 victim_name_len = btrfs_inode_ref_name_len(leaf,
906 victim_ref);
907 victim_name = kmalloc(victim_name_len, GFP_NOFS);
Josef Bacik36508602013-04-25 16:23:32 -0400908 if (!victim_name)
909 return -ENOMEM;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700910
911 read_extent_buffer(leaf, victim_name,
912 (unsigned long)(victim_ref + 1),
913 victim_name_len);
914
Mark Fashehf1863732012-08-08 11:32:27 -0700915 if (!backref_in_log(log_root, &search_key,
916 parent_objectid,
917 victim_name,
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700918 victim_name_len)) {
919 btrfs_inc_nlink(inode);
920 btrfs_release_path(path);
921
922 ret = btrfs_unlink_inode(trans, root, dir,
923 inode, victim_name,
924 victim_name_len);
Mark Fashehf1863732012-08-08 11:32:27 -0700925 kfree(victim_name);
Josef Bacik36508602013-04-25 16:23:32 -0400926 if (ret)
927 return ret;
Filipe David Borba Mananaada9af22013-08-05 09:25:47 +0100928 ret = btrfs_run_delayed_items(trans, root);
929 if (ret)
930 return ret;
Mark Fashehf1863732012-08-08 11:32:27 -0700931 *search_done = 1;
932 goto again;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700933 }
934 kfree(victim_name);
Mark Fashehf1863732012-08-08 11:32:27 -0700935
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700936 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
937 }
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700938
939 /*
940 * NOTE: we have searched root tree and checked the
941 * coresponding ref, it does not need to check again.
942 */
943 *search_done = 1;
944 }
945 btrfs_release_path(path);
946
Mark Fashehf1863732012-08-08 11:32:27 -0700947 /* Same search but for extended refs */
948 extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
949 inode_objectid, parent_objectid, 0,
950 0);
951 if (!IS_ERR_OR_NULL(extref)) {
952 u32 item_size;
953 u32 cur_offset = 0;
954 unsigned long base;
955 struct inode *victim_parent;
956
957 leaf = path->nodes[0];
958
959 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
960 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
961
962 while (cur_offset < item_size) {
963 extref = (struct btrfs_inode_extref *)base + cur_offset;
964
965 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
966
967 if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
968 goto next;
969
970 victim_name = kmalloc(victim_name_len, GFP_NOFS);
Josef Bacik36508602013-04-25 16:23:32 -0400971 if (!victim_name)
972 return -ENOMEM;
Mark Fashehf1863732012-08-08 11:32:27 -0700973 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
974 victim_name_len);
975
976 search_key.objectid = inode_objectid;
977 search_key.type = BTRFS_INODE_EXTREF_KEY;
978 search_key.offset = btrfs_extref_hash(parent_objectid,
979 victim_name,
980 victim_name_len);
981 ret = 0;
982 if (!backref_in_log(log_root, &search_key,
983 parent_objectid, victim_name,
984 victim_name_len)) {
985 ret = -ENOENT;
986 victim_parent = read_one_inode(root,
987 parent_objectid);
988 if (victim_parent) {
989 btrfs_inc_nlink(inode);
990 btrfs_release_path(path);
991
992 ret = btrfs_unlink_inode(trans, root,
993 victim_parent,
994 inode,
995 victim_name,
996 victim_name_len);
Filipe David Borba Mananaada9af22013-08-05 09:25:47 +0100997 if (!ret)
998 ret = btrfs_run_delayed_items(
999 trans, root);
Mark Fashehf1863732012-08-08 11:32:27 -07001000 }
Mark Fashehf1863732012-08-08 11:32:27 -07001001 iput(victim_parent);
1002 kfree(victim_name);
Josef Bacik36508602013-04-25 16:23:32 -04001003 if (ret)
1004 return ret;
Mark Fashehf1863732012-08-08 11:32:27 -07001005 *search_done = 1;
1006 goto again;
1007 }
1008 kfree(victim_name);
Josef Bacik36508602013-04-25 16:23:32 -04001009 if (ret)
1010 return ret;
Mark Fashehf1863732012-08-08 11:32:27 -07001011next:
1012 cur_offset += victim_name_len + sizeof(*extref);
1013 }
1014 *search_done = 1;
1015 }
1016 btrfs_release_path(path);
1017
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001018 /* look for a conflicting sequence number */
1019 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
Mark Fashehf1863732012-08-08 11:32:27 -07001020 ref_index, name, namelen, 0);
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001021 if (di && !IS_ERR(di)) {
1022 ret = drop_one_dir_item(trans, root, path, dir, di);
Josef Bacik36508602013-04-25 16:23:32 -04001023 if (ret)
1024 return ret;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001025 }
1026 btrfs_release_path(path);
1027
1028 /* look for a conflicing name */
1029 di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
1030 name, namelen, 0);
1031 if (di && !IS_ERR(di)) {
1032 ret = drop_one_dir_item(trans, root, path, dir, di);
Josef Bacik36508602013-04-25 16:23:32 -04001033 if (ret)
1034 return ret;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001035 }
1036 btrfs_release_path(path);
1037
1038 return 0;
1039}
Chris Masone02119d2008-09-05 16:13:11 -04001040
Mark Fashehf1863732012-08-08 11:32:27 -07001041static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
1042 u32 *namelen, char **name, u64 *index,
1043 u64 *parent_objectid)
1044{
1045 struct btrfs_inode_extref *extref;
1046
1047 extref = (struct btrfs_inode_extref *)ref_ptr;
1048
1049 *namelen = btrfs_inode_extref_name_len(eb, extref);
1050 *name = kmalloc(*namelen, GFP_NOFS);
1051 if (*name == NULL)
1052 return -ENOMEM;
1053
1054 read_extent_buffer(eb, *name, (unsigned long)&extref->name,
1055 *namelen);
1056
1057 *index = btrfs_inode_extref_index(eb, extref);
1058 if (parent_objectid)
1059 *parent_objectid = btrfs_inode_extref_parent(eb, extref);
1060
1061 return 0;
1062}
1063
1064static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
1065 u32 *namelen, char **name, u64 *index)
1066{
1067 struct btrfs_inode_ref *ref;
1068
1069 ref = (struct btrfs_inode_ref *)ref_ptr;
1070
1071 *namelen = btrfs_inode_ref_name_len(eb, ref);
1072 *name = kmalloc(*namelen, GFP_NOFS);
1073 if (*name == NULL)
1074 return -ENOMEM;
1075
1076 read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
1077
1078 *index = btrfs_inode_ref_index(eb, ref);
1079
1080 return 0;
1081}
1082
Chris Masone02119d2008-09-05 16:13:11 -04001083/*
1084 * replay one inode back reference item found in the log tree.
1085 * eb, slot and key refer to the buffer and key found in the log tree.
1086 * root is the destination we are replaying into, and path is for temp
1087 * use by this function. (it should be released on return).
1088 */
1089static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1090 struct btrfs_root *root,
1091 struct btrfs_root *log,
1092 struct btrfs_path *path,
1093 struct extent_buffer *eb, int slot,
1094 struct btrfs_key *key)
1095{
liubo34f3e4f2011-08-06 08:35:23 +00001096 struct inode *dir;
Chris Masone02119d2008-09-05 16:13:11 -04001097 struct inode *inode;
Chris Masone02119d2008-09-05 16:13:11 -04001098 unsigned long ref_ptr;
1099 unsigned long ref_end;
liubo34f3e4f2011-08-06 08:35:23 +00001100 char *name;
1101 int namelen;
1102 int ret;
liuboc622ae62011-03-26 08:01:12 -04001103 int search_done = 0;
Mark Fashehf1863732012-08-08 11:32:27 -07001104 int log_ref_ver = 0;
1105 u64 parent_objectid;
1106 u64 inode_objectid;
Chris Masonf46dbe3de2012-10-09 11:17:20 -04001107 u64 ref_index = 0;
Mark Fashehf1863732012-08-08 11:32:27 -07001108 int ref_struct_size;
1109
1110 ref_ptr = btrfs_item_ptr_offset(eb, slot);
1111 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
1112
1113 if (key->type == BTRFS_INODE_EXTREF_KEY) {
1114 struct btrfs_inode_extref *r;
1115
1116 ref_struct_size = sizeof(struct btrfs_inode_extref);
1117 log_ref_ver = 1;
1118 r = (struct btrfs_inode_extref *)ref_ptr;
1119 parent_objectid = btrfs_inode_extref_parent(eb, r);
1120 } else {
1121 ref_struct_size = sizeof(struct btrfs_inode_ref);
1122 parent_objectid = key->offset;
1123 }
1124 inode_objectid = key->objectid;
Chris Masone02119d2008-09-05 16:13:11 -04001125
Chris Masone02119d2008-09-05 16:13:11 -04001126 /*
1127 * it is possible that we didn't log all the parent directories
1128 * for a given inode. If we don't find the dir, just don't
1129 * copy the back ref in. The link count fixup code will take
1130 * care of the rest
1131 */
Mark Fashehf1863732012-08-08 11:32:27 -07001132 dir = read_one_inode(root, parent_objectid);
Chris Masone02119d2008-09-05 16:13:11 -04001133 if (!dir)
1134 return -ENOENT;
1135
Mark Fashehf1863732012-08-08 11:32:27 -07001136 inode = read_one_inode(root, inode_objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001137 if (!inode) {
1138 iput(dir);
1139 return -EIO;
1140 }
Chris Masone02119d2008-09-05 16:13:11 -04001141
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001142 while (ref_ptr < ref_end) {
Mark Fashehf1863732012-08-08 11:32:27 -07001143 if (log_ref_ver) {
1144 ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
1145 &ref_index, &parent_objectid);
1146 /*
1147 * parent object can change from one array
1148 * item to another.
1149 */
1150 if (!dir)
1151 dir = read_one_inode(root, parent_objectid);
1152 if (!dir)
1153 return -ENOENT;
1154 } else {
1155 ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
1156 &ref_index);
1157 }
1158 if (ret)
1159 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001160
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001161 /* if we already have a perfect match, we're done */
1162 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
Mark Fashehf1863732012-08-08 11:32:27 -07001163 ref_index, name, namelen)) {
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001164 /*
1165 * look for a conflicting back reference in the
1166 * metadata. if we find one we have to unlink that name
1167 * of the file before we add our new link. Later on, we
1168 * overwrite any existing back reference, and we don't
1169 * want to create dangling pointers in the directory.
1170 */
Chris Masone02119d2008-09-05 16:13:11 -04001171
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001172 if (!search_done) {
1173 ret = __add_inode_ref(trans, root, path, log,
Mark Fashehf1863732012-08-08 11:32:27 -07001174 dir, inode, eb,
1175 inode_objectid,
1176 parent_objectid,
1177 ref_index, name, namelen,
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001178 &search_done);
Josef Bacik36508602013-04-25 16:23:32 -04001179 if (ret == 1) {
1180 ret = 0;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001181 goto out;
Josef Bacik36508602013-04-25 16:23:32 -04001182 }
1183 if (ret)
1184 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001185 }
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001186
1187 /* insert our name */
1188 ret = btrfs_add_link(trans, dir, inode, name, namelen,
Mark Fashehf1863732012-08-08 11:32:27 -07001189 0, ref_index);
Josef Bacik36508602013-04-25 16:23:32 -04001190 if (ret)
1191 goto out;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001192
1193 btrfs_update_inode(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -04001194 }
liuboc622ae62011-03-26 08:01:12 -04001195
Mark Fashehf1863732012-08-08 11:32:27 -07001196 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001197 kfree(name);
Mark Fashehf1863732012-08-08 11:32:27 -07001198 if (log_ref_ver) {
1199 iput(dir);
1200 dir = NULL;
1201 }
Chris Masone02119d2008-09-05 16:13:11 -04001202 }
Chris Masone02119d2008-09-05 16:13:11 -04001203
1204 /* finally write the back reference in the inode */
1205 ret = overwrite_item(trans, root, path, eb, slot, key);
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001206out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001207 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001208 iput(dir);
1209 iput(inode);
Josef Bacik36508602013-04-25 16:23:32 -04001210 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001211}
1212
Yan, Zhengc71bf092009-11-12 09:34:40 +00001213static int insert_orphan_item(struct btrfs_trans_handle *trans,
1214 struct btrfs_root *root, u64 offset)
1215{
1216 int ret;
1217 ret = btrfs_find_orphan_item(root, offset);
1218 if (ret > 0)
1219 ret = btrfs_insert_orphan_item(trans, root, offset);
1220 return ret;
1221}
1222
Mark Fashehf1863732012-08-08 11:32:27 -07001223static int count_inode_extrefs(struct btrfs_root *root,
1224 struct inode *inode, struct btrfs_path *path)
Chris Masone02119d2008-09-05 16:13:11 -04001225{
Mark Fashehf1863732012-08-08 11:32:27 -07001226 int ret = 0;
1227 int name_len;
1228 unsigned int nlink = 0;
1229 u32 item_size;
1230 u32 cur_offset = 0;
1231 u64 inode_objectid = btrfs_ino(inode);
1232 u64 offset = 0;
1233 unsigned long ptr;
1234 struct btrfs_inode_extref *extref;
1235 struct extent_buffer *leaf;
1236
1237 while (1) {
1238 ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
1239 &extref, &offset);
1240 if (ret)
1241 break;
1242
1243 leaf = path->nodes[0];
1244 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1245 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1246
1247 while (cur_offset < item_size) {
1248 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
1249 name_len = btrfs_inode_extref_name_len(leaf, extref);
1250
1251 nlink++;
1252
1253 cur_offset += name_len + sizeof(*extref);
1254 }
1255
1256 offset++;
1257 btrfs_release_path(path);
1258 }
1259 btrfs_release_path(path);
1260
1261 if (ret < 0)
1262 return ret;
1263 return nlink;
1264}
1265
1266static int count_inode_refs(struct btrfs_root *root,
1267 struct inode *inode, struct btrfs_path *path)
1268{
Chris Masone02119d2008-09-05 16:13:11 -04001269 int ret;
1270 struct btrfs_key key;
Mark Fashehf1863732012-08-08 11:32:27 -07001271 unsigned int nlink = 0;
Chris Masone02119d2008-09-05 16:13:11 -04001272 unsigned long ptr;
1273 unsigned long ptr_end;
1274 int name_len;
Li Zefan33345d012011-04-20 10:31:50 +08001275 u64 ino = btrfs_ino(inode);
Chris Masone02119d2008-09-05 16:13:11 -04001276
Li Zefan33345d012011-04-20 10:31:50 +08001277 key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04001278 key.type = BTRFS_INODE_REF_KEY;
1279 key.offset = (u64)-1;
1280
Chris Masond3977122009-01-05 21:25:51 -05001281 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04001282 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1283 if (ret < 0)
1284 break;
1285 if (ret > 0) {
1286 if (path->slots[0] == 0)
1287 break;
1288 path->slots[0]--;
1289 }
1290 btrfs_item_key_to_cpu(path->nodes[0], &key,
1291 path->slots[0]);
Li Zefan33345d012011-04-20 10:31:50 +08001292 if (key.objectid != ino ||
Chris Masone02119d2008-09-05 16:13:11 -04001293 key.type != BTRFS_INODE_REF_KEY)
1294 break;
1295 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
1296 ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
1297 path->slots[0]);
Chris Masond3977122009-01-05 21:25:51 -05001298 while (ptr < ptr_end) {
Chris Masone02119d2008-09-05 16:13:11 -04001299 struct btrfs_inode_ref *ref;
1300
1301 ref = (struct btrfs_inode_ref *)ptr;
1302 name_len = btrfs_inode_ref_name_len(path->nodes[0],
1303 ref);
1304 ptr = (unsigned long)(ref + 1) + name_len;
1305 nlink++;
1306 }
1307
1308 if (key.offset == 0)
1309 break;
1310 key.offset--;
David Sterbab3b4aa72011-04-21 01:20:15 +02001311 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001312 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001313 btrfs_release_path(path);
Mark Fashehf1863732012-08-08 11:32:27 -07001314
1315 return nlink;
1316}
1317
1318/*
1319 * There are a few corners where the link count of the file can't
1320 * be properly maintained during replay. So, instead of adding
1321 * lots of complexity to the log code, we just scan the backrefs
1322 * for any file that has been through replay.
1323 *
1324 * The scan will update the link count on the inode to reflect the
1325 * number of back refs found. If it goes down to zero, the iput
1326 * will free the inode.
1327 */
1328static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1329 struct btrfs_root *root,
1330 struct inode *inode)
1331{
1332 struct btrfs_path *path;
1333 int ret;
1334 u64 nlink = 0;
1335 u64 ino = btrfs_ino(inode);
1336
1337 path = btrfs_alloc_path();
1338 if (!path)
1339 return -ENOMEM;
1340
1341 ret = count_inode_refs(root, inode, path);
1342 if (ret < 0)
1343 goto out;
1344
1345 nlink = ret;
1346
1347 ret = count_inode_extrefs(root, inode, path);
1348 if (ret == -ENOENT)
1349 ret = 0;
1350
1351 if (ret < 0)
1352 goto out;
1353
1354 nlink += ret;
1355
1356 ret = 0;
1357
Chris Masone02119d2008-09-05 16:13:11 -04001358 if (nlink != inode->i_nlink) {
Miklos Szeredibfe86842011-10-28 14:13:29 +02001359 set_nlink(inode, nlink);
Chris Masone02119d2008-09-05 16:13:11 -04001360 btrfs_update_inode(trans, root, inode);
1361 }
Chris Mason8d5bf1c2008-09-11 15:51:21 -04001362 BTRFS_I(inode)->index_cnt = (u64)-1;
Chris Masone02119d2008-09-05 16:13:11 -04001363
Yan, Zhengc71bf092009-11-12 09:34:40 +00001364 if (inode->i_nlink == 0) {
1365 if (S_ISDIR(inode->i_mode)) {
1366 ret = replay_dir_deletes(trans, root, NULL, path,
Li Zefan33345d012011-04-20 10:31:50 +08001367 ino, 1);
Josef Bacik36508602013-04-25 16:23:32 -04001368 if (ret)
1369 goto out;
Yan, Zhengc71bf092009-11-12 09:34:40 +00001370 }
Li Zefan33345d012011-04-20 10:31:50 +08001371 ret = insert_orphan_item(trans, root, ino);
Chris Mason12fcfd22009-03-24 10:24:20 -04001372 }
Chris Mason12fcfd22009-03-24 10:24:20 -04001373
Mark Fashehf1863732012-08-08 11:32:27 -07001374out:
1375 btrfs_free_path(path);
1376 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001377}
1378
1379static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1380 struct btrfs_root *root,
1381 struct btrfs_path *path)
1382{
1383 int ret;
1384 struct btrfs_key key;
1385 struct inode *inode;
1386
1387 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1388 key.type = BTRFS_ORPHAN_ITEM_KEY;
1389 key.offset = (u64)-1;
Chris Masond3977122009-01-05 21:25:51 -05001390 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04001391 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1392 if (ret < 0)
1393 break;
1394
1395 if (ret == 1) {
1396 if (path->slots[0] == 0)
1397 break;
1398 path->slots[0]--;
1399 }
1400
1401 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1402 if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
1403 key.type != BTRFS_ORPHAN_ITEM_KEY)
1404 break;
1405
1406 ret = btrfs_del_item(trans, root, path);
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001407 if (ret)
1408 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001409
David Sterbab3b4aa72011-04-21 01:20:15 +02001410 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001411 inode = read_one_inode(root, key.offset);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001412 if (!inode)
1413 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001414
1415 ret = fixup_inode_link_count(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -04001416 iput(inode);
Josef Bacik36508602013-04-25 16:23:32 -04001417 if (ret)
1418 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001419
Chris Mason12fcfd22009-03-24 10:24:20 -04001420 /*
1421 * fixup on a directory may create new entries,
1422 * make sure we always look for the highset possible
1423 * offset
1424 */
1425 key.offset = (u64)-1;
Chris Masone02119d2008-09-05 16:13:11 -04001426 }
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001427 ret = 0;
1428out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001429 btrfs_release_path(path);
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001430 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001431}
1432
1433
1434/*
1435 * record a given inode in the fixup dir so we can check its link
1436 * count when replay is done. The link count is incremented here
1437 * so the inode won't go away until we check it
1438 */
1439static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1440 struct btrfs_root *root,
1441 struct btrfs_path *path,
1442 u64 objectid)
1443{
1444 struct btrfs_key key;
1445 int ret = 0;
1446 struct inode *inode;
1447
1448 inode = read_one_inode(root, objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001449 if (!inode)
1450 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001451
1452 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1453 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
1454 key.offset = objectid;
1455
1456 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1457
David Sterbab3b4aa72011-04-21 01:20:15 +02001458 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001459 if (ret == 0) {
Josef Bacik9bf7a482013-03-01 13:35:47 -05001460 if (!inode->i_nlink)
1461 set_nlink(inode, 1);
1462 else
1463 btrfs_inc_nlink(inode);
Tsutomu Itohb9959292012-06-25 21:25:22 -06001464 ret = btrfs_update_inode(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -04001465 } else if (ret == -EEXIST) {
1466 ret = 0;
1467 } else {
Josef Bacik36508602013-04-25 16:23:32 -04001468 BUG(); /* Logic Error */
Chris Masone02119d2008-09-05 16:13:11 -04001469 }
1470 iput(inode);
1471
1472 return ret;
1473}
1474
1475/*
1476 * when replaying the log for a directory, we only insert names
1477 * for inodes that actually exist. This means an fsync on a directory
1478 * does not implicitly fsync all the new files in it
1479 */
1480static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1481 struct btrfs_root *root,
1482 struct btrfs_path *path,
1483 u64 dirid, u64 index,
1484 char *name, int name_len, u8 type,
1485 struct btrfs_key *location)
1486{
1487 struct inode *inode;
1488 struct inode *dir;
1489 int ret;
1490
1491 inode = read_one_inode(root, location->objectid);
1492 if (!inode)
1493 return -ENOENT;
1494
1495 dir = read_one_inode(root, dirid);
1496 if (!dir) {
1497 iput(inode);
1498 return -EIO;
1499 }
1500 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
1501
1502 /* FIXME, put inode into FIXUP list */
1503
1504 iput(inode);
1505 iput(dir);
1506 return ret;
1507}
1508
1509/*
1510 * take a single entry in a log directory item and replay it into
1511 * the subvolume.
1512 *
1513 * if a conflicting item exists in the subdirectory already,
1514 * the inode it points to is unlinked and put into the link count
1515 * fix up tree.
1516 *
1517 * If a name from the log points to a file or directory that does
1518 * not exist in the FS, it is skipped. fsyncs on directories
1519 * do not force down inodes inside that directory, just changes to the
1520 * names or unlinks in a directory.
1521 */
1522static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1523 struct btrfs_root *root,
1524 struct btrfs_path *path,
1525 struct extent_buffer *eb,
1526 struct btrfs_dir_item *di,
1527 struct btrfs_key *key)
1528{
1529 char *name;
1530 int name_len;
1531 struct btrfs_dir_item *dst_di;
1532 struct btrfs_key found_key;
1533 struct btrfs_key log_key;
1534 struct inode *dir;
Chris Masone02119d2008-09-05 16:13:11 -04001535 u8 log_type;
Chris Mason4bef0842008-09-08 11:18:08 -04001536 int exists;
Josef Bacik36508602013-04-25 16:23:32 -04001537 int ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -04001538
1539 dir = read_one_inode(root, key->objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001540 if (!dir)
1541 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001542
1543 name_len = btrfs_dir_name_len(eb, di);
1544 name = kmalloc(name_len, GFP_NOFS);
Filipe David Borba Manana2bac3252013-08-04 19:58:57 +01001545 if (!name) {
1546 ret = -ENOMEM;
1547 goto out;
1548 }
liubo2a29edc2011-01-26 06:22:08 +00001549
Chris Masone02119d2008-09-05 16:13:11 -04001550 log_type = btrfs_dir_type(eb, di);
1551 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1552 name_len);
1553
1554 btrfs_dir_item_key_to_cpu(eb, di, &log_key);
Chris Mason4bef0842008-09-08 11:18:08 -04001555 exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
1556 if (exists == 0)
1557 exists = 1;
1558 else
1559 exists = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02001560 btrfs_release_path(path);
Chris Mason4bef0842008-09-08 11:18:08 -04001561
Chris Masone02119d2008-09-05 16:13:11 -04001562 if (key->type == BTRFS_DIR_ITEM_KEY) {
1563 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
1564 name, name_len, 1);
Chris Masond3977122009-01-05 21:25:51 -05001565 } else if (key->type == BTRFS_DIR_INDEX_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04001566 dst_di = btrfs_lookup_dir_index_item(trans, root, path,
1567 key->objectid,
1568 key->offset, name,
1569 name_len, 1);
1570 } else {
Josef Bacik36508602013-04-25 16:23:32 -04001571 /* Corruption */
1572 ret = -EINVAL;
1573 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001574 }
David Sterbac7040052011-04-19 18:00:01 +02001575 if (IS_ERR_OR_NULL(dst_di)) {
Chris Masone02119d2008-09-05 16:13:11 -04001576 /* we need a sequence number to insert, so we only
1577 * do inserts for the BTRFS_DIR_INDEX_KEY types
1578 */
1579 if (key->type != BTRFS_DIR_INDEX_KEY)
1580 goto out;
1581 goto insert;
1582 }
1583
1584 btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
1585 /* the existing item matches the logged item */
1586 if (found_key.objectid == log_key.objectid &&
1587 found_key.type == log_key.type &&
1588 found_key.offset == log_key.offset &&
1589 btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
1590 goto out;
1591 }
1592
1593 /*
1594 * don't drop the conflicting directory entry if the inode
1595 * for the new entry doesn't exist
1596 */
Chris Mason4bef0842008-09-08 11:18:08 -04001597 if (!exists)
Chris Masone02119d2008-09-05 16:13:11 -04001598 goto out;
1599
Chris Masone02119d2008-09-05 16:13:11 -04001600 ret = drop_one_dir_item(trans, root, path, dir, dst_di);
Josef Bacik36508602013-04-25 16:23:32 -04001601 if (ret)
1602 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001603
1604 if (key->type == BTRFS_DIR_INDEX_KEY)
1605 goto insert;
1606out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001607 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001608 kfree(name);
1609 iput(dir);
Josef Bacik36508602013-04-25 16:23:32 -04001610 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001611
1612insert:
David Sterbab3b4aa72011-04-21 01:20:15 +02001613 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001614 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1615 name, name_len, log_type, &log_key);
Josef Bacik36508602013-04-25 16:23:32 -04001616 if (ret && ret != -ENOENT)
1617 goto out;
1618 ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -04001619 goto out;
1620}
1621
1622/*
1623 * find all the names in a directory item and reconcile them into
1624 * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than
1625 * one name in a directory item, but the same code gets used for
1626 * both directory index types
1627 */
1628static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1629 struct btrfs_root *root,
1630 struct btrfs_path *path,
1631 struct extent_buffer *eb, int slot,
1632 struct btrfs_key *key)
1633{
1634 int ret;
1635 u32 item_size = btrfs_item_size_nr(eb, slot);
1636 struct btrfs_dir_item *di;
1637 int name_len;
1638 unsigned long ptr;
1639 unsigned long ptr_end;
1640
1641 ptr = btrfs_item_ptr_offset(eb, slot);
1642 ptr_end = ptr + item_size;
Chris Masond3977122009-01-05 21:25:51 -05001643 while (ptr < ptr_end) {
Chris Masone02119d2008-09-05 16:13:11 -04001644 di = (struct btrfs_dir_item *)ptr;
Josef Bacik22a94d42011-03-16 16:47:17 -04001645 if (verify_dir_item(root, eb, di))
1646 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001647 name_len = btrfs_dir_name_len(eb, di);
1648 ret = replay_one_name(trans, root, path, eb, di, key);
Josef Bacik36508602013-04-25 16:23:32 -04001649 if (ret)
1650 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001651 ptr = (unsigned long)(di + 1);
1652 ptr += name_len;
1653 }
1654 return 0;
1655}
1656
1657/*
1658 * directory replay has two parts. There are the standard directory
1659 * items in the log copied from the subvolume, and range items
1660 * created in the log while the subvolume was logged.
1661 *
1662 * The range items tell us which parts of the key space the log
1663 * is authoritative for. During replay, if a key in the subvolume
1664 * directory is in a logged range item, but not actually in the log
1665 * that means it was deleted from the directory before the fsync
1666 * and should be removed.
1667 */
1668static noinline int find_dir_range(struct btrfs_root *root,
1669 struct btrfs_path *path,
1670 u64 dirid, int key_type,
1671 u64 *start_ret, u64 *end_ret)
1672{
1673 struct btrfs_key key;
1674 u64 found_end;
1675 struct btrfs_dir_log_item *item;
1676 int ret;
1677 int nritems;
1678
1679 if (*start_ret == (u64)-1)
1680 return 1;
1681
1682 key.objectid = dirid;
1683 key.type = key_type;
1684 key.offset = *start_ret;
1685
1686 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1687 if (ret < 0)
1688 goto out;
1689 if (ret > 0) {
1690 if (path->slots[0] == 0)
1691 goto out;
1692 path->slots[0]--;
1693 }
1694 if (ret != 0)
1695 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1696
1697 if (key.type != key_type || key.objectid != dirid) {
1698 ret = 1;
1699 goto next;
1700 }
1701 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1702 struct btrfs_dir_log_item);
1703 found_end = btrfs_dir_log_end(path->nodes[0], item);
1704
1705 if (*start_ret >= key.offset && *start_ret <= found_end) {
1706 ret = 0;
1707 *start_ret = key.offset;
1708 *end_ret = found_end;
1709 goto out;
1710 }
1711 ret = 1;
1712next:
1713 /* check the next slot in the tree to see if it is a valid item */
1714 nritems = btrfs_header_nritems(path->nodes[0]);
1715 if (path->slots[0] >= nritems) {
1716 ret = btrfs_next_leaf(root, path);
1717 if (ret)
1718 goto out;
1719 } else {
1720 path->slots[0]++;
1721 }
1722
1723 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1724
1725 if (key.type != key_type || key.objectid != dirid) {
1726 ret = 1;
1727 goto out;
1728 }
1729 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1730 struct btrfs_dir_log_item);
1731 found_end = btrfs_dir_log_end(path->nodes[0], item);
1732 *start_ret = key.offset;
1733 *end_ret = found_end;
1734 ret = 0;
1735out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001736 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001737 return ret;
1738}
1739
1740/*
1741 * this looks for a given directory item in the log. If the directory
1742 * item is not in the log, the item is removed and the inode it points
1743 * to is unlinked
1744 */
1745static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
1746 struct btrfs_root *root,
1747 struct btrfs_root *log,
1748 struct btrfs_path *path,
1749 struct btrfs_path *log_path,
1750 struct inode *dir,
1751 struct btrfs_key *dir_key)
1752{
1753 int ret;
1754 struct extent_buffer *eb;
1755 int slot;
1756 u32 item_size;
1757 struct btrfs_dir_item *di;
1758 struct btrfs_dir_item *log_di;
1759 int name_len;
1760 unsigned long ptr;
1761 unsigned long ptr_end;
1762 char *name;
1763 struct inode *inode;
1764 struct btrfs_key location;
1765
1766again:
1767 eb = path->nodes[0];
1768 slot = path->slots[0];
1769 item_size = btrfs_item_size_nr(eb, slot);
1770 ptr = btrfs_item_ptr_offset(eb, slot);
1771 ptr_end = ptr + item_size;
Chris Masond3977122009-01-05 21:25:51 -05001772 while (ptr < ptr_end) {
Chris Masone02119d2008-09-05 16:13:11 -04001773 di = (struct btrfs_dir_item *)ptr;
Josef Bacik22a94d42011-03-16 16:47:17 -04001774 if (verify_dir_item(root, eb, di)) {
1775 ret = -EIO;
1776 goto out;
1777 }
1778
Chris Masone02119d2008-09-05 16:13:11 -04001779 name_len = btrfs_dir_name_len(eb, di);
1780 name = kmalloc(name_len, GFP_NOFS);
1781 if (!name) {
1782 ret = -ENOMEM;
1783 goto out;
1784 }
1785 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1786 name_len);
1787 log_di = NULL;
Chris Mason12fcfd22009-03-24 10:24:20 -04001788 if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04001789 log_di = btrfs_lookup_dir_item(trans, log, log_path,
1790 dir_key->objectid,
1791 name, name_len, 0);
Chris Mason12fcfd22009-03-24 10:24:20 -04001792 } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04001793 log_di = btrfs_lookup_dir_index_item(trans, log,
1794 log_path,
1795 dir_key->objectid,
1796 dir_key->offset,
1797 name, name_len, 0);
1798 }
David Sterbac7040052011-04-19 18:00:01 +02001799 if (IS_ERR_OR_NULL(log_di)) {
Chris Masone02119d2008-09-05 16:13:11 -04001800 btrfs_dir_item_key_to_cpu(eb, di, &location);
David Sterbab3b4aa72011-04-21 01:20:15 +02001801 btrfs_release_path(path);
1802 btrfs_release_path(log_path);
Chris Masone02119d2008-09-05 16:13:11 -04001803 inode = read_one_inode(root, location.objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001804 if (!inode) {
1805 kfree(name);
1806 return -EIO;
1807 }
Chris Masone02119d2008-09-05 16:13:11 -04001808
1809 ret = link_to_fixup_dir(trans, root,
1810 path, location.objectid);
Josef Bacik36508602013-04-25 16:23:32 -04001811 if (ret) {
1812 kfree(name);
1813 iput(inode);
1814 goto out;
1815 }
1816
Chris Masone02119d2008-09-05 16:13:11 -04001817 btrfs_inc_nlink(inode);
1818 ret = btrfs_unlink_inode(trans, root, dir, inode,
1819 name, name_len);
Josef Bacik36508602013-04-25 16:23:32 -04001820 if (!ret)
Filipe David Borba Mananaada9af22013-08-05 09:25:47 +01001821 ret = btrfs_run_delayed_items(trans, root);
Chris Masone02119d2008-09-05 16:13:11 -04001822 kfree(name);
1823 iput(inode);
Josef Bacik36508602013-04-25 16:23:32 -04001824 if (ret)
1825 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001826
1827 /* there might still be more names under this key
1828 * check and repeat if required
1829 */
1830 ret = btrfs_search_slot(NULL, root, dir_key, path,
1831 0, 0);
1832 if (ret == 0)
1833 goto again;
1834 ret = 0;
1835 goto out;
1836 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001837 btrfs_release_path(log_path);
Chris Masone02119d2008-09-05 16:13:11 -04001838 kfree(name);
1839
1840 ptr = (unsigned long)(di + 1);
1841 ptr += name_len;
1842 }
1843 ret = 0;
1844out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001845 btrfs_release_path(path);
1846 btrfs_release_path(log_path);
Chris Masone02119d2008-09-05 16:13:11 -04001847 return ret;
1848}
1849
1850/*
1851 * deletion replay happens before we copy any new directory items
1852 * out of the log or out of backreferences from inodes. It
1853 * scans the log to find ranges of keys that log is authoritative for,
1854 * and then scans the directory to find items in those ranges that are
1855 * not present in the log.
1856 *
1857 * Anything we don't find in the log is unlinked and removed from the
1858 * directory.
1859 */
1860static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
1861 struct btrfs_root *root,
1862 struct btrfs_root *log,
1863 struct btrfs_path *path,
Chris Mason12fcfd22009-03-24 10:24:20 -04001864 u64 dirid, int del_all)
Chris Masone02119d2008-09-05 16:13:11 -04001865{
1866 u64 range_start;
1867 u64 range_end;
1868 int key_type = BTRFS_DIR_LOG_ITEM_KEY;
1869 int ret = 0;
1870 struct btrfs_key dir_key;
1871 struct btrfs_key found_key;
1872 struct btrfs_path *log_path;
1873 struct inode *dir;
1874
1875 dir_key.objectid = dirid;
1876 dir_key.type = BTRFS_DIR_ITEM_KEY;
1877 log_path = btrfs_alloc_path();
1878 if (!log_path)
1879 return -ENOMEM;
1880
1881 dir = read_one_inode(root, dirid);
1882 /* it isn't an error if the inode isn't there, that can happen
1883 * because we replay the deletes before we copy in the inode item
1884 * from the log
1885 */
1886 if (!dir) {
1887 btrfs_free_path(log_path);
1888 return 0;
1889 }
1890again:
1891 range_start = 0;
1892 range_end = 0;
Chris Masond3977122009-01-05 21:25:51 -05001893 while (1) {
Chris Mason12fcfd22009-03-24 10:24:20 -04001894 if (del_all)
1895 range_end = (u64)-1;
1896 else {
1897 ret = find_dir_range(log, path, dirid, key_type,
1898 &range_start, &range_end);
1899 if (ret != 0)
1900 break;
1901 }
Chris Masone02119d2008-09-05 16:13:11 -04001902
1903 dir_key.offset = range_start;
Chris Masond3977122009-01-05 21:25:51 -05001904 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04001905 int nritems;
1906 ret = btrfs_search_slot(NULL, root, &dir_key, path,
1907 0, 0);
1908 if (ret < 0)
1909 goto out;
1910
1911 nritems = btrfs_header_nritems(path->nodes[0]);
1912 if (path->slots[0] >= nritems) {
1913 ret = btrfs_next_leaf(root, path);
1914 if (ret)
1915 break;
1916 }
1917 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1918 path->slots[0]);
1919 if (found_key.objectid != dirid ||
1920 found_key.type != dir_key.type)
1921 goto next_type;
1922
1923 if (found_key.offset > range_end)
1924 break;
1925
1926 ret = check_item_in_log(trans, root, log, path,
Chris Mason12fcfd22009-03-24 10:24:20 -04001927 log_path, dir,
1928 &found_key);
Josef Bacik36508602013-04-25 16:23:32 -04001929 if (ret)
1930 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001931 if (found_key.offset == (u64)-1)
1932 break;
1933 dir_key.offset = found_key.offset + 1;
1934 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001935 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001936 if (range_end == (u64)-1)
1937 break;
1938 range_start = range_end + 1;
1939 }
1940
1941next_type:
1942 ret = 0;
1943 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
1944 key_type = BTRFS_DIR_LOG_INDEX_KEY;
1945 dir_key.type = BTRFS_DIR_INDEX_KEY;
David Sterbab3b4aa72011-04-21 01:20:15 +02001946 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001947 goto again;
1948 }
1949out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001950 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001951 btrfs_free_path(log_path);
1952 iput(dir);
1953 return ret;
1954}
1955
1956/*
1957 * the process_func used to replay items from the log tree. This
1958 * gets called in two different stages. The first stage just looks
1959 * for inodes and makes sure they are all copied into the subvolume.
1960 *
1961 * The second stage copies all the other item types from the log into
1962 * the subvolume. The two stage approach is slower, but gets rid of
1963 * lots of complexity around inodes referencing other inodes that exist
1964 * only in the log (references come from either directory items or inode
1965 * back refs).
1966 */
1967static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1968 struct walk_control *wc, u64 gen)
1969{
1970 int nritems;
1971 struct btrfs_path *path;
1972 struct btrfs_root *root = wc->replay_dest;
1973 struct btrfs_key key;
Chris Masone02119d2008-09-05 16:13:11 -04001974 int level;
1975 int i;
1976 int ret;
1977
Tsutomu Itoh018642a2012-05-29 18:10:13 +09001978 ret = btrfs_read_buffer(eb, gen);
1979 if (ret)
1980 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001981
1982 level = btrfs_header_level(eb);
1983
1984 if (level != 0)
1985 return 0;
1986
1987 path = btrfs_alloc_path();
Mark Fasheh1e5063d2011-07-12 10:46:06 -07001988 if (!path)
1989 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04001990
1991 nritems = btrfs_header_nritems(eb);
1992 for (i = 0; i < nritems; i++) {
1993 btrfs_item_key_to_cpu(eb, &key, i);
Chris Masone02119d2008-09-05 16:13:11 -04001994
1995 /* inode keys are done during the first stage */
1996 if (key.type == BTRFS_INODE_ITEM_KEY &&
1997 wc->stage == LOG_WALK_REPLAY_INODES) {
Chris Masone02119d2008-09-05 16:13:11 -04001998 struct btrfs_inode_item *inode_item;
1999 u32 mode;
2000
2001 inode_item = btrfs_item_ptr(eb, i,
2002 struct btrfs_inode_item);
2003 mode = btrfs_inode_mode(eb, inode_item);
2004 if (S_ISDIR(mode)) {
2005 ret = replay_dir_deletes(wc->trans,
Chris Mason12fcfd22009-03-24 10:24:20 -04002006 root, log, path, key.objectid, 0);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002007 if (ret)
2008 break;
Chris Masone02119d2008-09-05 16:13:11 -04002009 }
2010 ret = overwrite_item(wc->trans, root, path,
2011 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002012 if (ret)
2013 break;
Chris Masone02119d2008-09-05 16:13:11 -04002014
Yan, Zhengc71bf092009-11-12 09:34:40 +00002015 /* for regular files, make sure corresponding
2016 * orhpan item exist. extents past the new EOF
2017 * will be truncated later by orphan cleanup.
Chris Masone02119d2008-09-05 16:13:11 -04002018 */
2019 if (S_ISREG(mode)) {
Yan, Zhengc71bf092009-11-12 09:34:40 +00002020 ret = insert_orphan_item(wc->trans, root,
2021 key.objectid);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002022 if (ret)
2023 break;
Chris Masone02119d2008-09-05 16:13:11 -04002024 }
Yan, Zhengc71bf092009-11-12 09:34:40 +00002025
Chris Masone02119d2008-09-05 16:13:11 -04002026 ret = link_to_fixup_dir(wc->trans, root,
2027 path, key.objectid);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002028 if (ret)
2029 break;
Chris Masone02119d2008-09-05 16:13:11 -04002030 }
Josef Bacikdd8e7212013-09-11 11:57:23 -04002031
2032 if (key.type == BTRFS_DIR_INDEX_KEY &&
2033 wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
2034 ret = replay_one_dir_item(wc->trans, root, path,
2035 eb, i, &key);
2036 if (ret)
2037 break;
2038 }
2039
Chris Masone02119d2008-09-05 16:13:11 -04002040 if (wc->stage < LOG_WALK_REPLAY_ALL)
2041 continue;
2042
2043 /* these keys are simply copied */
2044 if (key.type == BTRFS_XATTR_ITEM_KEY) {
2045 ret = overwrite_item(wc->trans, root, path,
2046 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002047 if (ret)
2048 break;
Liu Bo2da1c662013-05-26 13:50:29 +00002049 } else if (key.type == BTRFS_INODE_REF_KEY ||
2050 key.type == BTRFS_INODE_EXTREF_KEY) {
Mark Fashehf1863732012-08-08 11:32:27 -07002051 ret = add_inode_ref(wc->trans, root, log, path,
2052 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002053 if (ret && ret != -ENOENT)
2054 break;
2055 ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -04002056 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
2057 ret = replay_one_extent(wc->trans, root, path,
2058 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002059 if (ret)
2060 break;
Josef Bacikdd8e7212013-09-11 11:57:23 -04002061 } else if (key.type == BTRFS_DIR_ITEM_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04002062 ret = replay_one_dir_item(wc->trans, root, path,
2063 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002064 if (ret)
2065 break;
Chris Masone02119d2008-09-05 16:13:11 -04002066 }
2067 }
2068 btrfs_free_path(path);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002069 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002070}
2071
Chris Masond3977122009-01-05 21:25:51 -05002072static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -04002073 struct btrfs_root *root,
2074 struct btrfs_path *path, int *level,
2075 struct walk_control *wc)
2076{
2077 u64 root_owner;
Chris Masone02119d2008-09-05 16:13:11 -04002078 u64 bytenr;
2079 u64 ptr_gen;
2080 struct extent_buffer *next;
2081 struct extent_buffer *cur;
2082 struct extent_buffer *parent;
2083 u32 blocksize;
2084 int ret = 0;
2085
2086 WARN_ON(*level < 0);
2087 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2088
Chris Masond3977122009-01-05 21:25:51 -05002089 while (*level > 0) {
Chris Masone02119d2008-09-05 16:13:11 -04002090 WARN_ON(*level < 0);
2091 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2092 cur = path->nodes[*level];
2093
2094 if (btrfs_header_level(cur) != *level)
2095 WARN_ON(1);
2096
2097 if (path->slots[*level] >=
2098 btrfs_header_nritems(cur))
2099 break;
2100
2101 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2102 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2103 blocksize = btrfs_level_size(root, *level - 1);
2104
2105 parent = path->nodes[*level];
2106 root_owner = btrfs_header_owner(parent);
Chris Masone02119d2008-09-05 16:13:11 -04002107
2108 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
liubo2a29edc2011-01-26 06:22:08 +00002109 if (!next)
2110 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04002111
Chris Masone02119d2008-09-05 16:13:11 -04002112 if (*level == 1) {
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002113 ret = wc->process_func(root, next, wc, ptr_gen);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002114 if (ret) {
2115 free_extent_buffer(next);
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002116 return ret;
Josef Bacikb50c6e22013-04-25 15:55:30 -04002117 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002118
Chris Masone02119d2008-09-05 16:13:11 -04002119 path->slots[*level]++;
2120 if (wc->free) {
Tsutomu Itoh018642a2012-05-29 18:10:13 +09002121 ret = btrfs_read_buffer(next, ptr_gen);
2122 if (ret) {
2123 free_extent_buffer(next);
2124 return ret;
2125 }
Chris Masone02119d2008-09-05 16:13:11 -04002126
2127 btrfs_tree_lock(next);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002128 btrfs_set_lock_blocking(next);
Chris Masonbd681512011-07-16 15:23:14 -04002129 clean_tree_block(trans, root, next);
Chris Masone02119d2008-09-05 16:13:11 -04002130 btrfs_wait_tree_block_writeback(next);
2131 btrfs_tree_unlock(next);
2132
Chris Masone02119d2008-09-05 16:13:11 -04002133 WARN_ON(root_owner !=
2134 BTRFS_TREE_LOG_OBJECTID);
Chris Masone688b7252011-10-31 20:52:39 -04002135 ret = btrfs_free_and_pin_reserved_extent(root,
Chris Masond00aff02008-09-11 15:54:42 -04002136 bytenr, blocksize);
Josef Bacik36508602013-04-25 16:23:32 -04002137 if (ret) {
2138 free_extent_buffer(next);
2139 return ret;
2140 }
Chris Masone02119d2008-09-05 16:13:11 -04002141 }
2142 free_extent_buffer(next);
2143 continue;
2144 }
Tsutomu Itoh018642a2012-05-29 18:10:13 +09002145 ret = btrfs_read_buffer(next, ptr_gen);
2146 if (ret) {
2147 free_extent_buffer(next);
2148 return ret;
2149 }
Chris Masone02119d2008-09-05 16:13:11 -04002150
2151 WARN_ON(*level <= 0);
2152 if (path->nodes[*level-1])
2153 free_extent_buffer(path->nodes[*level-1]);
2154 path->nodes[*level-1] = next;
2155 *level = btrfs_header_level(next);
2156 path->slots[*level] = 0;
2157 cond_resched();
2158 }
2159 WARN_ON(*level < 0);
2160 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2161
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002162 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
Chris Masone02119d2008-09-05 16:13:11 -04002163
2164 cond_resched();
2165 return 0;
2166}
2167
Chris Masond3977122009-01-05 21:25:51 -05002168static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -04002169 struct btrfs_root *root,
2170 struct btrfs_path *path, int *level,
2171 struct walk_control *wc)
2172{
2173 u64 root_owner;
Chris Masone02119d2008-09-05 16:13:11 -04002174 int i;
2175 int slot;
2176 int ret;
2177
Chris Masond3977122009-01-05 21:25:51 -05002178 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
Chris Masone02119d2008-09-05 16:13:11 -04002179 slot = path->slots[i];
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002180 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
Chris Masone02119d2008-09-05 16:13:11 -04002181 path->slots[i]++;
2182 *level = i;
2183 WARN_ON(*level == 0);
2184 return 0;
2185 } else {
Zheng Yan31840ae2008-09-23 13:14:14 -04002186 struct extent_buffer *parent;
2187 if (path->nodes[*level] == root->node)
2188 parent = path->nodes[*level];
2189 else
2190 parent = path->nodes[*level + 1];
2191
2192 root_owner = btrfs_header_owner(parent);
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002193 ret = wc->process_func(root, path->nodes[*level], wc,
Chris Masone02119d2008-09-05 16:13:11 -04002194 btrfs_header_generation(path->nodes[*level]));
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002195 if (ret)
2196 return ret;
2197
Chris Masone02119d2008-09-05 16:13:11 -04002198 if (wc->free) {
2199 struct extent_buffer *next;
2200
2201 next = path->nodes[*level];
2202
2203 btrfs_tree_lock(next);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002204 btrfs_set_lock_blocking(next);
Chris Masonbd681512011-07-16 15:23:14 -04002205 clean_tree_block(trans, root, next);
Chris Masone02119d2008-09-05 16:13:11 -04002206 btrfs_wait_tree_block_writeback(next);
2207 btrfs_tree_unlock(next);
2208
Chris Masone02119d2008-09-05 16:13:11 -04002209 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
Chris Masone688b7252011-10-31 20:52:39 -04002210 ret = btrfs_free_and_pin_reserved_extent(root,
Chris Masone02119d2008-09-05 16:13:11 -04002211 path->nodes[*level]->start,
Chris Masond00aff02008-09-11 15:54:42 -04002212 path->nodes[*level]->len);
Josef Bacik36508602013-04-25 16:23:32 -04002213 if (ret)
2214 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002215 }
2216 free_extent_buffer(path->nodes[*level]);
2217 path->nodes[*level] = NULL;
2218 *level = i + 1;
2219 }
2220 }
2221 return 1;
2222}
2223
2224/*
2225 * drop the reference count on the tree rooted at 'snap'. This traverses
2226 * the tree freeing any blocks that have a ref count of zero after being
2227 * decremented.
2228 */
2229static int walk_log_tree(struct btrfs_trans_handle *trans,
2230 struct btrfs_root *log, struct walk_control *wc)
2231{
2232 int ret = 0;
2233 int wret;
2234 int level;
2235 struct btrfs_path *path;
Chris Masone02119d2008-09-05 16:13:11 -04002236 int orig_level;
2237
2238 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00002239 if (!path)
2240 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04002241
2242 level = btrfs_header_level(log->node);
2243 orig_level = level;
2244 path->nodes[level] = log->node;
2245 extent_buffer_get(log->node);
2246 path->slots[level] = 0;
2247
Chris Masond3977122009-01-05 21:25:51 -05002248 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04002249 wret = walk_down_log_tree(trans, log, path, &level, wc);
2250 if (wret > 0)
2251 break;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002252 if (wret < 0) {
Chris Masone02119d2008-09-05 16:13:11 -04002253 ret = wret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002254 goto out;
2255 }
Chris Masone02119d2008-09-05 16:13:11 -04002256
2257 wret = walk_up_log_tree(trans, log, path, &level, wc);
2258 if (wret > 0)
2259 break;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002260 if (wret < 0) {
Chris Masone02119d2008-09-05 16:13:11 -04002261 ret = wret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002262 goto out;
2263 }
Chris Masone02119d2008-09-05 16:13:11 -04002264 }
2265
2266 /* was the root node processed? if not, catch it here */
2267 if (path->nodes[orig_level]) {
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002268 ret = wc->process_func(log, path->nodes[orig_level], wc,
Chris Masone02119d2008-09-05 16:13:11 -04002269 btrfs_header_generation(path->nodes[orig_level]));
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002270 if (ret)
2271 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04002272 if (wc->free) {
2273 struct extent_buffer *next;
2274
2275 next = path->nodes[orig_level];
2276
2277 btrfs_tree_lock(next);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002278 btrfs_set_lock_blocking(next);
Chris Masonbd681512011-07-16 15:23:14 -04002279 clean_tree_block(trans, log, next);
Chris Masone02119d2008-09-05 16:13:11 -04002280 btrfs_wait_tree_block_writeback(next);
2281 btrfs_tree_unlock(next);
2282
Chris Masone02119d2008-09-05 16:13:11 -04002283 WARN_ON(log->root_key.objectid !=
2284 BTRFS_TREE_LOG_OBJECTID);
Chris Masone688b7252011-10-31 20:52:39 -04002285 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
Chris Masond00aff02008-09-11 15:54:42 -04002286 next->len);
Josef Bacik36508602013-04-25 16:23:32 -04002287 if (ret)
2288 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04002289 }
2290 }
2291
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002292out:
Chris Masone02119d2008-09-05 16:13:11 -04002293 btrfs_free_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002294 return ret;
2295}
2296
Yan Zheng7237f182009-01-21 12:54:03 -05002297/*
2298 * helper function to update the item for a given subvolumes log root
2299 * in the tree of log roots
2300 */
2301static int update_log_root(struct btrfs_trans_handle *trans,
2302 struct btrfs_root *log)
2303{
2304 int ret;
2305
2306 if (log->log_transid == 1) {
2307 /* insert root item on the first sync */
2308 ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
2309 &log->root_key, &log->root_item);
2310 } else {
2311 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
2312 &log->root_key, &log->root_item);
2313 }
2314 return ret;
2315}
2316
Chris Mason12fcfd22009-03-24 10:24:20 -04002317static int wait_log_commit(struct btrfs_trans_handle *trans,
2318 struct btrfs_root *root, unsigned long transid)
Chris Masone02119d2008-09-05 16:13:11 -04002319{
2320 DEFINE_WAIT(wait);
Yan Zheng7237f182009-01-21 12:54:03 -05002321 int index = transid % 2;
Chris Masone02119d2008-09-05 16:13:11 -04002322
Yan Zheng7237f182009-01-21 12:54:03 -05002323 /*
2324 * we only allow two pending log transactions at a time,
2325 * so we know that if ours is more than 2 older than the
2326 * current transaction, we're done
2327 */
Chris Masone02119d2008-09-05 16:13:11 -04002328 do {
Yan Zheng7237f182009-01-21 12:54:03 -05002329 prepare_to_wait(&root->log_commit_wait[index],
2330 &wait, TASK_UNINTERRUPTIBLE);
2331 mutex_unlock(&root->log_mutex);
Chris Mason12fcfd22009-03-24 10:24:20 -04002332
2333 if (root->fs_info->last_trans_log_full_commit !=
2334 trans->transid && root->log_transid < transid + 2 &&
Yan Zheng7237f182009-01-21 12:54:03 -05002335 atomic_read(&root->log_commit[index]))
Chris Masone02119d2008-09-05 16:13:11 -04002336 schedule();
Chris Mason12fcfd22009-03-24 10:24:20 -04002337
Yan Zheng7237f182009-01-21 12:54:03 -05002338 finish_wait(&root->log_commit_wait[index], &wait);
2339 mutex_lock(&root->log_mutex);
Jan Kara6dd70ce2012-01-26 15:01:11 -05002340 } while (root->fs_info->last_trans_log_full_commit !=
2341 trans->transid && root->log_transid < transid + 2 &&
Yan Zheng7237f182009-01-21 12:54:03 -05002342 atomic_read(&root->log_commit[index]));
2343 return 0;
2344}
2345
Jeff Mahoney143bede2012-03-01 14:56:26 +01002346static void wait_for_writer(struct btrfs_trans_handle *trans,
2347 struct btrfs_root *root)
Yan Zheng7237f182009-01-21 12:54:03 -05002348{
2349 DEFINE_WAIT(wait);
Jan Kara6dd70ce2012-01-26 15:01:11 -05002350 while (root->fs_info->last_trans_log_full_commit !=
2351 trans->transid && atomic_read(&root->log_writers)) {
Yan Zheng7237f182009-01-21 12:54:03 -05002352 prepare_to_wait(&root->log_writer_wait,
2353 &wait, TASK_UNINTERRUPTIBLE);
2354 mutex_unlock(&root->log_mutex);
Chris Mason12fcfd22009-03-24 10:24:20 -04002355 if (root->fs_info->last_trans_log_full_commit !=
2356 trans->transid && atomic_read(&root->log_writers))
Yan Zheng7237f182009-01-21 12:54:03 -05002357 schedule();
2358 mutex_lock(&root->log_mutex);
2359 finish_wait(&root->log_writer_wait, &wait);
2360 }
Chris Masone02119d2008-09-05 16:13:11 -04002361}
2362
2363/*
2364 * btrfs_sync_log does sends a given tree log down to the disk and
2365 * updates the super blocks to record it. When this call is done,
Chris Mason12fcfd22009-03-24 10:24:20 -04002366 * you know that any inodes previously logged are safely on disk only
2367 * if it returns 0.
2368 *
2369 * Any other return value means you need to call btrfs_commit_transaction.
2370 * Some of the edge cases for fsyncing directories that have had unlinks
2371 * or renames done in the past mean that sometimes the only safe
2372 * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN,
2373 * that has happened.
Chris Masone02119d2008-09-05 16:13:11 -04002374 */
2375int btrfs_sync_log(struct btrfs_trans_handle *trans,
2376 struct btrfs_root *root)
2377{
Yan Zheng7237f182009-01-21 12:54:03 -05002378 int index1;
2379 int index2;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002380 int mark;
Chris Masone02119d2008-09-05 16:13:11 -04002381 int ret;
Chris Masone02119d2008-09-05 16:13:11 -04002382 struct btrfs_root *log = root->log_root;
Yan Zheng7237f182009-01-21 12:54:03 -05002383 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002384 unsigned long log_transid = 0;
Miao Xiec6adc9c2013-05-28 10:05:39 +00002385 struct blk_plug plug;
Chris Masone02119d2008-09-05 16:13:11 -04002386
Yan Zheng7237f182009-01-21 12:54:03 -05002387 mutex_lock(&root->log_mutex);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002388 log_transid = root->log_transid;
Yan Zheng7237f182009-01-21 12:54:03 -05002389 index1 = root->log_transid % 2;
2390 if (atomic_read(&root->log_commit[index1])) {
Chris Mason12fcfd22009-03-24 10:24:20 -04002391 wait_log_commit(trans, root, root->log_transid);
Yan Zheng7237f182009-01-21 12:54:03 -05002392 mutex_unlock(&root->log_mutex);
2393 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04002394 }
Yan Zheng7237f182009-01-21 12:54:03 -05002395 atomic_set(&root->log_commit[index1], 1);
2396
2397 /* wait for previous tree log sync to complete */
2398 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
Chris Mason12fcfd22009-03-24 10:24:20 -04002399 wait_log_commit(trans, root, root->log_transid - 1);
Yan, Zheng86df7eb2009-10-14 09:24:59 -04002400 while (1) {
Miao Xie2ecb7922012-09-06 04:04:27 -06002401 int batch = atomic_read(&root->log_batch);
Chris Masoncd354ad2011-10-20 15:45:37 -04002402 /* when we're on an ssd, just kick the log commit out */
2403 if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) {
Yan, Zheng86df7eb2009-10-14 09:24:59 -04002404 mutex_unlock(&root->log_mutex);
2405 schedule_timeout_uninterruptible(1);
2406 mutex_lock(&root->log_mutex);
2407 }
Chris Mason12fcfd22009-03-24 10:24:20 -04002408 wait_for_writer(trans, root);
Miao Xie2ecb7922012-09-06 04:04:27 -06002409 if (batch == atomic_read(&root->log_batch))
Chris Masone02119d2008-09-05 16:13:11 -04002410 break;
2411 }
Chris Masond0c803c2008-09-11 16:17:57 -04002412
Chris Mason12fcfd22009-03-24 10:24:20 -04002413 /* bail out if we need to do a full commit */
2414 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2415 ret = -EAGAIN;
Josef Bacik2ab28f32012-10-12 15:27:49 -04002416 btrfs_free_logged_extents(log, log_transid);
Chris Mason12fcfd22009-03-24 10:24:20 -04002417 mutex_unlock(&root->log_mutex);
2418 goto out;
2419 }
2420
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002421 if (log_transid % 2 == 0)
2422 mark = EXTENT_DIRTY;
2423 else
2424 mark = EXTENT_NEW;
2425
Chris Mason690587d2009-10-13 13:29:19 -04002426 /* we start IO on all the marked extents here, but we don't actually
2427 * wait for them until later.
2428 */
Miao Xiec6adc9c2013-05-28 10:05:39 +00002429 blk_start_plug(&plug);
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002430 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002431 if (ret) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002432 blk_finish_plug(&plug);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002433 btrfs_abort_transaction(trans, root, ret);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002434 btrfs_free_logged_extents(log, log_transid);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002435 mutex_unlock(&root->log_mutex);
2436 goto out;
2437 }
Yan Zheng7237f182009-01-21 12:54:03 -05002438
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002439 btrfs_set_root_node(&log->root_item, log->node);
Yan Zheng7237f182009-01-21 12:54:03 -05002440
Yan Zheng7237f182009-01-21 12:54:03 -05002441 root->log_transid++;
2442 log->log_transid = root->log_transid;
Josef Bacikff782e02009-10-08 15:30:04 -04002443 root->log_start_pid = 0;
Yan Zheng7237f182009-01-21 12:54:03 -05002444 smp_mb();
2445 /*
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002446 * IO has been started, blocks of the log tree have WRITTEN flag set
2447 * in their headers. new modifications of the log will be written to
2448 * new positions. so it's safe to allow log writers to go in.
Yan Zheng7237f182009-01-21 12:54:03 -05002449 */
2450 mutex_unlock(&root->log_mutex);
2451
2452 mutex_lock(&log_root_tree->log_mutex);
Miao Xie2ecb7922012-09-06 04:04:27 -06002453 atomic_inc(&log_root_tree->log_batch);
Yan Zheng7237f182009-01-21 12:54:03 -05002454 atomic_inc(&log_root_tree->log_writers);
2455 mutex_unlock(&log_root_tree->log_mutex);
2456
2457 ret = update_log_root(trans, log);
Yan Zheng7237f182009-01-21 12:54:03 -05002458
2459 mutex_lock(&log_root_tree->log_mutex);
2460 if (atomic_dec_and_test(&log_root_tree->log_writers)) {
2461 smp_mb();
2462 if (waitqueue_active(&log_root_tree->log_writer_wait))
2463 wake_up(&log_root_tree->log_writer_wait);
2464 }
2465
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002466 if (ret) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002467 blk_finish_plug(&plug);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002468 if (ret != -ENOSPC) {
2469 btrfs_abort_transaction(trans, root, ret);
2470 mutex_unlock(&log_root_tree->log_mutex);
2471 goto out;
2472 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002473 root->fs_info->last_trans_log_full_commit = trans->transid;
2474 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002475 btrfs_free_logged_extents(log, log_transid);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002476 mutex_unlock(&log_root_tree->log_mutex);
2477 ret = -EAGAIN;
2478 goto out;
2479 }
2480
Yan Zheng7237f182009-01-21 12:54:03 -05002481 index2 = log_root_tree->log_transid % 2;
2482 if (atomic_read(&log_root_tree->log_commit[index2])) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002483 blk_finish_plug(&plug);
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002484 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Chris Mason12fcfd22009-03-24 10:24:20 -04002485 wait_log_commit(trans, log_root_tree,
2486 log_root_tree->log_transid);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002487 btrfs_free_logged_extents(log, log_transid);
Yan Zheng7237f182009-01-21 12:54:03 -05002488 mutex_unlock(&log_root_tree->log_mutex);
Chris Masonb31eabd2011-01-31 16:48:24 -05002489 ret = 0;
Yan Zheng7237f182009-01-21 12:54:03 -05002490 goto out;
2491 }
2492 atomic_set(&log_root_tree->log_commit[index2], 1);
2493
Chris Mason12fcfd22009-03-24 10:24:20 -04002494 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
2495 wait_log_commit(trans, log_root_tree,
2496 log_root_tree->log_transid - 1);
2497 }
Yan Zheng7237f182009-01-21 12:54:03 -05002498
Chris Mason12fcfd22009-03-24 10:24:20 -04002499 wait_for_writer(trans, log_root_tree);
2500
2501 /*
2502 * now that we've moved on to the tree of log tree roots,
2503 * check the full commit flag again
2504 */
2505 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002506 blk_finish_plug(&plug);
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002507 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002508 btrfs_free_logged_extents(log, log_transid);
Chris Mason12fcfd22009-03-24 10:24:20 -04002509 mutex_unlock(&log_root_tree->log_mutex);
2510 ret = -EAGAIN;
2511 goto out_wake_log_root;
2512 }
Yan Zheng7237f182009-01-21 12:54:03 -05002513
Miao Xiec6adc9c2013-05-28 10:05:39 +00002514 ret = btrfs_write_marked_extents(log_root_tree,
2515 &log_root_tree->dirty_log_pages,
2516 EXTENT_DIRTY | EXTENT_NEW);
2517 blk_finish_plug(&plug);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002518 if (ret) {
2519 btrfs_abort_transaction(trans, root, ret);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002520 btrfs_free_logged_extents(log, log_transid);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002521 mutex_unlock(&log_root_tree->log_mutex);
2522 goto out_wake_log_root;
2523 }
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002524 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Miao Xiec6adc9c2013-05-28 10:05:39 +00002525 btrfs_wait_marked_extents(log_root_tree,
2526 &log_root_tree->dirty_log_pages,
2527 EXTENT_NEW | EXTENT_DIRTY);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002528 btrfs_wait_logged_extents(log, log_transid);
Chris Masone02119d2008-09-05 16:13:11 -04002529
David Sterba6c417612011-04-13 15:41:04 +02002530 btrfs_set_super_log_root(root->fs_info->super_for_commit,
Yan Zheng7237f182009-01-21 12:54:03 -05002531 log_root_tree->node->start);
David Sterba6c417612011-04-13 15:41:04 +02002532 btrfs_set_super_log_root_level(root->fs_info->super_for_commit,
Yan Zheng7237f182009-01-21 12:54:03 -05002533 btrfs_header_level(log_root_tree->node));
Chris Masone02119d2008-09-05 16:13:11 -04002534
Yan Zheng7237f182009-01-21 12:54:03 -05002535 log_root_tree->log_transid++;
Chris Masone02119d2008-09-05 16:13:11 -04002536 smp_mb();
Yan Zheng7237f182009-01-21 12:54:03 -05002537
2538 mutex_unlock(&log_root_tree->log_mutex);
2539
2540 /*
2541 * nobody else is going to jump in and write the the ctree
2542 * super here because the log_commit atomic below is protecting
2543 * us. We must be called with a transaction handle pinning
2544 * the running transaction open, so a full commit can't hop
2545 * in and cause problems either.
2546 */
Arne Jansena2de7332011-03-08 14:14:00 +01002547 btrfs_scrub_pause_super(root);
Stefan Behrens5af3e8c2012-08-01 18:56:49 +02002548 ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
Arne Jansena2de7332011-03-08 14:14:00 +01002549 btrfs_scrub_continue_super(root);
Stefan Behrens5af3e8c2012-08-01 18:56:49 +02002550 if (ret) {
2551 btrfs_abort_transaction(trans, root, ret);
2552 goto out_wake_log_root;
2553 }
Yan Zheng7237f182009-01-21 12:54:03 -05002554
Chris Mason257c62e2009-10-13 13:21:08 -04002555 mutex_lock(&root->log_mutex);
2556 if (root->last_log_commit < log_transid)
2557 root->last_log_commit = log_transid;
2558 mutex_unlock(&root->log_mutex);
2559
Chris Mason12fcfd22009-03-24 10:24:20 -04002560out_wake_log_root:
Yan Zheng7237f182009-01-21 12:54:03 -05002561 atomic_set(&log_root_tree->log_commit[index2], 0);
2562 smp_mb();
2563 if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
2564 wake_up(&log_root_tree->log_commit_wait[index2]);
Chris Masone02119d2008-09-05 16:13:11 -04002565out:
Yan Zheng7237f182009-01-21 12:54:03 -05002566 atomic_set(&root->log_commit[index1], 0);
2567 smp_mb();
2568 if (waitqueue_active(&root->log_commit_wait[index1]))
2569 wake_up(&root->log_commit_wait[index1]);
Chris Masonb31eabd2011-01-31 16:48:24 -05002570 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002571}
2572
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002573static void free_log_tree(struct btrfs_trans_handle *trans,
2574 struct btrfs_root *log)
Chris Masone02119d2008-09-05 16:13:11 -04002575{
2576 int ret;
Chris Masond0c803c2008-09-11 16:17:57 -04002577 u64 start;
2578 u64 end;
Chris Masone02119d2008-09-05 16:13:11 -04002579 struct walk_control wc = {
2580 .free = 1,
2581 .process_func = process_one_buffer
2582 };
2583
Liu Bo33217192013-02-27 13:28:24 +00002584 if (trans) {
2585 ret = walk_log_tree(trans, log, &wc);
Josef Bacik36508602013-04-25 16:23:32 -04002586
2587 /* I don't think this can happen but just in case */
2588 if (ret)
2589 btrfs_abort_transaction(trans, log, ret);
Liu Bo33217192013-02-27 13:28:24 +00002590 }
Chris Masone02119d2008-09-05 16:13:11 -04002591
Chris Masond3977122009-01-05 21:25:51 -05002592 while (1) {
Chris Masond0c803c2008-09-11 16:17:57 -04002593 ret = find_first_extent_bit(&log->dirty_log_pages,
Josef Bacike6138872012-09-27 17:07:30 -04002594 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
2595 NULL);
Chris Masond0c803c2008-09-11 16:17:57 -04002596 if (ret)
2597 break;
2598
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002599 clear_extent_bits(&log->dirty_log_pages, start, end,
2600 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
Chris Masond0c803c2008-09-11 16:17:57 -04002601 }
2602
Josef Bacik2ab28f32012-10-12 15:27:49 -04002603 /*
2604 * We may have short-circuited the log tree with the full commit logic
2605 * and left ordered extents on our list, so clear these out to keep us
2606 * from leaking inodes and memory.
2607 */
2608 btrfs_free_logged_extents(log, 0);
2609 btrfs_free_logged_extents(log, 1);
2610
Yan Zheng7237f182009-01-21 12:54:03 -05002611 free_extent_buffer(log->node);
2612 kfree(log);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002613}
2614
2615/*
2616 * free all the extents used by the tree log. This should be called
2617 * at commit time of the full transaction
2618 */
2619int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2620{
2621 if (root->log_root) {
2622 free_log_tree(trans, root->log_root);
2623 root->log_root = NULL;
2624 }
2625 return 0;
2626}
2627
2628int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
2629 struct btrfs_fs_info *fs_info)
2630{
2631 if (fs_info->log_root_tree) {
2632 free_log_tree(trans, fs_info->log_root_tree);
2633 fs_info->log_root_tree = NULL;
2634 }
Chris Masone02119d2008-09-05 16:13:11 -04002635 return 0;
2636}
2637
2638/*
Chris Masone02119d2008-09-05 16:13:11 -04002639 * If both a file and directory are logged, and unlinks or renames are
2640 * mixed in, we have a few interesting corners:
2641 *
2642 * create file X in dir Y
2643 * link file X to X.link in dir Y
2644 * fsync file X
2645 * unlink file X but leave X.link
2646 * fsync dir Y
2647 *
2648 * After a crash we would expect only X.link to exist. But file X
2649 * didn't get fsync'd again so the log has back refs for X and X.link.
2650 *
2651 * We solve this by removing directory entries and inode backrefs from the
2652 * log when a file that was logged in the current transaction is
2653 * unlinked. Any later fsync will include the updated log entries, and
2654 * we'll be able to reconstruct the proper directory items from backrefs.
2655 *
2656 * This optimizations allows us to avoid relogging the entire inode
2657 * or the entire directory.
2658 */
2659int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2660 struct btrfs_root *root,
2661 const char *name, int name_len,
2662 struct inode *dir, u64 index)
2663{
2664 struct btrfs_root *log;
2665 struct btrfs_dir_item *di;
2666 struct btrfs_path *path;
2667 int ret;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002668 int err = 0;
Chris Masone02119d2008-09-05 16:13:11 -04002669 int bytes_del = 0;
Li Zefan33345d012011-04-20 10:31:50 +08002670 u64 dir_ino = btrfs_ino(dir);
Chris Masone02119d2008-09-05 16:13:11 -04002671
Chris Mason3a5f1d42008-09-11 15:53:37 -04002672 if (BTRFS_I(dir)->logged_trans < trans->transid)
2673 return 0;
2674
Chris Masone02119d2008-09-05 16:13:11 -04002675 ret = join_running_log_trans(root);
2676 if (ret)
2677 return 0;
2678
2679 mutex_lock(&BTRFS_I(dir)->log_mutex);
2680
2681 log = root->log_root;
2682 path = btrfs_alloc_path();
Tsutomu Itoha62f44a2011-04-25 19:43:51 -04002683 if (!path) {
2684 err = -ENOMEM;
2685 goto out_unlock;
2686 }
liubo2a29edc2011-01-26 06:22:08 +00002687
Li Zefan33345d012011-04-20 10:31:50 +08002688 di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
Chris Masone02119d2008-09-05 16:13:11 -04002689 name, name_len, -1);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002690 if (IS_ERR(di)) {
2691 err = PTR_ERR(di);
2692 goto fail;
2693 }
2694 if (di) {
Chris Masone02119d2008-09-05 16:13:11 -04002695 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2696 bytes_del += name_len;
Josef Bacik36508602013-04-25 16:23:32 -04002697 if (ret) {
2698 err = ret;
2699 goto fail;
2700 }
Chris Masone02119d2008-09-05 16:13:11 -04002701 }
David Sterbab3b4aa72011-04-21 01:20:15 +02002702 btrfs_release_path(path);
Li Zefan33345d012011-04-20 10:31:50 +08002703 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
Chris Masone02119d2008-09-05 16:13:11 -04002704 index, name, name_len, -1);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002705 if (IS_ERR(di)) {
2706 err = PTR_ERR(di);
2707 goto fail;
2708 }
2709 if (di) {
Chris Masone02119d2008-09-05 16:13:11 -04002710 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2711 bytes_del += name_len;
Josef Bacik36508602013-04-25 16:23:32 -04002712 if (ret) {
2713 err = ret;
2714 goto fail;
2715 }
Chris Masone02119d2008-09-05 16:13:11 -04002716 }
2717
2718 /* update the directory size in the log to reflect the names
2719 * we have removed
2720 */
2721 if (bytes_del) {
2722 struct btrfs_key key;
2723
Li Zefan33345d012011-04-20 10:31:50 +08002724 key.objectid = dir_ino;
Chris Masone02119d2008-09-05 16:13:11 -04002725 key.offset = 0;
2726 key.type = BTRFS_INODE_ITEM_KEY;
David Sterbab3b4aa72011-04-21 01:20:15 +02002727 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002728
2729 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002730 if (ret < 0) {
2731 err = ret;
2732 goto fail;
2733 }
Chris Masone02119d2008-09-05 16:13:11 -04002734 if (ret == 0) {
2735 struct btrfs_inode_item *item;
2736 u64 i_size;
2737
2738 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2739 struct btrfs_inode_item);
2740 i_size = btrfs_inode_size(path->nodes[0], item);
2741 if (i_size > bytes_del)
2742 i_size -= bytes_del;
2743 else
2744 i_size = 0;
2745 btrfs_set_inode_size(path->nodes[0], item, i_size);
2746 btrfs_mark_buffer_dirty(path->nodes[0]);
2747 } else
2748 ret = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02002749 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002750 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002751fail:
Chris Masone02119d2008-09-05 16:13:11 -04002752 btrfs_free_path(path);
Tsutomu Itoha62f44a2011-04-25 19:43:51 -04002753out_unlock:
Chris Masone02119d2008-09-05 16:13:11 -04002754 mutex_unlock(&BTRFS_I(dir)->log_mutex);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002755 if (ret == -ENOSPC) {
2756 root->fs_info->last_trans_log_full_commit = trans->transid;
2757 ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002758 } else if (ret < 0)
2759 btrfs_abort_transaction(trans, root, ret);
2760
Chris Mason12fcfd22009-03-24 10:24:20 -04002761 btrfs_end_log_trans(root);
Chris Masone02119d2008-09-05 16:13:11 -04002762
Andi Kleen411fc6b2010-10-29 15:14:31 -04002763 return err;
Chris Masone02119d2008-09-05 16:13:11 -04002764}
2765
2766/* see comments for btrfs_del_dir_entries_in_log */
2767int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2768 struct btrfs_root *root,
2769 const char *name, int name_len,
2770 struct inode *inode, u64 dirid)
2771{
2772 struct btrfs_root *log;
2773 u64 index;
2774 int ret;
2775
Chris Mason3a5f1d42008-09-11 15:53:37 -04002776 if (BTRFS_I(inode)->logged_trans < trans->transid)
2777 return 0;
2778
Chris Masone02119d2008-09-05 16:13:11 -04002779 ret = join_running_log_trans(root);
2780 if (ret)
2781 return 0;
2782 log = root->log_root;
2783 mutex_lock(&BTRFS_I(inode)->log_mutex);
2784
Li Zefan33345d012011-04-20 10:31:50 +08002785 ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
Chris Masone02119d2008-09-05 16:13:11 -04002786 dirid, &index);
2787 mutex_unlock(&BTRFS_I(inode)->log_mutex);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002788 if (ret == -ENOSPC) {
2789 root->fs_info->last_trans_log_full_commit = trans->transid;
2790 ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002791 } else if (ret < 0 && ret != -ENOENT)
2792 btrfs_abort_transaction(trans, root, ret);
Chris Mason12fcfd22009-03-24 10:24:20 -04002793 btrfs_end_log_trans(root);
Chris Masone02119d2008-09-05 16:13:11 -04002794
Chris Masone02119d2008-09-05 16:13:11 -04002795 return ret;
2796}
2797
2798/*
2799 * creates a range item in the log for 'dirid'. first_offset and
2800 * last_offset tell us which parts of the key space the log should
2801 * be considered authoritative for.
2802 */
2803static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
2804 struct btrfs_root *log,
2805 struct btrfs_path *path,
2806 int key_type, u64 dirid,
2807 u64 first_offset, u64 last_offset)
2808{
2809 int ret;
2810 struct btrfs_key key;
2811 struct btrfs_dir_log_item *item;
2812
2813 key.objectid = dirid;
2814 key.offset = first_offset;
2815 if (key_type == BTRFS_DIR_ITEM_KEY)
2816 key.type = BTRFS_DIR_LOG_ITEM_KEY;
2817 else
2818 key.type = BTRFS_DIR_LOG_INDEX_KEY;
2819 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002820 if (ret)
2821 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002822
2823 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2824 struct btrfs_dir_log_item);
2825 btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
2826 btrfs_mark_buffer_dirty(path->nodes[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02002827 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002828 return 0;
2829}
2830
2831/*
2832 * log all the items included in the current transaction for a given
2833 * directory. This also creates the range items in the log tree required
2834 * to replay anything deleted before the fsync
2835 */
2836static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2837 struct btrfs_root *root, struct inode *inode,
2838 struct btrfs_path *path,
2839 struct btrfs_path *dst_path, int key_type,
2840 u64 min_offset, u64 *last_offset_ret)
2841{
2842 struct btrfs_key min_key;
2843 struct btrfs_key max_key;
2844 struct btrfs_root *log = root->log_root;
2845 struct extent_buffer *src;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002846 int err = 0;
Chris Masone02119d2008-09-05 16:13:11 -04002847 int ret;
2848 int i;
2849 int nritems;
2850 u64 first_offset = min_offset;
2851 u64 last_offset = (u64)-1;
Li Zefan33345d012011-04-20 10:31:50 +08002852 u64 ino = btrfs_ino(inode);
Chris Masone02119d2008-09-05 16:13:11 -04002853
2854 log = root->log_root;
Li Zefan33345d012011-04-20 10:31:50 +08002855 max_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04002856 max_key.offset = (u64)-1;
2857 max_key.type = key_type;
2858
Li Zefan33345d012011-04-20 10:31:50 +08002859 min_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04002860 min_key.type = key_type;
2861 min_key.offset = min_offset;
2862
2863 path->keep_locks = 1;
2864
2865 ret = btrfs_search_forward(root, &min_key, &max_key,
Eric Sandeende78b512013-01-31 18:21:12 +00002866 path, trans->transid);
Chris Masone02119d2008-09-05 16:13:11 -04002867
2868 /*
2869 * we didn't find anything from this transaction, see if there
2870 * is anything at all
2871 */
Li Zefan33345d012011-04-20 10:31:50 +08002872 if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
2873 min_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04002874 min_key.type = key_type;
2875 min_key.offset = (u64)-1;
David Sterbab3b4aa72011-04-21 01:20:15 +02002876 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002877 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2878 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002879 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002880 return ret;
2881 }
Li Zefan33345d012011-04-20 10:31:50 +08002882 ret = btrfs_previous_item(root, path, ino, key_type);
Chris Masone02119d2008-09-05 16:13:11 -04002883
2884 /* if ret == 0 there are items for this type,
2885 * create a range to tell us the last key of this type.
2886 * otherwise, there are no items in this directory after
2887 * *min_offset, and we create a range to indicate that.
2888 */
2889 if (ret == 0) {
2890 struct btrfs_key tmp;
2891 btrfs_item_key_to_cpu(path->nodes[0], &tmp,
2892 path->slots[0]);
Chris Masond3977122009-01-05 21:25:51 -05002893 if (key_type == tmp.type)
Chris Masone02119d2008-09-05 16:13:11 -04002894 first_offset = max(min_offset, tmp.offset) + 1;
Chris Masone02119d2008-09-05 16:13:11 -04002895 }
2896 goto done;
2897 }
2898
2899 /* go backward to find any previous key */
Li Zefan33345d012011-04-20 10:31:50 +08002900 ret = btrfs_previous_item(root, path, ino, key_type);
Chris Masone02119d2008-09-05 16:13:11 -04002901 if (ret == 0) {
2902 struct btrfs_key tmp;
2903 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2904 if (key_type == tmp.type) {
2905 first_offset = tmp.offset;
2906 ret = overwrite_item(trans, log, dst_path,
2907 path->nodes[0], path->slots[0],
2908 &tmp);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002909 if (ret) {
2910 err = ret;
2911 goto done;
2912 }
Chris Masone02119d2008-09-05 16:13:11 -04002913 }
2914 }
David Sterbab3b4aa72011-04-21 01:20:15 +02002915 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002916
2917 /* find the first key from this transaction again */
2918 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2919 if (ret != 0) {
2920 WARN_ON(1);
2921 goto done;
2922 }
2923
2924 /*
2925 * we have a block from this transaction, log every item in it
2926 * from our directory
2927 */
Chris Masond3977122009-01-05 21:25:51 -05002928 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04002929 struct btrfs_key tmp;
2930 src = path->nodes[0];
2931 nritems = btrfs_header_nritems(src);
2932 for (i = path->slots[0]; i < nritems; i++) {
2933 btrfs_item_key_to_cpu(src, &min_key, i);
2934
Li Zefan33345d012011-04-20 10:31:50 +08002935 if (min_key.objectid != ino || min_key.type != key_type)
Chris Masone02119d2008-09-05 16:13:11 -04002936 goto done;
2937 ret = overwrite_item(trans, log, dst_path, src, i,
2938 &min_key);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002939 if (ret) {
2940 err = ret;
2941 goto done;
2942 }
Chris Masone02119d2008-09-05 16:13:11 -04002943 }
2944 path->slots[0] = nritems;
2945
2946 /*
2947 * look ahead to the next item and see if it is also
2948 * from this directory and from this transaction
2949 */
2950 ret = btrfs_next_leaf(root, path);
2951 if (ret == 1) {
2952 last_offset = (u64)-1;
2953 goto done;
2954 }
2955 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
Li Zefan33345d012011-04-20 10:31:50 +08002956 if (tmp.objectid != ino || tmp.type != key_type) {
Chris Masone02119d2008-09-05 16:13:11 -04002957 last_offset = (u64)-1;
2958 goto done;
2959 }
2960 if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
2961 ret = overwrite_item(trans, log, dst_path,
2962 path->nodes[0], path->slots[0],
2963 &tmp);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002964 if (ret)
2965 err = ret;
2966 else
2967 last_offset = tmp.offset;
Chris Masone02119d2008-09-05 16:13:11 -04002968 goto done;
2969 }
2970 }
2971done:
David Sterbab3b4aa72011-04-21 01:20:15 +02002972 btrfs_release_path(path);
2973 btrfs_release_path(dst_path);
Chris Masone02119d2008-09-05 16:13:11 -04002974
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002975 if (err == 0) {
2976 *last_offset_ret = last_offset;
2977 /*
2978 * insert the log range keys to indicate where the log
2979 * is valid
2980 */
2981 ret = insert_dir_log_key(trans, log, path, key_type,
Li Zefan33345d012011-04-20 10:31:50 +08002982 ino, first_offset, last_offset);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002983 if (ret)
2984 err = ret;
2985 }
2986 return err;
Chris Masone02119d2008-09-05 16:13:11 -04002987}
2988
2989/*
2990 * logging directories is very similar to logging inodes, We find all the items
2991 * from the current transaction and write them to the log.
2992 *
2993 * The recovery code scans the directory in the subvolume, and if it finds a
2994 * key in the range logged that is not present in the log tree, then it means
2995 * that dir entry was unlinked during the transaction.
2996 *
2997 * In order for that scan to work, we must include one key smaller than
2998 * the smallest logged by this transaction and one key larger than the largest
2999 * key logged by this transaction.
3000 */
3001static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
3002 struct btrfs_root *root, struct inode *inode,
3003 struct btrfs_path *path,
3004 struct btrfs_path *dst_path)
3005{
3006 u64 min_key;
3007 u64 max_key;
3008 int ret;
3009 int key_type = BTRFS_DIR_ITEM_KEY;
3010
3011again:
3012 min_key = 0;
3013 max_key = 0;
Chris Masond3977122009-01-05 21:25:51 -05003014 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04003015 ret = log_dir_items(trans, root, inode, path,
3016 dst_path, key_type, min_key,
3017 &max_key);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003018 if (ret)
3019 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003020 if (max_key == (u64)-1)
3021 break;
3022 min_key = max_key + 1;
3023 }
3024
3025 if (key_type == BTRFS_DIR_ITEM_KEY) {
3026 key_type = BTRFS_DIR_INDEX_KEY;
3027 goto again;
3028 }
3029 return 0;
3030}
3031
3032/*
3033 * a helper function to drop items from the log before we relog an
3034 * inode. max_key_type indicates the highest item type to remove.
3035 * This cannot be run for file data extents because it does not
3036 * free the extents they point to.
3037 */
3038static int drop_objectid_items(struct btrfs_trans_handle *trans,
3039 struct btrfs_root *log,
3040 struct btrfs_path *path,
3041 u64 objectid, int max_key_type)
3042{
3043 int ret;
3044 struct btrfs_key key;
3045 struct btrfs_key found_key;
Josef Bacik18ec90d2012-09-28 11:56:28 -04003046 int start_slot;
Chris Masone02119d2008-09-05 16:13:11 -04003047
3048 key.objectid = objectid;
3049 key.type = max_key_type;
3050 key.offset = (u64)-1;
3051
Chris Masond3977122009-01-05 21:25:51 -05003052 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04003053 ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
Josef Bacik36508602013-04-25 16:23:32 -04003054 BUG_ON(ret == 0); /* Logic error */
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003055 if (ret < 0)
Chris Masone02119d2008-09-05 16:13:11 -04003056 break;
3057
3058 if (path->slots[0] == 0)
3059 break;
3060
3061 path->slots[0]--;
3062 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3063 path->slots[0]);
3064
3065 if (found_key.objectid != objectid)
3066 break;
3067
Josef Bacik18ec90d2012-09-28 11:56:28 -04003068 found_key.offset = 0;
3069 found_key.type = 0;
3070 ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
3071 &start_slot);
3072
3073 ret = btrfs_del_items(trans, log, path, start_slot,
3074 path->slots[0] - start_slot + 1);
3075 /*
3076 * If start slot isn't 0 then we don't need to re-search, we've
3077 * found the last guy with the objectid in this tree.
3078 */
3079 if (ret || start_slot != 0)
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00003080 break;
David Sterbab3b4aa72011-04-21 01:20:15 +02003081 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04003082 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003083 btrfs_release_path(path);
Josef Bacik5bdbeb22012-05-29 16:59:49 -04003084 if (ret > 0)
3085 ret = 0;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003086 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003087}
3088
Josef Bacik94edf4a2012-09-25 14:56:25 -04003089static void fill_inode_item(struct btrfs_trans_handle *trans,
3090 struct extent_buffer *leaf,
3091 struct btrfs_inode_item *item,
3092 struct inode *inode, int log_inode_only)
3093{
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003094 struct btrfs_map_token token;
Josef Bacik94edf4a2012-09-25 14:56:25 -04003095
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003096 btrfs_init_map_token(&token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003097
3098 if (log_inode_only) {
3099 /* set the generation to zero so the recover code
3100 * can tell the difference between an logging
3101 * just to say 'this inode exists' and a logging
3102 * to say 'update this inode with these values'
3103 */
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003104 btrfs_set_token_inode_generation(leaf, item, 0, &token);
3105 btrfs_set_token_inode_size(leaf, item, 0, &token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003106 } else {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003107 btrfs_set_token_inode_generation(leaf, item,
3108 BTRFS_I(inode)->generation,
3109 &token);
3110 btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003111 }
3112
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003113 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3114 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3115 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3116 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3117
3118 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
3119 inode->i_atime.tv_sec, &token);
3120 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
3121 inode->i_atime.tv_nsec, &token);
3122
3123 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
3124 inode->i_mtime.tv_sec, &token);
3125 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
3126 inode->i_mtime.tv_nsec, &token);
3127
3128 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
3129 inode->i_ctime.tv_sec, &token);
3130 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
3131 inode->i_ctime.tv_nsec, &token);
3132
3133 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3134 &token);
3135
3136 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
3137 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3138 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3139 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3140 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003141}
3142
Josef Bacika95249b2012-10-11 16:17:34 -04003143static int log_inode_item(struct btrfs_trans_handle *trans,
3144 struct btrfs_root *log, struct btrfs_path *path,
3145 struct inode *inode)
3146{
3147 struct btrfs_inode_item *inode_item;
3148 struct btrfs_key key;
3149 int ret;
3150
3151 memcpy(&key, &BTRFS_I(inode)->location, sizeof(key));
3152 ret = btrfs_insert_empty_item(trans, log, path, &key,
3153 sizeof(*inode_item));
3154 if (ret && ret != -EEXIST)
3155 return ret;
3156 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3157 struct btrfs_inode_item);
3158 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
3159 btrfs_release_path(path);
3160 return 0;
3161}
3162
Chris Mason31ff1cd2008-09-11 16:17:57 -04003163static noinline int copy_items(struct btrfs_trans_handle *trans,
Liu Bod2794402012-08-29 01:07:56 -06003164 struct inode *inode,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003165 struct btrfs_path *dst_path,
3166 struct extent_buffer *src,
3167 int start_slot, int nr, int inode_only)
3168{
3169 unsigned long src_offset;
3170 unsigned long dst_offset;
Liu Bod2794402012-08-29 01:07:56 -06003171 struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003172 struct btrfs_file_extent_item *extent;
3173 struct btrfs_inode_item *inode_item;
3174 int ret;
3175 struct btrfs_key *ins_keys;
3176 u32 *ins_sizes;
3177 char *ins_data;
3178 int i;
Chris Masond20f7042008-12-08 16:58:54 -05003179 struct list_head ordered_sums;
Liu Bod2794402012-08-29 01:07:56 -06003180 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
Chris Masond20f7042008-12-08 16:58:54 -05003181
3182 INIT_LIST_HEAD(&ordered_sums);
Chris Mason31ff1cd2008-09-11 16:17:57 -04003183
3184 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
3185 nr * sizeof(u32), GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +00003186 if (!ins_data)
3187 return -ENOMEM;
3188
Chris Mason31ff1cd2008-09-11 16:17:57 -04003189 ins_sizes = (u32 *)ins_data;
3190 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
3191
3192 for (i = 0; i < nr; i++) {
3193 ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
3194 btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
3195 }
3196 ret = btrfs_insert_empty_items(trans, log, dst_path,
3197 ins_keys, ins_sizes, nr);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003198 if (ret) {
3199 kfree(ins_data);
3200 return ret;
3201 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003202
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003203 for (i = 0; i < nr; i++, dst_path->slots[0]++) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003204 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
3205 dst_path->slots[0]);
3206
3207 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
3208
Josef Bacik94edf4a2012-09-25 14:56:25 -04003209 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003210 inode_item = btrfs_item_ptr(dst_path->nodes[0],
3211 dst_path->slots[0],
3212 struct btrfs_inode_item);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003213 fill_inode_item(trans, dst_path->nodes[0], inode_item,
3214 inode, inode_only == LOG_INODE_EXISTS);
3215 } else {
3216 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
3217 src_offset, ins_sizes[i]);
Chris Mason31ff1cd2008-09-11 16:17:57 -04003218 }
Josef Bacik94edf4a2012-09-25 14:56:25 -04003219
Chris Mason31ff1cd2008-09-11 16:17:57 -04003220 /* take a reference on file data extents so that truncates
3221 * or deletes of this inode don't have to relog the inode
3222 * again
3223 */
Liu Bod2794402012-08-29 01:07:56 -06003224 if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY &&
3225 !skip_csum) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003226 int found_type;
3227 extent = btrfs_item_ptr(src, start_slot + i,
3228 struct btrfs_file_extent_item);
3229
liubo8e531cd2011-05-06 10:36:09 +08003230 if (btrfs_file_extent_generation(src, extent) < trans->transid)
3231 continue;
3232
Chris Mason31ff1cd2008-09-11 16:17:57 -04003233 found_type = btrfs_file_extent_type(src, extent);
Josef Bacik6f1fed72012-09-26 11:07:06 -04003234 if (found_type == BTRFS_FILE_EXTENT_REG) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003235 u64 ds, dl, cs, cl;
3236 ds = btrfs_file_extent_disk_bytenr(src,
3237 extent);
3238 /* ds == 0 is a hole */
3239 if (ds == 0)
3240 continue;
3241
3242 dl = btrfs_file_extent_disk_num_bytes(src,
3243 extent);
3244 cs = btrfs_file_extent_offset(src, extent);
3245 cl = btrfs_file_extent_num_bytes(src,
Joe Perchesa419aef2009-08-18 11:18:35 -07003246 extent);
Chris Mason580afd72008-12-08 19:15:39 -05003247 if (btrfs_file_extent_compression(src,
3248 extent)) {
3249 cs = 0;
3250 cl = dl;
3251 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003252
3253 ret = btrfs_lookup_csums_range(
3254 log->fs_info->csum_root,
3255 ds + cs, ds + cs + cl - 1,
Arne Jansena2de7332011-03-08 14:14:00 +01003256 &ordered_sums, 0);
Josef Bacik36508602013-04-25 16:23:32 -04003257 if (ret) {
3258 btrfs_release_path(dst_path);
3259 kfree(ins_data);
3260 return ret;
3261 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003262 }
3263 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003264 }
3265
3266 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02003267 btrfs_release_path(dst_path);
Chris Mason31ff1cd2008-09-11 16:17:57 -04003268 kfree(ins_data);
Chris Masond20f7042008-12-08 16:58:54 -05003269
3270 /*
3271 * we have to do this after the loop above to avoid changing the
3272 * log tree while trying to change the log tree.
3273 */
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003274 ret = 0;
Chris Masond3977122009-01-05 21:25:51 -05003275 while (!list_empty(&ordered_sums)) {
Chris Masond20f7042008-12-08 16:58:54 -05003276 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
3277 struct btrfs_ordered_sum,
3278 list);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003279 if (!ret)
3280 ret = btrfs_csum_file_blocks(trans, log, sums);
Chris Masond20f7042008-12-08 16:58:54 -05003281 list_del(&sums->list);
3282 kfree(sums);
3283 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003284 return ret;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003285}
3286
Josef Bacik5dc562c2012-08-17 13:14:17 -04003287static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
3288{
3289 struct extent_map *em1, *em2;
3290
3291 em1 = list_entry(a, struct extent_map, list);
3292 em2 = list_entry(b, struct extent_map, list);
3293
3294 if (em1->start < em2->start)
3295 return -1;
3296 else if (em1->start > em2->start)
3297 return 1;
3298 return 0;
3299}
3300
Josef Bacik5dc562c2012-08-17 13:14:17 -04003301static int log_one_extent(struct btrfs_trans_handle *trans,
3302 struct inode *inode, struct btrfs_root *root,
Josef Bacik70c8a912012-10-11 16:54:30 -04003303 struct extent_map *em, struct btrfs_path *path)
Josef Bacik5dc562c2012-08-17 13:14:17 -04003304{
3305 struct btrfs_root *log = root->log_root;
Josef Bacik70c8a912012-10-11 16:54:30 -04003306 struct btrfs_file_extent_item *fi;
3307 struct extent_buffer *leaf;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003308 struct btrfs_ordered_extent *ordered;
Josef Bacik70c8a912012-10-11 16:54:30 -04003309 struct list_head ordered_sums;
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003310 struct btrfs_map_token token;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003311 struct btrfs_key key;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003312 u64 mod_start = em->mod_start;
3313 u64 mod_len = em->mod_len;
3314 u64 csum_offset;
3315 u64 csum_len;
Josef Bacik70c8a912012-10-11 16:54:30 -04003316 u64 extent_offset = em->start - em->orig_start;
3317 u64 block_len;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003318 int ret;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003319 int index = log->log_transid % 2;
Josef Bacik70c8a912012-10-11 16:54:30 -04003320 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003321
Josef Bacik09a2a8f92013-04-05 16:51:15 -04003322 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3323 em->start + em->len, NULL, 0);
3324 if (ret)
3325 return ret;
3326
Josef Bacik70c8a912012-10-11 16:54:30 -04003327 INIT_LIST_HEAD(&ordered_sums);
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003328 btrfs_init_map_token(&token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003329 key.objectid = btrfs_ino(inode);
3330 key.type = BTRFS_EXTENT_DATA_KEY;
3331 key.offset = em->start;
Josef Bacik70c8a912012-10-11 16:54:30 -04003332
3333 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
Josef Bacik09a2a8f92013-04-05 16:51:15 -04003334 if (ret)
Josef Bacik70c8a912012-10-11 16:54:30 -04003335 return ret;
Josef Bacik70c8a912012-10-11 16:54:30 -04003336 leaf = path->nodes[0];
3337 fi = btrfs_item_ptr(leaf, path->slots[0],
3338 struct btrfs_file_extent_item);
Josef Bacik124fe662013-03-01 11:47:21 -05003339
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003340 btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
3341 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003342 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3343 skip_csum = true;
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003344 btrfs_set_token_file_extent_type(leaf, fi,
3345 BTRFS_FILE_EXTENT_PREALLOC,
3346 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003347 } else {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003348 btrfs_set_token_file_extent_type(leaf, fi,
3349 BTRFS_FILE_EXTENT_REG,
3350 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003351 if (em->block_start == 0)
3352 skip_csum = true;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003353 }
3354
Josef Bacik70c8a912012-10-11 16:54:30 -04003355 block_len = max(em->block_len, em->orig_block_len);
3356 if (em->compress_type != BTRFS_COMPRESS_NONE) {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003357 btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
3358 em->block_start,
3359 &token);
3360 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
3361 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003362 } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003363 btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
3364 em->block_start -
3365 extent_offset, &token);
3366 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
3367 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003368 } else {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003369 btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
3370 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
3371 &token);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003372 }
3373
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003374 btrfs_set_token_file_extent_offset(leaf, fi,
3375 em->start - em->orig_start,
3376 &token);
3377 btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
Josef Bacikcc95bef2013-04-04 14:31:27 -04003378 btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003379 btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
3380 &token);
3381 btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
3382 btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003383 btrfs_mark_buffer_dirty(leaf);
3384
Josef Bacik70c8a912012-10-11 16:54:30 -04003385 btrfs_release_path(path);
Josef Bacik70c8a912012-10-11 16:54:30 -04003386 if (ret) {
3387 return ret;
3388 }
3389
3390 if (skip_csum)
3391 return 0;
3392
Liu Bo192000d2013-01-06 03:38:22 +00003393 if (em->compress_type) {
3394 csum_offset = 0;
3395 csum_len = block_len;
3396 }
3397
Josef Bacik2ab28f32012-10-12 15:27:49 -04003398 /*
3399 * First check and see if our csums are on our outstanding ordered
3400 * extents.
3401 */
3402again:
3403 spin_lock_irq(&log->log_extents_lock[index]);
3404 list_for_each_entry(ordered, &log->logged_list[index], log_list) {
3405 struct btrfs_ordered_sum *sum;
3406
3407 if (!mod_len)
3408 break;
3409
3410 if (ordered->inode != inode)
3411 continue;
3412
3413 if (ordered->file_offset + ordered->len <= mod_start ||
3414 mod_start + mod_len <= ordered->file_offset)
3415 continue;
3416
3417 /*
3418 * We are going to copy all the csums on this ordered extent, so
3419 * go ahead and adjust mod_start and mod_len in case this
3420 * ordered extent has already been logged.
3421 */
3422 if (ordered->file_offset > mod_start) {
3423 if (ordered->file_offset + ordered->len >=
3424 mod_start + mod_len)
3425 mod_len = ordered->file_offset - mod_start;
3426 /*
3427 * If we have this case
3428 *
3429 * |--------- logged extent ---------|
3430 * |----- ordered extent ----|
3431 *
3432 * Just don't mess with mod_start and mod_len, we'll
3433 * just end up logging more csums than we need and it
3434 * will be ok.
3435 */
3436 } else {
3437 if (ordered->file_offset + ordered->len <
3438 mod_start + mod_len) {
3439 mod_len = (mod_start + mod_len) -
3440 (ordered->file_offset + ordered->len);
3441 mod_start = ordered->file_offset +
3442 ordered->len;
3443 } else {
3444 mod_len = 0;
3445 }
3446 }
3447
3448 /*
3449 * To keep us from looping for the above case of an ordered
3450 * extent that falls inside of the logged extent.
3451 */
3452 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
3453 &ordered->flags))
3454 continue;
3455 atomic_inc(&ordered->refs);
3456 spin_unlock_irq(&log->log_extents_lock[index]);
3457 /*
3458 * we've dropped the lock, we must either break or
3459 * start over after this.
3460 */
3461
3462 wait_event(ordered->wait, ordered->csum_bytes_left == 0);
3463
3464 list_for_each_entry(sum, &ordered->list, list) {
3465 ret = btrfs_csum_file_blocks(trans, log, sum);
3466 if (ret) {
3467 btrfs_put_ordered_extent(ordered);
3468 goto unlocked;
3469 }
3470 }
3471 btrfs_put_ordered_extent(ordered);
3472 goto again;
3473
3474 }
3475 spin_unlock_irq(&log->log_extents_lock[index]);
3476unlocked:
3477
3478 if (!mod_len || ret)
3479 return ret;
3480
3481 csum_offset = mod_start - em->start;
3482 csum_len = mod_len;
3483
Josef Bacik70c8a912012-10-11 16:54:30 -04003484 /* block start is already adjusted for the file extent offset. */
3485 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3486 em->block_start + csum_offset,
3487 em->block_start + csum_offset +
3488 csum_len - 1, &ordered_sums, 0);
3489 if (ret)
3490 return ret;
3491
3492 while (!list_empty(&ordered_sums)) {
3493 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
3494 struct btrfs_ordered_sum,
3495 list);
3496 if (!ret)
3497 ret = btrfs_csum_file_blocks(trans, log, sums);
3498 list_del(&sums->list);
3499 kfree(sums);
3500 }
3501
3502 return ret;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003503}
3504
3505static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3506 struct btrfs_root *root,
3507 struct inode *inode,
Josef Bacik70c8a912012-10-11 16:54:30 -04003508 struct btrfs_path *path)
Josef Bacik5dc562c2012-08-17 13:14:17 -04003509{
Josef Bacik5dc562c2012-08-17 13:14:17 -04003510 struct extent_map *em, *n;
3511 struct list_head extents;
3512 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3513 u64 test_gen;
3514 int ret = 0;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003515 int num = 0;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003516
3517 INIT_LIST_HEAD(&extents);
3518
Josef Bacik5dc562c2012-08-17 13:14:17 -04003519 write_lock(&tree->lock);
3520 test_gen = root->fs_info->last_trans_committed;
3521
3522 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3523 list_del_init(&em->list);
Josef Bacik2ab28f32012-10-12 15:27:49 -04003524
3525 /*
3526 * Just an arbitrary number, this can be really CPU intensive
3527 * once we start getting a lot of extents, and really once we
3528 * have a bunch of extents we just want to commit since it will
3529 * be faster.
3530 */
3531 if (++num > 32768) {
3532 list_del_init(&tree->modified_extents);
3533 ret = -EFBIG;
3534 goto process;
3535 }
3536
Josef Bacik5dc562c2012-08-17 13:14:17 -04003537 if (em->generation <= test_gen)
3538 continue;
Josef Bacikff44c6e2012-09-14 12:59:20 -04003539 /* Need a ref to keep it from getting evicted from cache */
3540 atomic_inc(&em->refs);
3541 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003542 list_add_tail(&em->list, &extents);
Josef Bacik2ab28f32012-10-12 15:27:49 -04003543 num++;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003544 }
3545
3546 list_sort(NULL, &extents, extent_cmp);
3547
Josef Bacik2ab28f32012-10-12 15:27:49 -04003548process:
Josef Bacik5dc562c2012-08-17 13:14:17 -04003549 while (!list_empty(&extents)) {
3550 em = list_entry(extents.next, struct extent_map, list);
3551
3552 list_del_init(&em->list);
3553
3554 /*
3555 * If we had an error we just need to delete everybody from our
3556 * private list.
3557 */
Josef Bacikff44c6e2012-09-14 12:59:20 -04003558 if (ret) {
Josef Bacik201a9032013-01-24 12:02:07 -05003559 clear_em_logging(tree, em);
Josef Bacikff44c6e2012-09-14 12:59:20 -04003560 free_extent_map(em);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003561 continue;
Josef Bacikff44c6e2012-09-14 12:59:20 -04003562 }
3563
3564 write_unlock(&tree->lock);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003565
Josef Bacik70c8a912012-10-11 16:54:30 -04003566 ret = log_one_extent(trans, inode, root, em, path);
Josef Bacikff44c6e2012-09-14 12:59:20 -04003567 write_lock(&tree->lock);
Josef Bacik201a9032013-01-24 12:02:07 -05003568 clear_em_logging(tree, em);
3569 free_extent_map(em);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003570 }
Josef Bacikff44c6e2012-09-14 12:59:20 -04003571 WARN_ON(!list_empty(&extents));
3572 write_unlock(&tree->lock);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003573
Josef Bacik5dc562c2012-08-17 13:14:17 -04003574 btrfs_release_path(path);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003575 return ret;
3576}
3577
Chris Masone02119d2008-09-05 16:13:11 -04003578/* log a single inode in the tree log.
3579 * At least one parent directory for this inode must exist in the tree
3580 * or be logged already.
3581 *
3582 * Any items from this inode changed by the current transaction are copied
3583 * to the log tree. An extra reference is taken on any extents in this
3584 * file, allowing us to avoid a whole pile of corner cases around logging
3585 * blocks that have been removed from the tree.
3586 *
3587 * See LOG_INODE_ALL and related defines for a description of what inode_only
3588 * does.
3589 *
3590 * This handles both files and directories.
3591 */
Chris Mason12fcfd22009-03-24 10:24:20 -04003592static int btrfs_log_inode(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -04003593 struct btrfs_root *root, struct inode *inode,
3594 int inode_only)
3595{
3596 struct btrfs_path *path;
3597 struct btrfs_path *dst_path;
3598 struct btrfs_key min_key;
3599 struct btrfs_key max_key;
3600 struct btrfs_root *log = root->log_root;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003601 struct extent_buffer *src = NULL;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003602 int err = 0;
Chris Masone02119d2008-09-05 16:13:11 -04003603 int ret;
Chris Mason3a5f1d42008-09-11 15:53:37 -04003604 int nritems;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003605 int ins_start_slot = 0;
3606 int ins_nr;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003607 bool fast_search = false;
Li Zefan33345d012011-04-20 10:31:50 +08003608 u64 ino = btrfs_ino(inode);
Chris Masone02119d2008-09-05 16:13:11 -04003609
Chris Masone02119d2008-09-05 16:13:11 -04003610 path = btrfs_alloc_path();
Tsutomu Itoh5df67082011-02-01 09:17:35 +00003611 if (!path)
3612 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04003613 dst_path = btrfs_alloc_path();
Tsutomu Itoh5df67082011-02-01 09:17:35 +00003614 if (!dst_path) {
3615 btrfs_free_path(path);
3616 return -ENOMEM;
3617 }
Chris Masone02119d2008-09-05 16:13:11 -04003618
Li Zefan33345d012011-04-20 10:31:50 +08003619 min_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04003620 min_key.type = BTRFS_INODE_ITEM_KEY;
3621 min_key.offset = 0;
3622
Li Zefan33345d012011-04-20 10:31:50 +08003623 max_key.objectid = ino;
Chris Mason12fcfd22009-03-24 10:24:20 -04003624
Chris Mason12fcfd22009-03-24 10:24:20 -04003625
Josef Bacik5dc562c2012-08-17 13:14:17 -04003626 /* today the code can only do partial logging of directories */
Miao Xie5269b672012-11-01 07:35:23 +00003627 if (S_ISDIR(inode->i_mode) ||
3628 (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3629 &BTRFS_I(inode)->runtime_flags) &&
3630 inode_only == LOG_INODE_EXISTS))
Chris Masone02119d2008-09-05 16:13:11 -04003631 max_key.type = BTRFS_XATTR_ITEM_KEY;
3632 else
3633 max_key.type = (u8)-1;
3634 max_key.offset = (u64)-1;
3635
Josef Bacik94edf4a2012-09-25 14:56:25 -04003636 /* Only run delayed items if we are a dir or a new file */
3637 if (S_ISDIR(inode->i_mode) ||
3638 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
3639 ret = btrfs_commit_inode_delayed_items(trans, inode);
3640 if (ret) {
3641 btrfs_free_path(path);
3642 btrfs_free_path(dst_path);
3643 return ret;
3644 }
Miao Xie16cdcec2011-04-22 18:12:22 +08003645 }
3646
Chris Masone02119d2008-09-05 16:13:11 -04003647 mutex_lock(&BTRFS_I(inode)->log_mutex);
3648
Josef Bacik2ab28f32012-10-12 15:27:49 -04003649 btrfs_get_logged_extents(log, inode);
3650
Chris Masone02119d2008-09-05 16:13:11 -04003651 /*
3652 * a brute force approach to making sure we get the most uptodate
3653 * copies of everything.
3654 */
3655 if (S_ISDIR(inode->i_mode)) {
3656 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
3657
3658 if (inode_only == LOG_INODE_EXISTS)
3659 max_key_type = BTRFS_XATTR_ITEM_KEY;
Li Zefan33345d012011-04-20 10:31:50 +08003660 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
Chris Masone02119d2008-09-05 16:13:11 -04003661 } else {
Josef Bacik5dc562c2012-08-17 13:14:17 -04003662 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3663 &BTRFS_I(inode)->runtime_flags)) {
Josef Bacike9976152012-10-11 15:53:56 -04003664 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
3665 &BTRFS_I(inode)->runtime_flags);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003666 ret = btrfs_truncate_inode_items(trans, log,
3667 inode, 0, 0);
Josef Bacika95249b2012-10-11 16:17:34 -04003668 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
3669 &BTRFS_I(inode)->runtime_flags)) {
3670 if (inode_only == LOG_INODE_ALL)
3671 fast_search = true;
3672 max_key.type = BTRFS_XATTR_ITEM_KEY;
3673 ret = drop_objectid_items(trans, log, path, ino,
3674 max_key.type);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003675 } else {
Liu Bo183f37f2012-11-01 06:38:47 +00003676 if (inode_only == LOG_INODE_ALL)
3677 fast_search = true;
Josef Bacika95249b2012-10-11 16:17:34 -04003678 ret = log_inode_item(trans, log, dst_path, inode);
3679 if (ret) {
3680 err = ret;
3681 goto out_unlock;
3682 }
3683 goto log_extents;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003684 }
Josef Bacika95249b2012-10-11 16:17:34 -04003685
Chris Masone02119d2008-09-05 16:13:11 -04003686 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003687 if (ret) {
3688 err = ret;
3689 goto out_unlock;
3690 }
Chris Masone02119d2008-09-05 16:13:11 -04003691 path->keep_locks = 1;
3692
Chris Masond3977122009-01-05 21:25:51 -05003693 while (1) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003694 ins_nr = 0;
Chris Masone02119d2008-09-05 16:13:11 -04003695 ret = btrfs_search_forward(root, &min_key, &max_key,
Eric Sandeende78b512013-01-31 18:21:12 +00003696 path, trans->transid);
Chris Masone02119d2008-09-05 16:13:11 -04003697 if (ret != 0)
3698 break;
Chris Mason3a5f1d42008-09-11 15:53:37 -04003699again:
Chris Mason31ff1cd2008-09-11 16:17:57 -04003700 /* note, ins_nr might be > 0 here, cleanup outside the loop */
Li Zefan33345d012011-04-20 10:31:50 +08003701 if (min_key.objectid != ino)
Chris Masone02119d2008-09-05 16:13:11 -04003702 break;
3703 if (min_key.type > max_key.type)
3704 break;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003705
Chris Masone02119d2008-09-05 16:13:11 -04003706 src = path->nodes[0];
Chris Mason31ff1cd2008-09-11 16:17:57 -04003707 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
3708 ins_nr++;
3709 goto next_slot;
3710 } else if (!ins_nr) {
3711 ins_start_slot = path->slots[0];
3712 ins_nr = 1;
3713 goto next_slot;
Chris Masone02119d2008-09-05 16:13:11 -04003714 }
3715
Liu Bod2794402012-08-29 01:07:56 -06003716 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003717 ins_nr, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003718 if (ret) {
3719 err = ret;
3720 goto out_unlock;
3721 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003722 ins_nr = 1;
3723 ins_start_slot = path->slots[0];
3724next_slot:
Chris Masone02119d2008-09-05 16:13:11 -04003725
Chris Mason3a5f1d42008-09-11 15:53:37 -04003726 nritems = btrfs_header_nritems(path->nodes[0]);
3727 path->slots[0]++;
3728 if (path->slots[0] < nritems) {
3729 btrfs_item_key_to_cpu(path->nodes[0], &min_key,
3730 path->slots[0]);
3731 goto again;
3732 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003733 if (ins_nr) {
Liu Bod2794402012-08-29 01:07:56 -06003734 ret = copy_items(trans, inode, dst_path, src,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003735 ins_start_slot,
3736 ins_nr, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003737 if (ret) {
3738 err = ret;
3739 goto out_unlock;
3740 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003741 ins_nr = 0;
3742 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003743 btrfs_release_path(path);
Chris Mason3a5f1d42008-09-11 15:53:37 -04003744
Chris Masone02119d2008-09-05 16:13:11 -04003745 if (min_key.offset < (u64)-1)
3746 min_key.offset++;
3747 else if (min_key.type < (u8)-1)
3748 min_key.type++;
3749 else if (min_key.objectid < (u64)-1)
3750 min_key.objectid++;
3751 else
3752 break;
3753 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003754 if (ins_nr) {
Liu Bod2794402012-08-29 01:07:56 -06003755 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003756 ins_nr, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003757 if (ret) {
3758 err = ret;
3759 goto out_unlock;
3760 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003761 ins_nr = 0;
3762 }
Josef Bacik5dc562c2012-08-17 13:14:17 -04003763
Josef Bacika95249b2012-10-11 16:17:34 -04003764log_extents:
Josef Bacikf3b15cc2013-07-22 12:54:30 -04003765 btrfs_release_path(path);
3766 btrfs_release_path(dst_path);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003767 if (fast_search) {
Josef Bacik70c8a912012-10-11 16:54:30 -04003768 ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003769 if (ret) {
3770 err = ret;
3771 goto out_unlock;
3772 }
Liu Bo06d3d222012-08-27 10:52:19 -06003773 } else {
3774 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3775 struct extent_map *em, *n;
3776
Miao Xiebbe14262012-11-01 07:34:54 +00003777 write_lock(&tree->lock);
Liu Bo06d3d222012-08-27 10:52:19 -06003778 list_for_each_entry_safe(em, n, &tree->modified_extents, list)
3779 list_del_init(&em->list);
Miao Xiebbe14262012-11-01 07:34:54 +00003780 write_unlock(&tree->lock);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003781 }
3782
Chris Mason9623f9a2008-09-11 17:42:42 -04003783 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
Chris Masone02119d2008-09-05 16:13:11 -04003784 ret = log_directory_changes(trans, root, inode, path, dst_path);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003785 if (ret) {
3786 err = ret;
3787 goto out_unlock;
3788 }
Chris Masone02119d2008-09-05 16:13:11 -04003789 }
Chris Mason3a5f1d42008-09-11 15:53:37 -04003790 BTRFS_I(inode)->logged_trans = trans->transid;
Liu Bo46d8bc32012-08-29 01:07:55 -06003791 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003792out_unlock:
Josef Bacik2ab28f32012-10-12 15:27:49 -04003793 if (err)
3794 btrfs_free_logged_extents(log, log->log_transid);
Chris Masone02119d2008-09-05 16:13:11 -04003795 mutex_unlock(&BTRFS_I(inode)->log_mutex);
3796
3797 btrfs_free_path(path);
3798 btrfs_free_path(dst_path);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003799 return err;
Chris Masone02119d2008-09-05 16:13:11 -04003800}
3801
Chris Mason12fcfd22009-03-24 10:24:20 -04003802/*
3803 * follow the dentry parent pointers up the chain and see if any
3804 * of the directories in it require a full commit before they can
3805 * be logged. Returns zero if nothing special needs to be done or 1 if
3806 * a full commit is required.
3807 */
3808static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3809 struct inode *inode,
3810 struct dentry *parent,
3811 struct super_block *sb,
3812 u64 last_committed)
Chris Masone02119d2008-09-05 16:13:11 -04003813{
Chris Mason12fcfd22009-03-24 10:24:20 -04003814 int ret = 0;
3815 struct btrfs_root *root;
Josef Bacik6a912212010-11-20 09:48:00 +00003816 struct dentry *old_parent = NULL;
Josef Bacikde2b5302013-09-11 09:36:30 -04003817 struct inode *orig_inode = inode;
Chris Masone02119d2008-09-05 16:13:11 -04003818
Chris Masonaf4176b2009-03-24 10:24:31 -04003819 /*
3820 * for regular files, if its inode is already on disk, we don't
3821 * have to worry about the parents at all. This is because
3822 * we can use the last_unlink_trans field to record renames
3823 * and other fun in this file.
3824 */
3825 if (S_ISREG(inode->i_mode) &&
3826 BTRFS_I(inode)->generation <= last_committed &&
3827 BTRFS_I(inode)->last_unlink_trans <= last_committed)
3828 goto out;
3829
Chris Mason12fcfd22009-03-24 10:24:20 -04003830 if (!S_ISDIR(inode->i_mode)) {
3831 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
3832 goto out;
3833 inode = parent->d_inode;
3834 }
3835
3836 while (1) {
Josef Bacikde2b5302013-09-11 09:36:30 -04003837 /*
3838 * If we are logging a directory then we start with our inode,
3839 * not our parents inode, so we need to skipp setting the
3840 * logged_trans so that further down in the log code we don't
3841 * think this inode has already been logged.
3842 */
3843 if (inode != orig_inode)
3844 BTRFS_I(inode)->logged_trans = trans->transid;
Chris Mason12fcfd22009-03-24 10:24:20 -04003845 smp_mb();
3846
3847 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
3848 root = BTRFS_I(inode)->root;
3849
3850 /*
3851 * make sure any commits to the log are forced
3852 * to be full commits
3853 */
3854 root->fs_info->last_trans_log_full_commit =
3855 trans->transid;
3856 ret = 1;
3857 break;
3858 }
3859
3860 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
3861 break;
3862
Yan, Zheng76dda932009-09-21 16:00:26 -04003863 if (IS_ROOT(parent))
Chris Mason12fcfd22009-03-24 10:24:20 -04003864 break;
3865
Josef Bacik6a912212010-11-20 09:48:00 +00003866 parent = dget_parent(parent);
3867 dput(old_parent);
3868 old_parent = parent;
Chris Mason12fcfd22009-03-24 10:24:20 -04003869 inode = parent->d_inode;
3870
3871 }
Josef Bacik6a912212010-11-20 09:48:00 +00003872 dput(old_parent);
Chris Mason12fcfd22009-03-24 10:24:20 -04003873out:
Chris Masone02119d2008-09-05 16:13:11 -04003874 return ret;
3875}
3876
3877/*
3878 * helper function around btrfs_log_inode to make sure newly created
3879 * parent directories also end up in the log. A minimal inode and backref
3880 * only logging is done of any parent directories that are older than
3881 * the last committed transaction
3882 */
Eric Sandeen48a3b632013-04-25 20:41:01 +00003883static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3884 struct btrfs_root *root, struct inode *inode,
3885 struct dentry *parent, int exists_only)
Chris Masone02119d2008-09-05 16:13:11 -04003886{
Chris Mason12fcfd22009-03-24 10:24:20 -04003887 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
Chris Masone02119d2008-09-05 16:13:11 -04003888 struct super_block *sb;
Josef Bacik6a912212010-11-20 09:48:00 +00003889 struct dentry *old_parent = NULL;
Chris Mason12fcfd22009-03-24 10:24:20 -04003890 int ret = 0;
3891 u64 last_committed = root->fs_info->last_trans_committed;
3892
3893 sb = inode->i_sb;
3894
Sage Weil3a5e1402009-04-02 16:49:40 -04003895 if (btrfs_test_opt(root, NOTREELOG)) {
3896 ret = 1;
3897 goto end_no_trans;
3898 }
3899
Chris Mason12fcfd22009-03-24 10:24:20 -04003900 if (root->fs_info->last_trans_log_full_commit >
3901 root->fs_info->last_trans_committed) {
3902 ret = 1;
3903 goto end_no_trans;
3904 }
3905
Yan, Zheng76dda932009-09-21 16:00:26 -04003906 if (root != BTRFS_I(inode)->root ||
3907 btrfs_root_refs(&root->root_item) == 0) {
3908 ret = 1;
3909 goto end_no_trans;
3910 }
3911
Chris Mason12fcfd22009-03-24 10:24:20 -04003912 ret = check_parent_dirs_for_sync(trans, inode, parent,
3913 sb, last_committed);
3914 if (ret)
3915 goto end_no_trans;
Chris Masone02119d2008-09-05 16:13:11 -04003916
Josef Bacik22ee6982012-05-29 16:57:49 -04003917 if (btrfs_inode_in_log(inode, trans->transid)) {
Chris Mason257c62e2009-10-13 13:21:08 -04003918 ret = BTRFS_NO_LOG_SYNC;
3919 goto end_no_trans;
3920 }
3921
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003922 ret = start_log_trans(trans, root);
3923 if (ret)
3924 goto end_trans;
Chris Mason12fcfd22009-03-24 10:24:20 -04003925
3926 ret = btrfs_log_inode(trans, root, inode, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003927 if (ret)
3928 goto end_trans;
Chris Mason12fcfd22009-03-24 10:24:20 -04003929
Chris Masonaf4176b2009-03-24 10:24:31 -04003930 /*
3931 * for regular files, if its inode is already on disk, we don't
3932 * have to worry about the parents at all. This is because
3933 * we can use the last_unlink_trans field to record renames
3934 * and other fun in this file.
3935 */
3936 if (S_ISREG(inode->i_mode) &&
3937 BTRFS_I(inode)->generation <= last_committed &&
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003938 BTRFS_I(inode)->last_unlink_trans <= last_committed) {
3939 ret = 0;
3940 goto end_trans;
3941 }
Chris Masonaf4176b2009-03-24 10:24:31 -04003942
3943 inode_only = LOG_INODE_EXISTS;
Chris Masond3977122009-01-05 21:25:51 -05003944 while (1) {
Chris Mason12fcfd22009-03-24 10:24:20 -04003945 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
Chris Masone02119d2008-09-05 16:13:11 -04003946 break;
3947
Chris Mason12fcfd22009-03-24 10:24:20 -04003948 inode = parent->d_inode;
Yan, Zheng76dda932009-09-21 16:00:26 -04003949 if (root != BTRFS_I(inode)->root)
3950 break;
3951
Chris Mason12fcfd22009-03-24 10:24:20 -04003952 if (BTRFS_I(inode)->generation >
3953 root->fs_info->last_trans_committed) {
3954 ret = btrfs_log_inode(trans, root, inode, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003955 if (ret)
3956 goto end_trans;
Chris Mason12fcfd22009-03-24 10:24:20 -04003957 }
Yan, Zheng76dda932009-09-21 16:00:26 -04003958 if (IS_ROOT(parent))
Chris Masone02119d2008-09-05 16:13:11 -04003959 break;
Chris Mason12fcfd22009-03-24 10:24:20 -04003960
Josef Bacik6a912212010-11-20 09:48:00 +00003961 parent = dget_parent(parent);
3962 dput(old_parent);
3963 old_parent = parent;
Chris Masone02119d2008-09-05 16:13:11 -04003964 }
Chris Mason12fcfd22009-03-24 10:24:20 -04003965 ret = 0;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003966end_trans:
Josef Bacik6a912212010-11-20 09:48:00 +00003967 dput(old_parent);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003968 if (ret < 0) {
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003969 root->fs_info->last_trans_log_full_commit = trans->transid;
3970 ret = 1;
3971 }
Chris Mason12fcfd22009-03-24 10:24:20 -04003972 btrfs_end_log_trans(root);
3973end_no_trans:
3974 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003975}
3976
3977/*
3978 * it is not safe to log dentry if the chunk root has added new
3979 * chunks. This returns 0 if the dentry was logged, and 1 otherwise.
3980 * If this returns 1, you must commit the transaction to safely get your
3981 * data on disk.
3982 */
3983int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
3984 struct btrfs_root *root, struct dentry *dentry)
3985{
Josef Bacik6a912212010-11-20 09:48:00 +00003986 struct dentry *parent = dget_parent(dentry);
3987 int ret;
3988
3989 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
3990 dput(parent);
3991
3992 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003993}
3994
3995/*
3996 * should be called during mount to recover any replay any log trees
3997 * from the FS
3998 */
3999int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
4000{
4001 int ret;
4002 struct btrfs_path *path;
4003 struct btrfs_trans_handle *trans;
4004 struct btrfs_key key;
4005 struct btrfs_key found_key;
4006 struct btrfs_key tmp_key;
4007 struct btrfs_root *log;
4008 struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
4009 struct walk_control wc = {
4010 .process_func = process_one_buffer,
4011 .stage = 0,
4012 };
4013
Chris Masone02119d2008-09-05 16:13:11 -04004014 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004015 if (!path)
4016 return -ENOMEM;
4017
4018 fs_info->log_root_recovering = 1;
Chris Masone02119d2008-09-05 16:13:11 -04004019
Yan, Zheng4a500fd2010-05-16 10:49:59 -04004020 trans = btrfs_start_transaction(fs_info->tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004021 if (IS_ERR(trans)) {
4022 ret = PTR_ERR(trans);
4023 goto error;
4024 }
Chris Masone02119d2008-09-05 16:13:11 -04004025
4026 wc.trans = trans;
4027 wc.pin = 1;
4028
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004029 ret = walk_log_tree(trans, log_root_tree, &wc);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004030 if (ret) {
4031 btrfs_error(fs_info, ret, "Failed to pin buffers while "
4032 "recovering log root tree.");
4033 goto error;
4034 }
Chris Masone02119d2008-09-05 16:13:11 -04004035
4036again:
4037 key.objectid = BTRFS_TREE_LOG_OBJECTID;
4038 key.offset = (u64)-1;
4039 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
4040
Chris Masond3977122009-01-05 21:25:51 -05004041 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04004042 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004043
4044 if (ret < 0) {
4045 btrfs_error(fs_info, ret,
4046 "Couldn't find tree log root.");
4047 goto error;
4048 }
Chris Masone02119d2008-09-05 16:13:11 -04004049 if (ret > 0) {
4050 if (path->slots[0] == 0)
4051 break;
4052 path->slots[0]--;
4053 }
4054 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
4055 path->slots[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02004056 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04004057 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4058 break;
4059
Miao Xiecb517ea2013-05-15 07:48:19 +00004060 log = btrfs_read_fs_root(log_root_tree, &found_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004061 if (IS_ERR(log)) {
4062 ret = PTR_ERR(log);
4063 btrfs_error(fs_info, ret,
4064 "Couldn't read tree log root.");
4065 goto error;
4066 }
Chris Masone02119d2008-09-05 16:13:11 -04004067
4068 tmp_key.objectid = found_key.offset;
4069 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
4070 tmp_key.offset = (u64)-1;
4071
4072 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004073 if (IS_ERR(wc.replay_dest)) {
4074 ret = PTR_ERR(wc.replay_dest);
Josef Bacikb50c6e22013-04-25 15:55:30 -04004075 free_extent_buffer(log->node);
4076 free_extent_buffer(log->commit_root);
4077 kfree(log);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004078 btrfs_error(fs_info, ret, "Couldn't read target root "
4079 "for tree log recovery.");
4080 goto error;
4081 }
Chris Masone02119d2008-09-05 16:13:11 -04004082
Yan Zheng07d400a2009-01-06 11:42:00 -05004083 wc.replay_dest->log_root = log;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004084 btrfs_record_root_in_trans(trans, wc.replay_dest);
Chris Masone02119d2008-09-05 16:13:11 -04004085 ret = walk_log_tree(trans, log, &wc);
Chris Masone02119d2008-09-05 16:13:11 -04004086
Josef Bacikb50c6e22013-04-25 15:55:30 -04004087 if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
Chris Masone02119d2008-09-05 16:13:11 -04004088 ret = fixup_inode_link_counts(trans, wc.replay_dest,
4089 path);
Chris Masone02119d2008-09-05 16:13:11 -04004090 }
Chris Masone02119d2008-09-05 16:13:11 -04004091
4092 key.offset = found_key.offset - 1;
Yan Zheng07d400a2009-01-06 11:42:00 -05004093 wc.replay_dest->log_root = NULL;
Chris Masone02119d2008-09-05 16:13:11 -04004094 free_extent_buffer(log->node);
Chris Masonb263c2c2009-06-11 11:24:47 -04004095 free_extent_buffer(log->commit_root);
Chris Masone02119d2008-09-05 16:13:11 -04004096 kfree(log);
4097
Josef Bacikb50c6e22013-04-25 15:55:30 -04004098 if (ret)
4099 goto error;
4100
Chris Masone02119d2008-09-05 16:13:11 -04004101 if (found_key.offset == 0)
4102 break;
4103 }
David Sterbab3b4aa72011-04-21 01:20:15 +02004104 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04004105
4106 /* step one is to pin it all, step two is to replay just inodes */
4107 if (wc.pin) {
4108 wc.pin = 0;
4109 wc.process_func = replay_one_buffer;
4110 wc.stage = LOG_WALK_REPLAY_INODES;
4111 goto again;
4112 }
4113 /* step three is to replay everything */
4114 if (wc.stage < LOG_WALK_REPLAY_ALL) {
4115 wc.stage++;
4116 goto again;
4117 }
4118
4119 btrfs_free_path(path);
4120
Josef Bacikabefa552013-04-24 16:40:05 -04004121 /* step 4: commit the transaction, which also unpins the blocks */
4122 ret = btrfs_commit_transaction(trans, fs_info->tree_root);
4123 if (ret)
4124 return ret;
4125
Chris Masone02119d2008-09-05 16:13:11 -04004126 free_extent_buffer(log_root_tree->node);
4127 log_root_tree->log_root = NULL;
4128 fs_info->log_root_recovering = 0;
Chris Masone02119d2008-09-05 16:13:11 -04004129 kfree(log_root_tree);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004130
Josef Bacikabefa552013-04-24 16:40:05 -04004131 return 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004132error:
Josef Bacikb50c6e22013-04-25 15:55:30 -04004133 if (wc.trans)
4134 btrfs_end_transaction(wc.trans, fs_info->tree_root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004135 btrfs_free_path(path);
4136 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04004137}
Chris Mason12fcfd22009-03-24 10:24:20 -04004138
4139/*
4140 * there are some corner cases where we want to force a full
4141 * commit instead of allowing a directory to be logged.
4142 *
4143 * They revolve around files there were unlinked from the directory, and
4144 * this function updates the parent directory so that a full commit is
4145 * properly done if it is fsync'd later after the unlinks are done.
4146 */
4147void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
4148 struct inode *dir, struct inode *inode,
4149 int for_rename)
4150{
4151 /*
Chris Masonaf4176b2009-03-24 10:24:31 -04004152 * when we're logging a file, if it hasn't been renamed
4153 * or unlinked, and its inode is fully committed on disk,
4154 * we don't have to worry about walking up the directory chain
4155 * to log its parents.
4156 *
4157 * So, we use the last_unlink_trans field to put this transid
4158 * into the file. When the file is logged we check it and
4159 * don't log the parents if the file is fully on disk.
4160 */
4161 if (S_ISREG(inode->i_mode))
4162 BTRFS_I(inode)->last_unlink_trans = trans->transid;
4163
4164 /*
Chris Mason12fcfd22009-03-24 10:24:20 -04004165 * if this directory was already logged any new
4166 * names for this file/dir will get recorded
4167 */
4168 smp_mb();
4169 if (BTRFS_I(dir)->logged_trans == trans->transid)
4170 return;
4171
4172 /*
4173 * if the inode we're about to unlink was logged,
4174 * the log will be properly updated for any new names
4175 */
4176 if (BTRFS_I(inode)->logged_trans == trans->transid)
4177 return;
4178
4179 /*
4180 * when renaming files across directories, if the directory
4181 * there we're unlinking from gets fsync'd later on, there's
4182 * no way to find the destination directory later and fsync it
4183 * properly. So, we have to be conservative and force commits
4184 * so the new name gets discovered.
4185 */
4186 if (for_rename)
4187 goto record;
4188
4189 /* we can safely do the unlink without any special recording */
4190 return;
4191
4192record:
4193 BTRFS_I(dir)->last_unlink_trans = trans->transid;
4194}
4195
4196/*
4197 * Call this after adding a new name for a file and it will properly
4198 * update the log to reflect the new name.
4199 *
4200 * It will return zero if all goes well, and it will return 1 if a
4201 * full transaction commit is required.
4202 */
4203int btrfs_log_new_name(struct btrfs_trans_handle *trans,
4204 struct inode *inode, struct inode *old_dir,
4205 struct dentry *parent)
4206{
4207 struct btrfs_root * root = BTRFS_I(inode)->root;
4208
4209 /*
Chris Masonaf4176b2009-03-24 10:24:31 -04004210 * this will force the logging code to walk the dentry chain
4211 * up for the file
4212 */
4213 if (S_ISREG(inode->i_mode))
4214 BTRFS_I(inode)->last_unlink_trans = trans->transid;
4215
4216 /*
Chris Mason12fcfd22009-03-24 10:24:20 -04004217 * if this inode hasn't been logged and directory we're renaming it
4218 * from hasn't been logged, we don't need to log it
4219 */
4220 if (BTRFS_I(inode)->logged_trans <=
4221 root->fs_info->last_trans_committed &&
4222 (!old_dir || BTRFS_I(old_dir)->logged_trans <=
4223 root->fs_info->last_trans_committed))
4224 return 0;
4225
4226 return btrfs_log_inode_parent(trans, root, inode, parent, 1);
4227}
4228