blob: 831ddd4bf8971581e60142f293cca498143eedfa [file] [log] [blame]
Chris Masone02119d2008-09-05 16:13:11 -04001/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Miao Xiec6adc9c2013-05-28 10:05:39 +000021#include <linux/blkdev.h>
Josef Bacik5dc562c2012-08-17 13:14:17 -040022#include <linux/list_sort.h>
Chris Masone02119d2008-09-05 16:13:11 -040023#include "ctree.h"
24#include "transaction.h"
25#include "disk-io.h"
26#include "locking.h"
27#include "print-tree.h"
Mark Fashehf1863732012-08-08 11:32:27 -070028#include "backref.h"
Chris Masone02119d2008-09-05 16:13:11 -040029#include "compat.h"
Christoph Hellwigb2950862008-12-02 09:54:17 -050030#include "tree-log.h"
Mark Fashehf1863732012-08-08 11:32:27 -070031#include "hash.h"
Chris Masone02119d2008-09-05 16:13:11 -040032
33/* magic values for the inode_only field in btrfs_log_inode:
34 *
35 * LOG_INODE_ALL means to log everything
36 * LOG_INODE_EXISTS means to log just enough to recreate the inode
37 * during log replay
38 */
39#define LOG_INODE_ALL 0
40#define LOG_INODE_EXISTS 1
41
42/*
Chris Mason12fcfd22009-03-24 10:24:20 -040043 * directory trouble cases
44 *
45 * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
46 * log, we must force a full commit before doing an fsync of the directory
47 * where the unlink was done.
48 * ---> record transid of last unlink/rename per directory
49 *
50 * mkdir foo/some_dir
51 * normal commit
52 * rename foo/some_dir foo2/some_dir
53 * mkdir foo/some_dir
54 * fsync foo/some_dir/some_file
55 *
56 * The fsync above will unlink the original some_dir without recording
57 * it in its new location (foo2). After a crash, some_dir will be gone
58 * unless the fsync of some_file forces a full commit
59 *
60 * 2) we must log any new names for any file or dir that is in the fsync
61 * log. ---> check inode while renaming/linking.
62 *
63 * 2a) we must log any new names for any file or dir during rename
64 * when the directory they are being removed from was logged.
65 * ---> check inode and old parent dir during rename
66 *
67 * 2a is actually the more important variant. With the extra logging
68 * a crash might unlink the old name without recreating the new one
69 *
70 * 3) after a crash, we must go through any directories with a link count
71 * of zero and redo the rm -rf
72 *
73 * mkdir f1/foo
74 * normal commit
75 * rm -rf f1/foo
76 * fsync(f1)
77 *
78 * The directory f1 was fully removed from the FS, but fsync was never
79 * called on f1, only its parent dir. After a crash the rm -rf must
80 * be replayed. This must be able to recurse down the entire
81 * directory tree. The inode link count fixup code takes care of the
82 * ugly details.
83 */
84
85/*
Chris Masone02119d2008-09-05 16:13:11 -040086 * stages for the tree walking. The first
87 * stage (0) is to only pin down the blocks we find
88 * the second stage (1) is to make sure that all the inodes
89 * we find in the log are created in the subvolume.
90 *
91 * The last stage is to deal with directories and links and extents
92 * and all the other fun semantics
93 */
94#define LOG_WALK_PIN_ONLY 0
95#define LOG_WALK_REPLAY_INODES 1
96#define LOG_WALK_REPLAY_ALL 2
97
Chris Mason12fcfd22009-03-24 10:24:20 -040098static int btrfs_log_inode(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -040099 struct btrfs_root *root, struct inode *inode,
100 int inode_only);
Yan Zhengec051c02009-01-05 15:43:42 -0500101static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
102 struct btrfs_root *root,
103 struct btrfs_path *path, u64 objectid);
Chris Mason12fcfd22009-03-24 10:24:20 -0400104static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
105 struct btrfs_root *root,
106 struct btrfs_root *log,
107 struct btrfs_path *path,
108 u64 dirid, int del_all);
Chris Masone02119d2008-09-05 16:13:11 -0400109
110/*
111 * tree logging is a special write ahead log used to make sure that
112 * fsyncs and O_SYNCs can happen without doing full tree commits.
113 *
114 * Full tree commits are expensive because they require commonly
115 * modified blocks to be recowed, creating many dirty pages in the
116 * extent tree an 4x-6x higher write load than ext3.
117 *
118 * Instead of doing a tree commit on every fsync, we use the
119 * key ranges and transaction ids to find items for a given file or directory
120 * that have changed in this transaction. Those items are copied into
121 * a special tree (one per subvolume root), that tree is written to disk
122 * and then the fsync is considered complete.
123 *
124 * After a crash, items are copied out of the log-tree back into the
125 * subvolume tree. Any file data extents found are recorded in the extent
126 * allocation tree, and the log-tree freed.
127 *
128 * The log tree is read three times, once to pin down all the extents it is
129 * using in ram and once, once to create all the inodes logged in the tree
130 * and once to do all the other items.
131 */
132
133/*
Chris Masone02119d2008-09-05 16:13:11 -0400134 * start a sub transaction and setup the log tree
135 * this increments the log tree writer count to make the people
136 * syncing the tree wait for us to finish
137 */
138static int start_log_trans(struct btrfs_trans_handle *trans,
139 struct btrfs_root *root)
140{
141 int ret;
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400142 int err = 0;
Yan Zheng7237f182009-01-21 12:54:03 -0500143
144 mutex_lock(&root->log_mutex);
145 if (root->log_root) {
Josef Bacikff782e02009-10-08 15:30:04 -0400146 if (!root->log_start_pid) {
147 root->log_start_pid = current->pid;
148 root->log_multiple_pids = false;
149 } else if (root->log_start_pid != current->pid) {
150 root->log_multiple_pids = true;
151 }
152
Miao Xie2ecb7922012-09-06 04:04:27 -0600153 atomic_inc(&root->log_batch);
Yan Zheng7237f182009-01-21 12:54:03 -0500154 atomic_inc(&root->log_writers);
155 mutex_unlock(&root->log_mutex);
156 return 0;
157 }
Josef Bacikff782e02009-10-08 15:30:04 -0400158 root->log_multiple_pids = false;
159 root->log_start_pid = current->pid;
Chris Masone02119d2008-09-05 16:13:11 -0400160 mutex_lock(&root->fs_info->tree_log_mutex);
161 if (!root->fs_info->log_root_tree) {
162 ret = btrfs_init_log_root_tree(trans, root->fs_info);
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400163 if (ret)
164 err = ret;
Chris Masone02119d2008-09-05 16:13:11 -0400165 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400166 if (err == 0 && !root->log_root) {
Chris Masone02119d2008-09-05 16:13:11 -0400167 ret = btrfs_add_log_tree(trans, root);
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400168 if (ret)
169 err = ret;
Chris Masone02119d2008-09-05 16:13:11 -0400170 }
Chris Masone02119d2008-09-05 16:13:11 -0400171 mutex_unlock(&root->fs_info->tree_log_mutex);
Miao Xie2ecb7922012-09-06 04:04:27 -0600172 atomic_inc(&root->log_batch);
Yan Zheng7237f182009-01-21 12:54:03 -0500173 atomic_inc(&root->log_writers);
174 mutex_unlock(&root->log_mutex);
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400175 return err;
Chris Masone02119d2008-09-05 16:13:11 -0400176}
177
178/*
179 * returns 0 if there was a log transaction running and we were able
180 * to join, or returns -ENOENT if there were not transactions
181 * in progress
182 */
183static int join_running_log_trans(struct btrfs_root *root)
184{
185 int ret = -ENOENT;
186
187 smp_mb();
188 if (!root->log_root)
189 return -ENOENT;
190
Yan Zheng7237f182009-01-21 12:54:03 -0500191 mutex_lock(&root->log_mutex);
Chris Masone02119d2008-09-05 16:13:11 -0400192 if (root->log_root) {
193 ret = 0;
Yan Zheng7237f182009-01-21 12:54:03 -0500194 atomic_inc(&root->log_writers);
Chris Masone02119d2008-09-05 16:13:11 -0400195 }
Yan Zheng7237f182009-01-21 12:54:03 -0500196 mutex_unlock(&root->log_mutex);
Chris Masone02119d2008-09-05 16:13:11 -0400197 return ret;
198}
199
200/*
Chris Mason12fcfd22009-03-24 10:24:20 -0400201 * This either makes the current running log transaction wait
202 * until you call btrfs_end_log_trans() or it makes any future
203 * log transactions wait until you call btrfs_end_log_trans()
204 */
205int btrfs_pin_log_trans(struct btrfs_root *root)
206{
207 int ret = -ENOENT;
208
209 mutex_lock(&root->log_mutex);
210 atomic_inc(&root->log_writers);
211 mutex_unlock(&root->log_mutex);
212 return ret;
213}
214
215/*
Chris Masone02119d2008-09-05 16:13:11 -0400216 * indicate we're done making changes to the log tree
217 * and wake up anyone waiting to do a sync
218 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100219void btrfs_end_log_trans(struct btrfs_root *root)
Chris Masone02119d2008-09-05 16:13:11 -0400220{
Yan Zheng7237f182009-01-21 12:54:03 -0500221 if (atomic_dec_and_test(&root->log_writers)) {
222 smp_mb();
223 if (waitqueue_active(&root->log_writer_wait))
224 wake_up(&root->log_writer_wait);
225 }
Chris Masone02119d2008-09-05 16:13:11 -0400226}
227
228
229/*
230 * the walk control struct is used to pass state down the chain when
231 * processing the log tree. The stage field tells us which part
232 * of the log tree processing we are currently doing. The others
233 * are state fields used for that specific part
234 */
235struct walk_control {
236 /* should we free the extent on disk when done? This is used
237 * at transaction commit time while freeing a log tree
238 */
239 int free;
240
241 /* should we write out the extent buffer? This is used
242 * while flushing the log tree to disk during a sync
243 */
244 int write;
245
246 /* should we wait for the extent buffer io to finish? Also used
247 * while flushing the log tree to disk for a sync
248 */
249 int wait;
250
251 /* pin only walk, we record which extents on disk belong to the
252 * log trees
253 */
254 int pin;
255
256 /* what stage of the replay code we're currently in */
257 int stage;
258
259 /* the root we are currently replaying */
260 struct btrfs_root *replay_dest;
261
262 /* the trans handle for the current replay */
263 struct btrfs_trans_handle *trans;
264
265 /* the function that gets used to process blocks we find in the
266 * tree. Note the extent_buffer might not be up to date when it is
267 * passed in, and it must be checked or read if you need the data
268 * inside it
269 */
270 int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
271 struct walk_control *wc, u64 gen);
272};
273
274/*
275 * process_func used to pin down extents, write them or wait on them
276 */
277static int process_one_buffer(struct btrfs_root *log,
278 struct extent_buffer *eb,
279 struct walk_control *wc, u64 gen)
280{
Josef Bacikb50c6e22013-04-25 15:55:30 -0400281 int ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -0400282
Josef Bacikb50c6e22013-04-25 15:55:30 -0400283 if (wc->pin)
284 ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
285 eb->start, eb->len);
286
287 if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
Chris Masone02119d2008-09-05 16:13:11 -0400288 if (wc->write)
289 btrfs_write_tree_block(eb);
290 if (wc->wait)
291 btrfs_wait_tree_block_writeback(eb);
292 }
Josef Bacikb50c6e22013-04-25 15:55:30 -0400293 return ret;
Chris Masone02119d2008-09-05 16:13:11 -0400294}
295
296/*
297 * Item overwrite used by replay and tree logging. eb, slot and key all refer
298 * to the src data we are copying out.
299 *
300 * root is the tree we are copying into, and path is a scratch
301 * path for use in this function (it should be released on entry and
302 * will be released on exit).
303 *
304 * If the key is already in the destination tree the existing item is
305 * overwritten. If the existing item isn't big enough, it is extended.
306 * If it is too large, it is truncated.
307 *
308 * If the key isn't in the destination yet, a new item is inserted.
309 */
310static noinline int overwrite_item(struct btrfs_trans_handle *trans,
311 struct btrfs_root *root,
312 struct btrfs_path *path,
313 struct extent_buffer *eb, int slot,
314 struct btrfs_key *key)
315{
316 int ret;
317 u32 item_size;
318 u64 saved_i_size = 0;
319 int save_old_i_size = 0;
320 unsigned long src_ptr;
321 unsigned long dst_ptr;
322 int overwrite_root = 0;
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000323 bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
Chris Masone02119d2008-09-05 16:13:11 -0400324
325 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
326 overwrite_root = 1;
327
328 item_size = btrfs_item_size_nr(eb, slot);
329 src_ptr = btrfs_item_ptr_offset(eb, slot);
330
331 /* look for the key in the destination tree */
332 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000333 if (ret < 0)
334 return ret;
335
Chris Masone02119d2008-09-05 16:13:11 -0400336 if (ret == 0) {
337 char *src_copy;
338 char *dst_copy;
339 u32 dst_size = btrfs_item_size_nr(path->nodes[0],
340 path->slots[0]);
341 if (dst_size != item_size)
342 goto insert;
343
344 if (item_size == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200345 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400346 return 0;
347 }
348 dst_copy = kmalloc(item_size, GFP_NOFS);
349 src_copy = kmalloc(item_size, GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +0000350 if (!dst_copy || !src_copy) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200351 btrfs_release_path(path);
liubo2a29edc2011-01-26 06:22:08 +0000352 kfree(dst_copy);
353 kfree(src_copy);
354 return -ENOMEM;
355 }
Chris Masone02119d2008-09-05 16:13:11 -0400356
357 read_extent_buffer(eb, src_copy, src_ptr, item_size);
358
359 dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
360 read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
361 item_size);
362 ret = memcmp(dst_copy, src_copy, item_size);
363
364 kfree(dst_copy);
365 kfree(src_copy);
366 /*
367 * they have the same contents, just return, this saves
368 * us from cowing blocks in the destination tree and doing
369 * extra writes that may not have been done by a previous
370 * sync
371 */
372 if (ret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200373 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400374 return 0;
375 }
376
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000377 /*
378 * We need to load the old nbytes into the inode so when we
379 * replay the extents we've logged we get the right nbytes.
380 */
381 if (inode_item) {
382 struct btrfs_inode_item *item;
383 u64 nbytes;
384
385 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
386 struct btrfs_inode_item);
387 nbytes = btrfs_inode_nbytes(path->nodes[0], item);
388 item = btrfs_item_ptr(eb, slot,
389 struct btrfs_inode_item);
390 btrfs_set_inode_nbytes(eb, item, nbytes);
391 }
392 } else if (inode_item) {
393 struct btrfs_inode_item *item;
394
395 /*
396 * New inode, set nbytes to 0 so that the nbytes comes out
397 * properly when we replay the extents.
398 */
399 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
400 btrfs_set_inode_nbytes(eb, item, 0);
Chris Masone02119d2008-09-05 16:13:11 -0400401 }
402insert:
David Sterbab3b4aa72011-04-21 01:20:15 +0200403 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400404 /* try to insert the key into the destination tree */
405 ret = btrfs_insert_empty_item(trans, root, path,
406 key, item_size);
407
408 /* make sure any existing item is the correct size */
409 if (ret == -EEXIST) {
410 u32 found_size;
411 found_size = btrfs_item_size_nr(path->nodes[0],
412 path->slots[0]);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100413 if (found_size > item_size)
Tsutomu Itohafe5fea2013-04-16 05:18:22 +0000414 btrfs_truncate_item(root, path, item_size, 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100415 else if (found_size < item_size)
Tsutomu Itoh4b90c682013-04-16 05:18:49 +0000416 btrfs_extend_item(root, path,
Jeff Mahoney143bede2012-03-01 14:56:26 +0100417 item_size - found_size);
Chris Masone02119d2008-09-05 16:13:11 -0400418 } else if (ret) {
Yan, Zheng4a500fd2010-05-16 10:49:59 -0400419 return ret;
Chris Masone02119d2008-09-05 16:13:11 -0400420 }
421 dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
422 path->slots[0]);
423
424 /* don't overwrite an existing inode if the generation number
425 * was logged as zero. This is done when the tree logging code
426 * is just logging an inode to make sure it exists after recovery.
427 *
428 * Also, don't overwrite i_size on directories during replay.
429 * log replay inserts and removes directory items based on the
430 * state of the tree found in the subvolume, and i_size is modified
431 * as it goes
432 */
433 if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
434 struct btrfs_inode_item *src_item;
435 struct btrfs_inode_item *dst_item;
436
437 src_item = (struct btrfs_inode_item *)src_ptr;
438 dst_item = (struct btrfs_inode_item *)dst_ptr;
439
440 if (btrfs_inode_generation(eb, src_item) == 0)
441 goto no_copy;
442
443 if (overwrite_root &&
444 S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
445 S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
446 save_old_i_size = 1;
447 saved_i_size = btrfs_inode_size(path->nodes[0],
448 dst_item);
449 }
450 }
451
452 copy_extent_buffer(path->nodes[0], eb, dst_ptr,
453 src_ptr, item_size);
454
455 if (save_old_i_size) {
456 struct btrfs_inode_item *dst_item;
457 dst_item = (struct btrfs_inode_item *)dst_ptr;
458 btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
459 }
460
461 /* make sure the generation is filled in */
462 if (key->type == BTRFS_INODE_ITEM_KEY) {
463 struct btrfs_inode_item *dst_item;
464 dst_item = (struct btrfs_inode_item *)dst_ptr;
465 if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
466 btrfs_set_inode_generation(path->nodes[0], dst_item,
467 trans->transid);
468 }
469 }
470no_copy:
471 btrfs_mark_buffer_dirty(path->nodes[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +0200472 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400473 return 0;
474}
475
476/*
477 * simple helper to read an inode off the disk from a given root
478 * This can only be called for subvolume roots and not for the log
479 */
480static noinline struct inode *read_one_inode(struct btrfs_root *root,
481 u64 objectid)
482{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400483 struct btrfs_key key;
Chris Masone02119d2008-09-05 16:13:11 -0400484 struct inode *inode;
Chris Masone02119d2008-09-05 16:13:11 -0400485
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400486 key.objectid = objectid;
487 key.type = BTRFS_INODE_ITEM_KEY;
488 key.offset = 0;
Josef Bacik73f73412009-12-04 17:38:27 +0000489 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400490 if (IS_ERR(inode)) {
491 inode = NULL;
492 } else if (is_bad_inode(inode)) {
Chris Masone02119d2008-09-05 16:13:11 -0400493 iput(inode);
494 inode = NULL;
495 }
496 return inode;
497}
498
499/* replays a single extent in 'eb' at 'slot' with 'key' into the
500 * subvolume 'root'. path is released on entry and should be released
501 * on exit.
502 *
503 * extents in the log tree have not been allocated out of the extent
504 * tree yet. So, this completes the allocation, taking a reference
505 * as required if the extent already exists or creating a new extent
506 * if it isn't in the extent allocation tree yet.
507 *
508 * The extent is inserted into the file, dropping any existing extents
509 * from the file that overlap the new one.
510 */
511static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
512 struct btrfs_root *root,
513 struct btrfs_path *path,
514 struct extent_buffer *eb, int slot,
515 struct btrfs_key *key)
516{
517 int found_type;
Chris Masone02119d2008-09-05 16:13:11 -0400518 u64 extent_end;
Chris Masone02119d2008-09-05 16:13:11 -0400519 u64 start = key->offset;
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000520 u64 nbytes = 0;
Chris Masone02119d2008-09-05 16:13:11 -0400521 struct btrfs_file_extent_item *item;
522 struct inode *inode = NULL;
523 unsigned long size;
524 int ret = 0;
525
526 item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
527 found_type = btrfs_file_extent_type(eb, item);
528
Yan Zhengd899e052008-10-30 14:25:28 -0400529 if (found_type == BTRFS_FILE_EXTENT_REG ||
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000530 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
531 nbytes = btrfs_file_extent_num_bytes(eb, item);
532 extent_end = start + nbytes;
533
534 /*
535 * We don't add to the inodes nbytes if we are prealloc or a
536 * hole.
537 */
538 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
539 nbytes = 0;
540 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
Chris Masonc8b97812008-10-29 14:49:59 -0400541 size = btrfs_file_extent_inline_len(eb, item);
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000542 nbytes = btrfs_file_extent_ram_bytes(eb, item);
Qu Wenruofda28322013-02-26 08:10:22 +0000543 extent_end = ALIGN(start + size, root->sectorsize);
Chris Masone02119d2008-09-05 16:13:11 -0400544 } else {
545 ret = 0;
546 goto out;
547 }
548
549 inode = read_one_inode(root, key->objectid);
550 if (!inode) {
551 ret = -EIO;
552 goto out;
553 }
554
555 /*
556 * first check to see if we already have this extent in the
557 * file. This must be done before the btrfs_drop_extents run
558 * so we don't try to drop this extent.
559 */
Li Zefan33345d012011-04-20 10:31:50 +0800560 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
Chris Masone02119d2008-09-05 16:13:11 -0400561 start, 0);
562
Yan Zhengd899e052008-10-30 14:25:28 -0400563 if (ret == 0 &&
564 (found_type == BTRFS_FILE_EXTENT_REG ||
565 found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
Chris Masone02119d2008-09-05 16:13:11 -0400566 struct btrfs_file_extent_item cmp1;
567 struct btrfs_file_extent_item cmp2;
568 struct btrfs_file_extent_item *existing;
569 struct extent_buffer *leaf;
570
571 leaf = path->nodes[0];
572 existing = btrfs_item_ptr(leaf, path->slots[0],
573 struct btrfs_file_extent_item);
574
575 read_extent_buffer(eb, &cmp1, (unsigned long)item,
576 sizeof(cmp1));
577 read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
578 sizeof(cmp2));
579
580 /*
581 * we already have a pointer to this exact extent,
582 * we don't have to do anything
583 */
584 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200585 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400586 goto out;
587 }
588 }
David Sterbab3b4aa72011-04-21 01:20:15 +0200589 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400590
591 /* drop any overlapping extents */
Josef Bacik26714852012-08-29 12:24:27 -0400592 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
Josef Bacik36508602013-04-25 16:23:32 -0400593 if (ret)
594 goto out;
Chris Masone02119d2008-09-05 16:13:11 -0400595
Yan Zheng07d400a2009-01-06 11:42:00 -0500596 if (found_type == BTRFS_FILE_EXTENT_REG ||
597 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400598 u64 offset;
Yan Zheng07d400a2009-01-06 11:42:00 -0500599 unsigned long dest_offset;
600 struct btrfs_key ins;
Chris Masone02119d2008-09-05 16:13:11 -0400601
Yan Zheng07d400a2009-01-06 11:42:00 -0500602 ret = btrfs_insert_empty_item(trans, root, path, key,
603 sizeof(*item));
Josef Bacik36508602013-04-25 16:23:32 -0400604 if (ret)
605 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500606 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
607 path->slots[0]);
608 copy_extent_buffer(path->nodes[0], eb, dest_offset,
609 (unsigned long)item, sizeof(*item));
610
611 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
612 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
613 ins.type = BTRFS_EXTENT_ITEM_KEY;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400614 offset = key->offset - btrfs_file_extent_offset(eb, item);
Yan Zheng07d400a2009-01-06 11:42:00 -0500615
616 if (ins.objectid > 0) {
617 u64 csum_start;
618 u64 csum_end;
619 LIST_HEAD(ordered_sums);
620 /*
621 * is this extent already allocated in the extent
622 * allocation tree? If so, just add a reference
623 */
624 ret = btrfs_lookup_extent(root, ins.objectid,
625 ins.offset);
626 if (ret == 0) {
627 ret = btrfs_inc_extent_ref(trans, root,
628 ins.objectid, ins.offset,
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400629 0, root->root_key.objectid,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200630 key->objectid, offset, 0);
Josef Bacikb50c6e22013-04-25 15:55:30 -0400631 if (ret)
632 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500633 } else {
634 /*
635 * insert the extent pointer in the extent
636 * allocation tree
637 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400638 ret = btrfs_alloc_logged_file_extent(trans,
639 root, root->root_key.objectid,
640 key->objectid, offset, &ins);
Josef Bacikb50c6e22013-04-25 15:55:30 -0400641 if (ret)
642 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500643 }
David Sterbab3b4aa72011-04-21 01:20:15 +0200644 btrfs_release_path(path);
Yan Zheng07d400a2009-01-06 11:42:00 -0500645
646 if (btrfs_file_extent_compression(eb, item)) {
647 csum_start = ins.objectid;
648 csum_end = csum_start + ins.offset;
649 } else {
650 csum_start = ins.objectid +
651 btrfs_file_extent_offset(eb, item);
652 csum_end = csum_start +
653 btrfs_file_extent_num_bytes(eb, item);
654 }
655
656 ret = btrfs_lookup_csums_range(root->log_root,
657 csum_start, csum_end - 1,
Arne Jansena2de7332011-03-08 14:14:00 +0100658 &ordered_sums, 0);
Josef Bacik36508602013-04-25 16:23:32 -0400659 if (ret)
660 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500661 while (!list_empty(&ordered_sums)) {
662 struct btrfs_ordered_sum *sums;
663 sums = list_entry(ordered_sums.next,
664 struct btrfs_ordered_sum,
665 list);
Josef Bacik36508602013-04-25 16:23:32 -0400666 if (!ret)
667 ret = btrfs_csum_file_blocks(trans,
Yan Zheng07d400a2009-01-06 11:42:00 -0500668 root->fs_info->csum_root,
669 sums);
Yan Zheng07d400a2009-01-06 11:42:00 -0500670 list_del(&sums->list);
671 kfree(sums);
672 }
Josef Bacik36508602013-04-25 16:23:32 -0400673 if (ret)
674 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500675 } else {
David Sterbab3b4aa72011-04-21 01:20:15 +0200676 btrfs_release_path(path);
Yan Zheng07d400a2009-01-06 11:42:00 -0500677 }
678 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
679 /* inline extents are easy, we just overwrite them */
680 ret = overwrite_item(trans, root, path, eb, slot, key);
Josef Bacik36508602013-04-25 16:23:32 -0400681 if (ret)
682 goto out;
Yan Zheng07d400a2009-01-06 11:42:00 -0500683 }
684
Josef Bacik4bc4bee2013-04-05 20:50:09 +0000685 inode_add_bytes(inode, nbytes);
Tsutomu Itohb9959292012-06-25 21:25:22 -0600686 ret = btrfs_update_inode(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -0400687out:
688 if (inode)
689 iput(inode);
690 return ret;
691}
692
693/*
694 * when cleaning up conflicts between the directory names in the
695 * subvolume, directory names in the log and directory names in the
696 * inode back references, we may have to unlink inodes from directories.
697 *
698 * This is a helper function to do the unlink of a specific directory
699 * item
700 */
701static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
702 struct btrfs_root *root,
703 struct btrfs_path *path,
704 struct inode *dir,
705 struct btrfs_dir_item *di)
706{
707 struct inode *inode;
708 char *name;
709 int name_len;
710 struct extent_buffer *leaf;
711 struct btrfs_key location;
712 int ret;
713
714 leaf = path->nodes[0];
715
716 btrfs_dir_item_key_to_cpu(leaf, di, &location);
717 name_len = btrfs_dir_name_len(leaf, di);
718 name = kmalloc(name_len, GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +0000719 if (!name)
720 return -ENOMEM;
721
Chris Masone02119d2008-09-05 16:13:11 -0400722 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
David Sterbab3b4aa72011-04-21 01:20:15 +0200723 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400724
725 inode = read_one_inode(root, location.objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +0000726 if (!inode) {
Josef Bacik36508602013-04-25 16:23:32 -0400727 ret = -EIO;
728 goto out;
Tsutomu Itohc00e9492011-04-28 09:10:23 +0000729 }
Chris Masone02119d2008-09-05 16:13:11 -0400730
Yan Zhengec051c02009-01-05 15:43:42 -0500731 ret = link_to_fixup_dir(trans, root, path, location.objectid);
Josef Bacik36508602013-04-25 16:23:32 -0400732 if (ret)
733 goto out;
Chris Mason12fcfd22009-03-24 10:24:20 -0400734
Chris Masone02119d2008-09-05 16:13:11 -0400735 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
Josef Bacik36508602013-04-25 16:23:32 -0400736 if (ret)
737 goto out;
Chris Masonb6305562012-07-02 15:29:53 -0400738 btrfs_run_delayed_items(trans, root);
Josef Bacik36508602013-04-25 16:23:32 -0400739out:
740 kfree(name);
741 iput(inode);
Chris Masone02119d2008-09-05 16:13:11 -0400742 return ret;
743}
744
745/*
746 * helper function to see if a given name and sequence number found
747 * in an inode back reference are already in a directory and correctly
748 * point to this inode
749 */
750static noinline int inode_in_dir(struct btrfs_root *root,
751 struct btrfs_path *path,
752 u64 dirid, u64 objectid, u64 index,
753 const char *name, int name_len)
754{
755 struct btrfs_dir_item *di;
756 struct btrfs_key location;
757 int match = 0;
758
759 di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
760 index, name, name_len, 0);
761 if (di && !IS_ERR(di)) {
762 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
763 if (location.objectid != objectid)
764 goto out;
765 } else
766 goto out;
David Sterbab3b4aa72011-04-21 01:20:15 +0200767 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400768
769 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
770 if (di && !IS_ERR(di)) {
771 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
772 if (location.objectid != objectid)
773 goto out;
774 } else
775 goto out;
776 match = 1;
777out:
David Sterbab3b4aa72011-04-21 01:20:15 +0200778 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -0400779 return match;
780}
781
782/*
783 * helper function to check a log tree for a named back reference in
784 * an inode. This is used to decide if a back reference that is
785 * found in the subvolume conflicts with what we find in the log.
786 *
787 * inode backreferences may have multiple refs in a single item,
788 * during replay we process one reference at a time, and we don't
789 * want to delete valid links to a file from the subvolume if that
790 * link is also in the log.
791 */
792static noinline int backref_in_log(struct btrfs_root *log,
793 struct btrfs_key *key,
Mark Fashehf1863732012-08-08 11:32:27 -0700794 u64 ref_objectid,
Chris Masone02119d2008-09-05 16:13:11 -0400795 char *name, int namelen)
796{
797 struct btrfs_path *path;
798 struct btrfs_inode_ref *ref;
799 unsigned long ptr;
800 unsigned long ptr_end;
801 unsigned long name_ptr;
802 int found_name_len;
803 int item_size;
804 int ret;
805 int match = 0;
806
807 path = btrfs_alloc_path();
liubo2a29edc2011-01-26 06:22:08 +0000808 if (!path)
809 return -ENOMEM;
810
Chris Masone02119d2008-09-05 16:13:11 -0400811 ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
812 if (ret != 0)
813 goto out;
814
Chris Masone02119d2008-09-05 16:13:11 -0400815 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
Mark Fashehf1863732012-08-08 11:32:27 -0700816
817 if (key->type == BTRFS_INODE_EXTREF_KEY) {
818 if (btrfs_find_name_in_ext_backref(path, ref_objectid,
819 name, namelen, NULL))
820 match = 1;
821
822 goto out;
823 }
824
825 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -0400826 ptr_end = ptr + item_size;
827 while (ptr < ptr_end) {
828 ref = (struct btrfs_inode_ref *)ptr;
829 found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
830 if (found_name_len == namelen) {
831 name_ptr = (unsigned long)(ref + 1);
832 ret = memcmp_extent_buffer(path->nodes[0], name,
833 name_ptr, namelen);
834 if (ret == 0) {
835 match = 1;
836 goto out;
837 }
838 }
839 ptr = (unsigned long)(ref + 1) + found_name_len;
840 }
841out:
842 btrfs_free_path(path);
843 return match;
844}
845
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700846static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
847 struct btrfs_root *root,
848 struct btrfs_path *path,
849 struct btrfs_root *log_root,
850 struct inode *dir, struct inode *inode,
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700851 struct extent_buffer *eb,
Mark Fashehf1863732012-08-08 11:32:27 -0700852 u64 inode_objectid, u64 parent_objectid,
853 u64 ref_index, char *name, int namelen,
854 int *search_done)
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700855{
856 int ret;
Mark Fashehf1863732012-08-08 11:32:27 -0700857 char *victim_name;
858 int victim_name_len;
859 struct extent_buffer *leaf;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700860 struct btrfs_dir_item *di;
Mark Fashehf1863732012-08-08 11:32:27 -0700861 struct btrfs_key search_key;
862 struct btrfs_inode_extref *extref;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700863
Mark Fashehf1863732012-08-08 11:32:27 -0700864again:
865 /* Search old style refs */
866 search_key.objectid = inode_objectid;
867 search_key.type = BTRFS_INODE_REF_KEY;
868 search_key.offset = parent_objectid;
869 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700870 if (ret == 0) {
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700871 struct btrfs_inode_ref *victim_ref;
872 unsigned long ptr;
873 unsigned long ptr_end;
Mark Fashehf1863732012-08-08 11:32:27 -0700874
875 leaf = path->nodes[0];
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700876
877 /* are we trying to overwrite a back ref for the root directory
878 * if so, just jump out, we're done
879 */
Mark Fashehf1863732012-08-08 11:32:27 -0700880 if (search_key.objectid == search_key.offset)
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700881 return 1;
882
883 /* check all the names in this back reference to see
884 * if they are in the log. if so, we allow them to stay
885 * otherwise they must be unlinked as a conflict
886 */
887 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
888 ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
889 while (ptr < ptr_end) {
890 victim_ref = (struct btrfs_inode_ref *)ptr;
891 victim_name_len = btrfs_inode_ref_name_len(leaf,
892 victim_ref);
893 victim_name = kmalloc(victim_name_len, GFP_NOFS);
Josef Bacik36508602013-04-25 16:23:32 -0400894 if (!victim_name)
895 return -ENOMEM;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700896
897 read_extent_buffer(leaf, victim_name,
898 (unsigned long)(victim_ref + 1),
899 victim_name_len);
900
Mark Fashehf1863732012-08-08 11:32:27 -0700901 if (!backref_in_log(log_root, &search_key,
902 parent_objectid,
903 victim_name,
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700904 victim_name_len)) {
905 btrfs_inc_nlink(inode);
906 btrfs_release_path(path);
907
908 ret = btrfs_unlink_inode(trans, root, dir,
909 inode, victim_name,
910 victim_name_len);
Mark Fashehf1863732012-08-08 11:32:27 -0700911 kfree(victim_name);
Josef Bacik36508602013-04-25 16:23:32 -0400912 if (ret)
913 return ret;
914 btrfs_run_delayed_items(trans, root);
Mark Fashehf1863732012-08-08 11:32:27 -0700915 *search_done = 1;
916 goto again;
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700917 }
918 kfree(victim_name);
Mark Fashehf1863732012-08-08 11:32:27 -0700919
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700920 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
921 }
Jan Schmidt5a1d7842012-08-17 14:04:41 -0700922
923 /*
924 * NOTE: we have searched root tree and checked the
925 * coresponding ref, it does not need to check again.
926 */
927 *search_done = 1;
928 }
929 btrfs_release_path(path);
930
Mark Fashehf1863732012-08-08 11:32:27 -0700931 /* Same search but for extended refs */
932 extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
933 inode_objectid, parent_objectid, 0,
934 0);
935 if (!IS_ERR_OR_NULL(extref)) {
936 u32 item_size;
937 u32 cur_offset = 0;
938 unsigned long base;
939 struct inode *victim_parent;
940
941 leaf = path->nodes[0];
942
943 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
944 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
945
946 while (cur_offset < item_size) {
947 extref = (struct btrfs_inode_extref *)base + cur_offset;
948
949 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
950
951 if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
952 goto next;
953
954 victim_name = kmalloc(victim_name_len, GFP_NOFS);
Josef Bacik36508602013-04-25 16:23:32 -0400955 if (!victim_name)
956 return -ENOMEM;
Mark Fashehf1863732012-08-08 11:32:27 -0700957 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
958 victim_name_len);
959
960 search_key.objectid = inode_objectid;
961 search_key.type = BTRFS_INODE_EXTREF_KEY;
962 search_key.offset = btrfs_extref_hash(parent_objectid,
963 victim_name,
964 victim_name_len);
965 ret = 0;
966 if (!backref_in_log(log_root, &search_key,
967 parent_objectid, victim_name,
968 victim_name_len)) {
969 ret = -ENOENT;
970 victim_parent = read_one_inode(root,
971 parent_objectid);
972 if (victim_parent) {
973 btrfs_inc_nlink(inode);
974 btrfs_release_path(path);
975
976 ret = btrfs_unlink_inode(trans, root,
977 victim_parent,
978 inode,
979 victim_name,
980 victim_name_len);
981 btrfs_run_delayed_items(trans, root);
982 }
Mark Fashehf1863732012-08-08 11:32:27 -0700983 iput(victim_parent);
984 kfree(victim_name);
Josef Bacik36508602013-04-25 16:23:32 -0400985 if (ret)
986 return ret;
Mark Fashehf1863732012-08-08 11:32:27 -0700987 *search_done = 1;
988 goto again;
989 }
990 kfree(victim_name);
Josef Bacik36508602013-04-25 16:23:32 -0400991 if (ret)
992 return ret;
Mark Fashehf1863732012-08-08 11:32:27 -0700993next:
994 cur_offset += victim_name_len + sizeof(*extref);
995 }
996 *search_done = 1;
997 }
998 btrfs_release_path(path);
999
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001000 /* look for a conflicting sequence number */
1001 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
Mark Fashehf1863732012-08-08 11:32:27 -07001002 ref_index, name, namelen, 0);
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001003 if (di && !IS_ERR(di)) {
1004 ret = drop_one_dir_item(trans, root, path, dir, di);
Josef Bacik36508602013-04-25 16:23:32 -04001005 if (ret)
1006 return ret;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001007 }
1008 btrfs_release_path(path);
1009
1010 /* look for a conflicing name */
1011 di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
1012 name, namelen, 0);
1013 if (di && !IS_ERR(di)) {
1014 ret = drop_one_dir_item(trans, root, path, dir, di);
Josef Bacik36508602013-04-25 16:23:32 -04001015 if (ret)
1016 return ret;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001017 }
1018 btrfs_release_path(path);
1019
1020 return 0;
1021}
Chris Masone02119d2008-09-05 16:13:11 -04001022
Mark Fashehf1863732012-08-08 11:32:27 -07001023static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
1024 u32 *namelen, char **name, u64 *index,
1025 u64 *parent_objectid)
1026{
1027 struct btrfs_inode_extref *extref;
1028
1029 extref = (struct btrfs_inode_extref *)ref_ptr;
1030
1031 *namelen = btrfs_inode_extref_name_len(eb, extref);
1032 *name = kmalloc(*namelen, GFP_NOFS);
1033 if (*name == NULL)
1034 return -ENOMEM;
1035
1036 read_extent_buffer(eb, *name, (unsigned long)&extref->name,
1037 *namelen);
1038
1039 *index = btrfs_inode_extref_index(eb, extref);
1040 if (parent_objectid)
1041 *parent_objectid = btrfs_inode_extref_parent(eb, extref);
1042
1043 return 0;
1044}
1045
1046static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
1047 u32 *namelen, char **name, u64 *index)
1048{
1049 struct btrfs_inode_ref *ref;
1050
1051 ref = (struct btrfs_inode_ref *)ref_ptr;
1052
1053 *namelen = btrfs_inode_ref_name_len(eb, ref);
1054 *name = kmalloc(*namelen, GFP_NOFS);
1055 if (*name == NULL)
1056 return -ENOMEM;
1057
1058 read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
1059
1060 *index = btrfs_inode_ref_index(eb, ref);
1061
1062 return 0;
1063}
1064
Chris Masone02119d2008-09-05 16:13:11 -04001065/*
1066 * replay one inode back reference item found in the log tree.
1067 * eb, slot and key refer to the buffer and key found in the log tree.
1068 * root is the destination we are replaying into, and path is for temp
1069 * use by this function. (it should be released on return).
1070 */
1071static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1072 struct btrfs_root *root,
1073 struct btrfs_root *log,
1074 struct btrfs_path *path,
1075 struct extent_buffer *eb, int slot,
1076 struct btrfs_key *key)
1077{
liubo34f3e4f2011-08-06 08:35:23 +00001078 struct inode *dir;
Chris Masone02119d2008-09-05 16:13:11 -04001079 struct inode *inode;
Chris Masone02119d2008-09-05 16:13:11 -04001080 unsigned long ref_ptr;
1081 unsigned long ref_end;
liubo34f3e4f2011-08-06 08:35:23 +00001082 char *name;
1083 int namelen;
1084 int ret;
liuboc622ae62011-03-26 08:01:12 -04001085 int search_done = 0;
Mark Fashehf1863732012-08-08 11:32:27 -07001086 int log_ref_ver = 0;
1087 u64 parent_objectid;
1088 u64 inode_objectid;
Chris Masonf46dbe3de2012-10-09 11:17:20 -04001089 u64 ref_index = 0;
Mark Fashehf1863732012-08-08 11:32:27 -07001090 int ref_struct_size;
1091
1092 ref_ptr = btrfs_item_ptr_offset(eb, slot);
1093 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
1094
1095 if (key->type == BTRFS_INODE_EXTREF_KEY) {
1096 struct btrfs_inode_extref *r;
1097
1098 ref_struct_size = sizeof(struct btrfs_inode_extref);
1099 log_ref_ver = 1;
1100 r = (struct btrfs_inode_extref *)ref_ptr;
1101 parent_objectid = btrfs_inode_extref_parent(eb, r);
1102 } else {
1103 ref_struct_size = sizeof(struct btrfs_inode_ref);
1104 parent_objectid = key->offset;
1105 }
1106 inode_objectid = key->objectid;
Chris Masone02119d2008-09-05 16:13:11 -04001107
Chris Masone02119d2008-09-05 16:13:11 -04001108 /*
1109 * it is possible that we didn't log all the parent directories
1110 * for a given inode. If we don't find the dir, just don't
1111 * copy the back ref in. The link count fixup code will take
1112 * care of the rest
1113 */
Mark Fashehf1863732012-08-08 11:32:27 -07001114 dir = read_one_inode(root, parent_objectid);
Chris Masone02119d2008-09-05 16:13:11 -04001115 if (!dir)
1116 return -ENOENT;
1117
Mark Fashehf1863732012-08-08 11:32:27 -07001118 inode = read_one_inode(root, inode_objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001119 if (!inode) {
1120 iput(dir);
1121 return -EIO;
1122 }
Chris Masone02119d2008-09-05 16:13:11 -04001123
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001124 while (ref_ptr < ref_end) {
Mark Fashehf1863732012-08-08 11:32:27 -07001125 if (log_ref_ver) {
1126 ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
1127 &ref_index, &parent_objectid);
1128 /*
1129 * parent object can change from one array
1130 * item to another.
1131 */
1132 if (!dir)
1133 dir = read_one_inode(root, parent_objectid);
1134 if (!dir)
1135 return -ENOENT;
1136 } else {
1137 ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
1138 &ref_index);
1139 }
1140 if (ret)
1141 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001142
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001143 /* if we already have a perfect match, we're done */
1144 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
Mark Fashehf1863732012-08-08 11:32:27 -07001145 ref_index, name, namelen)) {
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001146 /*
1147 * look for a conflicting back reference in the
1148 * metadata. if we find one we have to unlink that name
1149 * of the file before we add our new link. Later on, we
1150 * overwrite any existing back reference, and we don't
1151 * want to create dangling pointers in the directory.
1152 */
Chris Masone02119d2008-09-05 16:13:11 -04001153
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001154 if (!search_done) {
1155 ret = __add_inode_ref(trans, root, path, log,
Mark Fashehf1863732012-08-08 11:32:27 -07001156 dir, inode, eb,
1157 inode_objectid,
1158 parent_objectid,
1159 ref_index, name, namelen,
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001160 &search_done);
Josef Bacik36508602013-04-25 16:23:32 -04001161 if (ret == 1) {
1162 ret = 0;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001163 goto out;
Josef Bacik36508602013-04-25 16:23:32 -04001164 }
1165 if (ret)
1166 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001167 }
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001168
1169 /* insert our name */
1170 ret = btrfs_add_link(trans, dir, inode, name, namelen,
Mark Fashehf1863732012-08-08 11:32:27 -07001171 0, ref_index);
Josef Bacik36508602013-04-25 16:23:32 -04001172 if (ret)
1173 goto out;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001174
1175 btrfs_update_inode(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -04001176 }
liuboc622ae62011-03-26 08:01:12 -04001177
Mark Fashehf1863732012-08-08 11:32:27 -07001178 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001179 kfree(name);
Mark Fashehf1863732012-08-08 11:32:27 -07001180 if (log_ref_ver) {
1181 iput(dir);
1182 dir = NULL;
1183 }
Chris Masone02119d2008-09-05 16:13:11 -04001184 }
Chris Masone02119d2008-09-05 16:13:11 -04001185
1186 /* finally write the back reference in the inode */
1187 ret = overwrite_item(trans, root, path, eb, slot, key);
Jan Schmidt5a1d7842012-08-17 14:04:41 -07001188out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001189 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001190 iput(dir);
1191 iput(inode);
Josef Bacik36508602013-04-25 16:23:32 -04001192 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001193}
1194
Yan, Zhengc71bf092009-11-12 09:34:40 +00001195static int insert_orphan_item(struct btrfs_trans_handle *trans,
1196 struct btrfs_root *root, u64 offset)
1197{
1198 int ret;
1199 ret = btrfs_find_orphan_item(root, offset);
1200 if (ret > 0)
1201 ret = btrfs_insert_orphan_item(trans, root, offset);
1202 return ret;
1203}
1204
Mark Fashehf1863732012-08-08 11:32:27 -07001205static int count_inode_extrefs(struct btrfs_root *root,
1206 struct inode *inode, struct btrfs_path *path)
Chris Masone02119d2008-09-05 16:13:11 -04001207{
Mark Fashehf1863732012-08-08 11:32:27 -07001208 int ret = 0;
1209 int name_len;
1210 unsigned int nlink = 0;
1211 u32 item_size;
1212 u32 cur_offset = 0;
1213 u64 inode_objectid = btrfs_ino(inode);
1214 u64 offset = 0;
1215 unsigned long ptr;
1216 struct btrfs_inode_extref *extref;
1217 struct extent_buffer *leaf;
1218
1219 while (1) {
1220 ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
1221 &extref, &offset);
1222 if (ret)
1223 break;
1224
1225 leaf = path->nodes[0];
1226 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1227 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1228
1229 while (cur_offset < item_size) {
1230 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
1231 name_len = btrfs_inode_extref_name_len(leaf, extref);
1232
1233 nlink++;
1234
1235 cur_offset += name_len + sizeof(*extref);
1236 }
1237
1238 offset++;
1239 btrfs_release_path(path);
1240 }
1241 btrfs_release_path(path);
1242
1243 if (ret < 0)
1244 return ret;
1245 return nlink;
1246}
1247
1248static int count_inode_refs(struct btrfs_root *root,
1249 struct inode *inode, struct btrfs_path *path)
1250{
Chris Masone02119d2008-09-05 16:13:11 -04001251 int ret;
1252 struct btrfs_key key;
Mark Fashehf1863732012-08-08 11:32:27 -07001253 unsigned int nlink = 0;
Chris Masone02119d2008-09-05 16:13:11 -04001254 unsigned long ptr;
1255 unsigned long ptr_end;
1256 int name_len;
Li Zefan33345d012011-04-20 10:31:50 +08001257 u64 ino = btrfs_ino(inode);
Chris Masone02119d2008-09-05 16:13:11 -04001258
Li Zefan33345d012011-04-20 10:31:50 +08001259 key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04001260 key.type = BTRFS_INODE_REF_KEY;
1261 key.offset = (u64)-1;
1262
Chris Masond3977122009-01-05 21:25:51 -05001263 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04001264 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1265 if (ret < 0)
1266 break;
1267 if (ret > 0) {
1268 if (path->slots[0] == 0)
1269 break;
1270 path->slots[0]--;
1271 }
1272 btrfs_item_key_to_cpu(path->nodes[0], &key,
1273 path->slots[0]);
Li Zefan33345d012011-04-20 10:31:50 +08001274 if (key.objectid != ino ||
Chris Masone02119d2008-09-05 16:13:11 -04001275 key.type != BTRFS_INODE_REF_KEY)
1276 break;
1277 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
1278 ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
1279 path->slots[0]);
Chris Masond3977122009-01-05 21:25:51 -05001280 while (ptr < ptr_end) {
Chris Masone02119d2008-09-05 16:13:11 -04001281 struct btrfs_inode_ref *ref;
1282
1283 ref = (struct btrfs_inode_ref *)ptr;
1284 name_len = btrfs_inode_ref_name_len(path->nodes[0],
1285 ref);
1286 ptr = (unsigned long)(ref + 1) + name_len;
1287 nlink++;
1288 }
1289
1290 if (key.offset == 0)
1291 break;
1292 key.offset--;
David Sterbab3b4aa72011-04-21 01:20:15 +02001293 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001294 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001295 btrfs_release_path(path);
Mark Fashehf1863732012-08-08 11:32:27 -07001296
1297 return nlink;
1298}
1299
1300/*
1301 * There are a few corners where the link count of the file can't
1302 * be properly maintained during replay. So, instead of adding
1303 * lots of complexity to the log code, we just scan the backrefs
1304 * for any file that has been through replay.
1305 *
1306 * The scan will update the link count on the inode to reflect the
1307 * number of back refs found. If it goes down to zero, the iput
1308 * will free the inode.
1309 */
1310static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1311 struct btrfs_root *root,
1312 struct inode *inode)
1313{
1314 struct btrfs_path *path;
1315 int ret;
1316 u64 nlink = 0;
1317 u64 ino = btrfs_ino(inode);
1318
1319 path = btrfs_alloc_path();
1320 if (!path)
1321 return -ENOMEM;
1322
1323 ret = count_inode_refs(root, inode, path);
1324 if (ret < 0)
1325 goto out;
1326
1327 nlink = ret;
1328
1329 ret = count_inode_extrefs(root, inode, path);
1330 if (ret == -ENOENT)
1331 ret = 0;
1332
1333 if (ret < 0)
1334 goto out;
1335
1336 nlink += ret;
1337
1338 ret = 0;
1339
Chris Masone02119d2008-09-05 16:13:11 -04001340 if (nlink != inode->i_nlink) {
Miklos Szeredibfe86842011-10-28 14:13:29 +02001341 set_nlink(inode, nlink);
Chris Masone02119d2008-09-05 16:13:11 -04001342 btrfs_update_inode(trans, root, inode);
1343 }
Chris Mason8d5bf1c2008-09-11 15:51:21 -04001344 BTRFS_I(inode)->index_cnt = (u64)-1;
Chris Masone02119d2008-09-05 16:13:11 -04001345
Yan, Zhengc71bf092009-11-12 09:34:40 +00001346 if (inode->i_nlink == 0) {
1347 if (S_ISDIR(inode->i_mode)) {
1348 ret = replay_dir_deletes(trans, root, NULL, path,
Li Zefan33345d012011-04-20 10:31:50 +08001349 ino, 1);
Josef Bacik36508602013-04-25 16:23:32 -04001350 if (ret)
1351 goto out;
Yan, Zhengc71bf092009-11-12 09:34:40 +00001352 }
Li Zefan33345d012011-04-20 10:31:50 +08001353 ret = insert_orphan_item(trans, root, ino);
Chris Mason12fcfd22009-03-24 10:24:20 -04001354 }
Chris Mason12fcfd22009-03-24 10:24:20 -04001355
Mark Fashehf1863732012-08-08 11:32:27 -07001356out:
1357 btrfs_free_path(path);
1358 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001359}
1360
1361static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1362 struct btrfs_root *root,
1363 struct btrfs_path *path)
1364{
1365 int ret;
1366 struct btrfs_key key;
1367 struct inode *inode;
1368
1369 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1370 key.type = BTRFS_ORPHAN_ITEM_KEY;
1371 key.offset = (u64)-1;
Chris Masond3977122009-01-05 21:25:51 -05001372 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04001373 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1374 if (ret < 0)
1375 break;
1376
1377 if (ret == 1) {
1378 if (path->slots[0] == 0)
1379 break;
1380 path->slots[0]--;
1381 }
1382
1383 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1384 if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
1385 key.type != BTRFS_ORPHAN_ITEM_KEY)
1386 break;
1387
1388 ret = btrfs_del_item(trans, root, path);
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001389 if (ret)
1390 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001391
David Sterbab3b4aa72011-04-21 01:20:15 +02001392 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001393 inode = read_one_inode(root, key.offset);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001394 if (!inode)
1395 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001396
1397 ret = fixup_inode_link_count(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -04001398 iput(inode);
Josef Bacik36508602013-04-25 16:23:32 -04001399 if (ret)
1400 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001401
Chris Mason12fcfd22009-03-24 10:24:20 -04001402 /*
1403 * fixup on a directory may create new entries,
1404 * make sure we always look for the highset possible
1405 * offset
1406 */
1407 key.offset = (u64)-1;
Chris Masone02119d2008-09-05 16:13:11 -04001408 }
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001409 ret = 0;
1410out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001411 btrfs_release_path(path);
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001412 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001413}
1414
1415
1416/*
1417 * record a given inode in the fixup dir so we can check its link
1418 * count when replay is done. The link count is incremented here
1419 * so the inode won't go away until we check it
1420 */
1421static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1422 struct btrfs_root *root,
1423 struct btrfs_path *path,
1424 u64 objectid)
1425{
1426 struct btrfs_key key;
1427 int ret = 0;
1428 struct inode *inode;
1429
1430 inode = read_one_inode(root, objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001431 if (!inode)
1432 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001433
1434 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1435 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
1436 key.offset = objectid;
1437
1438 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1439
David Sterbab3b4aa72011-04-21 01:20:15 +02001440 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001441 if (ret == 0) {
Josef Bacik9bf7a482013-03-01 13:35:47 -05001442 if (!inode->i_nlink)
1443 set_nlink(inode, 1);
1444 else
1445 btrfs_inc_nlink(inode);
Tsutomu Itohb9959292012-06-25 21:25:22 -06001446 ret = btrfs_update_inode(trans, root, inode);
Chris Masone02119d2008-09-05 16:13:11 -04001447 } else if (ret == -EEXIST) {
1448 ret = 0;
1449 } else {
Josef Bacik36508602013-04-25 16:23:32 -04001450 BUG(); /* Logic Error */
Chris Masone02119d2008-09-05 16:13:11 -04001451 }
1452 iput(inode);
1453
1454 return ret;
1455}
1456
1457/*
1458 * when replaying the log for a directory, we only insert names
1459 * for inodes that actually exist. This means an fsync on a directory
1460 * does not implicitly fsync all the new files in it
1461 */
1462static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1463 struct btrfs_root *root,
1464 struct btrfs_path *path,
1465 u64 dirid, u64 index,
1466 char *name, int name_len, u8 type,
1467 struct btrfs_key *location)
1468{
1469 struct inode *inode;
1470 struct inode *dir;
1471 int ret;
1472
1473 inode = read_one_inode(root, location->objectid);
1474 if (!inode)
1475 return -ENOENT;
1476
1477 dir = read_one_inode(root, dirid);
1478 if (!dir) {
1479 iput(inode);
1480 return -EIO;
1481 }
1482 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
1483
1484 /* FIXME, put inode into FIXUP list */
1485
1486 iput(inode);
1487 iput(dir);
1488 return ret;
1489}
1490
1491/*
1492 * take a single entry in a log directory item and replay it into
1493 * the subvolume.
1494 *
1495 * if a conflicting item exists in the subdirectory already,
1496 * the inode it points to is unlinked and put into the link count
1497 * fix up tree.
1498 *
1499 * If a name from the log points to a file or directory that does
1500 * not exist in the FS, it is skipped. fsyncs on directories
1501 * do not force down inodes inside that directory, just changes to the
1502 * names or unlinks in a directory.
1503 */
1504static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1505 struct btrfs_root *root,
1506 struct btrfs_path *path,
1507 struct extent_buffer *eb,
1508 struct btrfs_dir_item *di,
1509 struct btrfs_key *key)
1510{
1511 char *name;
1512 int name_len;
1513 struct btrfs_dir_item *dst_di;
1514 struct btrfs_key found_key;
1515 struct btrfs_key log_key;
1516 struct inode *dir;
Chris Masone02119d2008-09-05 16:13:11 -04001517 u8 log_type;
Chris Mason4bef0842008-09-08 11:18:08 -04001518 int exists;
Josef Bacik36508602013-04-25 16:23:32 -04001519 int ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -04001520
1521 dir = read_one_inode(root, key->objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001522 if (!dir)
1523 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001524
1525 name_len = btrfs_dir_name_len(eb, di);
1526 name = kmalloc(name_len, GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +00001527 if (!name)
1528 return -ENOMEM;
1529
Chris Masone02119d2008-09-05 16:13:11 -04001530 log_type = btrfs_dir_type(eb, di);
1531 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1532 name_len);
1533
1534 btrfs_dir_item_key_to_cpu(eb, di, &log_key);
Chris Mason4bef0842008-09-08 11:18:08 -04001535 exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
1536 if (exists == 0)
1537 exists = 1;
1538 else
1539 exists = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02001540 btrfs_release_path(path);
Chris Mason4bef0842008-09-08 11:18:08 -04001541
Chris Masone02119d2008-09-05 16:13:11 -04001542 if (key->type == BTRFS_DIR_ITEM_KEY) {
1543 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
1544 name, name_len, 1);
Chris Masond3977122009-01-05 21:25:51 -05001545 } else if (key->type == BTRFS_DIR_INDEX_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04001546 dst_di = btrfs_lookup_dir_index_item(trans, root, path,
1547 key->objectid,
1548 key->offset, name,
1549 name_len, 1);
1550 } else {
Josef Bacik36508602013-04-25 16:23:32 -04001551 /* Corruption */
1552 ret = -EINVAL;
1553 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001554 }
David Sterbac7040052011-04-19 18:00:01 +02001555 if (IS_ERR_OR_NULL(dst_di)) {
Chris Masone02119d2008-09-05 16:13:11 -04001556 /* we need a sequence number to insert, so we only
1557 * do inserts for the BTRFS_DIR_INDEX_KEY types
1558 */
1559 if (key->type != BTRFS_DIR_INDEX_KEY)
1560 goto out;
1561 goto insert;
1562 }
1563
1564 btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
1565 /* the existing item matches the logged item */
1566 if (found_key.objectid == log_key.objectid &&
1567 found_key.type == log_key.type &&
1568 found_key.offset == log_key.offset &&
1569 btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
1570 goto out;
1571 }
1572
1573 /*
1574 * don't drop the conflicting directory entry if the inode
1575 * for the new entry doesn't exist
1576 */
Chris Mason4bef0842008-09-08 11:18:08 -04001577 if (!exists)
Chris Masone02119d2008-09-05 16:13:11 -04001578 goto out;
1579
Chris Masone02119d2008-09-05 16:13:11 -04001580 ret = drop_one_dir_item(trans, root, path, dir, dst_di);
Josef Bacik36508602013-04-25 16:23:32 -04001581 if (ret)
1582 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001583
1584 if (key->type == BTRFS_DIR_INDEX_KEY)
1585 goto insert;
1586out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001587 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001588 kfree(name);
1589 iput(dir);
Josef Bacik36508602013-04-25 16:23:32 -04001590 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001591
1592insert:
David Sterbab3b4aa72011-04-21 01:20:15 +02001593 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001594 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1595 name, name_len, log_type, &log_key);
Josef Bacik36508602013-04-25 16:23:32 -04001596 if (ret && ret != -ENOENT)
1597 goto out;
1598 ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -04001599 goto out;
1600}
1601
1602/*
1603 * find all the names in a directory item and reconcile them into
1604 * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than
1605 * one name in a directory item, but the same code gets used for
1606 * both directory index types
1607 */
1608static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1609 struct btrfs_root *root,
1610 struct btrfs_path *path,
1611 struct extent_buffer *eb, int slot,
1612 struct btrfs_key *key)
1613{
1614 int ret;
1615 u32 item_size = btrfs_item_size_nr(eb, slot);
1616 struct btrfs_dir_item *di;
1617 int name_len;
1618 unsigned long ptr;
1619 unsigned long ptr_end;
1620
1621 ptr = btrfs_item_ptr_offset(eb, slot);
1622 ptr_end = ptr + item_size;
Chris Masond3977122009-01-05 21:25:51 -05001623 while (ptr < ptr_end) {
Chris Masone02119d2008-09-05 16:13:11 -04001624 di = (struct btrfs_dir_item *)ptr;
Josef Bacik22a94d42011-03-16 16:47:17 -04001625 if (verify_dir_item(root, eb, di))
1626 return -EIO;
Chris Masone02119d2008-09-05 16:13:11 -04001627 name_len = btrfs_dir_name_len(eb, di);
1628 ret = replay_one_name(trans, root, path, eb, di, key);
Josef Bacik36508602013-04-25 16:23:32 -04001629 if (ret)
1630 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001631 ptr = (unsigned long)(di + 1);
1632 ptr += name_len;
1633 }
1634 return 0;
1635}
1636
1637/*
1638 * directory replay has two parts. There are the standard directory
1639 * items in the log copied from the subvolume, and range items
1640 * created in the log while the subvolume was logged.
1641 *
1642 * The range items tell us which parts of the key space the log
1643 * is authoritative for. During replay, if a key in the subvolume
1644 * directory is in a logged range item, but not actually in the log
1645 * that means it was deleted from the directory before the fsync
1646 * and should be removed.
1647 */
1648static noinline int find_dir_range(struct btrfs_root *root,
1649 struct btrfs_path *path,
1650 u64 dirid, int key_type,
1651 u64 *start_ret, u64 *end_ret)
1652{
1653 struct btrfs_key key;
1654 u64 found_end;
1655 struct btrfs_dir_log_item *item;
1656 int ret;
1657 int nritems;
1658
1659 if (*start_ret == (u64)-1)
1660 return 1;
1661
1662 key.objectid = dirid;
1663 key.type = key_type;
1664 key.offset = *start_ret;
1665
1666 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1667 if (ret < 0)
1668 goto out;
1669 if (ret > 0) {
1670 if (path->slots[0] == 0)
1671 goto out;
1672 path->slots[0]--;
1673 }
1674 if (ret != 0)
1675 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1676
1677 if (key.type != key_type || key.objectid != dirid) {
1678 ret = 1;
1679 goto next;
1680 }
1681 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1682 struct btrfs_dir_log_item);
1683 found_end = btrfs_dir_log_end(path->nodes[0], item);
1684
1685 if (*start_ret >= key.offset && *start_ret <= found_end) {
1686 ret = 0;
1687 *start_ret = key.offset;
1688 *end_ret = found_end;
1689 goto out;
1690 }
1691 ret = 1;
1692next:
1693 /* check the next slot in the tree to see if it is a valid item */
1694 nritems = btrfs_header_nritems(path->nodes[0]);
1695 if (path->slots[0] >= nritems) {
1696 ret = btrfs_next_leaf(root, path);
1697 if (ret)
1698 goto out;
1699 } else {
1700 path->slots[0]++;
1701 }
1702
1703 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1704
1705 if (key.type != key_type || key.objectid != dirid) {
1706 ret = 1;
1707 goto out;
1708 }
1709 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1710 struct btrfs_dir_log_item);
1711 found_end = btrfs_dir_log_end(path->nodes[0], item);
1712 *start_ret = key.offset;
1713 *end_ret = found_end;
1714 ret = 0;
1715out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001716 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001717 return ret;
1718}
1719
1720/*
1721 * this looks for a given directory item in the log. If the directory
1722 * item is not in the log, the item is removed and the inode it points
1723 * to is unlinked
1724 */
1725static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
1726 struct btrfs_root *root,
1727 struct btrfs_root *log,
1728 struct btrfs_path *path,
1729 struct btrfs_path *log_path,
1730 struct inode *dir,
1731 struct btrfs_key *dir_key)
1732{
1733 int ret;
1734 struct extent_buffer *eb;
1735 int slot;
1736 u32 item_size;
1737 struct btrfs_dir_item *di;
1738 struct btrfs_dir_item *log_di;
1739 int name_len;
1740 unsigned long ptr;
1741 unsigned long ptr_end;
1742 char *name;
1743 struct inode *inode;
1744 struct btrfs_key location;
1745
1746again:
1747 eb = path->nodes[0];
1748 slot = path->slots[0];
1749 item_size = btrfs_item_size_nr(eb, slot);
1750 ptr = btrfs_item_ptr_offset(eb, slot);
1751 ptr_end = ptr + item_size;
Chris Masond3977122009-01-05 21:25:51 -05001752 while (ptr < ptr_end) {
Chris Masone02119d2008-09-05 16:13:11 -04001753 di = (struct btrfs_dir_item *)ptr;
Josef Bacik22a94d42011-03-16 16:47:17 -04001754 if (verify_dir_item(root, eb, di)) {
1755 ret = -EIO;
1756 goto out;
1757 }
1758
Chris Masone02119d2008-09-05 16:13:11 -04001759 name_len = btrfs_dir_name_len(eb, di);
1760 name = kmalloc(name_len, GFP_NOFS);
1761 if (!name) {
1762 ret = -ENOMEM;
1763 goto out;
1764 }
1765 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1766 name_len);
1767 log_di = NULL;
Chris Mason12fcfd22009-03-24 10:24:20 -04001768 if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04001769 log_di = btrfs_lookup_dir_item(trans, log, log_path,
1770 dir_key->objectid,
1771 name, name_len, 0);
Chris Mason12fcfd22009-03-24 10:24:20 -04001772 } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
Chris Masone02119d2008-09-05 16:13:11 -04001773 log_di = btrfs_lookup_dir_index_item(trans, log,
1774 log_path,
1775 dir_key->objectid,
1776 dir_key->offset,
1777 name, name_len, 0);
1778 }
David Sterbac7040052011-04-19 18:00:01 +02001779 if (IS_ERR_OR_NULL(log_di)) {
Chris Masone02119d2008-09-05 16:13:11 -04001780 btrfs_dir_item_key_to_cpu(eb, di, &location);
David Sterbab3b4aa72011-04-21 01:20:15 +02001781 btrfs_release_path(path);
1782 btrfs_release_path(log_path);
Chris Masone02119d2008-09-05 16:13:11 -04001783 inode = read_one_inode(root, location.objectid);
Tsutomu Itohc00e9492011-04-28 09:10:23 +00001784 if (!inode) {
1785 kfree(name);
1786 return -EIO;
1787 }
Chris Masone02119d2008-09-05 16:13:11 -04001788
1789 ret = link_to_fixup_dir(trans, root,
1790 path, location.objectid);
Josef Bacik36508602013-04-25 16:23:32 -04001791 if (ret) {
1792 kfree(name);
1793 iput(inode);
1794 goto out;
1795 }
1796
Chris Masone02119d2008-09-05 16:13:11 -04001797 btrfs_inc_nlink(inode);
1798 ret = btrfs_unlink_inode(trans, root, dir, inode,
1799 name, name_len);
Josef Bacik36508602013-04-25 16:23:32 -04001800 if (!ret)
1801 btrfs_run_delayed_items(trans, root);
Chris Masone02119d2008-09-05 16:13:11 -04001802 kfree(name);
1803 iput(inode);
Josef Bacik36508602013-04-25 16:23:32 -04001804 if (ret)
1805 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001806
1807 /* there might still be more names under this key
1808 * check and repeat if required
1809 */
1810 ret = btrfs_search_slot(NULL, root, dir_key, path,
1811 0, 0);
1812 if (ret == 0)
1813 goto again;
1814 ret = 0;
1815 goto out;
1816 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001817 btrfs_release_path(log_path);
Chris Masone02119d2008-09-05 16:13:11 -04001818 kfree(name);
1819
1820 ptr = (unsigned long)(di + 1);
1821 ptr += name_len;
1822 }
1823 ret = 0;
1824out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001825 btrfs_release_path(path);
1826 btrfs_release_path(log_path);
Chris Masone02119d2008-09-05 16:13:11 -04001827 return ret;
1828}
1829
1830/*
1831 * deletion replay happens before we copy any new directory items
1832 * out of the log or out of backreferences from inodes. It
1833 * scans the log to find ranges of keys that log is authoritative for,
1834 * and then scans the directory to find items in those ranges that are
1835 * not present in the log.
1836 *
1837 * Anything we don't find in the log is unlinked and removed from the
1838 * directory.
1839 */
1840static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
1841 struct btrfs_root *root,
1842 struct btrfs_root *log,
1843 struct btrfs_path *path,
Chris Mason12fcfd22009-03-24 10:24:20 -04001844 u64 dirid, int del_all)
Chris Masone02119d2008-09-05 16:13:11 -04001845{
1846 u64 range_start;
1847 u64 range_end;
1848 int key_type = BTRFS_DIR_LOG_ITEM_KEY;
1849 int ret = 0;
1850 struct btrfs_key dir_key;
1851 struct btrfs_key found_key;
1852 struct btrfs_path *log_path;
1853 struct inode *dir;
1854
1855 dir_key.objectid = dirid;
1856 dir_key.type = BTRFS_DIR_ITEM_KEY;
1857 log_path = btrfs_alloc_path();
1858 if (!log_path)
1859 return -ENOMEM;
1860
1861 dir = read_one_inode(root, dirid);
1862 /* it isn't an error if the inode isn't there, that can happen
1863 * because we replay the deletes before we copy in the inode item
1864 * from the log
1865 */
1866 if (!dir) {
1867 btrfs_free_path(log_path);
1868 return 0;
1869 }
1870again:
1871 range_start = 0;
1872 range_end = 0;
Chris Masond3977122009-01-05 21:25:51 -05001873 while (1) {
Chris Mason12fcfd22009-03-24 10:24:20 -04001874 if (del_all)
1875 range_end = (u64)-1;
1876 else {
1877 ret = find_dir_range(log, path, dirid, key_type,
1878 &range_start, &range_end);
1879 if (ret != 0)
1880 break;
1881 }
Chris Masone02119d2008-09-05 16:13:11 -04001882
1883 dir_key.offset = range_start;
Chris Masond3977122009-01-05 21:25:51 -05001884 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04001885 int nritems;
1886 ret = btrfs_search_slot(NULL, root, &dir_key, path,
1887 0, 0);
1888 if (ret < 0)
1889 goto out;
1890
1891 nritems = btrfs_header_nritems(path->nodes[0]);
1892 if (path->slots[0] >= nritems) {
1893 ret = btrfs_next_leaf(root, path);
1894 if (ret)
1895 break;
1896 }
1897 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1898 path->slots[0]);
1899 if (found_key.objectid != dirid ||
1900 found_key.type != dir_key.type)
1901 goto next_type;
1902
1903 if (found_key.offset > range_end)
1904 break;
1905
1906 ret = check_item_in_log(trans, root, log, path,
Chris Mason12fcfd22009-03-24 10:24:20 -04001907 log_path, dir,
1908 &found_key);
Josef Bacik36508602013-04-25 16:23:32 -04001909 if (ret)
1910 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04001911 if (found_key.offset == (u64)-1)
1912 break;
1913 dir_key.offset = found_key.offset + 1;
1914 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001915 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001916 if (range_end == (u64)-1)
1917 break;
1918 range_start = range_end + 1;
1919 }
1920
1921next_type:
1922 ret = 0;
1923 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
1924 key_type = BTRFS_DIR_LOG_INDEX_KEY;
1925 dir_key.type = BTRFS_DIR_INDEX_KEY;
David Sterbab3b4aa72011-04-21 01:20:15 +02001926 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001927 goto again;
1928 }
1929out:
David Sterbab3b4aa72011-04-21 01:20:15 +02001930 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04001931 btrfs_free_path(log_path);
1932 iput(dir);
1933 return ret;
1934}
1935
1936/*
1937 * the process_func used to replay items from the log tree. This
1938 * gets called in two different stages. The first stage just looks
1939 * for inodes and makes sure they are all copied into the subvolume.
1940 *
1941 * The second stage copies all the other item types from the log into
1942 * the subvolume. The two stage approach is slower, but gets rid of
1943 * lots of complexity around inodes referencing other inodes that exist
1944 * only in the log (references come from either directory items or inode
1945 * back refs).
1946 */
1947static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1948 struct walk_control *wc, u64 gen)
1949{
1950 int nritems;
1951 struct btrfs_path *path;
1952 struct btrfs_root *root = wc->replay_dest;
1953 struct btrfs_key key;
Chris Masone02119d2008-09-05 16:13:11 -04001954 int level;
1955 int i;
1956 int ret;
1957
Tsutomu Itoh018642a2012-05-29 18:10:13 +09001958 ret = btrfs_read_buffer(eb, gen);
1959 if (ret)
1960 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04001961
1962 level = btrfs_header_level(eb);
1963
1964 if (level != 0)
1965 return 0;
1966
1967 path = btrfs_alloc_path();
Mark Fasheh1e5063d2011-07-12 10:46:06 -07001968 if (!path)
1969 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04001970
1971 nritems = btrfs_header_nritems(eb);
1972 for (i = 0; i < nritems; i++) {
1973 btrfs_item_key_to_cpu(eb, &key, i);
Chris Masone02119d2008-09-05 16:13:11 -04001974
1975 /* inode keys are done during the first stage */
1976 if (key.type == BTRFS_INODE_ITEM_KEY &&
1977 wc->stage == LOG_WALK_REPLAY_INODES) {
Chris Masone02119d2008-09-05 16:13:11 -04001978 struct btrfs_inode_item *inode_item;
1979 u32 mode;
1980
1981 inode_item = btrfs_item_ptr(eb, i,
1982 struct btrfs_inode_item);
1983 mode = btrfs_inode_mode(eb, inode_item);
1984 if (S_ISDIR(mode)) {
1985 ret = replay_dir_deletes(wc->trans,
Chris Mason12fcfd22009-03-24 10:24:20 -04001986 root, log, path, key.objectid, 0);
Josef Bacikb50c6e22013-04-25 15:55:30 -04001987 if (ret)
1988 break;
Chris Masone02119d2008-09-05 16:13:11 -04001989 }
1990 ret = overwrite_item(wc->trans, root, path,
1991 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04001992 if (ret)
1993 break;
Chris Masone02119d2008-09-05 16:13:11 -04001994
Yan, Zhengc71bf092009-11-12 09:34:40 +00001995 /* for regular files, make sure corresponding
1996 * orhpan item exist. extents past the new EOF
1997 * will be truncated later by orphan cleanup.
Chris Masone02119d2008-09-05 16:13:11 -04001998 */
1999 if (S_ISREG(mode)) {
Yan, Zhengc71bf092009-11-12 09:34:40 +00002000 ret = insert_orphan_item(wc->trans, root,
2001 key.objectid);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002002 if (ret)
2003 break;
Chris Masone02119d2008-09-05 16:13:11 -04002004 }
Yan, Zhengc71bf092009-11-12 09:34:40 +00002005
Chris Masone02119d2008-09-05 16:13:11 -04002006 ret = link_to_fixup_dir(wc->trans, root,
2007 path, key.objectid);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002008 if (ret)
2009 break;
Chris Masone02119d2008-09-05 16:13:11 -04002010 }
2011 if (wc->stage < LOG_WALK_REPLAY_ALL)
2012 continue;
2013
2014 /* these keys are simply copied */
2015 if (key.type == BTRFS_XATTR_ITEM_KEY) {
2016 ret = overwrite_item(wc->trans, root, path,
2017 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002018 if (ret)
2019 break;
Liu Bo2da1c662013-05-26 13:50:29 +00002020 } else if (key.type == BTRFS_INODE_REF_KEY ||
2021 key.type == BTRFS_INODE_EXTREF_KEY) {
Mark Fashehf1863732012-08-08 11:32:27 -07002022 ret = add_inode_ref(wc->trans, root, log, path,
2023 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002024 if (ret && ret != -ENOENT)
2025 break;
2026 ret = 0;
Chris Masone02119d2008-09-05 16:13:11 -04002027 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
2028 ret = replay_one_extent(wc->trans, root, path,
2029 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002030 if (ret)
2031 break;
Chris Masone02119d2008-09-05 16:13:11 -04002032 } else if (key.type == BTRFS_DIR_ITEM_KEY ||
2033 key.type == BTRFS_DIR_INDEX_KEY) {
2034 ret = replay_one_dir_item(wc->trans, root, path,
2035 eb, i, &key);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002036 if (ret)
2037 break;
Chris Masone02119d2008-09-05 16:13:11 -04002038 }
2039 }
2040 btrfs_free_path(path);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002041 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002042}
2043
Chris Masond3977122009-01-05 21:25:51 -05002044static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -04002045 struct btrfs_root *root,
2046 struct btrfs_path *path, int *level,
2047 struct walk_control *wc)
2048{
2049 u64 root_owner;
Chris Masone02119d2008-09-05 16:13:11 -04002050 u64 bytenr;
2051 u64 ptr_gen;
2052 struct extent_buffer *next;
2053 struct extent_buffer *cur;
2054 struct extent_buffer *parent;
2055 u32 blocksize;
2056 int ret = 0;
2057
2058 WARN_ON(*level < 0);
2059 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2060
Chris Masond3977122009-01-05 21:25:51 -05002061 while (*level > 0) {
Chris Masone02119d2008-09-05 16:13:11 -04002062 WARN_ON(*level < 0);
2063 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2064 cur = path->nodes[*level];
2065
2066 if (btrfs_header_level(cur) != *level)
2067 WARN_ON(1);
2068
2069 if (path->slots[*level] >=
2070 btrfs_header_nritems(cur))
2071 break;
2072
2073 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2074 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2075 blocksize = btrfs_level_size(root, *level - 1);
2076
2077 parent = path->nodes[*level];
2078 root_owner = btrfs_header_owner(parent);
Chris Masone02119d2008-09-05 16:13:11 -04002079
2080 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
liubo2a29edc2011-01-26 06:22:08 +00002081 if (!next)
2082 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04002083
Chris Masone02119d2008-09-05 16:13:11 -04002084 if (*level == 1) {
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002085 ret = wc->process_func(root, next, wc, ptr_gen);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002086 if (ret) {
2087 free_extent_buffer(next);
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002088 return ret;
Josef Bacikb50c6e22013-04-25 15:55:30 -04002089 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002090
Chris Masone02119d2008-09-05 16:13:11 -04002091 path->slots[*level]++;
2092 if (wc->free) {
Tsutomu Itoh018642a2012-05-29 18:10:13 +09002093 ret = btrfs_read_buffer(next, ptr_gen);
2094 if (ret) {
2095 free_extent_buffer(next);
2096 return ret;
2097 }
Chris Masone02119d2008-09-05 16:13:11 -04002098
2099 btrfs_tree_lock(next);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002100 btrfs_set_lock_blocking(next);
Chris Masonbd681512011-07-16 15:23:14 -04002101 clean_tree_block(trans, root, next);
Chris Masone02119d2008-09-05 16:13:11 -04002102 btrfs_wait_tree_block_writeback(next);
2103 btrfs_tree_unlock(next);
2104
Chris Masone02119d2008-09-05 16:13:11 -04002105 WARN_ON(root_owner !=
2106 BTRFS_TREE_LOG_OBJECTID);
Chris Masone688b7252011-10-31 20:52:39 -04002107 ret = btrfs_free_and_pin_reserved_extent(root,
Chris Masond00aff02008-09-11 15:54:42 -04002108 bytenr, blocksize);
Josef Bacik36508602013-04-25 16:23:32 -04002109 if (ret) {
2110 free_extent_buffer(next);
2111 return ret;
2112 }
Chris Masone02119d2008-09-05 16:13:11 -04002113 }
2114 free_extent_buffer(next);
2115 continue;
2116 }
Tsutomu Itoh018642a2012-05-29 18:10:13 +09002117 ret = btrfs_read_buffer(next, ptr_gen);
2118 if (ret) {
2119 free_extent_buffer(next);
2120 return ret;
2121 }
Chris Masone02119d2008-09-05 16:13:11 -04002122
2123 WARN_ON(*level <= 0);
2124 if (path->nodes[*level-1])
2125 free_extent_buffer(path->nodes[*level-1]);
2126 path->nodes[*level-1] = next;
2127 *level = btrfs_header_level(next);
2128 path->slots[*level] = 0;
2129 cond_resched();
2130 }
2131 WARN_ON(*level < 0);
2132 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2133
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002134 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
Chris Masone02119d2008-09-05 16:13:11 -04002135
2136 cond_resched();
2137 return 0;
2138}
2139
Chris Masond3977122009-01-05 21:25:51 -05002140static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -04002141 struct btrfs_root *root,
2142 struct btrfs_path *path, int *level,
2143 struct walk_control *wc)
2144{
2145 u64 root_owner;
Chris Masone02119d2008-09-05 16:13:11 -04002146 int i;
2147 int slot;
2148 int ret;
2149
Chris Masond3977122009-01-05 21:25:51 -05002150 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
Chris Masone02119d2008-09-05 16:13:11 -04002151 slot = path->slots[i];
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002152 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
Chris Masone02119d2008-09-05 16:13:11 -04002153 path->slots[i]++;
2154 *level = i;
2155 WARN_ON(*level == 0);
2156 return 0;
2157 } else {
Zheng Yan31840ae2008-09-23 13:14:14 -04002158 struct extent_buffer *parent;
2159 if (path->nodes[*level] == root->node)
2160 parent = path->nodes[*level];
2161 else
2162 parent = path->nodes[*level + 1];
2163
2164 root_owner = btrfs_header_owner(parent);
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002165 ret = wc->process_func(root, path->nodes[*level], wc,
Chris Masone02119d2008-09-05 16:13:11 -04002166 btrfs_header_generation(path->nodes[*level]));
Mark Fasheh1e5063d2011-07-12 10:46:06 -07002167 if (ret)
2168 return ret;
2169
Chris Masone02119d2008-09-05 16:13:11 -04002170 if (wc->free) {
2171 struct extent_buffer *next;
2172
2173 next = path->nodes[*level];
2174
2175 btrfs_tree_lock(next);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002176 btrfs_set_lock_blocking(next);
Chris Masonbd681512011-07-16 15:23:14 -04002177 clean_tree_block(trans, root, next);
Chris Masone02119d2008-09-05 16:13:11 -04002178 btrfs_wait_tree_block_writeback(next);
2179 btrfs_tree_unlock(next);
2180
Chris Masone02119d2008-09-05 16:13:11 -04002181 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
Chris Masone688b7252011-10-31 20:52:39 -04002182 ret = btrfs_free_and_pin_reserved_extent(root,
Chris Masone02119d2008-09-05 16:13:11 -04002183 path->nodes[*level]->start,
Chris Masond00aff02008-09-11 15:54:42 -04002184 path->nodes[*level]->len);
Josef Bacik36508602013-04-25 16:23:32 -04002185 if (ret)
2186 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002187 }
2188 free_extent_buffer(path->nodes[*level]);
2189 path->nodes[*level] = NULL;
2190 *level = i + 1;
2191 }
2192 }
2193 return 1;
2194}
2195
2196/*
2197 * drop the reference count on the tree rooted at 'snap'. This traverses
2198 * the tree freeing any blocks that have a ref count of zero after being
2199 * decremented.
2200 */
2201static int walk_log_tree(struct btrfs_trans_handle *trans,
2202 struct btrfs_root *log, struct walk_control *wc)
2203{
2204 int ret = 0;
2205 int wret;
2206 int level;
2207 struct btrfs_path *path;
Chris Masone02119d2008-09-05 16:13:11 -04002208 int orig_level;
2209
2210 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00002211 if (!path)
2212 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04002213
2214 level = btrfs_header_level(log->node);
2215 orig_level = level;
2216 path->nodes[level] = log->node;
2217 extent_buffer_get(log->node);
2218 path->slots[level] = 0;
2219
Chris Masond3977122009-01-05 21:25:51 -05002220 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04002221 wret = walk_down_log_tree(trans, log, path, &level, wc);
2222 if (wret > 0)
2223 break;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002224 if (wret < 0) {
Chris Masone02119d2008-09-05 16:13:11 -04002225 ret = wret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002226 goto out;
2227 }
Chris Masone02119d2008-09-05 16:13:11 -04002228
2229 wret = walk_up_log_tree(trans, log, path, &level, wc);
2230 if (wret > 0)
2231 break;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002232 if (wret < 0) {
Chris Masone02119d2008-09-05 16:13:11 -04002233 ret = wret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002234 goto out;
2235 }
Chris Masone02119d2008-09-05 16:13:11 -04002236 }
2237
2238 /* was the root node processed? if not, catch it here */
2239 if (path->nodes[orig_level]) {
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002240 ret = wc->process_func(log, path->nodes[orig_level], wc,
Chris Masone02119d2008-09-05 16:13:11 -04002241 btrfs_header_generation(path->nodes[orig_level]));
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002242 if (ret)
2243 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04002244 if (wc->free) {
2245 struct extent_buffer *next;
2246
2247 next = path->nodes[orig_level];
2248
2249 btrfs_tree_lock(next);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002250 btrfs_set_lock_blocking(next);
Chris Masonbd681512011-07-16 15:23:14 -04002251 clean_tree_block(trans, log, next);
Chris Masone02119d2008-09-05 16:13:11 -04002252 btrfs_wait_tree_block_writeback(next);
2253 btrfs_tree_unlock(next);
2254
Chris Masone02119d2008-09-05 16:13:11 -04002255 WARN_ON(log->root_key.objectid !=
2256 BTRFS_TREE_LOG_OBJECTID);
Chris Masone688b7252011-10-31 20:52:39 -04002257 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
Chris Masond00aff02008-09-11 15:54:42 -04002258 next->len);
Josef Bacik36508602013-04-25 16:23:32 -04002259 if (ret)
2260 goto out;
Chris Masone02119d2008-09-05 16:13:11 -04002261 }
2262 }
2263
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002264out:
Chris Masone02119d2008-09-05 16:13:11 -04002265 btrfs_free_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002266 return ret;
2267}
2268
Yan Zheng7237f182009-01-21 12:54:03 -05002269/*
2270 * helper function to update the item for a given subvolumes log root
2271 * in the tree of log roots
2272 */
2273static int update_log_root(struct btrfs_trans_handle *trans,
2274 struct btrfs_root *log)
2275{
2276 int ret;
2277
2278 if (log->log_transid == 1) {
2279 /* insert root item on the first sync */
2280 ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
2281 &log->root_key, &log->root_item);
2282 } else {
2283 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
2284 &log->root_key, &log->root_item);
2285 }
2286 return ret;
2287}
2288
Chris Mason12fcfd22009-03-24 10:24:20 -04002289static int wait_log_commit(struct btrfs_trans_handle *trans,
2290 struct btrfs_root *root, unsigned long transid)
Chris Masone02119d2008-09-05 16:13:11 -04002291{
2292 DEFINE_WAIT(wait);
Yan Zheng7237f182009-01-21 12:54:03 -05002293 int index = transid % 2;
Chris Masone02119d2008-09-05 16:13:11 -04002294
Yan Zheng7237f182009-01-21 12:54:03 -05002295 /*
2296 * we only allow two pending log transactions at a time,
2297 * so we know that if ours is more than 2 older than the
2298 * current transaction, we're done
2299 */
Chris Masone02119d2008-09-05 16:13:11 -04002300 do {
Yan Zheng7237f182009-01-21 12:54:03 -05002301 prepare_to_wait(&root->log_commit_wait[index],
2302 &wait, TASK_UNINTERRUPTIBLE);
2303 mutex_unlock(&root->log_mutex);
Chris Mason12fcfd22009-03-24 10:24:20 -04002304
2305 if (root->fs_info->last_trans_log_full_commit !=
2306 trans->transid && root->log_transid < transid + 2 &&
Yan Zheng7237f182009-01-21 12:54:03 -05002307 atomic_read(&root->log_commit[index]))
Chris Masone02119d2008-09-05 16:13:11 -04002308 schedule();
Chris Mason12fcfd22009-03-24 10:24:20 -04002309
Yan Zheng7237f182009-01-21 12:54:03 -05002310 finish_wait(&root->log_commit_wait[index], &wait);
2311 mutex_lock(&root->log_mutex);
Jan Kara6dd70ce2012-01-26 15:01:11 -05002312 } while (root->fs_info->last_trans_log_full_commit !=
2313 trans->transid && root->log_transid < transid + 2 &&
Yan Zheng7237f182009-01-21 12:54:03 -05002314 atomic_read(&root->log_commit[index]));
2315 return 0;
2316}
2317
Jeff Mahoney143bede2012-03-01 14:56:26 +01002318static void wait_for_writer(struct btrfs_trans_handle *trans,
2319 struct btrfs_root *root)
Yan Zheng7237f182009-01-21 12:54:03 -05002320{
2321 DEFINE_WAIT(wait);
Jan Kara6dd70ce2012-01-26 15:01:11 -05002322 while (root->fs_info->last_trans_log_full_commit !=
2323 trans->transid && atomic_read(&root->log_writers)) {
Yan Zheng7237f182009-01-21 12:54:03 -05002324 prepare_to_wait(&root->log_writer_wait,
2325 &wait, TASK_UNINTERRUPTIBLE);
2326 mutex_unlock(&root->log_mutex);
Chris Mason12fcfd22009-03-24 10:24:20 -04002327 if (root->fs_info->last_trans_log_full_commit !=
2328 trans->transid && atomic_read(&root->log_writers))
Yan Zheng7237f182009-01-21 12:54:03 -05002329 schedule();
2330 mutex_lock(&root->log_mutex);
2331 finish_wait(&root->log_writer_wait, &wait);
2332 }
Chris Masone02119d2008-09-05 16:13:11 -04002333}
2334
2335/*
2336 * btrfs_sync_log does sends a given tree log down to the disk and
2337 * updates the super blocks to record it. When this call is done,
Chris Mason12fcfd22009-03-24 10:24:20 -04002338 * you know that any inodes previously logged are safely on disk only
2339 * if it returns 0.
2340 *
2341 * Any other return value means you need to call btrfs_commit_transaction.
2342 * Some of the edge cases for fsyncing directories that have had unlinks
2343 * or renames done in the past mean that sometimes the only safe
2344 * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN,
2345 * that has happened.
Chris Masone02119d2008-09-05 16:13:11 -04002346 */
2347int btrfs_sync_log(struct btrfs_trans_handle *trans,
2348 struct btrfs_root *root)
2349{
Yan Zheng7237f182009-01-21 12:54:03 -05002350 int index1;
2351 int index2;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002352 int mark;
Chris Masone02119d2008-09-05 16:13:11 -04002353 int ret;
Chris Masone02119d2008-09-05 16:13:11 -04002354 struct btrfs_root *log = root->log_root;
Yan Zheng7237f182009-01-21 12:54:03 -05002355 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002356 unsigned long log_transid = 0;
Miao Xiec6adc9c2013-05-28 10:05:39 +00002357 struct blk_plug plug;
Chris Masone02119d2008-09-05 16:13:11 -04002358
Yan Zheng7237f182009-01-21 12:54:03 -05002359 mutex_lock(&root->log_mutex);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002360 log_transid = root->log_transid;
Yan Zheng7237f182009-01-21 12:54:03 -05002361 index1 = root->log_transid % 2;
2362 if (atomic_read(&root->log_commit[index1])) {
Chris Mason12fcfd22009-03-24 10:24:20 -04002363 wait_log_commit(trans, root, root->log_transid);
Yan Zheng7237f182009-01-21 12:54:03 -05002364 mutex_unlock(&root->log_mutex);
2365 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04002366 }
Yan Zheng7237f182009-01-21 12:54:03 -05002367 atomic_set(&root->log_commit[index1], 1);
2368
2369 /* wait for previous tree log sync to complete */
2370 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
Chris Mason12fcfd22009-03-24 10:24:20 -04002371 wait_log_commit(trans, root, root->log_transid - 1);
Yan, Zheng86df7eb2009-10-14 09:24:59 -04002372 while (1) {
Miao Xie2ecb7922012-09-06 04:04:27 -06002373 int batch = atomic_read(&root->log_batch);
Chris Masoncd354ad2011-10-20 15:45:37 -04002374 /* when we're on an ssd, just kick the log commit out */
2375 if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) {
Yan, Zheng86df7eb2009-10-14 09:24:59 -04002376 mutex_unlock(&root->log_mutex);
2377 schedule_timeout_uninterruptible(1);
2378 mutex_lock(&root->log_mutex);
2379 }
Chris Mason12fcfd22009-03-24 10:24:20 -04002380 wait_for_writer(trans, root);
Miao Xie2ecb7922012-09-06 04:04:27 -06002381 if (batch == atomic_read(&root->log_batch))
Chris Masone02119d2008-09-05 16:13:11 -04002382 break;
2383 }
Chris Masond0c803c2008-09-11 16:17:57 -04002384
Chris Mason12fcfd22009-03-24 10:24:20 -04002385 /* bail out if we need to do a full commit */
2386 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2387 ret = -EAGAIN;
Josef Bacik2ab28f32012-10-12 15:27:49 -04002388 btrfs_free_logged_extents(log, log_transid);
Chris Mason12fcfd22009-03-24 10:24:20 -04002389 mutex_unlock(&root->log_mutex);
2390 goto out;
2391 }
2392
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002393 if (log_transid % 2 == 0)
2394 mark = EXTENT_DIRTY;
2395 else
2396 mark = EXTENT_NEW;
2397
Chris Mason690587d2009-10-13 13:29:19 -04002398 /* we start IO on all the marked extents here, but we don't actually
2399 * wait for them until later.
2400 */
Miao Xiec6adc9c2013-05-28 10:05:39 +00002401 blk_start_plug(&plug);
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002402 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002403 if (ret) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002404 blk_finish_plug(&plug);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002405 btrfs_abort_transaction(trans, root, ret);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002406 btrfs_free_logged_extents(log, log_transid);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002407 mutex_unlock(&root->log_mutex);
2408 goto out;
2409 }
Yan Zheng7237f182009-01-21 12:54:03 -05002410
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002411 btrfs_set_root_node(&log->root_item, log->node);
Yan Zheng7237f182009-01-21 12:54:03 -05002412
Yan Zheng7237f182009-01-21 12:54:03 -05002413 root->log_transid++;
2414 log->log_transid = root->log_transid;
Josef Bacikff782e02009-10-08 15:30:04 -04002415 root->log_start_pid = 0;
Yan Zheng7237f182009-01-21 12:54:03 -05002416 smp_mb();
2417 /*
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002418 * IO has been started, blocks of the log tree have WRITTEN flag set
2419 * in their headers. new modifications of the log will be written to
2420 * new positions. so it's safe to allow log writers to go in.
Yan Zheng7237f182009-01-21 12:54:03 -05002421 */
2422 mutex_unlock(&root->log_mutex);
2423
2424 mutex_lock(&log_root_tree->log_mutex);
Miao Xie2ecb7922012-09-06 04:04:27 -06002425 atomic_inc(&log_root_tree->log_batch);
Yan Zheng7237f182009-01-21 12:54:03 -05002426 atomic_inc(&log_root_tree->log_writers);
2427 mutex_unlock(&log_root_tree->log_mutex);
2428
2429 ret = update_log_root(trans, log);
Yan Zheng7237f182009-01-21 12:54:03 -05002430
2431 mutex_lock(&log_root_tree->log_mutex);
2432 if (atomic_dec_and_test(&log_root_tree->log_writers)) {
2433 smp_mb();
2434 if (waitqueue_active(&log_root_tree->log_writer_wait))
2435 wake_up(&log_root_tree->log_writer_wait);
2436 }
2437
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002438 if (ret) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002439 blk_finish_plug(&plug);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002440 if (ret != -ENOSPC) {
2441 btrfs_abort_transaction(trans, root, ret);
2442 mutex_unlock(&log_root_tree->log_mutex);
2443 goto out;
2444 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002445 root->fs_info->last_trans_log_full_commit = trans->transid;
2446 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002447 btrfs_free_logged_extents(log, log_transid);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002448 mutex_unlock(&log_root_tree->log_mutex);
2449 ret = -EAGAIN;
2450 goto out;
2451 }
2452
Yan Zheng7237f182009-01-21 12:54:03 -05002453 index2 = log_root_tree->log_transid % 2;
2454 if (atomic_read(&log_root_tree->log_commit[index2])) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002455 blk_finish_plug(&plug);
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002456 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Chris Mason12fcfd22009-03-24 10:24:20 -04002457 wait_log_commit(trans, log_root_tree,
2458 log_root_tree->log_transid);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002459 btrfs_free_logged_extents(log, log_transid);
Yan Zheng7237f182009-01-21 12:54:03 -05002460 mutex_unlock(&log_root_tree->log_mutex);
Chris Masonb31eabd2011-01-31 16:48:24 -05002461 ret = 0;
Yan Zheng7237f182009-01-21 12:54:03 -05002462 goto out;
2463 }
2464 atomic_set(&log_root_tree->log_commit[index2], 1);
2465
Chris Mason12fcfd22009-03-24 10:24:20 -04002466 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
2467 wait_log_commit(trans, log_root_tree,
2468 log_root_tree->log_transid - 1);
2469 }
Yan Zheng7237f182009-01-21 12:54:03 -05002470
Chris Mason12fcfd22009-03-24 10:24:20 -04002471 wait_for_writer(trans, log_root_tree);
2472
2473 /*
2474 * now that we've moved on to the tree of log tree roots,
2475 * check the full commit flag again
2476 */
2477 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
Miao Xiec6adc9c2013-05-28 10:05:39 +00002478 blk_finish_plug(&plug);
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002479 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002480 btrfs_free_logged_extents(log, log_transid);
Chris Mason12fcfd22009-03-24 10:24:20 -04002481 mutex_unlock(&log_root_tree->log_mutex);
2482 ret = -EAGAIN;
2483 goto out_wake_log_root;
2484 }
Yan Zheng7237f182009-01-21 12:54:03 -05002485
Miao Xiec6adc9c2013-05-28 10:05:39 +00002486 ret = btrfs_write_marked_extents(log_root_tree,
2487 &log_root_tree->dirty_log_pages,
2488 EXTENT_DIRTY | EXTENT_NEW);
2489 blk_finish_plug(&plug);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002490 if (ret) {
2491 btrfs_abort_transaction(trans, root, ret);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002492 btrfs_free_logged_extents(log, log_transid);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002493 mutex_unlock(&log_root_tree->log_mutex);
2494 goto out_wake_log_root;
2495 }
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002496 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
Miao Xiec6adc9c2013-05-28 10:05:39 +00002497 btrfs_wait_marked_extents(log_root_tree,
2498 &log_root_tree->dirty_log_pages,
2499 EXTENT_NEW | EXTENT_DIRTY);
Josef Bacik2ab28f32012-10-12 15:27:49 -04002500 btrfs_wait_logged_extents(log, log_transid);
Chris Masone02119d2008-09-05 16:13:11 -04002501
David Sterba6c417612011-04-13 15:41:04 +02002502 btrfs_set_super_log_root(root->fs_info->super_for_commit,
Yan Zheng7237f182009-01-21 12:54:03 -05002503 log_root_tree->node->start);
David Sterba6c417612011-04-13 15:41:04 +02002504 btrfs_set_super_log_root_level(root->fs_info->super_for_commit,
Yan Zheng7237f182009-01-21 12:54:03 -05002505 btrfs_header_level(log_root_tree->node));
Chris Masone02119d2008-09-05 16:13:11 -04002506
Yan Zheng7237f182009-01-21 12:54:03 -05002507 log_root_tree->log_transid++;
Chris Masone02119d2008-09-05 16:13:11 -04002508 smp_mb();
Yan Zheng7237f182009-01-21 12:54:03 -05002509
2510 mutex_unlock(&log_root_tree->log_mutex);
2511
2512 /*
2513 * nobody else is going to jump in and write the the ctree
2514 * super here because the log_commit atomic below is protecting
2515 * us. We must be called with a transaction handle pinning
2516 * the running transaction open, so a full commit can't hop
2517 * in and cause problems either.
2518 */
Arne Jansena2de7332011-03-08 14:14:00 +01002519 btrfs_scrub_pause_super(root);
Stefan Behrens5af3e8c2012-08-01 18:56:49 +02002520 ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
Arne Jansena2de7332011-03-08 14:14:00 +01002521 btrfs_scrub_continue_super(root);
Stefan Behrens5af3e8c2012-08-01 18:56:49 +02002522 if (ret) {
2523 btrfs_abort_transaction(trans, root, ret);
2524 goto out_wake_log_root;
2525 }
Yan Zheng7237f182009-01-21 12:54:03 -05002526
Chris Mason257c62e2009-10-13 13:21:08 -04002527 mutex_lock(&root->log_mutex);
2528 if (root->last_log_commit < log_transid)
2529 root->last_log_commit = log_transid;
2530 mutex_unlock(&root->log_mutex);
2531
Chris Mason12fcfd22009-03-24 10:24:20 -04002532out_wake_log_root:
Yan Zheng7237f182009-01-21 12:54:03 -05002533 atomic_set(&log_root_tree->log_commit[index2], 0);
2534 smp_mb();
2535 if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
2536 wake_up(&log_root_tree->log_commit_wait[index2]);
Chris Masone02119d2008-09-05 16:13:11 -04002537out:
Yan Zheng7237f182009-01-21 12:54:03 -05002538 atomic_set(&root->log_commit[index1], 0);
2539 smp_mb();
2540 if (waitqueue_active(&root->log_commit_wait[index1]))
2541 wake_up(&root->log_commit_wait[index1]);
Chris Masonb31eabd2011-01-31 16:48:24 -05002542 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002543}
2544
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002545static void free_log_tree(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *log)
Chris Masone02119d2008-09-05 16:13:11 -04002547{
2548 int ret;
Chris Masond0c803c2008-09-11 16:17:57 -04002549 u64 start;
2550 u64 end;
Chris Masone02119d2008-09-05 16:13:11 -04002551 struct walk_control wc = {
2552 .free = 1,
2553 .process_func = process_one_buffer
2554 };
2555
Liu Bo33217192013-02-27 13:28:24 +00002556 if (trans) {
2557 ret = walk_log_tree(trans, log, &wc);
Josef Bacik36508602013-04-25 16:23:32 -04002558
2559 /* I don't think this can happen but just in case */
2560 if (ret)
2561 btrfs_abort_transaction(trans, log, ret);
Liu Bo33217192013-02-27 13:28:24 +00002562 }
Chris Masone02119d2008-09-05 16:13:11 -04002563
Chris Masond3977122009-01-05 21:25:51 -05002564 while (1) {
Chris Masond0c803c2008-09-11 16:17:57 -04002565 ret = find_first_extent_bit(&log->dirty_log_pages,
Josef Bacike6138872012-09-27 17:07:30 -04002566 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
2567 NULL);
Chris Masond0c803c2008-09-11 16:17:57 -04002568 if (ret)
2569 break;
2570
Yan, Zheng8cef4e12009-11-12 09:33:26 +00002571 clear_extent_bits(&log->dirty_log_pages, start, end,
2572 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
Chris Masond0c803c2008-09-11 16:17:57 -04002573 }
2574
Josef Bacik2ab28f32012-10-12 15:27:49 -04002575 /*
2576 * We may have short-circuited the log tree with the full commit logic
2577 * and left ordered extents on our list, so clear these out to keep us
2578 * from leaking inodes and memory.
2579 */
2580 btrfs_free_logged_extents(log, 0);
2581 btrfs_free_logged_extents(log, 1);
2582
Yan Zheng7237f182009-01-21 12:54:03 -05002583 free_extent_buffer(log->node);
2584 kfree(log);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002585}
2586
2587/*
2588 * free all the extents used by the tree log. This should be called
2589 * at commit time of the full transaction
2590 */
2591int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2592{
2593 if (root->log_root) {
2594 free_log_tree(trans, root->log_root);
2595 root->log_root = NULL;
2596 }
2597 return 0;
2598}
2599
2600int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
2601 struct btrfs_fs_info *fs_info)
2602{
2603 if (fs_info->log_root_tree) {
2604 free_log_tree(trans, fs_info->log_root_tree);
2605 fs_info->log_root_tree = NULL;
2606 }
Chris Masone02119d2008-09-05 16:13:11 -04002607 return 0;
2608}
2609
2610/*
Chris Masone02119d2008-09-05 16:13:11 -04002611 * If both a file and directory are logged, and unlinks or renames are
2612 * mixed in, we have a few interesting corners:
2613 *
2614 * create file X in dir Y
2615 * link file X to X.link in dir Y
2616 * fsync file X
2617 * unlink file X but leave X.link
2618 * fsync dir Y
2619 *
2620 * After a crash we would expect only X.link to exist. But file X
2621 * didn't get fsync'd again so the log has back refs for X and X.link.
2622 *
2623 * We solve this by removing directory entries and inode backrefs from the
2624 * log when a file that was logged in the current transaction is
2625 * unlinked. Any later fsync will include the updated log entries, and
2626 * we'll be able to reconstruct the proper directory items from backrefs.
2627 *
2628 * This optimizations allows us to avoid relogging the entire inode
2629 * or the entire directory.
2630 */
2631int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2632 struct btrfs_root *root,
2633 const char *name, int name_len,
2634 struct inode *dir, u64 index)
2635{
2636 struct btrfs_root *log;
2637 struct btrfs_dir_item *di;
2638 struct btrfs_path *path;
2639 int ret;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002640 int err = 0;
Chris Masone02119d2008-09-05 16:13:11 -04002641 int bytes_del = 0;
Li Zefan33345d012011-04-20 10:31:50 +08002642 u64 dir_ino = btrfs_ino(dir);
Chris Masone02119d2008-09-05 16:13:11 -04002643
Chris Mason3a5f1d42008-09-11 15:53:37 -04002644 if (BTRFS_I(dir)->logged_trans < trans->transid)
2645 return 0;
2646
Chris Masone02119d2008-09-05 16:13:11 -04002647 ret = join_running_log_trans(root);
2648 if (ret)
2649 return 0;
2650
2651 mutex_lock(&BTRFS_I(dir)->log_mutex);
2652
2653 log = root->log_root;
2654 path = btrfs_alloc_path();
Tsutomu Itoha62f44a2011-04-25 19:43:51 -04002655 if (!path) {
2656 err = -ENOMEM;
2657 goto out_unlock;
2658 }
liubo2a29edc2011-01-26 06:22:08 +00002659
Li Zefan33345d012011-04-20 10:31:50 +08002660 di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
Chris Masone02119d2008-09-05 16:13:11 -04002661 name, name_len, -1);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002662 if (IS_ERR(di)) {
2663 err = PTR_ERR(di);
2664 goto fail;
2665 }
2666 if (di) {
Chris Masone02119d2008-09-05 16:13:11 -04002667 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2668 bytes_del += name_len;
Josef Bacik36508602013-04-25 16:23:32 -04002669 if (ret) {
2670 err = ret;
2671 goto fail;
2672 }
Chris Masone02119d2008-09-05 16:13:11 -04002673 }
David Sterbab3b4aa72011-04-21 01:20:15 +02002674 btrfs_release_path(path);
Li Zefan33345d012011-04-20 10:31:50 +08002675 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
Chris Masone02119d2008-09-05 16:13:11 -04002676 index, name, name_len, -1);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002677 if (IS_ERR(di)) {
2678 err = PTR_ERR(di);
2679 goto fail;
2680 }
2681 if (di) {
Chris Masone02119d2008-09-05 16:13:11 -04002682 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2683 bytes_del += name_len;
Josef Bacik36508602013-04-25 16:23:32 -04002684 if (ret) {
2685 err = ret;
2686 goto fail;
2687 }
Chris Masone02119d2008-09-05 16:13:11 -04002688 }
2689
2690 /* update the directory size in the log to reflect the names
2691 * we have removed
2692 */
2693 if (bytes_del) {
2694 struct btrfs_key key;
2695
Li Zefan33345d012011-04-20 10:31:50 +08002696 key.objectid = dir_ino;
Chris Masone02119d2008-09-05 16:13:11 -04002697 key.offset = 0;
2698 key.type = BTRFS_INODE_ITEM_KEY;
David Sterbab3b4aa72011-04-21 01:20:15 +02002699 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002700
2701 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002702 if (ret < 0) {
2703 err = ret;
2704 goto fail;
2705 }
Chris Masone02119d2008-09-05 16:13:11 -04002706 if (ret == 0) {
2707 struct btrfs_inode_item *item;
2708 u64 i_size;
2709
2710 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2711 struct btrfs_inode_item);
2712 i_size = btrfs_inode_size(path->nodes[0], item);
2713 if (i_size > bytes_del)
2714 i_size -= bytes_del;
2715 else
2716 i_size = 0;
2717 btrfs_set_inode_size(path->nodes[0], item, i_size);
2718 btrfs_mark_buffer_dirty(path->nodes[0]);
2719 } else
2720 ret = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02002721 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002722 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002723fail:
Chris Masone02119d2008-09-05 16:13:11 -04002724 btrfs_free_path(path);
Tsutomu Itoha62f44a2011-04-25 19:43:51 -04002725out_unlock:
Chris Masone02119d2008-09-05 16:13:11 -04002726 mutex_unlock(&BTRFS_I(dir)->log_mutex);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002727 if (ret == -ENOSPC) {
2728 root->fs_info->last_trans_log_full_commit = trans->transid;
2729 ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002730 } else if (ret < 0)
2731 btrfs_abort_transaction(trans, root, ret);
2732
Chris Mason12fcfd22009-03-24 10:24:20 -04002733 btrfs_end_log_trans(root);
Chris Masone02119d2008-09-05 16:13:11 -04002734
Andi Kleen411fc6b2010-10-29 15:14:31 -04002735 return err;
Chris Masone02119d2008-09-05 16:13:11 -04002736}
2737
2738/* see comments for btrfs_del_dir_entries_in_log */
2739int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2740 struct btrfs_root *root,
2741 const char *name, int name_len,
2742 struct inode *inode, u64 dirid)
2743{
2744 struct btrfs_root *log;
2745 u64 index;
2746 int ret;
2747
Chris Mason3a5f1d42008-09-11 15:53:37 -04002748 if (BTRFS_I(inode)->logged_trans < trans->transid)
2749 return 0;
2750
Chris Masone02119d2008-09-05 16:13:11 -04002751 ret = join_running_log_trans(root);
2752 if (ret)
2753 return 0;
2754 log = root->log_root;
2755 mutex_lock(&BTRFS_I(inode)->log_mutex);
2756
Li Zefan33345d012011-04-20 10:31:50 +08002757 ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
Chris Masone02119d2008-09-05 16:13:11 -04002758 dirid, &index);
2759 mutex_unlock(&BTRFS_I(inode)->log_mutex);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002760 if (ret == -ENOSPC) {
2761 root->fs_info->last_trans_log_full_commit = trans->transid;
2762 ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002763 } else if (ret < 0 && ret != -ENOENT)
2764 btrfs_abort_transaction(trans, root, ret);
Chris Mason12fcfd22009-03-24 10:24:20 -04002765 btrfs_end_log_trans(root);
Chris Masone02119d2008-09-05 16:13:11 -04002766
Chris Masone02119d2008-09-05 16:13:11 -04002767 return ret;
2768}
2769
2770/*
2771 * creates a range item in the log for 'dirid'. first_offset and
2772 * last_offset tell us which parts of the key space the log should
2773 * be considered authoritative for.
2774 */
2775static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
2776 struct btrfs_root *log,
2777 struct btrfs_path *path,
2778 int key_type, u64 dirid,
2779 u64 first_offset, u64 last_offset)
2780{
2781 int ret;
2782 struct btrfs_key key;
2783 struct btrfs_dir_log_item *item;
2784
2785 key.objectid = dirid;
2786 key.offset = first_offset;
2787 if (key_type == BTRFS_DIR_ITEM_KEY)
2788 key.type = BTRFS_DIR_LOG_ITEM_KEY;
2789 else
2790 key.type = BTRFS_DIR_LOG_INDEX_KEY;
2791 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002792 if (ret)
2793 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002794
2795 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2796 struct btrfs_dir_log_item);
2797 btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
2798 btrfs_mark_buffer_dirty(path->nodes[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02002799 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002800 return 0;
2801}
2802
2803/*
2804 * log all the items included in the current transaction for a given
2805 * directory. This also creates the range items in the log tree required
2806 * to replay anything deleted before the fsync
2807 */
2808static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2809 struct btrfs_root *root, struct inode *inode,
2810 struct btrfs_path *path,
2811 struct btrfs_path *dst_path, int key_type,
2812 u64 min_offset, u64 *last_offset_ret)
2813{
2814 struct btrfs_key min_key;
2815 struct btrfs_key max_key;
2816 struct btrfs_root *log = root->log_root;
2817 struct extent_buffer *src;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002818 int err = 0;
Chris Masone02119d2008-09-05 16:13:11 -04002819 int ret;
2820 int i;
2821 int nritems;
2822 u64 first_offset = min_offset;
2823 u64 last_offset = (u64)-1;
Li Zefan33345d012011-04-20 10:31:50 +08002824 u64 ino = btrfs_ino(inode);
Chris Masone02119d2008-09-05 16:13:11 -04002825
2826 log = root->log_root;
Li Zefan33345d012011-04-20 10:31:50 +08002827 max_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04002828 max_key.offset = (u64)-1;
2829 max_key.type = key_type;
2830
Li Zefan33345d012011-04-20 10:31:50 +08002831 min_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04002832 min_key.type = key_type;
2833 min_key.offset = min_offset;
2834
2835 path->keep_locks = 1;
2836
2837 ret = btrfs_search_forward(root, &min_key, &max_key,
Eric Sandeende78b512013-01-31 18:21:12 +00002838 path, trans->transid);
Chris Masone02119d2008-09-05 16:13:11 -04002839
2840 /*
2841 * we didn't find anything from this transaction, see if there
2842 * is anything at all
2843 */
Li Zefan33345d012011-04-20 10:31:50 +08002844 if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
2845 min_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04002846 min_key.type = key_type;
2847 min_key.offset = (u64)-1;
David Sterbab3b4aa72011-04-21 01:20:15 +02002848 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002849 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2850 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002851 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002852 return ret;
2853 }
Li Zefan33345d012011-04-20 10:31:50 +08002854 ret = btrfs_previous_item(root, path, ino, key_type);
Chris Masone02119d2008-09-05 16:13:11 -04002855
2856 /* if ret == 0 there are items for this type,
2857 * create a range to tell us the last key of this type.
2858 * otherwise, there are no items in this directory after
2859 * *min_offset, and we create a range to indicate that.
2860 */
2861 if (ret == 0) {
2862 struct btrfs_key tmp;
2863 btrfs_item_key_to_cpu(path->nodes[0], &tmp,
2864 path->slots[0]);
Chris Masond3977122009-01-05 21:25:51 -05002865 if (key_type == tmp.type)
Chris Masone02119d2008-09-05 16:13:11 -04002866 first_offset = max(min_offset, tmp.offset) + 1;
Chris Masone02119d2008-09-05 16:13:11 -04002867 }
2868 goto done;
2869 }
2870
2871 /* go backward to find any previous key */
Li Zefan33345d012011-04-20 10:31:50 +08002872 ret = btrfs_previous_item(root, path, ino, key_type);
Chris Masone02119d2008-09-05 16:13:11 -04002873 if (ret == 0) {
2874 struct btrfs_key tmp;
2875 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2876 if (key_type == tmp.type) {
2877 first_offset = tmp.offset;
2878 ret = overwrite_item(trans, log, dst_path,
2879 path->nodes[0], path->slots[0],
2880 &tmp);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002881 if (ret) {
2882 err = ret;
2883 goto done;
2884 }
Chris Masone02119d2008-09-05 16:13:11 -04002885 }
2886 }
David Sterbab3b4aa72011-04-21 01:20:15 +02002887 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04002888
2889 /* find the first key from this transaction again */
2890 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2891 if (ret != 0) {
2892 WARN_ON(1);
2893 goto done;
2894 }
2895
2896 /*
2897 * we have a block from this transaction, log every item in it
2898 * from our directory
2899 */
Chris Masond3977122009-01-05 21:25:51 -05002900 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04002901 struct btrfs_key tmp;
2902 src = path->nodes[0];
2903 nritems = btrfs_header_nritems(src);
2904 for (i = path->slots[0]; i < nritems; i++) {
2905 btrfs_item_key_to_cpu(src, &min_key, i);
2906
Li Zefan33345d012011-04-20 10:31:50 +08002907 if (min_key.objectid != ino || min_key.type != key_type)
Chris Masone02119d2008-09-05 16:13:11 -04002908 goto done;
2909 ret = overwrite_item(trans, log, dst_path, src, i,
2910 &min_key);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002911 if (ret) {
2912 err = ret;
2913 goto done;
2914 }
Chris Masone02119d2008-09-05 16:13:11 -04002915 }
2916 path->slots[0] = nritems;
2917
2918 /*
2919 * look ahead to the next item and see if it is also
2920 * from this directory and from this transaction
2921 */
2922 ret = btrfs_next_leaf(root, path);
2923 if (ret == 1) {
2924 last_offset = (u64)-1;
2925 goto done;
2926 }
2927 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
Li Zefan33345d012011-04-20 10:31:50 +08002928 if (tmp.objectid != ino || tmp.type != key_type) {
Chris Masone02119d2008-09-05 16:13:11 -04002929 last_offset = (u64)-1;
2930 goto done;
2931 }
2932 if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
2933 ret = overwrite_item(trans, log, dst_path,
2934 path->nodes[0], path->slots[0],
2935 &tmp);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002936 if (ret)
2937 err = ret;
2938 else
2939 last_offset = tmp.offset;
Chris Masone02119d2008-09-05 16:13:11 -04002940 goto done;
2941 }
2942 }
2943done:
David Sterbab3b4aa72011-04-21 01:20:15 +02002944 btrfs_release_path(path);
2945 btrfs_release_path(dst_path);
Chris Masone02119d2008-09-05 16:13:11 -04002946
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002947 if (err == 0) {
2948 *last_offset_ret = last_offset;
2949 /*
2950 * insert the log range keys to indicate where the log
2951 * is valid
2952 */
2953 ret = insert_dir_log_key(trans, log, path, key_type,
Li Zefan33345d012011-04-20 10:31:50 +08002954 ino, first_offset, last_offset);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002955 if (ret)
2956 err = ret;
2957 }
2958 return err;
Chris Masone02119d2008-09-05 16:13:11 -04002959}
2960
2961/*
2962 * logging directories is very similar to logging inodes, We find all the items
2963 * from the current transaction and write them to the log.
2964 *
2965 * The recovery code scans the directory in the subvolume, and if it finds a
2966 * key in the range logged that is not present in the log tree, then it means
2967 * that dir entry was unlinked during the transaction.
2968 *
2969 * In order for that scan to work, we must include one key smaller than
2970 * the smallest logged by this transaction and one key larger than the largest
2971 * key logged by this transaction.
2972 */
2973static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
2974 struct btrfs_root *root, struct inode *inode,
2975 struct btrfs_path *path,
2976 struct btrfs_path *dst_path)
2977{
2978 u64 min_key;
2979 u64 max_key;
2980 int ret;
2981 int key_type = BTRFS_DIR_ITEM_KEY;
2982
2983again:
2984 min_key = 0;
2985 max_key = 0;
Chris Masond3977122009-01-05 21:25:51 -05002986 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04002987 ret = log_dir_items(trans, root, inode, path,
2988 dst_path, key_type, min_key,
2989 &max_key);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04002990 if (ret)
2991 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04002992 if (max_key == (u64)-1)
2993 break;
2994 min_key = max_key + 1;
2995 }
2996
2997 if (key_type == BTRFS_DIR_ITEM_KEY) {
2998 key_type = BTRFS_DIR_INDEX_KEY;
2999 goto again;
3000 }
3001 return 0;
3002}
3003
3004/*
3005 * a helper function to drop items from the log before we relog an
3006 * inode. max_key_type indicates the highest item type to remove.
3007 * This cannot be run for file data extents because it does not
3008 * free the extents they point to.
3009 */
3010static int drop_objectid_items(struct btrfs_trans_handle *trans,
3011 struct btrfs_root *log,
3012 struct btrfs_path *path,
3013 u64 objectid, int max_key_type)
3014{
3015 int ret;
3016 struct btrfs_key key;
3017 struct btrfs_key found_key;
Josef Bacik18ec90d2012-09-28 11:56:28 -04003018 int start_slot;
Chris Masone02119d2008-09-05 16:13:11 -04003019
3020 key.objectid = objectid;
3021 key.type = max_key_type;
3022 key.offset = (u64)-1;
3023
Chris Masond3977122009-01-05 21:25:51 -05003024 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04003025 ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
Josef Bacik36508602013-04-25 16:23:32 -04003026 BUG_ON(ret == 0); /* Logic error */
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003027 if (ret < 0)
Chris Masone02119d2008-09-05 16:13:11 -04003028 break;
3029
3030 if (path->slots[0] == 0)
3031 break;
3032
3033 path->slots[0]--;
3034 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3035 path->slots[0]);
3036
3037 if (found_key.objectid != objectid)
3038 break;
3039
Josef Bacik18ec90d2012-09-28 11:56:28 -04003040 found_key.offset = 0;
3041 found_key.type = 0;
3042 ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
3043 &start_slot);
3044
3045 ret = btrfs_del_items(trans, log, path, start_slot,
3046 path->slots[0] - start_slot + 1);
3047 /*
3048 * If start slot isn't 0 then we don't need to re-search, we've
3049 * found the last guy with the objectid in this tree.
3050 */
3051 if (ret || start_slot != 0)
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00003052 break;
David Sterbab3b4aa72011-04-21 01:20:15 +02003053 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04003054 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003055 btrfs_release_path(path);
Josef Bacik5bdbeb22012-05-29 16:59:49 -04003056 if (ret > 0)
3057 ret = 0;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003058 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003059}
3060
Josef Bacik94edf4a2012-09-25 14:56:25 -04003061static void fill_inode_item(struct btrfs_trans_handle *trans,
3062 struct extent_buffer *leaf,
3063 struct btrfs_inode_item *item,
3064 struct inode *inode, int log_inode_only)
3065{
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003066 struct btrfs_map_token token;
Josef Bacik94edf4a2012-09-25 14:56:25 -04003067
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003068 btrfs_init_map_token(&token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003069
3070 if (log_inode_only) {
3071 /* set the generation to zero so the recover code
3072 * can tell the difference between an logging
3073 * just to say 'this inode exists' and a logging
3074 * to say 'update this inode with these values'
3075 */
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003076 btrfs_set_token_inode_generation(leaf, item, 0, &token);
3077 btrfs_set_token_inode_size(leaf, item, 0, &token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003078 } else {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003079 btrfs_set_token_inode_generation(leaf, item,
3080 BTRFS_I(inode)->generation,
3081 &token);
3082 btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003083 }
3084
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003085 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3086 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3087 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3088 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3089
3090 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
3091 inode->i_atime.tv_sec, &token);
3092 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
3093 inode->i_atime.tv_nsec, &token);
3094
3095 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
3096 inode->i_mtime.tv_sec, &token);
3097 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
3098 inode->i_mtime.tv_nsec, &token);
3099
3100 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
3101 inode->i_ctime.tv_sec, &token);
3102 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
3103 inode->i_ctime.tv_nsec, &token);
3104
3105 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3106 &token);
3107
3108 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
3109 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3110 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3111 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3112 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003113}
3114
Josef Bacika95249b2012-10-11 16:17:34 -04003115static int log_inode_item(struct btrfs_trans_handle *trans,
3116 struct btrfs_root *log, struct btrfs_path *path,
3117 struct inode *inode)
3118{
3119 struct btrfs_inode_item *inode_item;
3120 struct btrfs_key key;
3121 int ret;
3122
3123 memcpy(&key, &BTRFS_I(inode)->location, sizeof(key));
3124 ret = btrfs_insert_empty_item(trans, log, path, &key,
3125 sizeof(*inode_item));
3126 if (ret && ret != -EEXIST)
3127 return ret;
3128 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3129 struct btrfs_inode_item);
3130 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
3131 btrfs_release_path(path);
3132 return 0;
3133}
3134
Chris Mason31ff1cd2008-09-11 16:17:57 -04003135static noinline int copy_items(struct btrfs_trans_handle *trans,
Liu Bod2794402012-08-29 01:07:56 -06003136 struct inode *inode,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003137 struct btrfs_path *dst_path,
3138 struct extent_buffer *src,
3139 int start_slot, int nr, int inode_only)
3140{
3141 unsigned long src_offset;
3142 unsigned long dst_offset;
Liu Bod2794402012-08-29 01:07:56 -06003143 struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003144 struct btrfs_file_extent_item *extent;
3145 struct btrfs_inode_item *inode_item;
3146 int ret;
3147 struct btrfs_key *ins_keys;
3148 u32 *ins_sizes;
3149 char *ins_data;
3150 int i;
Chris Masond20f7042008-12-08 16:58:54 -05003151 struct list_head ordered_sums;
Liu Bod2794402012-08-29 01:07:56 -06003152 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
Chris Masond20f7042008-12-08 16:58:54 -05003153
3154 INIT_LIST_HEAD(&ordered_sums);
Chris Mason31ff1cd2008-09-11 16:17:57 -04003155
3156 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
3157 nr * sizeof(u32), GFP_NOFS);
liubo2a29edc2011-01-26 06:22:08 +00003158 if (!ins_data)
3159 return -ENOMEM;
3160
Chris Mason31ff1cd2008-09-11 16:17:57 -04003161 ins_sizes = (u32 *)ins_data;
3162 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
3163
3164 for (i = 0; i < nr; i++) {
3165 ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
3166 btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
3167 }
3168 ret = btrfs_insert_empty_items(trans, log, dst_path,
3169 ins_keys, ins_sizes, nr);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003170 if (ret) {
3171 kfree(ins_data);
3172 return ret;
3173 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003174
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003175 for (i = 0; i < nr; i++, dst_path->slots[0]++) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003176 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
3177 dst_path->slots[0]);
3178
3179 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
3180
Josef Bacik94edf4a2012-09-25 14:56:25 -04003181 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003182 inode_item = btrfs_item_ptr(dst_path->nodes[0],
3183 dst_path->slots[0],
3184 struct btrfs_inode_item);
Josef Bacik94edf4a2012-09-25 14:56:25 -04003185 fill_inode_item(trans, dst_path->nodes[0], inode_item,
3186 inode, inode_only == LOG_INODE_EXISTS);
3187 } else {
3188 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
3189 src_offset, ins_sizes[i]);
Chris Mason31ff1cd2008-09-11 16:17:57 -04003190 }
Josef Bacik94edf4a2012-09-25 14:56:25 -04003191
Chris Mason31ff1cd2008-09-11 16:17:57 -04003192 /* take a reference on file data extents so that truncates
3193 * or deletes of this inode don't have to relog the inode
3194 * again
3195 */
Liu Bod2794402012-08-29 01:07:56 -06003196 if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY &&
3197 !skip_csum) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003198 int found_type;
3199 extent = btrfs_item_ptr(src, start_slot + i,
3200 struct btrfs_file_extent_item);
3201
liubo8e531cd2011-05-06 10:36:09 +08003202 if (btrfs_file_extent_generation(src, extent) < trans->transid)
3203 continue;
3204
Chris Mason31ff1cd2008-09-11 16:17:57 -04003205 found_type = btrfs_file_extent_type(src, extent);
Josef Bacik6f1fed72012-09-26 11:07:06 -04003206 if (found_type == BTRFS_FILE_EXTENT_REG) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003207 u64 ds, dl, cs, cl;
3208 ds = btrfs_file_extent_disk_bytenr(src,
3209 extent);
3210 /* ds == 0 is a hole */
3211 if (ds == 0)
3212 continue;
3213
3214 dl = btrfs_file_extent_disk_num_bytes(src,
3215 extent);
3216 cs = btrfs_file_extent_offset(src, extent);
3217 cl = btrfs_file_extent_num_bytes(src,
Joe Perchesa419aef2009-08-18 11:18:35 -07003218 extent);
Chris Mason580afd72008-12-08 19:15:39 -05003219 if (btrfs_file_extent_compression(src,
3220 extent)) {
3221 cs = 0;
3222 cl = dl;
3223 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003224
3225 ret = btrfs_lookup_csums_range(
3226 log->fs_info->csum_root,
3227 ds + cs, ds + cs + cl - 1,
Arne Jansena2de7332011-03-08 14:14:00 +01003228 &ordered_sums, 0);
Josef Bacik36508602013-04-25 16:23:32 -04003229 if (ret) {
3230 btrfs_release_path(dst_path);
3231 kfree(ins_data);
3232 return ret;
3233 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003234 }
3235 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003236 }
3237
3238 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02003239 btrfs_release_path(dst_path);
Chris Mason31ff1cd2008-09-11 16:17:57 -04003240 kfree(ins_data);
Chris Masond20f7042008-12-08 16:58:54 -05003241
3242 /*
3243 * we have to do this after the loop above to avoid changing the
3244 * log tree while trying to change the log tree.
3245 */
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003246 ret = 0;
Chris Masond3977122009-01-05 21:25:51 -05003247 while (!list_empty(&ordered_sums)) {
Chris Masond20f7042008-12-08 16:58:54 -05003248 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
3249 struct btrfs_ordered_sum,
3250 list);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003251 if (!ret)
3252 ret = btrfs_csum_file_blocks(trans, log, sums);
Chris Masond20f7042008-12-08 16:58:54 -05003253 list_del(&sums->list);
3254 kfree(sums);
3255 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003256 return ret;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003257}
3258
Josef Bacik5dc562c2012-08-17 13:14:17 -04003259static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
3260{
3261 struct extent_map *em1, *em2;
3262
3263 em1 = list_entry(a, struct extent_map, list);
3264 em2 = list_entry(b, struct extent_map, list);
3265
3266 if (em1->start < em2->start)
3267 return -1;
3268 else if (em1->start > em2->start)
3269 return 1;
3270 return 0;
3271}
3272
Josef Bacik5dc562c2012-08-17 13:14:17 -04003273static int log_one_extent(struct btrfs_trans_handle *trans,
3274 struct inode *inode, struct btrfs_root *root,
Josef Bacik70c8a912012-10-11 16:54:30 -04003275 struct extent_map *em, struct btrfs_path *path)
Josef Bacik5dc562c2012-08-17 13:14:17 -04003276{
3277 struct btrfs_root *log = root->log_root;
Josef Bacik70c8a912012-10-11 16:54:30 -04003278 struct btrfs_file_extent_item *fi;
3279 struct extent_buffer *leaf;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003280 struct btrfs_ordered_extent *ordered;
Josef Bacik70c8a912012-10-11 16:54:30 -04003281 struct list_head ordered_sums;
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003282 struct btrfs_map_token token;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003283 struct btrfs_key key;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003284 u64 mod_start = em->mod_start;
3285 u64 mod_len = em->mod_len;
3286 u64 csum_offset;
3287 u64 csum_len;
Josef Bacik70c8a912012-10-11 16:54:30 -04003288 u64 extent_offset = em->start - em->orig_start;
3289 u64 block_len;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003290 int ret;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003291 int index = log->log_transid % 2;
Josef Bacik70c8a912012-10-11 16:54:30 -04003292 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003293
Josef Bacik09a2a8f92013-04-05 16:51:15 -04003294 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3295 em->start + em->len, NULL, 0);
3296 if (ret)
3297 return ret;
3298
Josef Bacik70c8a912012-10-11 16:54:30 -04003299 INIT_LIST_HEAD(&ordered_sums);
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003300 btrfs_init_map_token(&token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003301 key.objectid = btrfs_ino(inode);
3302 key.type = BTRFS_EXTENT_DATA_KEY;
3303 key.offset = em->start;
Josef Bacik70c8a912012-10-11 16:54:30 -04003304
3305 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
Josef Bacik09a2a8f92013-04-05 16:51:15 -04003306 if (ret)
Josef Bacik70c8a912012-10-11 16:54:30 -04003307 return ret;
Josef Bacik70c8a912012-10-11 16:54:30 -04003308 leaf = path->nodes[0];
3309 fi = btrfs_item_ptr(leaf, path->slots[0],
3310 struct btrfs_file_extent_item);
Josef Bacik124fe662013-03-01 11:47:21 -05003311
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003312 btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
3313 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003314 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3315 skip_csum = true;
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003316 btrfs_set_token_file_extent_type(leaf, fi,
3317 BTRFS_FILE_EXTENT_PREALLOC,
3318 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003319 } else {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003320 btrfs_set_token_file_extent_type(leaf, fi,
3321 BTRFS_FILE_EXTENT_REG,
3322 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003323 if (em->block_start == 0)
3324 skip_csum = true;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003325 }
3326
Josef Bacik70c8a912012-10-11 16:54:30 -04003327 block_len = max(em->block_len, em->orig_block_len);
3328 if (em->compress_type != BTRFS_COMPRESS_NONE) {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003329 btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
3330 em->block_start,
3331 &token);
3332 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
3333 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003334 } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003335 btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
3336 em->block_start -
3337 extent_offset, &token);
3338 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
3339 &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003340 } else {
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003341 btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
3342 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
3343 &token);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003344 }
3345
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003346 btrfs_set_token_file_extent_offset(leaf, fi,
3347 em->start - em->orig_start,
3348 &token);
3349 btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
Josef Bacikcc95bef2013-04-04 14:31:27 -04003350 btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
Josef Bacik0b1c6cc2012-10-23 16:03:44 -04003351 btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
3352 &token);
3353 btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
3354 btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
Josef Bacik70c8a912012-10-11 16:54:30 -04003355 btrfs_mark_buffer_dirty(leaf);
3356
Josef Bacik70c8a912012-10-11 16:54:30 -04003357 btrfs_release_path(path);
Josef Bacik70c8a912012-10-11 16:54:30 -04003358 if (ret) {
3359 return ret;
3360 }
3361
3362 if (skip_csum)
3363 return 0;
3364
Liu Bo192000d2013-01-06 03:38:22 +00003365 if (em->compress_type) {
3366 csum_offset = 0;
3367 csum_len = block_len;
3368 }
3369
Josef Bacik2ab28f32012-10-12 15:27:49 -04003370 /*
3371 * First check and see if our csums are on our outstanding ordered
3372 * extents.
3373 */
3374again:
3375 spin_lock_irq(&log->log_extents_lock[index]);
3376 list_for_each_entry(ordered, &log->logged_list[index], log_list) {
3377 struct btrfs_ordered_sum *sum;
3378
3379 if (!mod_len)
3380 break;
3381
3382 if (ordered->inode != inode)
3383 continue;
3384
3385 if (ordered->file_offset + ordered->len <= mod_start ||
3386 mod_start + mod_len <= ordered->file_offset)
3387 continue;
3388
3389 /*
3390 * We are going to copy all the csums on this ordered extent, so
3391 * go ahead and adjust mod_start and mod_len in case this
3392 * ordered extent has already been logged.
3393 */
3394 if (ordered->file_offset > mod_start) {
3395 if (ordered->file_offset + ordered->len >=
3396 mod_start + mod_len)
3397 mod_len = ordered->file_offset - mod_start;
3398 /*
3399 * If we have this case
3400 *
3401 * |--------- logged extent ---------|
3402 * |----- ordered extent ----|
3403 *
3404 * Just don't mess with mod_start and mod_len, we'll
3405 * just end up logging more csums than we need and it
3406 * will be ok.
3407 */
3408 } else {
3409 if (ordered->file_offset + ordered->len <
3410 mod_start + mod_len) {
3411 mod_len = (mod_start + mod_len) -
3412 (ordered->file_offset + ordered->len);
3413 mod_start = ordered->file_offset +
3414 ordered->len;
3415 } else {
3416 mod_len = 0;
3417 }
3418 }
3419
3420 /*
3421 * To keep us from looping for the above case of an ordered
3422 * extent that falls inside of the logged extent.
3423 */
3424 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
3425 &ordered->flags))
3426 continue;
3427 atomic_inc(&ordered->refs);
3428 spin_unlock_irq(&log->log_extents_lock[index]);
3429 /*
3430 * we've dropped the lock, we must either break or
3431 * start over after this.
3432 */
3433
3434 wait_event(ordered->wait, ordered->csum_bytes_left == 0);
3435
3436 list_for_each_entry(sum, &ordered->list, list) {
3437 ret = btrfs_csum_file_blocks(trans, log, sum);
3438 if (ret) {
3439 btrfs_put_ordered_extent(ordered);
3440 goto unlocked;
3441 }
3442 }
3443 btrfs_put_ordered_extent(ordered);
3444 goto again;
3445
3446 }
3447 spin_unlock_irq(&log->log_extents_lock[index]);
3448unlocked:
3449
3450 if (!mod_len || ret)
3451 return ret;
3452
3453 csum_offset = mod_start - em->start;
3454 csum_len = mod_len;
3455
Josef Bacik70c8a912012-10-11 16:54:30 -04003456 /* block start is already adjusted for the file extent offset. */
3457 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3458 em->block_start + csum_offset,
3459 em->block_start + csum_offset +
3460 csum_len - 1, &ordered_sums, 0);
3461 if (ret)
3462 return ret;
3463
3464 while (!list_empty(&ordered_sums)) {
3465 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
3466 struct btrfs_ordered_sum,
3467 list);
3468 if (!ret)
3469 ret = btrfs_csum_file_blocks(trans, log, sums);
3470 list_del(&sums->list);
3471 kfree(sums);
3472 }
3473
3474 return ret;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003475}
3476
3477static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3478 struct btrfs_root *root,
3479 struct inode *inode,
Josef Bacik70c8a912012-10-11 16:54:30 -04003480 struct btrfs_path *path)
Josef Bacik5dc562c2012-08-17 13:14:17 -04003481{
Josef Bacik5dc562c2012-08-17 13:14:17 -04003482 struct extent_map *em, *n;
3483 struct list_head extents;
3484 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3485 u64 test_gen;
3486 int ret = 0;
Josef Bacik2ab28f32012-10-12 15:27:49 -04003487 int num = 0;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003488
3489 INIT_LIST_HEAD(&extents);
3490
Josef Bacik5dc562c2012-08-17 13:14:17 -04003491 write_lock(&tree->lock);
3492 test_gen = root->fs_info->last_trans_committed;
3493
3494 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3495 list_del_init(&em->list);
Josef Bacik2ab28f32012-10-12 15:27:49 -04003496
3497 /*
3498 * Just an arbitrary number, this can be really CPU intensive
3499 * once we start getting a lot of extents, and really once we
3500 * have a bunch of extents we just want to commit since it will
3501 * be faster.
3502 */
3503 if (++num > 32768) {
3504 list_del_init(&tree->modified_extents);
3505 ret = -EFBIG;
3506 goto process;
3507 }
3508
Josef Bacik5dc562c2012-08-17 13:14:17 -04003509 if (em->generation <= test_gen)
3510 continue;
Josef Bacikff44c6e2012-09-14 12:59:20 -04003511 /* Need a ref to keep it from getting evicted from cache */
3512 atomic_inc(&em->refs);
3513 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003514 list_add_tail(&em->list, &extents);
Josef Bacik2ab28f32012-10-12 15:27:49 -04003515 num++;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003516 }
3517
3518 list_sort(NULL, &extents, extent_cmp);
3519
Josef Bacik2ab28f32012-10-12 15:27:49 -04003520process:
Josef Bacik5dc562c2012-08-17 13:14:17 -04003521 while (!list_empty(&extents)) {
3522 em = list_entry(extents.next, struct extent_map, list);
3523
3524 list_del_init(&em->list);
3525
3526 /*
3527 * If we had an error we just need to delete everybody from our
3528 * private list.
3529 */
Josef Bacikff44c6e2012-09-14 12:59:20 -04003530 if (ret) {
Josef Bacik201a9032013-01-24 12:02:07 -05003531 clear_em_logging(tree, em);
Josef Bacikff44c6e2012-09-14 12:59:20 -04003532 free_extent_map(em);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003533 continue;
Josef Bacikff44c6e2012-09-14 12:59:20 -04003534 }
3535
3536 write_unlock(&tree->lock);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003537
Josef Bacik70c8a912012-10-11 16:54:30 -04003538 ret = log_one_extent(trans, inode, root, em, path);
Josef Bacikff44c6e2012-09-14 12:59:20 -04003539 write_lock(&tree->lock);
Josef Bacik201a9032013-01-24 12:02:07 -05003540 clear_em_logging(tree, em);
3541 free_extent_map(em);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003542 }
Josef Bacikff44c6e2012-09-14 12:59:20 -04003543 WARN_ON(!list_empty(&extents));
3544 write_unlock(&tree->lock);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003545
Josef Bacik5dc562c2012-08-17 13:14:17 -04003546 btrfs_release_path(path);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003547 return ret;
3548}
3549
Chris Masone02119d2008-09-05 16:13:11 -04003550/* log a single inode in the tree log.
3551 * At least one parent directory for this inode must exist in the tree
3552 * or be logged already.
3553 *
3554 * Any items from this inode changed by the current transaction are copied
3555 * to the log tree. An extra reference is taken on any extents in this
3556 * file, allowing us to avoid a whole pile of corner cases around logging
3557 * blocks that have been removed from the tree.
3558 *
3559 * See LOG_INODE_ALL and related defines for a description of what inode_only
3560 * does.
3561 *
3562 * This handles both files and directories.
3563 */
Chris Mason12fcfd22009-03-24 10:24:20 -04003564static int btrfs_log_inode(struct btrfs_trans_handle *trans,
Chris Masone02119d2008-09-05 16:13:11 -04003565 struct btrfs_root *root, struct inode *inode,
3566 int inode_only)
3567{
3568 struct btrfs_path *path;
3569 struct btrfs_path *dst_path;
3570 struct btrfs_key min_key;
3571 struct btrfs_key max_key;
3572 struct btrfs_root *log = root->log_root;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003573 struct extent_buffer *src = NULL;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003574 int err = 0;
Chris Masone02119d2008-09-05 16:13:11 -04003575 int ret;
Chris Mason3a5f1d42008-09-11 15:53:37 -04003576 int nritems;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003577 int ins_start_slot = 0;
3578 int ins_nr;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003579 bool fast_search = false;
Li Zefan33345d012011-04-20 10:31:50 +08003580 u64 ino = btrfs_ino(inode);
Chris Masone02119d2008-09-05 16:13:11 -04003581
Chris Masone02119d2008-09-05 16:13:11 -04003582 path = btrfs_alloc_path();
Tsutomu Itoh5df67082011-02-01 09:17:35 +00003583 if (!path)
3584 return -ENOMEM;
Chris Masone02119d2008-09-05 16:13:11 -04003585 dst_path = btrfs_alloc_path();
Tsutomu Itoh5df67082011-02-01 09:17:35 +00003586 if (!dst_path) {
3587 btrfs_free_path(path);
3588 return -ENOMEM;
3589 }
Chris Masone02119d2008-09-05 16:13:11 -04003590
Li Zefan33345d012011-04-20 10:31:50 +08003591 min_key.objectid = ino;
Chris Masone02119d2008-09-05 16:13:11 -04003592 min_key.type = BTRFS_INODE_ITEM_KEY;
3593 min_key.offset = 0;
3594
Li Zefan33345d012011-04-20 10:31:50 +08003595 max_key.objectid = ino;
Chris Mason12fcfd22009-03-24 10:24:20 -04003596
Chris Mason12fcfd22009-03-24 10:24:20 -04003597
Josef Bacik5dc562c2012-08-17 13:14:17 -04003598 /* today the code can only do partial logging of directories */
Miao Xie5269b672012-11-01 07:35:23 +00003599 if (S_ISDIR(inode->i_mode) ||
3600 (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3601 &BTRFS_I(inode)->runtime_flags) &&
3602 inode_only == LOG_INODE_EXISTS))
Chris Masone02119d2008-09-05 16:13:11 -04003603 max_key.type = BTRFS_XATTR_ITEM_KEY;
3604 else
3605 max_key.type = (u8)-1;
3606 max_key.offset = (u64)-1;
3607
Josef Bacik94edf4a2012-09-25 14:56:25 -04003608 /* Only run delayed items if we are a dir or a new file */
3609 if (S_ISDIR(inode->i_mode) ||
3610 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
3611 ret = btrfs_commit_inode_delayed_items(trans, inode);
3612 if (ret) {
3613 btrfs_free_path(path);
3614 btrfs_free_path(dst_path);
3615 return ret;
3616 }
Miao Xie16cdcec2011-04-22 18:12:22 +08003617 }
3618
Chris Masone02119d2008-09-05 16:13:11 -04003619 mutex_lock(&BTRFS_I(inode)->log_mutex);
3620
Josef Bacik2ab28f32012-10-12 15:27:49 -04003621 btrfs_get_logged_extents(log, inode);
3622
Chris Masone02119d2008-09-05 16:13:11 -04003623 /*
3624 * a brute force approach to making sure we get the most uptodate
3625 * copies of everything.
3626 */
3627 if (S_ISDIR(inode->i_mode)) {
3628 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
3629
3630 if (inode_only == LOG_INODE_EXISTS)
3631 max_key_type = BTRFS_XATTR_ITEM_KEY;
Li Zefan33345d012011-04-20 10:31:50 +08003632 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
Chris Masone02119d2008-09-05 16:13:11 -04003633 } else {
Josef Bacik5dc562c2012-08-17 13:14:17 -04003634 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3635 &BTRFS_I(inode)->runtime_flags)) {
Josef Bacike9976152012-10-11 15:53:56 -04003636 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
3637 &BTRFS_I(inode)->runtime_flags);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003638 ret = btrfs_truncate_inode_items(trans, log,
3639 inode, 0, 0);
Josef Bacika95249b2012-10-11 16:17:34 -04003640 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
3641 &BTRFS_I(inode)->runtime_flags)) {
3642 if (inode_only == LOG_INODE_ALL)
3643 fast_search = true;
3644 max_key.type = BTRFS_XATTR_ITEM_KEY;
3645 ret = drop_objectid_items(trans, log, path, ino,
3646 max_key.type);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003647 } else {
Liu Bo183f37f2012-11-01 06:38:47 +00003648 if (inode_only == LOG_INODE_ALL)
3649 fast_search = true;
Josef Bacika95249b2012-10-11 16:17:34 -04003650 ret = log_inode_item(trans, log, dst_path, inode);
3651 if (ret) {
3652 err = ret;
3653 goto out_unlock;
3654 }
3655 goto log_extents;
Josef Bacik5dc562c2012-08-17 13:14:17 -04003656 }
Josef Bacika95249b2012-10-11 16:17:34 -04003657
Chris Masone02119d2008-09-05 16:13:11 -04003658 }
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003659 if (ret) {
3660 err = ret;
3661 goto out_unlock;
3662 }
Chris Masone02119d2008-09-05 16:13:11 -04003663 path->keep_locks = 1;
3664
Chris Masond3977122009-01-05 21:25:51 -05003665 while (1) {
Chris Mason31ff1cd2008-09-11 16:17:57 -04003666 ins_nr = 0;
Chris Masone02119d2008-09-05 16:13:11 -04003667 ret = btrfs_search_forward(root, &min_key, &max_key,
Eric Sandeende78b512013-01-31 18:21:12 +00003668 path, trans->transid);
Chris Masone02119d2008-09-05 16:13:11 -04003669 if (ret != 0)
3670 break;
Chris Mason3a5f1d42008-09-11 15:53:37 -04003671again:
Chris Mason31ff1cd2008-09-11 16:17:57 -04003672 /* note, ins_nr might be > 0 here, cleanup outside the loop */
Li Zefan33345d012011-04-20 10:31:50 +08003673 if (min_key.objectid != ino)
Chris Masone02119d2008-09-05 16:13:11 -04003674 break;
3675 if (min_key.type > max_key.type)
3676 break;
Chris Mason31ff1cd2008-09-11 16:17:57 -04003677
Chris Masone02119d2008-09-05 16:13:11 -04003678 src = path->nodes[0];
Chris Mason31ff1cd2008-09-11 16:17:57 -04003679 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
3680 ins_nr++;
3681 goto next_slot;
3682 } else if (!ins_nr) {
3683 ins_start_slot = path->slots[0];
3684 ins_nr = 1;
3685 goto next_slot;
Chris Masone02119d2008-09-05 16:13:11 -04003686 }
3687
Liu Bod2794402012-08-29 01:07:56 -06003688 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003689 ins_nr, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003690 if (ret) {
3691 err = ret;
3692 goto out_unlock;
3693 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003694 ins_nr = 1;
3695 ins_start_slot = path->slots[0];
3696next_slot:
Chris Masone02119d2008-09-05 16:13:11 -04003697
Chris Mason3a5f1d42008-09-11 15:53:37 -04003698 nritems = btrfs_header_nritems(path->nodes[0]);
3699 path->slots[0]++;
3700 if (path->slots[0] < nritems) {
3701 btrfs_item_key_to_cpu(path->nodes[0], &min_key,
3702 path->slots[0]);
3703 goto again;
3704 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003705 if (ins_nr) {
Liu Bod2794402012-08-29 01:07:56 -06003706 ret = copy_items(trans, inode, dst_path, src,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003707 ins_start_slot,
3708 ins_nr, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003709 if (ret) {
3710 err = ret;
3711 goto out_unlock;
3712 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003713 ins_nr = 0;
3714 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003715 btrfs_release_path(path);
Chris Mason3a5f1d42008-09-11 15:53:37 -04003716
Chris Masone02119d2008-09-05 16:13:11 -04003717 if (min_key.offset < (u64)-1)
3718 min_key.offset++;
3719 else if (min_key.type < (u8)-1)
3720 min_key.type++;
3721 else if (min_key.objectid < (u64)-1)
3722 min_key.objectid++;
3723 else
3724 break;
3725 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003726 if (ins_nr) {
Liu Bod2794402012-08-29 01:07:56 -06003727 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
Chris Mason31ff1cd2008-09-11 16:17:57 -04003728 ins_nr, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003729 if (ret) {
3730 err = ret;
3731 goto out_unlock;
3732 }
Chris Mason31ff1cd2008-09-11 16:17:57 -04003733 ins_nr = 0;
3734 }
Josef Bacik5dc562c2012-08-17 13:14:17 -04003735
Josef Bacika95249b2012-10-11 16:17:34 -04003736log_extents:
Josef Bacik5dc562c2012-08-17 13:14:17 -04003737 if (fast_search) {
Josef Bacik5dc562c2012-08-17 13:14:17 -04003738 btrfs_release_path(dst_path);
Josef Bacik70c8a912012-10-11 16:54:30 -04003739 ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003740 if (ret) {
3741 err = ret;
3742 goto out_unlock;
3743 }
Liu Bo06d3d222012-08-27 10:52:19 -06003744 } else {
3745 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3746 struct extent_map *em, *n;
3747
Miao Xiebbe14262012-11-01 07:34:54 +00003748 write_lock(&tree->lock);
Liu Bo06d3d222012-08-27 10:52:19 -06003749 list_for_each_entry_safe(em, n, &tree->modified_extents, list)
3750 list_del_init(&em->list);
Miao Xiebbe14262012-11-01 07:34:54 +00003751 write_unlock(&tree->lock);
Josef Bacik5dc562c2012-08-17 13:14:17 -04003752 }
3753
Chris Mason9623f9a2008-09-11 17:42:42 -04003754 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
David Sterbab3b4aa72011-04-21 01:20:15 +02003755 btrfs_release_path(path);
3756 btrfs_release_path(dst_path);
Chris Masone02119d2008-09-05 16:13:11 -04003757 ret = log_directory_changes(trans, root, inode, path, dst_path);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003758 if (ret) {
3759 err = ret;
3760 goto out_unlock;
3761 }
Chris Masone02119d2008-09-05 16:13:11 -04003762 }
Chris Mason3a5f1d42008-09-11 15:53:37 -04003763 BTRFS_I(inode)->logged_trans = trans->transid;
Liu Bo46d8bc32012-08-29 01:07:55 -06003764 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003765out_unlock:
Josef Bacik2ab28f32012-10-12 15:27:49 -04003766 if (err)
3767 btrfs_free_logged_extents(log, log->log_transid);
Chris Masone02119d2008-09-05 16:13:11 -04003768 mutex_unlock(&BTRFS_I(inode)->log_mutex);
3769
3770 btrfs_free_path(path);
3771 btrfs_free_path(dst_path);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003772 return err;
Chris Masone02119d2008-09-05 16:13:11 -04003773}
3774
Chris Mason12fcfd22009-03-24 10:24:20 -04003775/*
3776 * follow the dentry parent pointers up the chain and see if any
3777 * of the directories in it require a full commit before they can
3778 * be logged. Returns zero if nothing special needs to be done or 1 if
3779 * a full commit is required.
3780 */
3781static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3782 struct inode *inode,
3783 struct dentry *parent,
3784 struct super_block *sb,
3785 u64 last_committed)
Chris Masone02119d2008-09-05 16:13:11 -04003786{
Chris Mason12fcfd22009-03-24 10:24:20 -04003787 int ret = 0;
3788 struct btrfs_root *root;
Josef Bacik6a912212010-11-20 09:48:00 +00003789 struct dentry *old_parent = NULL;
Chris Masone02119d2008-09-05 16:13:11 -04003790
Chris Masonaf4176b2009-03-24 10:24:31 -04003791 /*
3792 * for regular files, if its inode is already on disk, we don't
3793 * have to worry about the parents at all. This is because
3794 * we can use the last_unlink_trans field to record renames
3795 * and other fun in this file.
3796 */
3797 if (S_ISREG(inode->i_mode) &&
3798 BTRFS_I(inode)->generation <= last_committed &&
3799 BTRFS_I(inode)->last_unlink_trans <= last_committed)
3800 goto out;
3801
Chris Mason12fcfd22009-03-24 10:24:20 -04003802 if (!S_ISDIR(inode->i_mode)) {
3803 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
3804 goto out;
3805 inode = parent->d_inode;
3806 }
3807
3808 while (1) {
3809 BTRFS_I(inode)->logged_trans = trans->transid;
3810 smp_mb();
3811
3812 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
3813 root = BTRFS_I(inode)->root;
3814
3815 /*
3816 * make sure any commits to the log are forced
3817 * to be full commits
3818 */
3819 root->fs_info->last_trans_log_full_commit =
3820 trans->transid;
3821 ret = 1;
3822 break;
3823 }
3824
3825 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
3826 break;
3827
Yan, Zheng76dda932009-09-21 16:00:26 -04003828 if (IS_ROOT(parent))
Chris Mason12fcfd22009-03-24 10:24:20 -04003829 break;
3830
Josef Bacik6a912212010-11-20 09:48:00 +00003831 parent = dget_parent(parent);
3832 dput(old_parent);
3833 old_parent = parent;
Chris Mason12fcfd22009-03-24 10:24:20 -04003834 inode = parent->d_inode;
3835
3836 }
Josef Bacik6a912212010-11-20 09:48:00 +00003837 dput(old_parent);
Chris Mason12fcfd22009-03-24 10:24:20 -04003838out:
Chris Masone02119d2008-09-05 16:13:11 -04003839 return ret;
3840}
3841
3842/*
3843 * helper function around btrfs_log_inode to make sure newly created
3844 * parent directories also end up in the log. A minimal inode and backref
3845 * only logging is done of any parent directories that are older than
3846 * the last committed transaction
3847 */
Eric Sandeen48a3b632013-04-25 20:41:01 +00003848static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3849 struct btrfs_root *root, struct inode *inode,
3850 struct dentry *parent, int exists_only)
Chris Masone02119d2008-09-05 16:13:11 -04003851{
Chris Mason12fcfd22009-03-24 10:24:20 -04003852 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
Chris Masone02119d2008-09-05 16:13:11 -04003853 struct super_block *sb;
Josef Bacik6a912212010-11-20 09:48:00 +00003854 struct dentry *old_parent = NULL;
Chris Mason12fcfd22009-03-24 10:24:20 -04003855 int ret = 0;
3856 u64 last_committed = root->fs_info->last_trans_committed;
3857
3858 sb = inode->i_sb;
3859
Sage Weil3a5e1402009-04-02 16:49:40 -04003860 if (btrfs_test_opt(root, NOTREELOG)) {
3861 ret = 1;
3862 goto end_no_trans;
3863 }
3864
Chris Mason12fcfd22009-03-24 10:24:20 -04003865 if (root->fs_info->last_trans_log_full_commit >
3866 root->fs_info->last_trans_committed) {
3867 ret = 1;
3868 goto end_no_trans;
3869 }
3870
Yan, Zheng76dda932009-09-21 16:00:26 -04003871 if (root != BTRFS_I(inode)->root ||
3872 btrfs_root_refs(&root->root_item) == 0) {
3873 ret = 1;
3874 goto end_no_trans;
3875 }
3876
Chris Mason12fcfd22009-03-24 10:24:20 -04003877 ret = check_parent_dirs_for_sync(trans, inode, parent,
3878 sb, last_committed);
3879 if (ret)
3880 goto end_no_trans;
Chris Masone02119d2008-09-05 16:13:11 -04003881
Josef Bacik22ee6982012-05-29 16:57:49 -04003882 if (btrfs_inode_in_log(inode, trans->transid)) {
Chris Mason257c62e2009-10-13 13:21:08 -04003883 ret = BTRFS_NO_LOG_SYNC;
3884 goto end_no_trans;
3885 }
3886
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003887 ret = start_log_trans(trans, root);
3888 if (ret)
3889 goto end_trans;
Chris Mason12fcfd22009-03-24 10:24:20 -04003890
3891 ret = btrfs_log_inode(trans, root, inode, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003892 if (ret)
3893 goto end_trans;
Chris Mason12fcfd22009-03-24 10:24:20 -04003894
Chris Masonaf4176b2009-03-24 10:24:31 -04003895 /*
3896 * for regular files, if its inode is already on disk, we don't
3897 * have to worry about the parents at all. This is because
3898 * we can use the last_unlink_trans field to record renames
3899 * and other fun in this file.
3900 */
3901 if (S_ISREG(inode->i_mode) &&
3902 BTRFS_I(inode)->generation <= last_committed &&
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003903 BTRFS_I(inode)->last_unlink_trans <= last_committed) {
3904 ret = 0;
3905 goto end_trans;
3906 }
Chris Masonaf4176b2009-03-24 10:24:31 -04003907
3908 inode_only = LOG_INODE_EXISTS;
Chris Masond3977122009-01-05 21:25:51 -05003909 while (1) {
Chris Mason12fcfd22009-03-24 10:24:20 -04003910 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
Chris Masone02119d2008-09-05 16:13:11 -04003911 break;
3912
Chris Mason12fcfd22009-03-24 10:24:20 -04003913 inode = parent->d_inode;
Yan, Zheng76dda932009-09-21 16:00:26 -04003914 if (root != BTRFS_I(inode)->root)
3915 break;
3916
Chris Mason12fcfd22009-03-24 10:24:20 -04003917 if (BTRFS_I(inode)->generation >
3918 root->fs_info->last_trans_committed) {
3919 ret = btrfs_log_inode(trans, root, inode, inode_only);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003920 if (ret)
3921 goto end_trans;
Chris Mason12fcfd22009-03-24 10:24:20 -04003922 }
Yan, Zheng76dda932009-09-21 16:00:26 -04003923 if (IS_ROOT(parent))
Chris Masone02119d2008-09-05 16:13:11 -04003924 break;
Chris Mason12fcfd22009-03-24 10:24:20 -04003925
Josef Bacik6a912212010-11-20 09:48:00 +00003926 parent = dget_parent(parent);
3927 dput(old_parent);
3928 old_parent = parent;
Chris Masone02119d2008-09-05 16:13:11 -04003929 }
Chris Mason12fcfd22009-03-24 10:24:20 -04003930 ret = 0;
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003931end_trans:
Josef Bacik6a912212010-11-20 09:48:00 +00003932 dput(old_parent);
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003933 if (ret < 0) {
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003934 root->fs_info->last_trans_log_full_commit = trans->transid;
3935 ret = 1;
3936 }
Chris Mason12fcfd22009-03-24 10:24:20 -04003937 btrfs_end_log_trans(root);
3938end_no_trans:
3939 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003940}
3941
3942/*
3943 * it is not safe to log dentry if the chunk root has added new
3944 * chunks. This returns 0 if the dentry was logged, and 1 otherwise.
3945 * If this returns 1, you must commit the transaction to safely get your
3946 * data on disk.
3947 */
3948int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
3949 struct btrfs_root *root, struct dentry *dentry)
3950{
Josef Bacik6a912212010-11-20 09:48:00 +00003951 struct dentry *parent = dget_parent(dentry);
3952 int ret;
3953
3954 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
3955 dput(parent);
3956
3957 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04003958}
3959
3960/*
3961 * should be called during mount to recover any replay any log trees
3962 * from the FS
3963 */
3964int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3965{
3966 int ret;
3967 struct btrfs_path *path;
3968 struct btrfs_trans_handle *trans;
3969 struct btrfs_key key;
3970 struct btrfs_key found_key;
3971 struct btrfs_key tmp_key;
3972 struct btrfs_root *log;
3973 struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
3974 struct walk_control wc = {
3975 .process_func = process_one_buffer,
3976 .stage = 0,
3977 };
3978
Chris Masone02119d2008-09-05 16:13:11 -04003979 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00003980 if (!path)
3981 return -ENOMEM;
3982
3983 fs_info->log_root_recovering = 1;
Chris Masone02119d2008-09-05 16:13:11 -04003984
Yan, Zheng4a500fd2010-05-16 10:49:59 -04003985 trans = btrfs_start_transaction(fs_info->tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01003986 if (IS_ERR(trans)) {
3987 ret = PTR_ERR(trans);
3988 goto error;
3989 }
Chris Masone02119d2008-09-05 16:13:11 -04003990
3991 wc.trans = trans;
3992 wc.pin = 1;
3993
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00003994 ret = walk_log_tree(trans, log_root_tree, &wc);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01003995 if (ret) {
3996 btrfs_error(fs_info, ret, "Failed to pin buffers while "
3997 "recovering log root tree.");
3998 goto error;
3999 }
Chris Masone02119d2008-09-05 16:13:11 -04004000
4001again:
4002 key.objectid = BTRFS_TREE_LOG_OBJECTID;
4003 key.offset = (u64)-1;
4004 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
4005
Chris Masond3977122009-01-05 21:25:51 -05004006 while (1) {
Chris Masone02119d2008-09-05 16:13:11 -04004007 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004008
4009 if (ret < 0) {
4010 btrfs_error(fs_info, ret,
4011 "Couldn't find tree log root.");
4012 goto error;
4013 }
Chris Masone02119d2008-09-05 16:13:11 -04004014 if (ret > 0) {
4015 if (path->slots[0] == 0)
4016 break;
4017 path->slots[0]--;
4018 }
4019 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
4020 path->slots[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02004021 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04004022 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4023 break;
4024
Miao Xiecb517ea2013-05-15 07:48:19 +00004025 log = btrfs_read_fs_root(log_root_tree, &found_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004026 if (IS_ERR(log)) {
4027 ret = PTR_ERR(log);
4028 btrfs_error(fs_info, ret,
4029 "Couldn't read tree log root.");
4030 goto error;
4031 }
Chris Masone02119d2008-09-05 16:13:11 -04004032
4033 tmp_key.objectid = found_key.offset;
4034 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
4035 tmp_key.offset = (u64)-1;
4036
4037 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004038 if (IS_ERR(wc.replay_dest)) {
4039 ret = PTR_ERR(wc.replay_dest);
Josef Bacikb50c6e22013-04-25 15:55:30 -04004040 free_extent_buffer(log->node);
4041 free_extent_buffer(log->commit_root);
4042 kfree(log);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004043 btrfs_error(fs_info, ret, "Couldn't read target root "
4044 "for tree log recovery.");
4045 goto error;
4046 }
Chris Masone02119d2008-09-05 16:13:11 -04004047
Yan Zheng07d400a2009-01-06 11:42:00 -05004048 wc.replay_dest->log_root = log;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004049 btrfs_record_root_in_trans(trans, wc.replay_dest);
Chris Masone02119d2008-09-05 16:13:11 -04004050 ret = walk_log_tree(trans, log, &wc);
Chris Masone02119d2008-09-05 16:13:11 -04004051
Josef Bacikb50c6e22013-04-25 15:55:30 -04004052 if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
Chris Masone02119d2008-09-05 16:13:11 -04004053 ret = fixup_inode_link_counts(trans, wc.replay_dest,
4054 path);
Chris Masone02119d2008-09-05 16:13:11 -04004055 }
Chris Masone02119d2008-09-05 16:13:11 -04004056
4057 key.offset = found_key.offset - 1;
Yan Zheng07d400a2009-01-06 11:42:00 -05004058 wc.replay_dest->log_root = NULL;
Chris Masone02119d2008-09-05 16:13:11 -04004059 free_extent_buffer(log->node);
Chris Masonb263c2c2009-06-11 11:24:47 -04004060 free_extent_buffer(log->commit_root);
Chris Masone02119d2008-09-05 16:13:11 -04004061 kfree(log);
4062
Josef Bacikb50c6e22013-04-25 15:55:30 -04004063 if (ret)
4064 goto error;
4065
Chris Masone02119d2008-09-05 16:13:11 -04004066 if (found_key.offset == 0)
4067 break;
4068 }
David Sterbab3b4aa72011-04-21 01:20:15 +02004069 btrfs_release_path(path);
Chris Masone02119d2008-09-05 16:13:11 -04004070
4071 /* step one is to pin it all, step two is to replay just inodes */
4072 if (wc.pin) {
4073 wc.pin = 0;
4074 wc.process_func = replay_one_buffer;
4075 wc.stage = LOG_WALK_REPLAY_INODES;
4076 goto again;
4077 }
4078 /* step three is to replay everything */
4079 if (wc.stage < LOG_WALK_REPLAY_ALL) {
4080 wc.stage++;
4081 goto again;
4082 }
4083
4084 btrfs_free_path(path);
4085
Josef Bacikabefa552013-04-24 16:40:05 -04004086 /* step 4: commit the transaction, which also unpins the blocks */
4087 ret = btrfs_commit_transaction(trans, fs_info->tree_root);
4088 if (ret)
4089 return ret;
4090
Chris Masone02119d2008-09-05 16:13:11 -04004091 free_extent_buffer(log_root_tree->node);
4092 log_root_tree->log_root = NULL;
4093 fs_info->log_root_recovering = 0;
Chris Masone02119d2008-09-05 16:13:11 -04004094 kfree(log_root_tree);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004095
Josef Bacikabefa552013-04-24 16:40:05 -04004096 return 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004097error:
Josef Bacikb50c6e22013-04-25 15:55:30 -04004098 if (wc.trans)
4099 btrfs_end_transaction(wc.trans, fs_info->tree_root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004100 btrfs_free_path(path);
4101 return ret;
Chris Masone02119d2008-09-05 16:13:11 -04004102}
Chris Mason12fcfd22009-03-24 10:24:20 -04004103
4104/*
4105 * there are some corner cases where we want to force a full
4106 * commit instead of allowing a directory to be logged.
4107 *
4108 * They revolve around files there were unlinked from the directory, and
4109 * this function updates the parent directory so that a full commit is
4110 * properly done if it is fsync'd later after the unlinks are done.
4111 */
4112void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
4113 struct inode *dir, struct inode *inode,
4114 int for_rename)
4115{
4116 /*
Chris Masonaf4176b2009-03-24 10:24:31 -04004117 * when we're logging a file, if it hasn't been renamed
4118 * or unlinked, and its inode is fully committed on disk,
4119 * we don't have to worry about walking up the directory chain
4120 * to log its parents.
4121 *
4122 * So, we use the last_unlink_trans field to put this transid
4123 * into the file. When the file is logged we check it and
4124 * don't log the parents if the file is fully on disk.
4125 */
4126 if (S_ISREG(inode->i_mode))
4127 BTRFS_I(inode)->last_unlink_trans = trans->transid;
4128
4129 /*
Chris Mason12fcfd22009-03-24 10:24:20 -04004130 * if this directory was already logged any new
4131 * names for this file/dir will get recorded
4132 */
4133 smp_mb();
4134 if (BTRFS_I(dir)->logged_trans == trans->transid)
4135 return;
4136
4137 /*
4138 * if the inode we're about to unlink was logged,
4139 * the log will be properly updated for any new names
4140 */
4141 if (BTRFS_I(inode)->logged_trans == trans->transid)
4142 return;
4143
4144 /*
4145 * when renaming files across directories, if the directory
4146 * there we're unlinking from gets fsync'd later on, there's
4147 * no way to find the destination directory later and fsync it
4148 * properly. So, we have to be conservative and force commits
4149 * so the new name gets discovered.
4150 */
4151 if (for_rename)
4152 goto record;
4153
4154 /* we can safely do the unlink without any special recording */
4155 return;
4156
4157record:
4158 BTRFS_I(dir)->last_unlink_trans = trans->transid;
4159}
4160
4161/*
4162 * Call this after adding a new name for a file and it will properly
4163 * update the log to reflect the new name.
4164 *
4165 * It will return zero if all goes well, and it will return 1 if a
4166 * full transaction commit is required.
4167 */
4168int btrfs_log_new_name(struct btrfs_trans_handle *trans,
4169 struct inode *inode, struct inode *old_dir,
4170 struct dentry *parent)
4171{
4172 struct btrfs_root * root = BTRFS_I(inode)->root;
4173
4174 /*
Chris Masonaf4176b2009-03-24 10:24:31 -04004175 * this will force the logging code to walk the dentry chain
4176 * up for the file
4177 */
4178 if (S_ISREG(inode->i_mode))
4179 BTRFS_I(inode)->last_unlink_trans = trans->transid;
4180
4181 /*
Chris Mason12fcfd22009-03-24 10:24:20 -04004182 * if this inode hasn't been logged and directory we're renaming it
4183 * from hasn't been logged, we don't need to log it
4184 */
4185 if (BTRFS_I(inode)->logged_trans <=
4186 root->fs_info->last_trans_committed &&
4187 (!old_dir || BTRFS_I(old_dir)->logged_trans <=
4188 root->fs_info->last_trans_committed))
4189 return 0;
4190
4191 return btrfs_log_inode_parent(trans, root, inode, parent, 1);
4192}
4193