blob: c9c96c7825dc0e007d80bd1bc34004a3d86a051a [file] [log] [blame]
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -07001/*
2 * recovery.c - NILFS recovery logic
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 */
22
23#include <linux/buffer_head.h>
24#include <linux/blkdev.h>
25#include <linux/swap.h>
26#include <linux/crc32.h>
27#include "nilfs.h"
28#include "segment.h"
29#include "sufile.h"
30#include "page.h"
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -070031#include "segbuf.h"
32
33/*
34 * Segment check result
35 */
36enum {
37 NILFS_SEG_VALID,
38 NILFS_SEG_NO_SUPER_ROOT,
39 NILFS_SEG_FAIL_IO,
40 NILFS_SEG_FAIL_MAGIC,
41 NILFS_SEG_FAIL_SEQ,
42 NILFS_SEG_FAIL_CHECKSUM_SEGSUM,
43 NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
44 NILFS_SEG_FAIL_CHECKSUM_FULL,
45 NILFS_SEG_FAIL_CONSISTENCY,
46};
47
48/* work structure for recovery */
49struct nilfs_recovery_block {
50 ino_t ino; /* Inode number of the file that this block
51 belongs to */
52 sector_t blocknr; /* block number */
53 __u64 vblocknr; /* virtual block number */
54 unsigned long blkoff; /* File offset of the data block (per block) */
55 struct list_head list;
56};
57
58
59static int nilfs_warn_segment_error(int err)
60{
61 switch (err) {
62 case NILFS_SEG_FAIL_IO:
63 printk(KERN_WARNING
64 "NILFS warning: I/O error on loading last segment\n");
65 return -EIO;
66 case NILFS_SEG_FAIL_MAGIC:
67 printk(KERN_WARNING
68 "NILFS warning: Segment magic number invalid\n");
69 break;
70 case NILFS_SEG_FAIL_SEQ:
71 printk(KERN_WARNING
72 "NILFS warning: Sequence number mismatch\n");
73 break;
74 case NILFS_SEG_FAIL_CHECKSUM_SEGSUM:
75 printk(KERN_WARNING
76 "NILFS warning: Checksum error in segment summary\n");
77 break;
78 case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
79 printk(KERN_WARNING
80 "NILFS warning: Checksum error in super root\n");
81 break;
82 case NILFS_SEG_FAIL_CHECKSUM_FULL:
83 printk(KERN_WARNING
84 "NILFS warning: Checksum error in segment payload\n");
85 break;
86 case NILFS_SEG_FAIL_CONSISTENCY:
87 printk(KERN_WARNING
88 "NILFS warning: Inconsistent segment\n");
89 break;
90 case NILFS_SEG_NO_SUPER_ROOT:
91 printk(KERN_WARNING
92 "NILFS warning: No super root in the last segment\n");
93 break;
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -070094 }
95 return -EINVAL;
96}
97
98static void store_segsum_info(struct nilfs_segsum_info *ssi,
99 struct nilfs_segment_summary *sum,
100 unsigned int blocksize)
101{
102 ssi->flags = le16_to_cpu(sum->ss_flags);
103 ssi->seg_seq = le64_to_cpu(sum->ss_seq);
104 ssi->ctime = le64_to_cpu(sum->ss_create);
105 ssi->next = le64_to_cpu(sum->ss_next);
106 ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
107 ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
108 ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
109
110 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
111 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
112}
113
114/**
115 * calc_crc_cont - check CRC of blocks continuously
116 * @sbi: nilfs_sb_info
117 * @bhs: buffer head of start block
118 * @sum: place to store result
119 * @offset: offset bytes in the first block
120 * @check_bytes: number of bytes to be checked
121 * @start: DBN of start block
122 * @nblock: number of blocks to be checked
123 */
124static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
125 u32 *sum, unsigned long offset, u64 check_bytes,
126 sector_t start, unsigned long nblock)
127{
128 unsigned long blocksize = sbi->s_super->s_blocksize;
129 unsigned long size;
130 u32 crc;
131
132 BUG_ON(offset >= blocksize);
133 check_bytes -= offset;
134 size = min_t(u64, check_bytes, blocksize - offset);
135 crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
136 (unsigned char *)bhs->b_data + offset, size);
137 if (--nblock > 0) {
138 do {
139 struct buffer_head *bh
140 = sb_bread(sbi->s_super, ++start);
141 if (!bh)
142 return -EIO;
143 check_bytes -= size;
144 size = min_t(u64, check_bytes, blocksize);
145 crc = crc32_le(crc, bh->b_data, size);
146 brelse(bh);
147 } while (--nblock > 0);
148 }
149 *sum = crc;
150 return 0;
151}
152
153/**
154 * nilfs_read_super_root_block - read super root block
155 * @sb: super_block
156 * @sr_block: disk block number of the super root block
157 * @pbh: address of a buffer_head pointer to return super root buffer
158 * @check: CRC check flag
159 */
160int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
161 struct buffer_head **pbh, int check)
162{
163 struct buffer_head *bh_sr;
164 struct nilfs_super_root *sr;
165 u32 crc;
166 int ret;
167
168 *pbh = NULL;
169 bh_sr = sb_bread(sb, sr_block);
170 if (unlikely(!bh_sr)) {
171 ret = NILFS_SEG_FAIL_IO;
172 goto failed;
173 }
174
175 sr = (struct nilfs_super_root *)bh_sr->b_data;
176 if (check) {
177 unsigned bytes = le16_to_cpu(sr->sr_bytes);
178
179 if (bytes == 0 || bytes > sb->s_blocksize) {
180 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
181 goto failed_bh;
182 }
183 if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
184 sizeof(sr->sr_sum), bytes, sr_block, 1)) {
185 ret = NILFS_SEG_FAIL_IO;
186 goto failed_bh;
187 }
188 if (crc != le32_to_cpu(sr->sr_sum)) {
189 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
190 goto failed_bh;
191 }
192 }
193 *pbh = bh_sr;
194 return 0;
195
196 failed_bh:
197 brelse(bh_sr);
198
199 failed:
200 return nilfs_warn_segment_error(ret);
201}
202
203/**
204 * load_segment_summary - read segment summary of the specified partial segment
205 * @sbi: nilfs_sb_info
206 * @pseg_start: start disk block number of partial segment
207 * @seg_seq: sequence number requested
208 * @ssi: pointer to nilfs_segsum_info struct to store information
209 * @full_check: full check flag
210 * (0: only checks segment summary CRC, 1: data CRC)
211 */
212static int
213load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
214 u64 seg_seq, struct nilfs_segsum_info *ssi,
215 int full_check)
216{
217 struct buffer_head *bh_sum;
218 struct nilfs_segment_summary *sum;
219 unsigned long offset, nblock;
220 u64 check_bytes;
221 u32 crc, crc_sum;
222 int ret = NILFS_SEG_FAIL_IO;
223
224 bh_sum = sb_bread(sbi->s_super, pseg_start);
225 if (!bh_sum)
226 goto out;
227
228 sum = (struct nilfs_segment_summary *)bh_sum->b_data;
229
230 /* Check consistency of segment summary */
231 if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
232 ret = NILFS_SEG_FAIL_MAGIC;
233 goto failed;
234 }
235 store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
236 if (seg_seq != ssi->seg_seq) {
237 ret = NILFS_SEG_FAIL_SEQ;
238 goto failed;
239 }
240 if (full_check) {
241 offset = sizeof(sum->ss_datasum);
242 check_bytes =
243 ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits);
244 nblock = ssi->nblocks;
245 crc_sum = le32_to_cpu(sum->ss_datasum);
246 ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
247 } else { /* only checks segment summary */
248 offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum);
249 check_bytes = ssi->sumbytes;
250 nblock = ssi->nsumblk;
251 crc_sum = le32_to_cpu(sum->ss_sumsum);
252 ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM;
253 }
254
255 if (unlikely(nblock == 0 ||
256 nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
257 /* This limits the number of blocks read in the CRC check */
258 ret = NILFS_SEG_FAIL_CONSISTENCY;
259 goto failed;
260 }
261 if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes,
262 pseg_start, nblock)) {
263 ret = NILFS_SEG_FAIL_IO;
264 goto failed;
265 }
266 if (crc == crc_sum)
267 ret = 0;
268 failed:
269 brelse(bh_sum);
270 out:
271 return ret;
272}
273
274static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
275 unsigned int *offset, unsigned int bytes)
276{
277 void *ptr;
278 sector_t blocknr;
279
280 BUG_ON((*pbh)->b_size < *offset);
281 if (bytes > (*pbh)->b_size - *offset) {
282 blocknr = (*pbh)->b_blocknr;
283 brelse(*pbh);
284 *pbh = sb_bread(sb, blocknr + 1);
285 if (unlikely(!*pbh))
286 return NULL;
287 *offset = 0;
288 }
289 ptr = (*pbh)->b_data + *offset;
290 *offset += bytes;
291 return ptr;
292}
293
294static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
295 unsigned int *offset, unsigned int bytes,
296 unsigned long count)
297{
298 unsigned int rest_item_in_current_block
299 = ((*pbh)->b_size - *offset) / bytes;
300
301 if (count <= rest_item_in_current_block) {
302 *offset += bytes * count;
303 } else {
304 sector_t blocknr = (*pbh)->b_blocknr;
305 unsigned int nitem_per_block = (*pbh)->b_size / bytes;
306 unsigned int bcnt;
307
308 count -= rest_item_in_current_block;
309 bcnt = DIV_ROUND_UP(count, nitem_per_block);
310 *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
311
312 brelse(*pbh);
313 *pbh = sb_bread(sb, blocknr + bcnt);
314 }
315}
316
317static int
318collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
319 struct nilfs_segsum_info *ssi,
320 struct list_head *head)
321{
322 struct buffer_head *bh;
323 unsigned int offset;
324 unsigned long nfinfo = ssi->nfinfo;
325 sector_t blocknr = sum_blocknr + ssi->nsumblk;
326 ino_t ino;
327 int err = -EIO;
328
329 if (!nfinfo)
330 return 0;
331
332 bh = sb_bread(sbi->s_super, sum_blocknr);
333 if (unlikely(!bh))
334 goto out;
335
336 offset = le16_to_cpu(
337 ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
338 for (;;) {
339 unsigned long nblocks, ndatablk, nnodeblk;
340 struct nilfs_finfo *finfo;
341
342 finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
343 if (unlikely(!finfo))
344 goto out;
345
346 ino = le64_to_cpu(finfo->fi_ino);
347 nblocks = le32_to_cpu(finfo->fi_nblocks);
348 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
349 nnodeblk = nblocks - ndatablk;
350
351 while (ndatablk-- > 0) {
352 struct nilfs_recovery_block *rb;
353 struct nilfs_binfo_v *binfo;
354
355 binfo = segsum_get(sbi->s_super, &bh, &offset,
356 sizeof(*binfo));
357 if (unlikely(!binfo))
358 goto out;
359
360 rb = kmalloc(sizeof(*rb), GFP_NOFS);
361 if (unlikely(!rb)) {
362 err = -ENOMEM;
363 goto out;
364 }
365 rb->ino = ino;
366 rb->blocknr = blocknr++;
367 rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
368 rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
369 /* INIT_LIST_HEAD(&rb->list); */
370 list_add_tail(&rb->list, head);
371 }
372 if (--nfinfo == 0)
373 break;
374 blocknr += nnodeblk; /* always 0 for the data sync segments */
375 segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
376 nnodeblk);
377 if (unlikely(!bh))
378 goto out;
379 }
380 err = 0;
381 out:
382 brelse(bh); /* brelse(NULL) is just ignored */
383 return err;
384}
385
386static void dispose_recovery_list(struct list_head *head)
387{
388 while (!list_empty(head)) {
389 struct nilfs_recovery_block *rb
390 = list_entry(head->next,
391 struct nilfs_recovery_block, list);
392 list_del(&rb->list);
393 kfree(rb);
394 }
395}
396
Ryusuke Konishi654137d2009-05-17 19:07:21 +0900397struct nilfs_segment_entry {
398 struct list_head list;
399 __u64 segnum;
400};
401
402static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
403{
404 struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
405
406 if (unlikely(!ent))
407 return -ENOMEM;
408
409 ent->segnum = segnum;
410 INIT_LIST_HEAD(&ent->list);
411 list_add_tail(&ent->list, head);
412 return 0;
413}
414
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700415void nilfs_dispose_segment_list(struct list_head *head)
416{
417 while (!list_empty(head)) {
418 struct nilfs_segment_entry *ent
419 = list_entry(head->next,
420 struct nilfs_segment_entry, list);
421 list_del(&ent->list);
Ryusuke Konishi654137d2009-05-17 19:07:21 +0900422 kfree(ent);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700423 }
424}
425
426static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
Ryusuke Konishi85c2a742009-04-28 23:38:46 +0900427 struct nilfs_sb_info *sbi,
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700428 struct nilfs_recovery_info *ri)
429{
430 struct list_head *head = &ri->ri_used_segments;
431 struct nilfs_segment_entry *ent, *n;
432 struct inode *sufile = nilfs->ns_sufile;
433 __u64 segnum[4];
434 int err;
435 int i;
436
437 segnum[0] = nilfs->ns_segnum;
438 segnum[1] = nilfs->ns_nextnum;
439 segnum[2] = ri->ri_segnum;
440 segnum[3] = ri->ri_nextnum;
441
Ryusuke Konishi85c2a742009-04-28 23:38:46 +0900442 nilfs_attach_writer(nilfs, sbi);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700443 /*
444 * Releasing the next segment of the latest super root.
445 * The next segment is invalidated by this recovery.
446 */
447 err = nilfs_sufile_free(sufile, segnum[1]);
448 if (unlikely(err))
449 goto failed;
450
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700451 for (i = 1; i < 4; i++) {
Ryusuke Konishi654137d2009-05-17 19:07:21 +0900452 err = nilfs_segment_list_add(head, segnum[i]);
453 if (unlikely(err))
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700454 goto failed;
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700455 }
456
457 /*
458 * Collecting segments written after the latest super root.
Ryusuke Konishi2c2e52f2009-04-06 19:01:54 -0700459 * These are marked dirty to avoid being reallocated in the next write.
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700460 */
461 list_for_each_entry_safe(ent, n, head, list) {
Ryusuke Konishic85399c2009-04-05 18:30:58 +0900462 if (ent->segnum != segnum[0]) {
463 err = nilfs_sufile_scrap(sufile, ent->segnum);
464 if (unlikely(err))
465 goto failed;
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700466 }
Ryusuke Konishi2c2e52f2009-04-06 19:01:54 -0700467 list_del(&ent->list);
Ryusuke Konishi654137d2009-05-17 19:07:21 +0900468 kfree(ent);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700469 }
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700470
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700471 /* Allocate new segments for recovery */
472 err = nilfs_sufile_alloc(sufile, &segnum[0]);
473 if (unlikely(err))
474 goto failed;
475
476 nilfs->ns_pseg_offset = 0;
477 nilfs->ns_seg_seq = ri->ri_seq + 2;
478 nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700479
480 failed:
481 /* No need to recover sufile because it will be destroyed on error */
Ryusuke Konishi85c2a742009-04-28 23:38:46 +0900482 nilfs_detach_writer(nilfs, sbi);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700483 return err;
484}
485
486static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
487 struct nilfs_recovery_block *rb,
488 struct page *page)
489{
490 struct buffer_head *bh_org;
491 void *kaddr;
492
493 bh_org = sb_bread(sbi->s_super, rb->blocknr);
494 if (unlikely(!bh_org))
495 return -EIO;
496
497 kaddr = kmap_atomic(page, KM_USER0);
498 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
499 kunmap_atomic(kaddr, KM_USER0);
500 brelse(bh_org);
501 return 0;
502}
503
504static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
505 struct list_head *head,
506 unsigned long *nr_salvaged_blocks)
507{
508 struct inode *inode;
509 struct nilfs_recovery_block *rb, *n;
510 unsigned blocksize = sbi->s_super->s_blocksize;
511 struct page *page;
512 loff_t pos;
513 int err = 0, err2 = 0;
514
515 list_for_each_entry_safe(rb, n, head, list) {
516 inode = nilfs_iget(sbi->s_super, rb->ino);
517 if (IS_ERR(inode)) {
518 err = PTR_ERR(inode);
519 inode = NULL;
520 goto failed_inode;
521 }
522
523 pos = rb->blkoff << inode->i_blkbits;
524 page = NULL;
525 err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
526 0, &page, NULL, nilfs_get_block);
527 if (unlikely(err))
528 goto failed_inode;
529
530 err = nilfs_recovery_copy_block(sbi, rb, page);
531 if (unlikely(err))
532 goto failed_page;
533
534 err = nilfs_set_file_dirty(sbi, inode, 1);
535 if (unlikely(err))
536 goto failed_page;
537
538 block_write_end(NULL, inode->i_mapping, pos, blocksize,
539 blocksize, page, NULL);
540
541 unlock_page(page);
542 page_cache_release(page);
543
544 (*nr_salvaged_blocks)++;
545 goto next;
546
547 failed_page:
548 unlock_page(page);
549 page_cache_release(page);
550
551 failed_inode:
552 printk(KERN_WARNING
553 "NILFS warning: error recovering data block "
554 "(err=%d, ino=%lu, block-offset=%llu)\n",
Heiko Carstensb5696e52009-09-03 17:42:48 +0200555 err, (unsigned long)rb->ino,
556 (unsigned long long)rb->blkoff);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700557 if (!err2)
558 err2 = err;
559 next:
560 iput(inode); /* iput(NULL) is just ignored */
561 list_del_init(&rb->list);
562 kfree(rb);
563 }
564 return err2;
565}
566
567/**
568 * nilfs_do_roll_forward - salvage logical segments newer than the latest
569 * checkpoint
570 * @sbi: nilfs_sb_info
571 * @nilfs: the_nilfs
572 * @ri: pointer to a nilfs_recovery_info
573 */
574static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
575 struct nilfs_sb_info *sbi,
576 struct nilfs_recovery_info *ri)
577{
578 struct nilfs_segsum_info ssi;
579 sector_t pseg_start;
580 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
581 unsigned long nsalvaged_blocks = 0;
582 u64 seg_seq;
583 __u64 segnum, nextnum = 0;
584 int empty_seg = 0;
585 int err = 0, ret;
586 LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */
587 enum {
588 RF_INIT_ST,
589 RF_DSYNC_ST, /* scanning data-sync segments */
590 };
591 int state = RF_INIT_ST;
592
593 nilfs_attach_writer(nilfs, sbi);
594 pseg_start = ri->ri_lsegs_start;
595 seg_seq = ri->ri_lsegs_start_seq;
596 segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
597 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
598
599 while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
600
601 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
602 if (ret) {
603 if (ret == NILFS_SEG_FAIL_IO) {
604 err = -EIO;
605 goto failed;
606 }
607 goto strayed;
608 }
609 if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
610 goto confused;
611
612 /* Found a valid partial segment; do recovery actions */
613 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
614 empty_seg = 0;
615 nilfs->ns_ctime = ssi.ctime;
616 if (!(ssi.flags & NILFS_SS_GC))
617 nilfs->ns_nongc_ctime = ssi.ctime;
618
619 switch (state) {
620 case RF_INIT_ST:
621 if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
622 goto try_next_pseg;
623 state = RF_DSYNC_ST;
624 /* Fall through */
625 case RF_DSYNC_ST:
626 if (!NILFS_SEG_DSYNC(&ssi))
627 goto confused;
628
629 err = collect_blocks_from_segsum(
630 sbi, pseg_start, &ssi, &dsync_blocks);
631 if (unlikely(err))
632 goto failed;
633 if (NILFS_SEG_LOGEND(&ssi)) {
634 err = recover_dsync_blocks(
635 sbi, &dsync_blocks, &nsalvaged_blocks);
636 if (unlikely(err))
637 goto failed;
638 state = RF_INIT_ST;
639 }
640 break; /* Fall through to try_next_pseg */
641 }
642
643 try_next_pseg:
644 if (pseg_start == ri->ri_lsegs_end)
645 break;
646 pseg_start += ssi.nblocks;
647 if (pseg_start < seg_end)
648 continue;
649 goto feed_segment;
650
651 strayed:
652 if (pseg_start == ri->ri_lsegs_end)
653 break;
654
655 feed_segment:
656 /* Looking to the next full segment */
657 if (empty_seg++)
658 break;
659 seg_seq++;
660 segnum = nextnum;
661 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
662 pseg_start = seg_start;
663 }
664
665 if (nsalvaged_blocks) {
666 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
667 sbi->s_super->s_id, nsalvaged_blocks);
668 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
669 }
670 out:
671 dispose_recovery_list(&dsync_blocks);
672 nilfs_detach_writer(sbi->s_nilfs, sbi);
673 return err;
674
675 confused:
676 err = -EINVAL;
677 failed:
678 printk(KERN_ERR
679 "NILFS (device %s): Error roll-forwarding "
680 "(err=%d, pseg block=%llu). ",
681 sbi->s_super->s_id, err, (unsigned long long)pseg_start);
682 goto out;
683}
684
685static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
686 struct nilfs_sb_info *sbi,
687 struct nilfs_recovery_info *ri)
688{
689 struct buffer_head *bh;
690 int err;
691
692 if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
693 nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
694 return;
695
696 bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
697 BUG_ON(!bh);
698 memset(bh->b_data, 0, bh->b_size);
699 set_buffer_dirty(bh);
700 err = sync_dirty_buffer(bh);
701 if (unlikely(err))
702 printk(KERN_WARNING
703 "NILFS warning: buffer sync write failed during "
704 "post-cleaning of recovery.\n");
705 brelse(bh);
706}
707
708/**
709 * nilfs_recover_logical_segments - salvage logical segments written after
710 * the latest super root
711 * @nilfs: the_nilfs
712 * @sbi: nilfs_sb_info
713 * @ri: pointer to a nilfs_recovery_info struct to store search results.
714 *
715 * Return Value: On success, 0 is returned. On error, one of the following
716 * negative error code is returned.
717 *
718 * %-EINVAL - Inconsistent filesystem state.
719 *
720 * %-EIO - I/O error
721 *
722 * %-ENOSPC - No space left on device (only in a panic state).
723 *
724 * %-ERESTARTSYS - Interrupted.
725 *
726 * %-ENOMEM - Insufficient memory available.
727 */
728int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
729 struct nilfs_sb_info *sbi,
730 struct nilfs_recovery_info *ri)
731{
732 int err;
733
734 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
735 return 0;
736
737 err = nilfs_attach_checkpoint(sbi, ri->ri_cno);
738 if (unlikely(err)) {
739 printk(KERN_ERR
740 "NILFS: error loading the latest checkpoint.\n");
741 return err;
742 }
743
744 err = nilfs_do_roll_forward(nilfs, sbi, ri);
745 if (unlikely(err))
746 goto failed;
747
748 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
Ryusuke Konishi85c2a742009-04-28 23:38:46 +0900749 err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700750 if (unlikely(err)) {
751 printk(KERN_ERR "NILFS: Error preparing segments for "
752 "recovery.\n");
753 goto failed;
754 }
755
Ryusuke Konishicece5522009-04-06 19:01:58 -0700756 err = nilfs_attach_segment_constructor(sbi);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700757 if (unlikely(err))
758 goto failed;
759
760 set_nilfs_discontinued(nilfs);
761 err = nilfs_construct_segment(sbi->s_super);
762 nilfs_detach_segment_constructor(sbi);
763
764 if (unlikely(err)) {
765 printk(KERN_ERR "NILFS: Oops! recovery failed. "
766 "(err=%d)\n", err);
767 goto failed;
768 }
769
770 nilfs_finish_roll_forward(nilfs, sbi, ri);
771 }
772
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700773 failed:
774 nilfs_detach_checkpoint(sbi);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700775 return err;
776}
777
778/**
779 * nilfs_search_super_root - search the latest valid super root
780 * @nilfs: the_nilfs
781 * @sbi: nilfs_sb_info
782 * @ri: pointer to a nilfs_recovery_info struct to store search results.
783 *
784 * nilfs_search_super_root() looks for the latest super-root from a partial
785 * segment pointed by the superblock. It sets up struct the_nilfs through
786 * this search. It fills nilfs_recovery_info (ri) required for recovery.
787 *
788 * Return Value: On success, 0 is returned. On error, one of the following
789 * negative error code is returned.
790 *
791 * %-EINVAL - No valid segment found
792 *
793 * %-EIO - I/O error
794 */
795int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
796 struct nilfs_recovery_info *ri)
797{
798 struct nilfs_segsum_info ssi;
799 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
800 sector_t seg_start, seg_end; /* range of full segment (block number) */
Ryusuke Konishi050b4142009-11-19 22:24:48 +0900801 sector_t b, end;
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700802 u64 seg_seq;
803 __u64 segnum, nextnum = 0;
804 __u64 cno;
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700805 LIST_HEAD(segments);
806 int empty_seg = 0, scan_newer = 0;
807 int ret;
808
809 pseg_start = nilfs->ns_last_pseg;
810 seg_seq = nilfs->ns_last_seq;
811 cno = nilfs->ns_last_cno;
812 segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
813
814 /* Calculate range of segment */
815 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
816
Ryusuke Konishi050b4142009-11-19 22:24:48 +0900817 /* Read ahead segment */
818 b = seg_start;
819 while (b <= seg_end)
820 sb_breadahead(sbi->s_super, b++);
821
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700822 for (;;) {
823 /* Load segment summary */
824 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
825 if (ret) {
826 if (ret == NILFS_SEG_FAIL_IO)
827 goto failed;
828 goto strayed;
829 }
830 pseg_end = pseg_start + ssi.nblocks - 1;
831 if (unlikely(pseg_end > seg_end)) {
832 ret = NILFS_SEG_FAIL_CONSISTENCY;
833 goto strayed;
834 }
835
836 /* A valid partial segment */
837 ri->ri_pseg_start = pseg_start;
838 ri->ri_seq = seg_seq;
839 ri->ri_segnum = segnum;
840 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
841 ri->ri_nextnum = nextnum;
842 empty_seg = 0;
843
Ryusuke Konishi050b4142009-11-19 22:24:48 +0900844 if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) {
845 /* This will never happen because a superblock
846 (last_segment) always points to a pseg
847 having a super root. */
848 ret = NILFS_SEG_FAIL_CONSISTENCY;
849 goto failed;
850 }
851
852 if (pseg_start == seg_start) {
853 nilfs_get_segment_range(nilfs, nextnum, &b, &end);
854 while (b <= end)
855 sb_breadahead(sbi->s_super, b++);
856 }
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700857 if (!NILFS_SEG_HAS_SR(&ssi)) {
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700858 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
859 ri->ri_lsegs_start = pseg_start;
860 ri->ri_lsegs_start_seq = seg_seq;
861 }
862 if (NILFS_SEG_LOGEND(&ssi))
863 ri->ri_lsegs_end = pseg_start;
864 goto try_next_pseg;
865 }
866
867 /* A valid super root was found. */
868 ri->ri_cno = cno++;
869 ri->ri_super_root = pseg_end;
870 ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
871
872 nilfs_dispose_segment_list(&segments);
873 nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
874 + ssi.nblocks - seg_start;
875 nilfs->ns_seg_seq = seg_seq;
876 nilfs->ns_segnum = segnum;
877 nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */
878 nilfs->ns_ctime = ssi.ctime;
879 nilfs->ns_nextnum = nextnum;
880
881 if (scan_newer)
882 ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
Ryusuke Konishi2c2e52f2009-04-06 19:01:54 -0700883 else {
Ryusuke Konishi2c2e52f2009-04-06 19:01:54 -0700884 if (nilfs->ns_mount_state & NILFS_VALID_FS)
885 goto super_root_found;
886 scan_newer = 1;
887 }
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700888
889 /* reset region for roll-forward */
890 pseg_start += ssi.nblocks;
891 if (pseg_start < seg_end)
892 continue;
893 goto feed_segment;
894
895 try_next_pseg:
896 /* Standing on a course, or met an inconsistent state */
897 pseg_start += ssi.nblocks;
898 if (pseg_start < seg_end)
899 continue;
900 goto feed_segment;
901
902 strayed:
903 /* Off the trail */
904 if (!scan_newer)
905 /*
906 * This can happen if a checkpoint was written without
907 * barriers, or as a result of an I/O failure.
908 */
909 goto failed;
910
911 feed_segment:
912 /* Looking to the next full segment */
913 if (empty_seg++)
914 goto super_root_found; /* found a valid super root */
915
Ryusuke Konishi654137d2009-05-17 19:07:21 +0900916 ret = nilfs_segment_list_add(&segments, segnum);
917 if (unlikely(ret))
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700918 goto failed;
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700919
920 seg_seq++;
921 segnum = nextnum;
922 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
923 pseg_start = seg_start;
924 }
925
926 super_root_found:
927 /* Updating pointers relating to the latest checkpoint */
Ryusuke Konishi0935db72009-11-29 02:39:11 +0900928 list_splice_tail(&segments, &ri->ri_used_segments);
Ryusuke Konishi0f3e1c72009-04-06 19:01:38 -0700929 nilfs->ns_last_pseg = sr_pseg_start;
930 nilfs->ns_last_seq = nilfs->ns_seg_seq;
931 nilfs->ns_last_cno = ri->ri_cno;
932 return 0;
933
934 failed:
935 nilfs_dispose_segment_list(&segments);
936 return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
937}