blob: 06e124603807cd10ff871c0f329b2a5b922b3482 [file] [log] [blame]
Theodore Ts'o3b5386d2000-08-14 14:25:19 +00001/*
2 * linux/fs/recovery.c
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1999 Red Hat Software --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Journal recovery routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
14 */
15
16#ifndef __KERNEL__
17#include "jfs.h"
18#else
19#include <linux/sched.h>
20#include <linux/fs.h>
21#include <linux/jfs.h>
22#include <linux/errno.h>
23#include <linux/malloc.h>
24#include <linux/locks.h>
25#include <linux/buffer.h>
26
27
28/* Release readahead buffers after use */
29static void brelse_array(struct buffer_head *b[], int n)
30{
31 while (--n >= 0)
32 brelse (b[n]);
33}
34
35
36/*
37 * When reading from the journal, we are going through the block device
38 * layer directly and so there is no readahead being done for us. We
39 * need to implement any readahead ourselves if we want it to happen at
40 * all. Recovery is basically one long sequential read, so make sure we
41 * do the IO in reasonably large chunks.
42 *
43 * This is not so critical that we need to be enormously clever about
44 * the readahead size, though. 128K is a purely arbitrary, good-enough
45 * fixed value.
46 */
47
48static int do_readahead(journal_t *journal, unsigned int start)
49{
50 int err;
51 unsigned int max, nbufs, next, blocknr;
52 struct buffer_head *bh;
53
54 #define MAXBUF 8
55 struct buffer_head * bufs[MAXBUF];
56
57 /* Do up to 128K of readahead */
58 max = start + (128 * 1024 / journal->j_blocksize);
59 if (max > journal->j_maxlen)
60 max = journal->j_maxlen;
61
62 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
63 * a time to the block device IO layer. */
64
65 nbufs = 0;
66
67 for (next = start; next < max; next++) {
68 blocknr = next;
69 if (journal->j_inode)
70 blocknr = bmap(journal->j_inode, next);
71 if (!blocknr) {
72 printk (KERN_ERR "JFS: bad block at offset %u\n",
73 next);
74 err = -EIO;
75 goto failed;
76 }
77
78 bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
79 if (!bh) {
80 printk(KERN_ERR "JFS: readahead getblk failed\n");
81 err = -ENOMEM;
82 goto failed;
83 }
84
85 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
86 bufs[nbufs++] = bh;
87 if (nbufs == MAXBUF) {
88 ll_rw_block(READ, nbufs, bufs);
89 brelse_array(bufs, nbufs);
90 nbufs = 0;
91 }
92 } else
93 brelse(bh);
94 }
95
96 if (nbufs)
97 ll_rw_block(READ, nbufs, bufs);
98 err = 0;
99
100failed:
101 if (nbufs)
102 brelse_array(bufs, nbufs);
103 return err;
104}
105#endif
106
107/*
108 * Read a block from the journal
109 */
110
111static int jread(struct buffer_head **bhp, journal_t *journal,
112 unsigned int offset)
113{
114 unsigned int blocknr;
115 struct buffer_head *bh;
116
117 *bhp = NULL;
118
119 if (offset >= journal->j_maxlen)
120 return -EINVAL;
121
122 blocknr = offset;
123 if (journal->j_inode)
124 blocknr = bmap(journal->j_inode, offset);
125
126 if (!blocknr) {
127 printk (KERN_ERR "JFS: bad block at offset %u\n",
128 offset);
129 return -EIO;
130 }
131
132 bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
133 if (!bh)
134 return -ENOMEM;
135
136 if (!buffer_uptodate(bh)) {
137 /* If this is a brand new buffer, start readahead.
138 Otherwise, we assume we are already reading it. */
139 if (!buffer_req(bh))
140 do_readahead(journal, offset);
141 wait_on_buffer(bh);
142 }
143
144 if (!buffer_uptodate(bh)) {
145 printk (KERN_ERR "JFS: Failed to read block at offset %u\n",
146 offset);
147 brelse(bh);
148 return -EIO;
149 }
150
151 *bhp = bh;
152 return 0;
153}
154
155
156/*
157 * Count the number of in-use tags in a journal descriptor block.
158 */
159
160int count_tags(struct buffer_head *bh, int size)
161{
162 char * tagp;
163 journal_block_tag_t * tag;
164 int nr = 0;
165
166 tagp = &bh->b_data[sizeof(journal_header_t)];
167
168 while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
169 tag = (journal_block_tag_t *) tagp;
170
171 nr++;
172 tagp += sizeof(journal_block_tag_t);
173 if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID)))
174 tagp += 16;
175
176 if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG))
177 break;
178 }
179
180 return nr;
181}
182
183
184/* Make sure we wrap around the log correctly! */
185#define wrap(journal, var) \
186do { \
187 if (var >= (journal)->j_last) \
188 var -= ((journal)->j_last - (journal)->j_first); \
189} while (0)
190
191/*
192 * journal_recover
193 *
194 * The primary function for recovering the log contents when mounting a
195 * journaled device.
196 */
197
198int journal_recover(journal_t *journal)
199{
200 unsigned int first_commit_ID, next_commit_ID;
201 unsigned long next_log_block;
202 unsigned long transaction_start;
203 int err, success = 0;
204 journal_superblock_t * jsb;
205 journal_header_t * tmp;
206 struct buffer_head * bh;
207
208 /* Precompute the maximum metadata descriptors in a descriptor block */
209 int MAX_BLOCKS_PER_DESC;
210 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
211 / sizeof(journal_block_tag_t));
212
213 /*
214 * First thing is to establish what we expect to find in the log
215 * (in terms of transaction IDs), and where (in terms of log
216 * block offsets): query the superblock.
217 */
218
219 jsb = journal->j_superblock;
220 next_commit_ID = ntohl(jsb->s_sequence);
221 next_log_block = ntohl(jsb->s_start);
222
223 first_commit_ID = next_commit_ID;
224
225 /*
226 * The journal superblock's s_start field (the current log head)
227 * is always zero if, and only if, the journal was cleanly
228 * unmounted.
229 */
230
231 if (!jsb->s_start) {
232 jfs_debug(1, "No recovery required, last transaction %d\n",
233 ntohl(jsb->s_sequence));
234 journal->j_transaction_sequence = ++next_commit_ID;
235 return 0;
236 }
237
238 jfs_debug(1, "Starting recovery\n");
239
240 /*
241 * Now we walk through the log, transaction by transaction,
242 * making sure that each transaction has a commit block in the
243 * expected place. Each complete transaction gets replayed back
244 * into the main filesystem.
245 */
246
247 while (1) {
248 jfs_debug(2, "Looking for commit ID %u at %lu/%lu\n",
249 next_commit_ID, next_log_block, journal->j_last);
250 transaction_start = next_log_block;
251
252 while (next_log_block < journal->j_last) {
253 /* Skip over each chunk of the transaction
254 * looking either the next descriptor block or
255 * the final commit record. */
256
257 jfs_debug(3, "JFS: checking block %ld\n",
258 next_log_block);
259 err = jread(&bh, journal, next_log_block);
260 if (err)
261 goto failed;
262
263 /* What kind of buffer is it?
264 *
265 * If it is a descriptor block, work out the
266 * expected location of the next and skip to it.
267 *
268 * If it is the right commit block, end the
269 * search and start recovering the transaction.
270 *
271 * Any non-control block, or an unexpected
272 * control block is interpreted as old data from
273 * a previous wrap of the log: stop recovery at
274 * this point.
275 */
276
277 tmp = (journal_header_t *) bh->b_data;
278
279 if (tmp->h_magic == htonl(JFS_MAGIC_NUMBER)) {
280 int blocktype = ntohl(tmp->h_blocktype);
281 jfs_debug(3, "Found magic %d\n", blocktype);
282
283 if (blocktype == JFS_DESCRIPTOR_BLOCK) {
284 /* Work out where the next descriptor
285 * should be. */
286 next_log_block++;
287 next_log_block += count_tags(bh, journal->j_blocksize);
288 wrap(journal, next_log_block);
289 brelse(bh);
290 continue;
291 } else if (blocktype == JFS_COMMIT_BLOCK) {
292 unsigned int sequence = tmp->h_sequence;
293 brelse(bh);
294 if (sequence == htonl(next_commit_ID))
295 goto commit;
296 jfs_debug(2, "found sequence %d, "
297 "expected %d.\n",
298 ntohl(sequence),
299 next_commit_ID);
300 goto finished;
301 }
302 }
303
304 /* We didn't recognise it? OK, we've gone off
305 * the tail of the log in that case. */
306 brelse(bh);
307 break;
308 }
309
310 goto finished;
311
312 commit:
313 jfs_debug(2, "Found transaction %d\n", next_commit_ID);
314
315 /* OK, we have a transaction to commit. Rewind to the
316 * start of it, gather up all of the buffers in each
317 * transaction segment, and replay the segments one by
318 * one. */
319
320 next_log_block = transaction_start;
321
322 while (1) {
323 int flags;
324 char * tagp;
325 journal_block_tag_t * tag;
326 struct buffer_head * obh;
327 struct buffer_head * nbh;
328
329 err = jread(&bh, journal, next_log_block++);
330 wrap(journal, next_log_block);
331 if (err)
332 goto failed;
333
334 tmp = (journal_header_t *) bh->b_data;
335 /* should never happen - we just checked above - AED */
336 J_ASSERT(tmp->h_magic == htonl(JFS_MAGIC_NUMBER));
337
338 /* If it is the commit block, then we are all done! */
339 if (tmp->h_blocktype == htonl(JFS_COMMIT_BLOCK)) {
340 brelse(bh);
341 break;
342 }
343
344 /* A descriptor block: we can now write all of
345 * the data blocks. Yay, useful work is finally
346 * getting done here! */
347
348 tagp = &bh->b_data[sizeof(journal_header_t)];
349
350 while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
351 <= journal->j_blocksize) {
352 tag = (journal_block_tag_t *) tagp;
353 flags = ntohl(tag->t_flags);
354
355 err = jread(&obh, journal, next_log_block++);
356 wrap(journal, next_log_block);
357 if (err) {
358 /* Recover what we can, but
359 * report failure at the end. */
360 success = err;
361 printk (KERN_ERR
362 "JFS: IO error recovering "
363 "block %ld in log\n",
364 next_log_block-1);
365 } else {
366 /* can never happen if jread OK - AED */
367 J_ASSERT(obh != NULL);
368
369 /* And find a buffer for the new data
370 * being restored */
371 nbh = getblk(journal->j_dev,
372 ntohl(tag->t_blocknr),
373 journal->j_blocksize);
374 if (nbh == NULL) {
375 printk(KERN_ERR
376 "JFS: Out of memory "
377 "during recovery.\n");
378 err = -ENOMEM;
379 brelse(bh);
380 brelse(obh);
381 goto failed;
382 }
383
384 memcpy(nbh->b_data, obh->b_data,
385 journal->j_blocksize);
386 if (flags & JFS_FLAG_ESCAPE) {
387 * ((unsigned int *) bh->b_data) = htonl(JFS_MAGIC_NUMBER);
388 }
389
390 mark_buffer_dirty(nbh, 1);
391 /* ll_rw_block(WRITE, 1, &nbh); */
392 brelse(obh);
393 brelse(nbh);
394 }
395
396 tagp += sizeof(journal_block_tag_t);
397 if (!(flags & JFS_FLAG_SAME_UUID))
398 tagp += 16;
399
400 if (flags & JFS_FLAG_LAST_TAG)
401 break;
402
403 } /* end of tag loop */
404
405 brelse(bh);
406
407 } /* end of descriptor block loop */
408
409 /* We have now replayed that entire transaction: start
410 * looking for the next transaction. */
411 next_commit_ID++;
412 }
413
414 finished:
415 err = success;
416 fsync_dev(journal->j_dev);
417
418 failed:
419
420 /* Restart the log at the next transaction ID, thus invalidating
421 * any existing commit records in the log. */
422 jfs_debug(0, "JFS: recovery, exit status %d, "
423 "recovered transactions %u to %u\n",
424 err, first_commit_ID, next_commit_ID);
425 journal->j_transaction_sequence = ++next_commit_ID;
426
427 return err;
428}