Blame - fs/jbd/recovery.c - kernel/msm-4.9

blob: 80d7f53fd0a759dec8a019c60bb96badf7f927bb [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/fs/recovery.c
				3	*
				4	* Written by Stephen C. Tweedie <sct@redhat.com>, 1999
				5	*
				6	* Copyright 1999-2000 Red Hat Software --- All Rights Reserved
				7	*
				8	* This file is part of the Linux kernel and is made available under
				9	* the terms of the GNU General Public License, version 2, or at your
				10	* option, any later version, incorporated herein by reference.
				11	*
				12	* Journal recovery routines for the generic filesystem journaling code;
				13	* part of the ext2fs journaling system.
				14	*/
				15
				16	#ifndef __KERNEL__
				17	#include "jfs_user.h"
				18	#else
				19	#include <linux/time.h>
				20	#include <linux/fs.h>
				21	#include <linux/jbd.h>
				22	#include <linux/errno.h>
				23	#include <linux/slab.h>
				24	#endif
				25
				26	/*
				27	* Maintain information about the progress of the recovery job, so that
				28	* the different passes can carry information between them.
				29	*/
				30	struct recovery_info
				31	{
				32	tid_t start_transaction;
				33	tid_t end_transaction;
				34
				35	int nr_replays;
				36	int nr_revokes;
				37	int nr_revoke_hits;
				38	};
				39
				40	enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
				41	static int do_one_pass(journal_t *journal,
				42	struct recovery_info *info, enum passtype pass);
				43	static int scan_revoke_records(journal_t , struct buffer_head ,
				44	tid_t, struct recovery_info *);
				45
				46	#ifdef __KERNEL__
				47
				48	/* Release readahead buffers after use */
				49	void journal_brelse_array(struct buffer_head *b[], int n)
				50	{
				51	while (--n >= 0)
				52	brelse (b[n]);
				53	}
				54
				55
				56	/*
				57	* When reading from the journal, we are going through the block device
				58	* layer directly and so there is no readahead being done for us. We
				59	* need to implement any readahead ourselves if we want it to happen at
				60	* all. Recovery is basically one long sequential read, so make sure we
				61	* do the IO in reasonably large chunks.
				62	*
				63	* This is not so critical that we need to be enormously clever about
				64	* the readahead size, though. 128K is a purely arbitrary, good-enough
				65	* fixed value.
				66	*/
				67
				68	#define MAXBUF 8
				69	static int do_readahead(journal_t *journal, unsigned int start)
				70	{
				71	int err;
				72	unsigned int max, nbufs, next;
				73	unsigned long blocknr;
				74	struct buffer_head *bh;
				75
				76	struct buffer_head * bufs[MAXBUF];
				77
				78	/* Do up to 128K of readahead */
				79	max = start + (128 * 1024 / journal->j_blocksize);
				80	if (max > journal->j_maxlen)
				81	max = journal->j_maxlen;
				82
				83	/* Do the readahead itself. We'll submit MAXBUF buffer_heads at
				84	* a time to the block device IO layer. */
				85
				86	nbufs = 0;
				87
				88	for (next = start; next < max; next++) {
				89	err = journal_bmap(journal, next, &blocknr);
				90
				91	if (err) {
				92	printk (KERN_ERR "JBD: bad block at offset %u\n",
				93	next);
				94	goto failed;
				95	}
				96
				97	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
				98	if (!bh) {
				99	err = -ENOMEM;
				100	goto failed;
				101	}
				102
				103	if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
				104	bufs[nbufs++] = bh;
				105	if (nbufs == MAXBUF) {
				106	ll_rw_block(READ, nbufs, bufs);
				107	journal_brelse_array(bufs, nbufs);
				108	nbufs = 0;
				109	}
				110	} else
				111	brelse(bh);
				112	}
				113
				114	if (nbufs)
				115	ll_rw_block(READ, nbufs, bufs);
				116	err = 0;
				117
				118	failed:
				119	if (nbufs)
				120	journal_brelse_array(bufs, nbufs);
				121	return err;
				122	}
				123
				124	#endif /* __KERNEL__ */
				125
				126
				127	/*
				128	* Read a block from the journal
				129	*/
				130
				131	static int jread(struct buffer_head *bhp, journal_t journal,
				132	unsigned int offset)
				133	{
				134	int err;
				135	unsigned long blocknr;
				136	struct buffer_head *bh;
				137
				138	*bhp = NULL;
				139
				140	if (offset >= journal->j_maxlen) {
				141	printk(KERN_ERR "JBD: corrupted journal superblock\n");
				142	return -EIO;
				143	}
				144
				145	err = journal_bmap(journal, offset, &blocknr);
				146
				147	if (err) {
				148	printk (KERN_ERR "JBD: bad block at offset %u\n",
				149	offset);
				150	return err;
				151	}
				152
				153	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
				154	if (!bh)
				155	return -ENOMEM;
				156
				157	if (!buffer_uptodate(bh)) {
				158	/* If this is a brand new buffer, start readahead.
				159	Otherwise, we assume we are already reading it. */
				160	if (!buffer_req(bh))
				161	do_readahead(journal, offset);
				162	wait_on_buffer(bh);
				163	}
				164
				165	if (!buffer_uptodate(bh)) {
				166	printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
				167	offset);
				168	brelse(bh);
				169	return -EIO;
				170	}
				171
				172	*bhp = bh;
				173	return 0;
				174	}
				175
				176
				177	/*
				178	* Count the number of in-use tags in a journal descriptor block.
				179	*/
				180
				181	static int count_tags(struct buffer_head *bh, int size)
				182	{
				183	char * tagp;
				184	journal_block_tag_t * tag;
				185	int nr = 0;
				186
				187	tagp = &bh->b_data[sizeof(journal_header_t)];
				188
				189	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
				190	tag = (journal_block_tag_t *) tagp;
				191
				192	nr++;
				193	tagp += sizeof(journal_block_tag_t);
				194	if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
				195	tagp += 16;
				196
				197	if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
				198	break;
				199	}
				200
				201	return nr;
				202	}
				203
				204
				205	/* Make sure we wrap around the log correctly! */
				206	#define wrap(journal, var) \
				207	do { \
				208	if (var >= (journal)->j_last) \
				209	var -= ((journal)->j_last - (journal)->j_first); \
				210	} while (0)
				211
				212	/**
Randy Dunlap	6c8bec6	2005-11-07 01:01:04 -0800	[diff] [blame^]	213	* journal_recover - recovers a on-disk journal
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	214	* @journal: the journal to recover
				215	*
				216	* The primary function for recovering the log contents when mounting a
				217	* journaled device.
				218	*
				219	* Recovery is done in three passes. In the first pass, we look for the
				220	* end of the log. In the second, we assemble the list of revoke
				221	* blocks. In the third and final pass, we replay any un-revoked blocks
				222	* in the log.
				223	*/
				224	int journal_recover(journal_t *journal)
				225	{
				226	int err;
				227	journal_superblock_t * sb;
				228
				229	struct recovery_info info;
				230
				231	memset(&info, 0, sizeof(info));
				232	sb = journal->j_superblock;
				233
				234	/*
				235	* The journal superblock's s_start field (the current log head)
				236	* is always zero if, and only if, the journal was cleanly
				237	* unmounted.
				238	*/
				239
				240	if (!sb->s_start) {
				241	jbd_debug(1, "No recovery required, last transaction %d\n",
				242	be32_to_cpu(sb->s_sequence));
				243	journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
				244	return 0;
				245	}
				246
				247	err = do_one_pass(journal, &info, PASS_SCAN);
				248	if (!err)
				249	err = do_one_pass(journal, &info, PASS_REVOKE);
				250	if (!err)
				251	err = do_one_pass(journal, &info, PASS_REPLAY);
				252
				253	jbd_debug(0, "JBD: recovery, exit status %d, "
				254	"recovered transactions %u to %u\n",
				255	err, info.start_transaction, info.end_transaction);
				256	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
				257	info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
				258
				259	/* Restart the log at the next transaction ID, thus invalidating
				260	* any existing commit records in the log. */
				261	journal->j_transaction_sequence = ++info.end_transaction;
				262
				263	journal_clear_revoke(journal);
				264	sync_blockdev(journal->j_fs_dev);
				265	return err;
				266	}
				267
				268	/**
Randy Dunlap	6c8bec6	2005-11-07 01:01:04 -0800	[diff] [blame^]	269	* journal_skip_recovery - Start journal and wipe exiting records
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	270	* @journal: journal to startup
				271	*
				272	* Locate any valid recovery information from the journal and set up the
				273	* journal structures in memory to ignore it (presumably because the
				274	* caller has evidence that it is out of date).
				275	* This function does'nt appear to be exorted..
				276	*
				277	* We perform one pass over the journal to allow us to tell the user how
				278	* much recovery information is being erased, and to let us initialise
				279	* the journal transaction sequence numbers to the next unused ID.
				280	*/
				281	int journal_skip_recovery(journal_t *journal)
				282	{
				283	int err;
				284	journal_superblock_t * sb;
				285
				286	struct recovery_info info;
				287
				288	memset (&info, 0, sizeof(info));
				289	sb = journal->j_superblock;
				290
				291	err = do_one_pass(journal, &info, PASS_SCAN);
				292
				293	if (err) {
				294	printk(KERN_ERR "JBD: error %d scanning journal\n", err);
				295	++journal->j_transaction_sequence;
				296	} else {
				297	#ifdef CONFIG_JBD_DEBUG
				298	int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
				299	#endif
				300	jbd_debug(0,
				301	"JBD: ignoring %d transaction%s from the journal.\n",
				302	dropped, (dropped == 1) ? "" : "s");
				303	journal->j_transaction_sequence = ++info.end_transaction;
				304	}
				305
				306	journal->j_tail = 0;
				307	return err;
				308	}
				309
				310	static int do_one_pass(journal_t *journal,
				311	struct recovery_info *info, enum passtype pass)
				312	{
				313	unsigned int first_commit_ID, next_commit_ID;
				314	unsigned long next_log_block;
				315	int err, success = 0;
				316	journal_superblock_t * sb;
				317	journal_header_t * tmp;
				318	struct buffer_head * bh;
				319	unsigned int sequence;
				320	int blocktype;
				321
				322	/* Precompute the maximum metadata descriptors in a descriptor block */
				323	int MAX_BLOCKS_PER_DESC;
				324	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
				325	/ sizeof(journal_block_tag_t));
				326
				327	/*
				328	* First thing is to establish what we expect to find in the log
				329	* (in terms of transaction IDs), and where (in terms of log
				330	* block offsets): query the superblock.
				331	*/
				332
				333	sb = journal->j_superblock;
				334	next_commit_ID = be32_to_cpu(sb->s_sequence);
				335	next_log_block = be32_to_cpu(sb->s_start);
				336
				337	first_commit_ID = next_commit_ID;
				338	if (pass == PASS_SCAN)
				339	info->start_transaction = first_commit_ID;
				340
				341	jbd_debug(1, "Starting recovery pass %d\n", pass);
				342
				343	/*
				344	* Now we walk through the log, transaction by transaction,
				345	* making sure that each transaction has a commit block in the
				346	* expected place. Each complete transaction gets replayed back
				347	* into the main filesystem.
				348	*/
				349
				350	while (1) {
				351	int flags;
				352	char * tagp;
				353	journal_block_tag_t * tag;
				354	struct buffer_head * obh;
				355	struct buffer_head * nbh;
				356
				357	cond_resched(); /* We're under lock_kernel() */
				358
				359	/* If we already know where to stop the log traversal,
				360	* check right now that we haven't gone past the end of
				361	* the log. */
				362
				363	if (pass != PASS_SCAN)
				364	if (tid_geq(next_commit_ID, info->end_transaction))
				365	break;
				366
				367	jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
				368	next_commit_ID, next_log_block, journal->j_last);
				369
				370	/* Skip over each chunk of the transaction looking
				371	* either the next descriptor block or the final commit
				372	* record. */
				373
				374	jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
				375	err = jread(&bh, journal, next_log_block);
				376	if (err)
				377	goto failed;
				378
				379	next_log_block++;
				380	wrap(journal, next_log_block);
				381
				382	/* What kind of buffer is it?
				383	*
				384	* If it is a descriptor block, check that it has the
				385	* expected sequence number. Otherwise, we're all done
				386	* here. */
				387
				388	tmp = (journal_header_t *)bh->b_data;
				389
				390	if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
				391	brelse(bh);
				392	break;
				393	}
				394
				395	blocktype = be32_to_cpu(tmp->h_blocktype);
				396	sequence = be32_to_cpu(tmp->h_sequence);
				397	jbd_debug(3, "Found magic %d, sequence %d\n",
				398	blocktype, sequence);
				399
				400	if (sequence != next_commit_ID) {
				401	brelse(bh);
				402	break;
				403	}
				404
				405	/* OK, we have a valid descriptor block which matches
				406	* all of the sequence number checks. What are we going
				407	* to do with it? That depends on the pass... */
				408
				409	switch(blocktype) {
				410	case JFS_DESCRIPTOR_BLOCK:
				411	/* If it is a valid descriptor block, replay it
				412	* in pass REPLAY; otherwise, just skip over the
				413	* blocks it describes. */
				414	if (pass != PASS_REPLAY) {
				415	next_log_block +=
				416	count_tags(bh, journal->j_blocksize);
				417	wrap(journal, next_log_block);
				418	brelse(bh);
				419	continue;
				420	}
				421
				422	/* A descriptor block: we can now write all of
				423	* the data blocks. Yay, useful work is finally
				424	* getting done here! */
				425
				426	tagp = &bh->b_data[sizeof(journal_header_t)];
				427	while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
				428	<= journal->j_blocksize) {
				429	unsigned long io_block;
				430
				431	tag = (journal_block_tag_t *) tagp;
				432	flags = be32_to_cpu(tag->t_flags);
				433
				434	io_block = next_log_block++;
				435	wrap(journal, next_log_block);
				436	err = jread(&obh, journal, io_block);
				437	if (err) {
				438	/* Recover what we can, but
				439	* report failure at the end. */
				440	success = err;
				441	printk (KERN_ERR
				442	"JBD: IO error %d recovering "
				443	"block %ld in log\n",
				444	err, io_block);
				445	} else {
				446	unsigned long blocknr;
				447
				448	J_ASSERT(obh != NULL);
				449	blocknr = be32_to_cpu(tag->t_blocknr);
				450
				451	/* If the block has been
				452	* revoked, then we're all done
				453	* here. */
				454	if (journal_test_revoke
				455	(journal, blocknr,
				456	next_commit_ID)) {
				457	brelse(obh);
				458	++info->nr_revoke_hits;
				459	goto skip_write;
				460	}
				461
				462	/* Find a buffer for the new
				463	* data being restored */
				464	nbh = __getblk(journal->j_fs_dev,
				465	blocknr,
				466	journal->j_blocksize);
				467	if (nbh == NULL) {
				468	printk(KERN_ERR
				469	"JBD: Out of memory "
				470	"during recovery.\n");
				471	err = -ENOMEM;
				472	brelse(bh);
				473	brelse(obh);
				474	goto failed;
				475	}
				476
				477	lock_buffer(nbh);
				478	memcpy(nbh->b_data, obh->b_data,
				479	journal->j_blocksize);
				480	if (flags & JFS_FLAG_ESCAPE) {
				481	((__be32 )bh->b_data) =
				482	cpu_to_be32(JFS_MAGIC_NUMBER);
				483	}
				484
				485	BUFFER_TRACE(nbh, "marking dirty");
				486	set_buffer_uptodate(nbh);
				487	mark_buffer_dirty(nbh);
				488	BUFFER_TRACE(nbh, "marking uptodate");
				489	++info->nr_replays;
				490	/* ll_rw_block(WRITE, 1, &nbh); */
				491	unlock_buffer(nbh);
				492	brelse(obh);
				493	brelse(nbh);
				494	}
				495
				496	skip_write:
				497	tagp += sizeof(journal_block_tag_t);
				498	if (!(flags & JFS_FLAG_SAME_UUID))
				499	tagp += 16;
				500
				501	if (flags & JFS_FLAG_LAST_TAG)
				502	break;
				503	}
				504
				505	brelse(bh);
				506	continue;
				507
				508	case JFS_COMMIT_BLOCK:
				509	/* Found an expected commit block: not much to
				510	* do other than move on to the next sequence
				511	* number. */
				512	brelse(bh);
				513	next_commit_ID++;
				514	continue;
				515
				516	case JFS_REVOKE_BLOCK:
				517	/* If we aren't in the REVOKE pass, then we can
				518	* just skip over this block. */
				519	if (pass != PASS_REVOKE) {
				520	brelse(bh);
				521	continue;
				522	}
				523
				524	err = scan_revoke_records(journal, bh,
				525	next_commit_ID, info);
				526	brelse(bh);
				527	if (err)
				528	goto failed;
				529	continue;
				530
				531	default:
				532	jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
				533	blocktype);
				534	goto done;
				535	}
				536	}
				537
				538	done:
				539	/*
				540	* We broke out of the log scan loop: either we came to the
				541	* known end of the log or we found an unexpected block in the
				542	* log. If the latter happened, then we know that the "current"
				543	* transaction marks the end of the valid log.
				544	*/
				545
				546	if (pass == PASS_SCAN)
				547	info->end_transaction = next_commit_ID;
				548	else {
				549	/* It's really bad news if different passes end up at
				550	* different places (but possible due to IO errors). */
				551	if (info->end_transaction != next_commit_ID) {
				552	printk (KERN_ERR "JBD: recovery pass %d ended at "
				553	"transaction %u, expected %u\n",
				554	pass, next_commit_ID, info->end_transaction);
				555	if (!success)
				556	success = -EIO;
				557	}
				558	}
				559
				560	return success;
				561
				562	failed:
				563	return err;
				564	}
				565
				566
				567	/* Scan a revoke record, marking all blocks mentioned as revoked. */
				568
				569	static int scan_revoke_records(journal_t journal, struct buffer_head bh,
				570	tid_t sequence, struct recovery_info *info)
				571	{
				572	journal_revoke_header_t *header;
				573	int offset, max;
				574
				575	header = (journal_revoke_header_t *) bh->b_data;
				576	offset = sizeof(journal_revoke_header_t);
				577	max = be32_to_cpu(header->r_count);
				578
				579	while (offset < max) {
				580	unsigned long blocknr;
				581	int err;
				582
				583	blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
				584	offset += 4;
				585	err = journal_set_revoke(journal, blocknr, sequence);
				586	if (err)
				587	return err;
				588	++info->nr_revokes;
				589	}
				590	return 0;
				591	}