Blame - fs/ocfs2/journal.c - fp2-dev/kernel/msm

blob: f31c7e8c19c32bf091117a8fbd114b69c200f7ae [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* journal.c
				5	*
				6	* Defines functions of journalling api
				7	*
				8	* Copyright (C) 2003, 2004 Oracle. All rights reserved.
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public
				12	* License as published by the Free Software Foundation; either
				13	* version 2 of the License, or (at your option) any later version.
				14	*
				15	* This program is distributed in the hope that it will be useful,
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*
				20	* You should have received a copy of the GNU General Public
				21	* License along with this program; if not, write to the
				22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				23	* Boston, MA 021110-1307, USA.
				24	*/
				25
				26	#include <linux/fs.h>
				27	#include <linux/types.h>
				28	#include <linux/slab.h>
				29	#include <linux/highmem.h>
				30	#include <linux/kthread.h>
				31
				32	#define MLOG_MASK_PREFIX ML_JOURNAL
				33	#include <cluster/masklog.h>
				34
				35	#include "ocfs2.h"
				36
				37	#include "alloc.h"
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	38	#include "dir.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	39	#include "dlmglue.h"
				40	#include "extent_map.h"
				41	#include "heartbeat.h"
				42	#include "inode.h"
				43	#include "journal.h"
				44	#include "localalloc.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	45	#include "slot_map.h"
				46	#include "super.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	47	#include "sysfile.h"
				48
				49	#include "buffer_head_io.h"
				50
Ingo Molnar	34af946	2006-06-27 02:53:55 -0700	[diff] [blame]	51	DEFINE_SPINLOCK(trans_inc_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	52
				53	static int ocfs2_force_read_journal(struct inode *inode);
				54	static int ocfs2_recover_node(struct ocfs2_super *osb,
				55	int node_num);
				56	static int __ocfs2_recovery_thread(void *arg);
				57	static int ocfs2_commit_cache(struct ocfs2_super *osb);
				58	static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	59	static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
				60	int dirty);
				61	static int ocfs2_trylock_journal(struct ocfs2_super *osb,
				62	int slot_num);
				63	static int ocfs2_recover_orphans(struct ocfs2_super *osb,
				64	int slot);
				65	static int ocfs2_commit_thread(void *arg);
				66
				67	static int ocfs2_commit_cache(struct ocfs2_super *osb)
				68	{
				69	int status = 0;
				70	unsigned int flushed;
				71	unsigned long old_id;
				72	struct ocfs2_journal *journal = NULL;
				73
				74	mlog_entry_void();
				75
				76	journal = osb->journal;
				77
				78	/* Flush all pending commits and checkpoint the journal. */
				79	down_write(&journal->j_trans_barrier);
				80
				81	if (atomic_read(&journal->j_num_trans) == 0) {
				82	up_write(&journal->j_trans_barrier);
				83	mlog(0, "No transactions for me to flush!\n");
				84	goto finally;
				85	}
				86
				87	journal_lock_updates(journal->j_journal);
				88	status = journal_flush(journal->j_journal);
				89	journal_unlock_updates(journal->j_journal);
				90	if (status < 0) {
				91	up_write(&journal->j_trans_barrier);
				92	mlog_errno(status);
				93	goto finally;
				94	}
				95
				96	old_id = ocfs2_inc_trans_id(journal);
				97
				98	flushed = atomic_read(&journal->j_num_trans);
				99	atomic_set(&journal->j_num_trans, 0);
				100	up_write(&journal->j_trans_barrier);
				101
				102	mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n",
				103	journal->j_trans_id, flushed);
				104
Mark Fasheh	34d024f	2007-09-24 15:56:19 -0700	[diff] [blame]	105	ocfs2_wake_downconvert_thread(osb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	106	wake_up(&journal->j_checkpointed);
				107	finally:
				108	mlog_exit(status);
				109	return status;
				110	}
				111
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	112	/* pass it NULL and it will allocate a new handle object for you. If
				113	* you pass it a handle however, it may still return error, in which
				114	* case it has free'd the passed handle for you. */
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	115	handle_t ocfs2_start_trans(struct ocfs2_super osb, int max_buffs)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	116	{
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	117	journal_t *journal = osb->journal->j_journal;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	118	handle_t *handle;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	119
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	120	BUG_ON(!osb \|\| !osb->journal->j_journal);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	121
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	122	if (ocfs2_is_hard_readonly(osb))
				123	return ERR_PTR(-EROFS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	124
				125	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
				126	BUG_ON(max_buffs <= 0);
				127
				128	/* JBD might support this, but our journalling code doesn't yet. */
				129	if (journal_current_handle()) {
				130	mlog(ML_ERROR, "Recursive transaction attempted!\n");
				131	BUG();
				132	}
				133
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	134	down_read(&osb->journal->j_trans_barrier);
				135
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	136	handle = journal_start(journal, max_buffs);
				137	if (IS_ERR(handle)) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	138	up_read(&osb->journal->j_trans_barrier);
				139
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	140	mlog_errno(PTR_ERR(handle));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	141
				142	if (is_journal_aborted(journal)) {
				143	ocfs2_abort(osb->sb, "Detected aborted journal");
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	144	handle = ERR_PTR(-EROFS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	145	}
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	146	} else {
				147	if (!ocfs2_mount_local(osb))
				148	atomic_inc(&(osb->journal->j_num_trans));
				149	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	150
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	151	return handle;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	152	}
				153
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	154	int ocfs2_commit_trans(struct ocfs2_super *osb,
				155	handle_t *handle)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	156	{
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	157	int ret;
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	158	struct ocfs2_journal *journal = osb->journal;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	159
				160	BUG_ON(!handle);
				161
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	162	ret = journal_stop(handle);
				163	if (ret < 0)
				164	mlog_errno(ret);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	165
				166	up_read(&journal->j_trans_barrier);
				167
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	168	return ret;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	169	}
				170
				171	/*
				172	* 'nblocks' is what you want to add to the current
				173	* transaction. extend_trans will either extend the current handle by
				174	* nblocks, or commit it and start a new one with nblocks credits.
				175	*
Mark Fasheh	e8aed34	2007-12-03 16:43:01 -0800	[diff] [blame]	176	* This might call journal_restart() which will commit dirty buffers
				177	* and then restart the transaction. Before calling
				178	* ocfs2_extend_trans(), any changed blocks should have been
				179	* dirtied. After calling it, all blocks which need to be changed must
				180	* go through another set of journal_access/journal_dirty calls.
				181	*
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	182	* WARNING: This will not release any semaphores or disk locks taken
				183	* during the transaction, so make sure they were taken before
				184	* start_trans or we'll have ordering deadlocks.
				185	*
				186	* WARNING2: Note that we do not drop j_trans_barrier here. This is
				187	* good because transaction ids haven't yet been recorded on the
				188	* cluster locks associated with this handle.
				189	*/
Mark Fasheh	1fc5814	2006-10-05 14:15:36 -0700	[diff] [blame]	190	int ocfs2_extend_trans(handle_t *handle, int nblocks)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	191	{
				192	int status;
				193
				194	BUG_ON(!handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	195	BUG_ON(!nblocks);
				196
				197	mlog_entry_void();
				198
				199	mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
				200
Mark Fasheh	0879c58	2007-12-03 16:42:19 -0800	[diff] [blame]	201	#ifdef OCFS2_DEBUG_FS
				202	status = 1;
				203	#else
Mark Fasheh	1fc5814	2006-10-05 14:15:36 -0700	[diff] [blame]	204	status = journal_extend(handle, nblocks);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	205	if (status < 0) {
				206	mlog_errno(status);
				207	goto bail;
				208	}
Mark Fasheh	0879c58	2007-12-03 16:42:19 -0800	[diff] [blame]	209	#endif
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	210
				211	if (status > 0) {
				212	mlog(0, "journal_extend failed, trying journal_restart\n");
Mark Fasheh	1fc5814	2006-10-05 14:15:36 -0700	[diff] [blame]	213	status = journal_restart(handle, nblocks);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	214	if (status < 0) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	215	mlog_errno(status);
				216	goto bail;
				217	}
Mark Fasheh	01ddf1e	2006-10-05 13:54:39 -0700	[diff] [blame]	218	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	219
				220	status = 0;
				221	bail:
				222
				223	mlog_exit(status);
				224	return status;
				225	}
				226
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	227	int ocfs2_journal_access(handle_t *handle,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	228	struct inode *inode,
				229	struct buffer_head *bh,
				230	int type)
				231	{
				232	int status;
				233
				234	BUG_ON(!inode);
				235	BUG_ON(!handle);
				236	BUG_ON(!bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	237
Badari Pulavarty	205f87f	2006-03-26 01:38:00 -0800	[diff] [blame]	238	mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n",
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	239	(unsigned long long)bh->b_blocknr, type,
				240	(type == OCFS2_JOURNAL_ACCESS_CREATE) ?
				241	"OCFS2_JOURNAL_ACCESS_CREATE" :
				242	"OCFS2_JOURNAL_ACCESS_WRITE",
				243	bh->b_size);
				244
				245	/* we can safely remove this assertion after testing. */
				246	if (!buffer_uptodate(bh)) {
				247	mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
				248	mlog(ML_ERROR, "b_blocknr=%llu\n",
				249	(unsigned long long)bh->b_blocknr);
				250	BUG();
				251	}
				252
				253	/* Set the current transaction information on the inode so
				254	* that the locking code knows whether it can drop it's locks
				255	* on this inode or not. We're protected from the commit
				256	* thread updating the current transaction id until
				257	* ocfs2_commit_trans() because ocfs2_start_trans() took
				258	* j_trans_barrier for us. */
				259	ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode);
				260
Mark Fasheh	251b6ec	2006-01-10 15:41:43 -0800	[diff] [blame]	261	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	262	switch (type) {
				263	case OCFS2_JOURNAL_ACCESS_CREATE:
				264	case OCFS2_JOURNAL_ACCESS_WRITE:
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	265	status = journal_get_write_access(handle, bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	266	break;
				267
				268	case OCFS2_JOURNAL_ACCESS_UNDO:
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	269	status = journal_get_undo_access(handle, bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	270	break;
				271
				272	default:
				273	status = -EINVAL;
				274	mlog(ML_ERROR, "Uknown access type!\n");
				275	}
Mark Fasheh	251b6ec	2006-01-10 15:41:43 -0800	[diff] [blame]	276	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	277
				278	if (status < 0)
				279	mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
				280	status, type);
				281
				282	mlog_exit(status);
				283	return status;
				284	}
				285
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	286	int ocfs2_journal_dirty(handle_t *handle,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	287	struct buffer_head *bh)
				288	{
				289	int status;
				290
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	291	mlog_entry("(bh->b_blocknr=%llu)\n",
				292	(unsigned long long)bh->b_blocknr);
				293
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	294	status = journal_dirty_metadata(handle, bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	295	if (status < 0)
				296	mlog(ML_ERROR, "Could not dirty metadata buffer. "
				297	"(bh->b_blocknr=%llu)\n",
				298	(unsigned long long)bh->b_blocknr);
				299
				300	mlog_exit(status);
				301	return status;
				302	}
				303
				304	int ocfs2_journal_dirty_data(handle_t *handle,
				305	struct buffer_head *bh)
				306	{
				307	int err = journal_dirty_data(handle, bh);
				308	if (err)
				309	mlog_errno(err);
				310	/* TODO: When we can handle it, abort the handle and go RO on
				311	* error here. */
				312
				313	return err;
				314	}
				315
Mark Fasheh	d147b3d	2007-11-07 14:40:36 -0800	[diff] [blame]	316	#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	317
				318	void ocfs2_set_journal_params(struct ocfs2_super *osb)
				319	{
				320	journal_t *journal = osb->journal->j_journal;
Mark Fasheh	d147b3d	2007-11-07 14:40:36 -0800	[diff] [blame]	321	unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
				322
				323	if (osb->osb_commit_interval)
				324	commit_interval = osb->osb_commit_interval;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	325
				326	spin_lock(&journal->j_state_lock);
Mark Fasheh	d147b3d	2007-11-07 14:40:36 -0800	[diff] [blame]	327	journal->j_commit_interval = commit_interval;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	328	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
				329	journal->j_flags \|= JFS_BARRIER;
				330	else
				331	journal->j_flags &= ~JFS_BARRIER;
				332	spin_unlock(&journal->j_state_lock);
				333	}
				334
				335	int ocfs2_journal_init(struct ocfs2_journal journal, int dirty)
				336	{
				337	int status = -1;
				338	struct inode inode = NULL; / the journal inode */
				339	journal_t *j_journal = NULL;
				340	struct ocfs2_dinode *di = NULL;
				341	struct buffer_head *bh = NULL;
				342	struct ocfs2_super *osb;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	343	int inode_lock = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	344
				345	mlog_entry_void();
				346
				347	BUG_ON(!journal);
				348
				349	osb = journal->j_osb;
				350
				351	/* already have the inode for our journal */
				352	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
				353	osb->slot_num);
				354	if (inode == NULL) {
				355	status = -EACCES;
				356	mlog_errno(status);
				357	goto done;
				358	}
				359	if (is_bad_inode(inode)) {
				360	mlog(ML_ERROR, "access error (bad inode)\n");
				361	iput(inode);
				362	inode = NULL;
				363	status = -EACCES;
				364	goto done;
				365	}
				366
				367	SET_INODE_JOURNAL(inode);
				368	OCFS2_I(inode)->ip_open_count++;
				369
Mark Fasheh	6eff579	2006-01-18 10:31:47 -0800	[diff] [blame]	370	/* Skip recovery waits here - journal inode metadata never
				371	* changes in a live cluster so it can be considered an
				372	* exception to the rule. */
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	373	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	374	if (status < 0) {
				375	if (status != -ERESTARTSYS)
				376	mlog(ML_ERROR, "Could not get lock on journal!\n");
				377	goto done;
				378	}
				379
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	380	inode_lock = 1;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	381	di = (struct ocfs2_dinode *)bh->b_data;
				382
				383	if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) {
				384	mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
				385	inode->i_size);
				386	status = -EINVAL;
				387	goto done;
				388	}
				389
				390	mlog(0, "inode->i_size = %lld\n", inode->i_size);
Andrew Morton	5515eff	2006-03-26 01:37:53 -0800	[diff] [blame]	391	mlog(0, "inode->i_blocks = %llu\n",
				392	(unsigned long long)inode->i_blocks);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	393	mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
				394
				395	/* call the kernels journal init function now */
				396	j_journal = journal_init_inode(inode);
				397	if (j_journal == NULL) {
				398	mlog(ML_ERROR, "Linux journal layer error\n");
				399	status = -EINVAL;
				400	goto done;
				401	}
				402
				403	mlog(0, "Returned from journal_init_inode\n");
				404	mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
				405
				406	*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
				407	OCFS2_JOURNAL_DIRTY_FL);
				408
				409	journal->j_journal = j_journal;
				410	journal->j_inode = inode;
				411	journal->j_bh = bh;
				412
				413	ocfs2_set_journal_params(osb);
				414
				415	journal->j_state = OCFS2_JOURNAL_LOADED;
				416
				417	status = 0;
				418	done:
				419	if (status < 0) {
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	420	if (inode_lock)
				421	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	422	if (bh != NULL)
				423	brelse(bh);
				424	if (inode) {
				425	OCFS2_I(inode)->ip_open_count--;
				426	iput(inode);
				427	}
				428	}
				429
				430	mlog_exit(status);
				431	return status;
				432	}
				433
				434	static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
				435	int dirty)
				436	{
				437	int status;
				438	unsigned int flags;
				439	struct ocfs2_journal *journal = osb->journal;
				440	struct buffer_head *bh = journal->j_bh;
				441	struct ocfs2_dinode *fe;
				442
				443	mlog_entry_void();
				444
				445	fe = (struct ocfs2_dinode *)bh->b_data;
				446	if (!OCFS2_IS_VALID_DINODE(fe)) {
				447	/* This is called from startup/shutdown which will
				448	* handle the errors in a specific manner, so no need
				449	* to call ocfs2_error() here. */
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	450	mlog(ML_ERROR, "Journal dinode %llu has invalid "
Mark Fasheh	1ca1a11	2007-04-27 16:01:25 -0700	[diff] [blame]	451	"signature: %.*s",
				452	(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	453	fe->i_signature);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	454	status = -EIO;
				455	goto out;
				456	}
				457
				458	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
				459	if (dirty)
				460	flags \|= OCFS2_JOURNAL_DIRTY_FL;
				461	else
				462	flags &= ~OCFS2_JOURNAL_DIRTY_FL;
				463	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
				464
				465	status = ocfs2_write_block(osb, bh, journal->j_inode);
				466	if (status < 0)
				467	mlog_errno(status);
				468
				469	out:
				470	mlog_exit(status);
				471	return status;
				472	}
				473
				474	/*
				475	* If the journal has been kmalloc'd it needs to be freed after this
				476	* call.
				477	*/
				478	void ocfs2_journal_shutdown(struct ocfs2_super *osb)
				479	{
				480	struct ocfs2_journal *journal = NULL;
				481	int status = 0;
				482	struct inode *inode = NULL;
				483	int num_running_trans = 0;
				484
				485	mlog_entry_void();
				486
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	487	BUG_ON(!osb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	488
				489	journal = osb->journal;
				490	if (!journal)
				491	goto done;
				492
				493	inode = journal->j_inode;
				494
				495	if (journal->j_state != OCFS2_JOURNAL_LOADED)
				496	goto done;
				497
				498	/* need to inc inode use count as journal_destroy will iput. */
				499	if (!igrab(inode))
				500	BUG();
				501
				502	num_running_trans = atomic_read(&(osb->journal->j_num_trans));
				503	if (num_running_trans > 0)
				504	mlog(0, "Shutting down journal: must wait on %d "
				505	"running transactions!\n",
				506	num_running_trans);
				507
				508	/* Do a commit_cache here. It will flush our journal, and
				509	* release any locks that are still held.
				510	* set the SHUTDOWN flag and release the trans lock.
				511	* the commit thread will take the trans lock for us below. */
				512	journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
				513
				514	/* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
				515	* drop the trans_lock (which we want to hold until we
				516	* completely destroy the journal. */
				517	if (osb->commit_task) {
				518	/* Wait for the commit thread */
				519	mlog(0, "Waiting for ocfs2commit to exit....\n");
				520	kthread_stop(osb->commit_task);
				521	osb->commit_task = NULL;
				522	}
				523
				524	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
				525
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	526	if (ocfs2_mount_local(osb)) {
				527	journal_lock_updates(journal->j_journal);
				528	status = journal_flush(journal->j_journal);
				529	journal_unlock_updates(journal->j_journal);
				530	if (status < 0)
				531	mlog_errno(status);
				532	}
				533
				534	if (status == 0) {
				535	/*
				536	* Do not toggle if flush was unsuccessful otherwise
				537	* will leave dirty metadata in a "clean" journal
				538	*/
				539	status = ocfs2_journal_toggle_dirty(osb, 0);
				540	if (status < 0)
				541	mlog_errno(status);
				542	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	543
				544	/* Shutdown the kernel journal system */
				545	journal_destroy(journal->j_journal);
				546
				547	OCFS2_I(inode)->ip_open_count--;
				548
				549	/* unlock our journal */
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	550	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	551
				552	brelse(journal->j_bh);
				553	journal->j_bh = NULL;
				554
				555	journal->j_state = OCFS2_JOURNAL_FREE;
				556
				557	// up_write(&journal->j_trans_barrier);
				558	done:
				559	if (inode)
				560	iput(inode);
				561	mlog_exit_void();
				562	}
				563
				564	static void ocfs2_clear_journal_error(struct super_block *sb,
				565	journal_t *journal,
				566	int slot)
				567	{
				568	int olderr;
				569
				570	olderr = journal_errno(journal);
				571	if (olderr) {
				572	mlog(ML_ERROR, "File system error %d recorded in "
				573	"journal %u.\n", olderr, slot);
				574	mlog(ML_ERROR, "File system on device %s needs checking.\n",
				575	sb->s_id);
				576
				577	journal_ack_err(journal);
				578	journal_clear_err(journal);
				579	}
				580	}
				581
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	582	int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	583	{
				584	int status = 0;
				585	struct ocfs2_super *osb;
				586
				587	mlog_entry_void();
				588
				589	if (!journal)
				590	BUG();
				591
				592	osb = journal->j_osb;
				593
				594	status = journal_load(journal->j_journal);
				595	if (status < 0) {
				596	mlog(ML_ERROR, "Failed to load journal!\n");
				597	goto done;
				598	}
				599
				600	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
				601
				602	status = ocfs2_journal_toggle_dirty(osb, 1);
				603	if (status < 0) {
				604	mlog_errno(status);
				605	goto done;
				606	}
				607
				608	/* Launch the commit thread */
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	609	if (!local) {
				610	osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
				611	"ocfs2cmt");
				612	if (IS_ERR(osb->commit_task)) {
				613	status = PTR_ERR(osb->commit_task);
				614	osb->commit_task = NULL;
				615	mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
				616	"error=%d", status);
				617	goto done;
				618	}
				619	} else
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	620	osb->commit_task = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	621
				622	done:
				623	mlog_exit(status);
				624	return status;
				625	}
				626
				627
				628	/* 'full' flag tells us whether we clear out all blocks or if we just
				629	* mark the journal clean */
				630	int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
				631	{
				632	int status;
				633
				634	mlog_entry_void();
				635
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	636	BUG_ON(!journal);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	637
				638	status = journal_wipe(journal->j_journal, full);
				639	if (status < 0) {
				640	mlog_errno(status);
				641	goto bail;
				642	}
				643
				644	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);
				645	if (status < 0)
				646	mlog_errno(status);
				647
				648	bail:
				649	mlog_exit(status);
				650	return status;
				651	}
				652
				653	/*
				654	* JBD Might read a cached version of another nodes journal file. We
				655	* don't want this as this file changes often and we get no
				656	* notification on those changes. The only way to be sure that we've
				657	* got the most up to date version of those blocks then is to force
				658	* read them off disk. Just searching through the buffer cache won't
				659	* work as there may be pages backing this file which are still marked
				660	* up to date. We know things can't change on this file underneath us
				661	* as we have the lock by now :)
				662	*/
				663	static int ocfs2_force_read_journal(struct inode *inode)
				664	{
				665	int status = 0;
Mark Fasheh	4f902c3	2007-03-09 16:26:50 -0800	[diff] [blame]	666	int i;
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	667	u64 v_blkno, p_blkno, p_blocks, num_blocks;
Mark Fasheh	4f902c3	2007-03-09 16:26:50 -0800	[diff] [blame]	668	#define CONCURRENT_JOURNAL_FILL 32ULL
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	669	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
				670
				671	mlog_entry_void();
				672
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	673	memset(bhs, 0, sizeof(struct buffer_head ) CONCURRENT_JOURNAL_FILL);
				674
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	675	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	676	v_blkno = 0;
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	677	while (v_blkno < num_blocks) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	678	status = ocfs2_extent_map_get_blocks(inode, v_blkno,
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	679	&p_blkno, &p_blocks, NULL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	680	if (status < 0) {
				681	mlog_errno(status);
				682	goto bail;
				683	}
				684
				685	if (p_blocks > CONCURRENT_JOURNAL_FILL)
				686	p_blocks = CONCURRENT_JOURNAL_FILL;
				687
Mark Fasheh	dd4a2c2	2006-04-12 14:24:05 -0700	[diff] [blame]	688	/* We are reading journal data which should not
				689	* be put in the uptodate cache */
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	690	status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
				691	p_blkno, p_blocks, bhs, 0,
Mark Fasheh	dd4a2c2	2006-04-12 14:24:05 -0700	[diff] [blame]	692	NULL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	693	if (status < 0) {
				694	mlog_errno(status);
				695	goto bail;
				696	}
				697
				698	for(i = 0; i < p_blocks; i++) {
				699	brelse(bhs[i]);
				700	bhs[i] = NULL;
				701	}
				702
				703	v_blkno += p_blocks;
				704	}
				705
				706	bail:
				707	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
				708	if (bhs[i])
				709	brelse(bhs[i]);
				710	mlog_exit(status);
				711	return status;
				712	}
				713
				714	struct ocfs2_la_recovery_item {
				715	struct list_head lri_list;
				716	int lri_slot;
				717	struct ocfs2_dinode *lri_la_dinode;
				718	struct ocfs2_dinode *lri_tl_dinode;
				719	};
				720
				721	/* Does the second half of the recovery process. By this point, the
				722	* node is marked clean and can actually be considered recovered,
				723	* hence it's no longer in the recovery map, but there's still some
				724	* cleanup we can do which shouldn't happen within the recovery thread
				725	* as locking in that context becomes very difficult if we are to take
				726	* recovering nodes into account.
				727	*
				728	* NOTE: This function can and will sleep on recovery of other nodes
				729	* during cluster locking, just like any other ocfs2 process.
				730	*/
David Howells	c402895	2006-11-22 14:57:56 +0000	[diff] [blame]	731	void ocfs2_complete_recovery(struct work_struct *work)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	732	{
				733	int ret;
David Howells	c402895	2006-11-22 14:57:56 +0000	[diff] [blame]	734	struct ocfs2_journal *journal =
				735	container_of(work, struct ocfs2_journal, j_recovery_work);
				736	struct ocfs2_super *osb = journal->j_osb;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	737	struct ocfs2_dinode la_dinode, tl_dinode;
Christoph Hellwig	800deef	2007-05-17 16:03:13 +0200	[diff] [blame]	738	struct ocfs2_la_recovery_item item, n;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	739	LIST_HEAD(tmp_la_list);
				740
				741	mlog_entry_void();
				742
				743	mlog(0, "completing recovery from keventd\n");
				744
				745	spin_lock(&journal->j_lock);
				746	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
				747	spin_unlock(&journal->j_lock);
				748
Christoph Hellwig	800deef	2007-05-17 16:03:13 +0200	[diff] [blame]	749	list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	750	list_del_init(&item->lri_list);
				751
				752	mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
				753
				754	la_dinode = item->lri_la_dinode;
				755	if (la_dinode) {
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	756	mlog(0, "Clean up local alloc %llu\n",
Mark Fasheh	1ca1a11	2007-04-27 16:01:25 -0700	[diff] [blame]	757	(unsigned long long)le64_to_cpu(la_dinode->i_blkno));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	758
				759	ret = ocfs2_complete_local_alloc_recovery(osb,
				760	la_dinode);
				761	if (ret < 0)
				762	mlog_errno(ret);
				763
				764	kfree(la_dinode);
				765	}
				766
				767	tl_dinode = item->lri_tl_dinode;
				768	if (tl_dinode) {
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	769	mlog(0, "Clean up truncate log %llu\n",
Mark Fasheh	1ca1a11	2007-04-27 16:01:25 -0700	[diff] [blame]	770	(unsigned long long)le64_to_cpu(tl_dinode->i_blkno));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	771
				772	ret = ocfs2_complete_truncate_log_recovery(osb,
				773	tl_dinode);
				774	if (ret < 0)
				775	mlog_errno(ret);
				776
				777	kfree(tl_dinode);
				778	}
				779
				780	ret = ocfs2_recover_orphans(osb, item->lri_slot);
				781	if (ret < 0)
				782	mlog_errno(ret);
				783
				784	kfree(item);
				785	}
				786
				787	mlog(0, "Recovery completion\n");
				788	mlog_exit_void();
				789	}
				790
				791	/* NOTE: This function always eats your references to la_dinode and
				792	* tl_dinode, either manually on error, or by passing them to
				793	* ocfs2_complete_recovery */
				794	static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
				795	int slot_num,
				796	struct ocfs2_dinode *la_dinode,
				797	struct ocfs2_dinode *tl_dinode)
				798	{
				799	struct ocfs2_la_recovery_item *item;
				800
Sunil Mushran	afae00ab	2006-04-12 14:37:00 -0700	[diff] [blame]	801	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	802	if (!item) {
				803	/* Though we wish to avoid it, we are in fact safe in
				804	* skipping local alloc cleanup as fsck.ocfs2 is more
				805	* than capable of reclaiming unused space. */
				806	if (la_dinode)
				807	kfree(la_dinode);
				808
				809	if (tl_dinode)
				810	kfree(tl_dinode);
				811
				812	mlog_errno(-ENOMEM);
				813	return;
				814	}
				815
				816	INIT_LIST_HEAD(&item->lri_list);
				817	item->lri_la_dinode = la_dinode;
				818	item->lri_slot = slot_num;
				819	item->lri_tl_dinode = tl_dinode;
				820
				821	spin_lock(&journal->j_lock);
				822	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
				823	queue_work(ocfs2_wq, &journal->j_recovery_work);
				824	spin_unlock(&journal->j_lock);
				825	}
				826
				827	/* Called by the mount code to queue recovery the last part of
				828	* recovery for it's own slot. */
				829	void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
				830	{
				831	struct ocfs2_journal *journal = osb->journal;
				832
				833	if (osb->dirty) {
				834	/* No need to queue up our truncate_log as regular
				835	* cleanup will catch that. */
				836	ocfs2_queue_recovery_completion(journal,
				837	osb->slot_num,
				838	osb->local_alloc_copy,
				839	NULL);
				840	ocfs2_schedule_truncate_log_flush(osb, 0);
				841
				842	osb->local_alloc_copy = NULL;
				843	osb->dirty = 0;
				844	}
				845	}
				846
				847	static int __ocfs2_recovery_thread(void *arg)
				848	{
				849	int status, node_num;
				850	struct ocfs2_super *osb = arg;
				851
				852	mlog_entry_void();
				853
				854	status = ocfs2_wait_on_mount(osb);
				855	if (status < 0) {
				856	goto bail;
				857	}
				858
				859	restart:
				860	status = ocfs2_super_lock(osb, 1);
				861	if (status < 0) {
				862	mlog_errno(status);
				863	goto bail;
				864	}
				865
				866	while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
				867	node_num = ocfs2_node_map_first_set_bit(osb,
				868	&osb->recovery_map);
				869	if (node_num == O2NM_INVALID_NODE_NUM) {
				870	mlog(0, "Out of nodes to recover.\n");
				871	break;
				872	}
				873
				874	status = ocfs2_recover_node(osb, node_num);
				875	if (status < 0) {
				876	mlog(ML_ERROR,
				877	"Error %d recovering node %d on device (%u,%u)!\n",
				878	status, node_num,
				879	MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
				880	mlog(ML_ERROR, "Volume requires unmount.\n");
				881	continue;
				882	}
				883
				884	ocfs2_recovery_map_clear(osb, node_num);
				885	}
				886	ocfs2_super_unlock(osb, 1);
				887
				888	/* We always run recovery on our own orphan dir - the dead
Mark Fasheh	34d024f	2007-09-24 15:56:19 -0700	[diff] [blame]	889	* node(s) may have disallowd a previos inode delete. Re-processing
				890	* is therefore required. */
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	891	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
				892	NULL);
				893
				894	bail:
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	895	mutex_lock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	896	if (!status &&
				897	!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	898	mutex_unlock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	899	goto restart;
				900	}
				901
				902	osb->recovery_thread_task = NULL;
				903	mb(); /* sync with ocfs2_recovery_thread_running */
				904	wake_up(&osb->recovery_event);
				905
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	906	mutex_unlock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	907
				908	mlog_exit(status);
				909	/* no one is callint kthread_stop() for us so the kthread() api
				910	* requires that we call do_exit(). And it isn't exported, but
				911	* complete_and_exit() seems to be a minimal wrapper around it. */
				912	complete_and_exit(NULL, status);
				913	return status;
				914	}
				915
				916	void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
				917	{
				918	mlog_entry("(node_num=%d, osb->node_num = %d)\n",
				919	node_num, osb->node_num);
				920
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	921	mutex_lock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	922	if (osb->disable_recovery)
				923	goto out;
				924
				925	/* People waiting on recovery will wait on
				926	* the recovery map to empty. */
				927	if (!ocfs2_recovery_map_set(osb, node_num))
				928	mlog(0, "node %d already be in recovery.\n", node_num);
				929
				930	mlog(0, "starting recovery thread...\n");
				931
				932	if (osb->recovery_thread_task)
				933	goto out;
				934
				935	osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
Mark Fasheh	7842704	2006-05-04 12:03:26 -0700	[diff] [blame]	936	"ocfs2rec");
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	937	if (IS_ERR(osb->recovery_thread_task)) {
				938	mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
				939	osb->recovery_thread_task = NULL;
				940	}
				941
				942	out:
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	943	mutex_unlock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	944	wake_up(&osb->recovery_event);
				945
				946	mlog_exit_void();
				947	}
				948
				949	/* Does the actual journal replay and marks the journal inode as
				950	* clean. Will only replay if the journal inode is marked dirty. */
				951	static int ocfs2_replay_journal(struct ocfs2_super *osb,
				952	int node_num,
				953	int slot_num)
				954	{
				955	int status;
				956	int got_lock = 0;
				957	unsigned int flags;
				958	struct inode *inode = NULL;
				959	struct ocfs2_dinode *fe;
				960	journal_t *journal = NULL;
				961	struct buffer_head *bh = NULL;
				962
				963	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
				964	slot_num);
				965	if (inode == NULL) {
				966	status = -EACCES;
				967	mlog_errno(status);
				968	goto done;
				969	}
				970	if (is_bad_inode(inode)) {
				971	status = -EACCES;
				972	iput(inode);
				973	inode = NULL;
				974	mlog_errno(status);
				975	goto done;
				976	}
				977	SET_INODE_JOURNAL(inode);
				978
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	979	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	980	if (status < 0) {
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	981	mlog(0, "status returned from ocfs2_inode_lock=%d\n", status);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	982	if (status != -ERESTARTSYS)
				983	mlog(ML_ERROR, "Could not lock journal!\n");
				984	goto done;
				985	}
				986	got_lock = 1;
				987
				988	fe = (struct ocfs2_dinode *) bh->b_data;
				989
				990	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
				991
				992	if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
				993	mlog(0, "No recovery required for node %d\n", node_num);
				994	goto done;
				995	}
				996
				997	mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
				998	node_num, slot_num,
				999	MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
				1000
				1001	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
				1002
				1003	status = ocfs2_force_read_journal(inode);
				1004	if (status < 0) {
				1005	mlog_errno(status);
				1006	goto done;
				1007	}
				1008
				1009	mlog(0, "calling journal_init_inode\n");
				1010	journal = journal_init_inode(inode);
				1011	if (journal == NULL) {
				1012	mlog(ML_ERROR, "Linux journal layer error\n");
				1013	status = -EIO;
				1014	goto done;
				1015	}
				1016
				1017	status = journal_load(journal);
				1018	if (status < 0) {
				1019	mlog_errno(status);
				1020	if (!igrab(inode))
				1021	BUG();
				1022	journal_destroy(journal);
				1023	goto done;
				1024	}
				1025
				1026	ocfs2_clear_journal_error(osb->sb, journal, slot_num);
				1027
				1028	/* wipe the journal */
				1029	mlog(0, "flushing the journal.\n");
				1030	journal_lock_updates(journal);
				1031	status = journal_flush(journal);
				1032	journal_unlock_updates(journal);
				1033	if (status < 0)
				1034	mlog_errno(status);
				1035
				1036	/* This will mark the node clean */
				1037	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
				1038	flags &= ~OCFS2_JOURNAL_DIRTY_FL;
				1039	fe->id1.journal1.ij_flags = cpu_to_le32(flags);
				1040
				1041	status = ocfs2_write_block(osb, bh, inode);
				1042	if (status < 0)
				1043	mlog_errno(status);
				1044
				1045	if (!igrab(inode))
				1046	BUG();
				1047
				1048	journal_destroy(journal);
				1049
				1050	done:
				1051	/* drop the lock on this nodes journal */
				1052	if (got_lock)
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1053	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1054
				1055	if (inode)
				1056	iput(inode);
				1057
				1058	if (bh)
				1059	brelse(bh);
				1060
				1061	mlog_exit(status);
				1062	return status;
				1063	}
				1064
				1065	/*
				1066	* Do the most important parts of node recovery:
				1067	* - Replay it's journal
				1068	* - Stamp a clean local allocator file
				1069	* - Stamp a clean truncate log
				1070	* - Mark the node clean
				1071	*
				1072	* If this function completes without error, a node in OCFS2 can be
				1073	* said to have been safely recovered. As a result, failure during the
				1074	* second part of a nodes recovery process (local alloc recovery) is
				1075	* far less concerning.
				1076	*/
				1077	static int ocfs2_recover_node(struct ocfs2_super *osb,
				1078	int node_num)
				1079	{
				1080	int status = 0;
				1081	int slot_num;
				1082	struct ocfs2_slot_info *si = osb->slot_info;
				1083	struct ocfs2_dinode *la_copy = NULL;
				1084	struct ocfs2_dinode *tl_copy = NULL;
				1085
				1086	mlog_entry("(node_num=%d, osb->node_num = %d)\n",
				1087	node_num, osb->node_num);
				1088
				1089	mlog(0, "checking node %d\n", node_num);
				1090
				1091	/* Should not ever be called to recover ourselves -- in that
				1092	* case we should've called ocfs2_journal_load instead. */
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	1093	BUG_ON(osb->node_num == node_num);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1094
				1095	slot_num = ocfs2_node_num_to_slot(si, node_num);
				1096	if (slot_num == OCFS2_INVALID_SLOT) {
				1097	status = 0;
				1098	mlog(0, "no slot for this node, so no recovery required.\n");
				1099	goto done;
				1100	}
				1101
				1102	mlog(0, "node %d was using slot %d\n", node_num, slot_num);
				1103
				1104	status = ocfs2_replay_journal(osb, node_num, slot_num);
				1105	if (status < 0) {
				1106	mlog_errno(status);
				1107	goto done;
				1108	}
				1109
				1110	/* Stamp a clean local alloc file AFTER recovering the journal... */
				1111	status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
				1112	if (status < 0) {
				1113	mlog_errno(status);
				1114	goto done;
				1115	}
				1116
				1117	/* An error from begin_truncate_log_recovery is not
				1118	* serious enough to warrant halting the rest of
				1119	* recovery. */
				1120	status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
				1121	if (status < 0)
				1122	mlog_errno(status);
				1123
				1124	/* Likewise, this would be a strange but ultimately not so
				1125	* harmful place to get an error... */
				1126	ocfs2_clear_slot(si, slot_num);
				1127	status = ocfs2_update_disk_slots(osb, si);
				1128	if (status < 0)
				1129	mlog_errno(status);
				1130
				1131	/* This will kfree the memory pointed to by la_copy and tl_copy */
				1132	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
				1133	tl_copy);
				1134
				1135	status = 0;
				1136	done:
				1137
				1138	mlog_exit(status);
				1139	return status;
				1140	}
				1141
				1142	/* Test node liveness by trylocking his journal. If we get the lock,
				1143	* we drop it here. Return 0 if we got the lock, -EAGAIN if node is
				1144	* still alive (we couldn't get the lock) and < 0 on error. */
				1145	static int ocfs2_trylock_journal(struct ocfs2_super *osb,
				1146	int slot_num)
				1147	{
				1148	int status, flags;
				1149	struct inode *inode = NULL;
				1150
				1151	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
				1152	slot_num);
				1153	if (inode == NULL) {
				1154	mlog(ML_ERROR, "access error\n");
				1155	status = -EACCES;
				1156	goto bail;
				1157	}
				1158	if (is_bad_inode(inode)) {
				1159	mlog(ML_ERROR, "access error (bad inode)\n");
				1160	iput(inode);
				1161	inode = NULL;
				1162	status = -EACCES;
				1163	goto bail;
				1164	}
				1165	SET_INODE_JOURNAL(inode);
				1166
				1167	flags = OCFS2_META_LOCK_RECOVERY \| OCFS2_META_LOCK_NOQUEUE;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1168	status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1169	if (status < 0) {
				1170	if (status != -EAGAIN)
				1171	mlog_errno(status);
				1172	goto bail;
				1173	}
				1174
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1175	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1176	bail:
				1177	if (inode)
				1178	iput(inode);
				1179
				1180	return status;
				1181	}
				1182
				1183	/* Call this underneath ocfs2_super_lock. It also assumes that the
				1184	* slot info struct has been updated from disk. */
				1185	int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
				1186	{
				1187	int status, i, node_num;
				1188	struct ocfs2_slot_info *si = osb->slot_info;
				1189
				1190	/* This is called with the super block cluster lock, so we
				1191	* know that the slot map can't change underneath us. */
				1192
				1193	spin_lock(&si->si_lock);
				1194	for(i = 0; i < si->si_num_slots; i++) {
				1195	if (i == osb->slot_num)
				1196	continue;
				1197	if (ocfs2_is_empty_slot(si, i))
				1198	continue;
				1199
				1200	node_num = si->si_global_node_nums[i];
				1201	if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num))
				1202	continue;
				1203	spin_unlock(&si->si_lock);
				1204
				1205	/* Ok, we have a slot occupied by another node which
				1206	* is not in the recovery map. We trylock his journal
				1207	* file here to test if he's alive. */
				1208	status = ocfs2_trylock_journal(osb, i);
				1209	if (!status) {
				1210	/* Since we're called from mount, we know that
				1211	* the recovery thread can't race us on
				1212	* setting / checking the recovery bits. */
				1213	ocfs2_recovery_thread(osb, node_num);
				1214	} else if ((status < 0) && (status != -EAGAIN)) {
				1215	mlog_errno(status);
				1216	goto bail;
				1217	}
				1218
				1219	spin_lock(&si->si_lock);
				1220	}
				1221	spin_unlock(&si->si_lock);
				1222
				1223	status = 0;
				1224	bail:
				1225	mlog_exit(status);
				1226	return status;
				1227	}
				1228
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	1229	struct ocfs2_orphan_filldir_priv {
				1230	struct inode *head;
				1231	struct ocfs2_super *osb;
				1232	};
				1233
				1234	static int ocfs2_orphan_filldir(void priv, const char name, int name_len,
				1235	loff_t pos, u64 ino, unsigned type)
				1236	{
				1237	struct ocfs2_orphan_filldir_priv *p = priv;
				1238	struct inode *iter;
				1239
				1240	if (name_len == 1 && !strncmp(".", name, 1))
				1241	return 0;
				1242	if (name_len == 2 && !strncmp("..", name, 2))
				1243	return 0;
				1244
				1245	/* Skip bad inodes so that recovery can continue */
				1246	iter = ocfs2_iget(p->osb, ino,
Jan Kara	5fa0613	2008-01-11 00:11:45 +0100	[diff] [blame]	1247	OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	1248	if (IS_ERR(iter))
				1249	return 0;
				1250
				1251	mlog(0, "queue orphan %llu\n",
				1252	(unsigned long long)OCFS2_I(iter)->ip_blkno);
				1253	/* No locking is required for the next_orphan queue as there
				1254	* is only ever a single process doing orphan recovery. */
				1255	OCFS2_I(iter)->ip_next_orphan = p->head;
				1256	p->head = iter;
				1257
				1258	return 0;
				1259	}
				1260
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1261	static int ocfs2_queue_orphans(struct ocfs2_super *osb,
				1262	int slot,
				1263	struct inode **head)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1264	{
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1265	int status;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1266	struct inode *orphan_dir_inode = NULL;
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	1267	struct ocfs2_orphan_filldir_priv priv;
				1268	loff_t pos = 0;
				1269
				1270	priv.osb = osb;
				1271	priv.head = *head;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1272
				1273	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
				1274	ORPHAN_DIR_SYSTEM_INODE,
				1275	slot);
				1276	if (!orphan_dir_inode) {
				1277	status = -ENOENT;
				1278	mlog_errno(status);
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1279	return status;
				1280	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1281
Jes Sorensen	1b1dcc1	2006-01-09 15:59:24 -0800	[diff] [blame]	1282	mutex_lock(&orphan_dir_inode->i_mutex);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1283	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1284	if (status < 0) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1285	mlog_errno(status);
				1286	goto out;
				1287	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1288
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	1289	status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
				1290	ocfs2_orphan_filldir);
				1291	if (status) {
				1292	mlog_errno(status);
Mark Fasheh	a86370f	2007-12-03 14:06:23 -0800	[diff] [blame]	1293	goto out_cluster;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1294	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1295
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	1296	*head = priv.head;
				1297
Mark Fasheh	a86370f	2007-12-03 14:06:23 -0800	[diff] [blame]	1298	out_cluster:
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1299	ocfs2_inode_unlock(orphan_dir_inode, 0);
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1300	out:
				1301	mutex_unlock(&orphan_dir_inode->i_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1302	iput(orphan_dir_inode);
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1303	return status;
				1304	}
				1305
				1306	static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
				1307	int slot)
				1308	{
				1309	int ret;
				1310
				1311	spin_lock(&osb->osb_lock);
				1312	ret = !osb->osb_orphan_wipes[slot];
				1313	spin_unlock(&osb->osb_lock);
				1314	return ret;
				1315	}
				1316
				1317	static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
				1318	int slot)
				1319	{
				1320	spin_lock(&osb->osb_lock);
				1321	/* Mark ourselves such that new processes in delete_inode()
				1322	* know to quit early. */
				1323	ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
				1324	while (osb->osb_orphan_wipes[slot]) {
				1325	/* If any processes are already in the middle of an
				1326	* orphan wipe on this dir, then we need to wait for
				1327	* them. */
				1328	spin_unlock(&osb->osb_lock);
				1329	wait_event_interruptible(osb->osb_wipe_event,
				1330	ocfs2_orphan_recovery_can_continue(osb, slot));
				1331	spin_lock(&osb->osb_lock);
				1332	}
				1333	spin_unlock(&osb->osb_lock);
				1334	}
				1335
				1336	static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
				1337	int slot)
				1338	{
				1339	ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
				1340	}
				1341
				1342	/*
				1343	* Orphan recovery. Each mounted node has it's own orphan dir which we
				1344	* must run during recovery. Our strategy here is to build a list of
				1345	* the inodes in the orphan dir and iget/iput them. The VFS does
				1346	* (most) of the rest of the work.
				1347	*
				1348	* Orphan recovery can happen at any time, not just mount so we have a
				1349	* couple of extra considerations.
				1350	*
				1351	* - We grab as many inodes as we can under the orphan dir lock -
				1352	* doing iget() outside the orphan dir risks getting a reference on
				1353	* an invalid inode.
				1354	* - We must be sure not to deadlock with other processes on the
				1355	* system wanting to run delete_inode(). This can happen when they go
				1356	* to lock the orphan dir and the orphan recovery process attempts to
				1357	* iget() inside the orphan dir lock. This can be avoided by
				1358	* advertising our state to ocfs2_delete_inode().
				1359	*/
				1360	static int ocfs2_recover_orphans(struct ocfs2_super *osb,
				1361	int slot)
				1362	{
				1363	int ret = 0;
				1364	struct inode *inode = NULL;
				1365	struct inode *iter;
				1366	struct ocfs2_inode_info *oi;
				1367
				1368	mlog(0, "Recover inodes from orphan dir in slot %d\n", slot);
				1369
				1370	ocfs2_mark_recovering_orphan_dir(osb, slot);
				1371	ret = ocfs2_queue_orphans(osb, slot, &inode);
				1372	ocfs2_clear_recovering_orphan_dir(osb, slot);
				1373
				1374	/* Error here should be noted, but we want to continue with as
				1375	* many queued inodes as we've got. */
				1376	if (ret)
				1377	mlog_errno(ret);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1378
				1379	while (inode) {
				1380	oi = OCFS2_I(inode);
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1381	mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1382
				1383	iter = oi->ip_next_orphan;
				1384
				1385	spin_lock(&oi->ip_lock);
Mark Fasheh	34d024f	2007-09-24 15:56:19 -0700	[diff] [blame]	1386	/* The remote delete code may have set these on the
				1387	* assumption that the other node would wipe them
				1388	* successfully. If they are still in the node's
				1389	* orphan dir, we need to reset that state. */
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1390	oi->ip_flags &= ~(OCFS2_INODE_DELETED\|OCFS2_INODE_SKIP_DELETE);
				1391
				1392	/* Set the proper information to get us going into
				1393	* ocfs2_delete_inode. */
				1394	oi->ip_flags \|= OCFS2_INODE_MAYBE_ORPHANED;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1395	spin_unlock(&oi->ip_lock);
				1396
				1397	iput(inode);
				1398
				1399	inode = iter;
				1400	}
				1401
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1402	return ret;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1403	}
				1404
				1405	static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
				1406	{
				1407	/* This check is good because ocfs2 will wait on our recovery
				1408	* thread before changing it to something other than MOUNTED
				1409	* or DISABLED. */
				1410	wait_event(osb->osb_mount_event,
				1411	atomic_read(&osb->vol_state) == VOLUME_MOUNTED \|\|
				1412	atomic_read(&osb->vol_state) == VOLUME_DISABLED);
				1413
				1414	/* If there's an error on mount, then we may never get to the
				1415	* MOUNTED flag, but this is set right before
				1416	* dismount_volume() so we can trust it. */
				1417	if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
				1418	mlog(0, "mount error, exiting!\n");
				1419	return -EBUSY;
				1420	}
				1421
				1422	return 0;
				1423	}
				1424
				1425	static int ocfs2_commit_thread(void *arg)
				1426	{
				1427	int status;
				1428	struct ocfs2_super *osb = arg;
				1429	struct ocfs2_journal *journal = osb->journal;
				1430
				1431	/* we can trust j_num_trans here because _should_stop() is only set in
				1432	* shutdown and nobody other than ourselves should be able to start
				1433	* transactions. committing on shutdown might take a few iterations
				1434	* as final transactions put deleted inodes on the list */
				1435	while (!(kthread_should_stop() &&
				1436	atomic_read(&journal->j_num_trans) == 0)) {
				1437
Mark Fasheh	745ae8ba	2006-02-09 13:23:39 -0800	[diff] [blame]	1438	wait_event_interruptible(osb->checkpoint_event,
				1439	atomic_read(&journal->j_num_trans)
				1440	\|\| kthread_should_stop());
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1441
				1442	status = ocfs2_commit_cache(osb);
				1443	if (status < 0)
				1444	mlog_errno(status);
				1445
				1446	if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
				1447	mlog(ML_KTHREAD,
				1448	"commit_thread: %u transactions pending on "
				1449	"shutdown\n",
				1450	atomic_read(&journal->j_num_trans));
				1451	}
				1452	}
				1453
				1454	return 0;
				1455	}
				1456
				1457	/* Look for a dirty journal without taking any cluster locks. Used for
				1458	* hard readonly access to determine whether the file system journals
				1459	* require recovery. */
				1460	int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
				1461	{
				1462	int ret = 0;
				1463	unsigned int slot;
				1464	struct buffer_head *di_bh;
				1465	struct ocfs2_dinode *di;
				1466	struct inode *journal = NULL;
				1467
				1468	for(slot = 0; slot < osb->max_slots; slot++) {
				1469	journal = ocfs2_get_system_file_inode(osb,
				1470	JOURNAL_SYSTEM_INODE,
				1471	slot);
				1472	if (!journal \|\| is_bad_inode(journal)) {
				1473	ret = -EACCES;
				1474	mlog_errno(ret);
				1475	goto out;
				1476	}
				1477
				1478	di_bh = NULL;
				1479	ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
				1480	0, journal);
				1481	if (ret < 0) {
				1482	mlog_errno(ret);
				1483	goto out;
				1484	}
				1485
				1486	di = (struct ocfs2_dinode *) di_bh->b_data;
				1487
				1488	if (le32_to_cpu(di->id1.journal1.ij_flags) &
				1489	OCFS2_JOURNAL_DIRTY_FL)
				1490	ret = -EROFS;
				1491
				1492	brelse(di_bh);
				1493	if (ret)
				1494	break;
				1495	}
				1496
				1497	out:
				1498	if (journal)
				1499	iput(journal);
				1500
				1501	return ret;
				1502	}