Blame - fs/ocfs2/super.c - fp2-dev/kernel/msm

blob: 6534f92424dd213a4471a8da553439f9843daa37 [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* super.c
				5	*
				6	* load/unload driver, mount/dismount volumes
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public
				12	* License as published by the Free Software Foundation; either
				13	* version 2 of the License, or (at your option) any later version.
				14	*
				15	* This program is distributed in the hope that it will be useful,
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*
				20	* You should have received a copy of the GNU General Public
				21	* License along with this program; if not, write to the
				22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				23	* Boston, MA 021110-1307, USA.
				24	*/
				25
				26	#include <linux/module.h>
				27	#include <linux/fs.h>
				28	#include <linux/types.h>
				29	#include <linux/slab.h>
				30	#include <linux/highmem.h>
				31	#include <linux/utsname.h>
				32	#include <linux/init.h>
				33	#include <linux/random.h>
				34	#include <linux/statfs.h>
				35	#include <linux/moduleparam.h>
				36	#include <linux/blkdev.h>
				37	#include <linux/socket.h>
				38	#include <linux/inet.h>
				39	#include <linux/parser.h>
				40	#include <linux/crc32.h>
				41	#include <linux/debugfs.h>
				42
				43	#include <cluster/nodemanager.h>
				44
				45	#define MLOG_MASK_PREFIX ML_SUPER
				46	#include <cluster/masklog.h>
				47
				48	#include "ocfs2.h"
				49
				50	/* this should be the only file to include a version 1 header */
				51	#include "ocfs1_fs_compat.h"
				52
				53	#include "alloc.h"
				54	#include "dlmglue.h"
				55	#include "export.h"
				56	#include "extent_map.h"
				57	#include "heartbeat.h"
				58	#include "inode.h"
				59	#include "journal.h"
				60	#include "localalloc.h"
				61	#include "namei.h"
				62	#include "slot_map.h"
				63	#include "super.h"
				64	#include "sysfile.h"
				65	#include "uptodate.h"
				66	#include "ver.h"
				67	#include "vote.h"
				68
				69	#include "buffer_head_io.h"
				70
Christoph Lameter	e18b890	2006-12-06 20:33:20 -0800	[diff] [blame]	71	static struct kmem_cache *ocfs2_inode_cachep = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	72
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	73	/* OCFS2 needs to schedule several differnt types of work which
				74	* require cluster locking, disk I/O, recovery waits, etc. Since these
				75	* types of work tend to be heavy we avoid using the kernel events
				76	* workqueue and schedule on our own. */
				77	struct workqueue_struct *ocfs2_wq = NULL;
				78
				79	static struct dentry *ocfs2_debugfs_root = NULL;
				80
				81	MODULE_AUTHOR("Oracle");
				82	MODULE_LICENSE("GPL");
				83
				84	static int ocfs2_parse_options(struct super_block sb, char options,
				85	unsigned long *mount_opt, int is_remount);
				86	static void ocfs2_put_super(struct super_block *sb);
				87	static int ocfs2_mount_volume(struct super_block *sb);
				88	static int ocfs2_remount(struct super_block sb, int flags, char *data);
				89	static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err);
				90	static int ocfs2_initialize_mem_caches(void);
				91	static void ocfs2_free_mem_caches(void);
				92	static void ocfs2_delete_osb(struct ocfs2_super *osb);
				93
David Howells	726c334	2006-06-23 02:02:58 -0700	[diff] [blame]	94	static int ocfs2_statfs(struct dentry dentry, struct kstatfs buf);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	95
				96	static int ocfs2_sync_fs(struct super_block *sb, int wait);
				97
				98	static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
				99	static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
				100	static int ocfs2_release_system_inodes(struct ocfs2_super *osb);
				101	static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
				102	static int ocfs2_check_volume(struct ocfs2_super *osb);
				103	static int ocfs2_verify_volume(struct ocfs2_dinode *di,
				104	struct buffer_head *bh,
				105	u32 sectsize);
				106	static int ocfs2_initialize_super(struct super_block *sb,
				107	struct buffer_head *bh,
				108	int sector_size);
				109	static int ocfs2_get_sector(struct super_block *sb,
				110	struct buffer_head **bh,
				111	int block,
				112	int sect_size);
				113	static void ocfs2_write_super(struct super_block *sb);
				114	static struct inode ocfs2_alloc_inode(struct super_block sb);
				115	static void ocfs2_destroy_inode(struct inode *inode);
				116
				117	static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
				118
Josef 'Jeff' Sipek	ee9b6d6	2007-02-12 00:55:41 -0800	[diff] [blame]	119	static const struct super_operations ocfs2_sops = {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	120	.statfs = ocfs2_statfs,
				121	.alloc_inode = ocfs2_alloc_inode,
				122	.destroy_inode = ocfs2_destroy_inode,
				123	.drop_inode = ocfs2_drop_inode,
				124	.clear_inode = ocfs2_clear_inode,
				125	.delete_inode = ocfs2_delete_inode,
				126	.sync_fs = ocfs2_sync_fs,
				127	.write_super = ocfs2_write_super,
				128	.put_super = ocfs2_put_super,
				129	.remount_fs = ocfs2_remount,
				130	};
				131
				132	enum {
				133	Opt_barrier,
				134	Opt_err_panic,
				135	Opt_err_ro,
				136	Opt_intr,
				137	Opt_nointr,
				138	Opt_hb_none,
				139	Opt_hb_local,
				140	Opt_data_ordered,
				141	Opt_data_writeback,
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	142	Opt_atime_quantum,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	143	Opt_err,
				144	};
				145
				146	static match_table_t tokens = {
				147	{Opt_barrier, "barrier=%u"},
				148	{Opt_err_panic, "errors=panic"},
				149	{Opt_err_ro, "errors=remount-ro"},
				150	{Opt_intr, "intr"},
				151	{Opt_nointr, "nointr"},
				152	{Opt_hb_none, OCFS2_HB_NONE},
				153	{Opt_hb_local, OCFS2_HB_LOCAL},
				154	{Opt_data_ordered, "data=ordered"},
				155	{Opt_data_writeback, "data=writeback"},
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	156	{Opt_atime_quantum, "atime_quantum=%u"},
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	157	{Opt_err, NULL}
				158	};
				159
				160	/*
				161	* write_super and sync_fs ripped right out of ext3.
				162	*/
				163	static void ocfs2_write_super(struct super_block *sb)
				164	{
Ingo Molnar	7892f2f	2006-01-09 15:59:25 -0800	[diff] [blame]	165	if (mutex_trylock(&sb->s_lock) != 0)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	166	BUG();
				167	sb->s_dirt = 0;
				168	}
				169
				170	static int ocfs2_sync_fs(struct super_block *sb, int wait)
				171	{
				172	int status = 0;
				173	tid_t target;
				174	struct ocfs2_super *osb = OCFS2_SB(sb);
				175
				176	sb->s_dirt = 0;
				177
				178	if (ocfs2_is_hard_readonly(osb))
				179	return -EROFS;
				180
				181	if (wait) {
				182	status = ocfs2_flush_truncate_log(osb);
				183	if (status < 0)
				184	mlog_errno(status);
				185	} else {
				186	ocfs2_schedule_truncate_log_flush(osb, 0);
				187	}
				188
				189	if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) {
				190	if (wait)
				191	log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
				192	target);
				193	}
				194	return 0;
				195	}
				196
				197	static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
				198	{
				199	struct inode *new = NULL;
				200	int status = 0;
				201	int i;
				202
				203	mlog_entry_void();
				204
Mark Fasheh	24c19ef	2006-09-22 17:28:19 -0700	[diff] [blame]	205	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	206	if (IS_ERR(new)) {
				207	status = PTR_ERR(new);
				208	mlog_errno(status);
				209	goto bail;
				210	}
				211	osb->root_inode = new;
				212
Mark Fasheh	24c19ef	2006-09-22 17:28:19 -0700	[diff] [blame]	213	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	214	if (IS_ERR(new)) {
				215	status = PTR_ERR(new);
				216	mlog_errno(status);
				217	goto bail;
				218	}
				219	osb->sys_root_inode = new;
				220
				221	for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
				222	i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
				223	new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
				224	if (!new) {
				225	ocfs2_release_system_inodes(osb);
				226	status = -EINVAL;
				227	mlog_errno(status);
				228	/* FIXME: Should ERROR_RO_FS */
				229	mlog(ML_ERROR, "Unable to load system inode %d, "
				230	"possibly corrupt fs?", i);
				231	goto bail;
				232	}
				233	// the array now has one ref, so drop this one
				234	iput(new);
				235	}
				236
				237	bail:
				238	mlog_exit(status);
				239	return status;
				240	}
				241
				242	static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
				243	{
				244	struct inode *new = NULL;
				245	int status = 0;
				246	int i;
				247
				248	mlog_entry_void();
				249
				250	for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
				251	i < NUM_SYSTEM_INODES;
				252	i++) {
				253	new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
				254	if (!new) {
				255	ocfs2_release_system_inodes(osb);
				256	status = -EINVAL;
				257	mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
				258	status, i, osb->slot_num);
				259	goto bail;
				260	}
				261	/* the array now has one ref, so drop this one */
				262	iput(new);
				263	}
				264
				265	bail:
				266	mlog_exit(status);
				267	return status;
				268	}
				269
				270	static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
				271	{
				272	int status = 0, i;
				273	struct inode *inode;
				274
				275	mlog_entry_void();
				276
				277	for (i = 0; i < NUM_SYSTEM_INODES; i++) {
				278	inode = osb->system_inodes[i];
				279	if (inode) {
				280	iput(inode);
				281	osb->system_inodes[i] = NULL;
				282	}
				283	}
				284
				285	inode = osb->sys_root_inode;
				286	if (inode) {
				287	iput(inode);
				288	osb->sys_root_inode = NULL;
				289	}
				290
				291	inode = osb->root_inode;
				292	if (inode) {
				293	iput(inode);
				294	osb->root_inode = NULL;
				295	}
				296
				297	mlog_exit(status);
				298	return status;
				299	}
				300
				301	/* We're allocating fs objects, use GFP_NOFS */
				302	static struct inode ocfs2_alloc_inode(struct super_block sb)
				303	{
				304	struct ocfs2_inode_info *oi;
				305
Christoph Lameter	e6b4f8d	2006-12-06 20:33:14 -0800	[diff] [blame]	306	oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	307	if (!oi)
				308	return NULL;
				309
				310	return &oi->vfs_inode;
				311	}
				312
				313	static void ocfs2_destroy_inode(struct inode *inode)
				314	{
				315	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
				316	}
				317
				318	/* From xfs_super.c:xfs_max_file_offset
				319	* Copyright (c) 2000-2004 Silicon Graphics, Inc.
				320	*/
				321	static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
				322	{
				323	unsigned int pagefactor = 1;
				324	unsigned int bitshift = BITS_PER_LONG - 1;
				325
				326	/* Figure out maximum filesize, on Linux this can depend on
				327	* the filesystem blocksize (on 32 bit platforms).
				328	* __block_prepare_write does this in an [unsigned] long...
				329	* page->index << (PAGE_CACHE_SHIFT - bbits)
				330	* So, for page sized blocks (4K on 32 bit platforms),
				331	* this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
				332	* (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
				333	* but for smaller blocksizes it is less (bbits = log2 bsize).
				334	* Note1: get_block_t takes a long (implicit cast from above)
				335	* Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
				336	* can optionally convert the [unsigned] long from above into
				337	* an [unsigned] long long.
				338	*/
				339
				340	#if BITS_PER_LONG == 32
				341	# if defined(CONFIG_LBD)
Alexey Dobriyan	2ecd05a	2006-10-11 01:22:05 -0700	[diff] [blame]	342	BUILD_BUG_ON(sizeof(sector_t) != 8);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	343	pagefactor = PAGE_CACHE_SIZE;
				344	bitshift = BITS_PER_LONG;
				345	# else
				346	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
				347	# endif
				348	#endif
				349
				350	return (((unsigned long long)pagefactor) << bitshift) - 1;
				351	}
				352
				353	static int ocfs2_remount(struct super_block sb, int flags, char *data)
				354	{
				355	int incompat_features;
				356	int ret = 0;
				357	unsigned long parsed_options;
				358	struct ocfs2_super *osb = OCFS2_SB(sb);
				359
				360	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
				361	ret = -EINVAL;
				362	goto out;
				363	}
				364
				365	if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
				366	(parsed_options & OCFS2_MOUNT_HB_LOCAL)) {
				367	ret = -EINVAL;
				368	mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
				369	goto out;
				370	}
				371
				372	if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
				373	(parsed_options & OCFS2_MOUNT_DATA_WRITEBACK)) {
				374	ret = -EINVAL;
				375	mlog(ML_ERROR, "Cannot change data mode on remount\n");
				376	goto out;
				377	}
				378
				379	/* We're going to/from readonly mode. */
				380	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
				381	/* Lock here so the check of HARD_RO and the potential
				382	* setting of SOFT_RO is atomic. */
				383	spin_lock(&osb->osb_lock);
				384	if (osb->osb_flags & OCFS2_OSB_HARD_RO) {
				385	mlog(ML_ERROR, "Remount on readonly device is forbidden.\n");
				386	ret = -EROFS;
				387	goto unlock_osb;
				388	}
				389
				390	if (*flags & MS_RDONLY) {
				391	mlog(0, "Going to ro mode.\n");
				392	sb->s_flags \|= MS_RDONLY;
				393	osb->osb_flags \|= OCFS2_OSB_SOFT_RO;
				394	} else {
				395	mlog(0, "Making ro filesystem writeable.\n");
				396
				397	if (osb->osb_flags & OCFS2_OSB_ERROR_FS) {
				398	mlog(ML_ERROR, "Cannot remount RDWR "
				399	"filesystem due to previous errors.\n");
				400	ret = -EROFS;
				401	goto unlock_osb;
				402	}
				403	incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP);
				404	if (incompat_features) {
				405	mlog(ML_ERROR, "Cannot remount RDWR because "
				406	"of unsupported optional features "
				407	"(%x).\n", incompat_features);
				408	ret = -EINVAL;
				409	goto unlock_osb;
				410	}
				411	sb->s_flags &= ~MS_RDONLY;
				412	osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
				413	}
				414	unlock_osb:
				415	spin_unlock(&osb->osb_lock);
				416	}
				417
				418	if (!ret) {
				419	if (!ocfs2_is_hard_readonly(osb))
				420	ocfs2_set_journal_params(osb);
				421
				422	/* Only save off the new mount options in case of a successful
				423	* remount. */
				424	osb->s_mount_opt = parsed_options;
				425	}
				426	out:
				427	return ret;
				428	}
				429
				430	static int ocfs2_sb_probe(struct super_block *sb,
				431	struct buffer_head **bh,
				432	int *sector_size)
				433	{
				434	int status = 0, tmpstat;
				435	struct ocfs1_vol_disk_hdr *hdr;
				436	struct ocfs2_dinode *di;
				437	int blksize;
				438
				439	*bh = NULL;
				440
				441	/* may be > 512 */
				442	*sector_size = bdev_hardsect_size(sb->s_bdev);
				443	if (*sector_size > OCFS2_MAX_BLOCKSIZE) {
				444	mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n",
				445	*sector_size, OCFS2_MAX_BLOCKSIZE);
				446	status = -EINVAL;
				447	goto bail;
				448	}
				449
				450	/* Can this really happen? */
				451	if (*sector_size < OCFS2_MIN_BLOCKSIZE)
				452	*sector_size = OCFS2_MIN_BLOCKSIZE;
				453
				454	/* check block zero for old format */
				455	status = ocfs2_get_sector(sb, bh, 0, *sector_size);
				456	if (status < 0) {
				457	mlog_errno(status);
				458	goto bail;
				459	}
				460	hdr = (struct ocfs1_vol_disk_hdr ) (bh)->b_data;
				461	if (hdr->major_version == OCFS1_MAJOR_VERSION) {
				462	mlog(ML_ERROR, "incompatible version: %u.%u\n",
				463	hdr->major_version, hdr->minor_version);
				464	status = -EINVAL;
				465	}
				466	if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE,
				467	strlen(OCFS1_VOLUME_SIGNATURE)) == 0) {
				468	mlog(ML_ERROR, "incompatible volume signature: %8s\n",
				469	hdr->signature);
				470	status = -EINVAL;
				471	}
				472	brelse(*bh);
				473	*bh = NULL;
				474	if (status < 0) {
				475	mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be "
				476	"upgraded before mounting with ocfs v2\n");
				477	goto bail;
				478	}
				479
				480	/*
				481	* Now check at magic offset for 512, 1024, 2048, 4096
				482	* blocksizes. 4096 is the maximum blocksize because it is
				483	* the minimum clustersize.
				484	*/
				485	status = -EINVAL;
				486	for (blksize = *sector_size;
				487	blksize <= OCFS2_MAX_BLOCKSIZE;
				488	blksize <<= 1) {
				489	tmpstat = ocfs2_get_sector(sb, bh,
				490	OCFS2_SUPER_BLOCK_BLKNO,
				491	blksize);
				492	if (tmpstat < 0) {
				493	status = tmpstat;
				494	mlog_errno(status);
				495	goto bail;
				496	}
				497	di = (struct ocfs2_dinode ) (bh)->b_data;
				498	status = ocfs2_verify_volume(di, *bh, blksize);
				499	if (status >= 0)
				500	goto bail;
				501	brelse(*bh);
				502	*bh = NULL;
				503	if (status != -EAGAIN)
				504	break;
				505	}
				506
				507	bail:
				508	return status;
				509	}
				510
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	511	static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
				512	{
				513	if (ocfs2_mount_local(osb)) {
				514	if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
				515	mlog(ML_ERROR, "Cannot heartbeat on a locally "
				516	"mounted device.\n");
				517	return -EINVAL;
				518	}
				519	}
				520
				521	if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
				522	if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) {
				523	mlog(ML_ERROR, "Heartbeat has to be started to mount "
				524	"a read-write clustered device.\n");
				525	return -EINVAL;
				526	}
				527	}
				528
				529	return 0;
				530	}
				531
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	532	static int ocfs2_fill_super(struct super_block sb, void data, int silent)
				533	{
				534	struct dentry *root;
				535	int status, sector_size;
				536	unsigned long parsed_opt;
				537	struct inode *inode = NULL;
				538	struct ocfs2_super *osb = NULL;
				539	struct buffer_head *bh = NULL;
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	540	char nodestr[8];
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	541
				542	mlog_entry("%p, %p, %i", sb, data, silent);
				543
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	544	if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	545	status = -EINVAL;
				546	goto read_super_error;
				547	}
				548
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	549	/* for now we only have one cluster/node, make sure we see it
				550	* in the heartbeat universe */
				551	if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) {
				552	if (!o2hb_check_local_node_heartbeating()) {
				553	status = -EINVAL;
				554	goto read_super_error;
				555	}
				556	}
				557
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	558	/* probe for superblock */
				559	status = ocfs2_sb_probe(sb, &bh, &sector_size);
				560	if (status < 0) {
				561	mlog(ML_ERROR, "superblock probe failed!\n");
				562	goto read_super_error;
				563	}
				564
				565	status = ocfs2_initialize_super(sb, bh, sector_size);
				566	osb = OCFS2_SB(sb);
				567	if (status < 0) {
				568	mlog_errno(status);
				569	goto read_super_error;
				570	}
				571	brelse(bh);
				572	bh = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	573	osb->s_mount_opt = parsed_opt;
				574
				575	sb->s_magic = OCFS2_SUPER_MAGIC;
				576
				577	/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
				578	* heartbeat=none */
				579	if (bdev_read_only(sb->s_bdev)) {
				580	if (!(sb->s_flags & MS_RDONLY)) {
				581	status = -EACCES;
				582	mlog(ML_ERROR, "Readonly device detected but readonly "
				583	"mount was not specified.\n");
				584	goto read_super_error;
				585	}
				586
				587	/* You should not be able to start a local heartbeat
				588	* on a readonly device. */
				589	if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
				590	status = -EROFS;
				591	mlog(ML_ERROR, "Local heartbeat specified on readonly "
				592	"device.\n");
				593	goto read_super_error;
				594	}
				595
				596	status = ocfs2_check_journals_nolocks(osb);
				597	if (status < 0) {
				598	if (status == -EROFS)
				599	mlog(ML_ERROR, "Recovery required on readonly "
				600	"file system, but write access is "
				601	"unavailable.\n");
				602	else
				603	mlog_errno(status);
				604	goto read_super_error;
				605	}
				606
				607	ocfs2_set_ro_flag(osb, 1);
				608
				609	printk(KERN_NOTICE "Readonly device detected. No cluster "
				610	"services will be utilized for this mount. Recovery "
				611	"will be skipped.\n");
				612	}
				613
				614	if (!ocfs2_is_hard_readonly(osb)) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	615	if (sb->s_flags & MS_RDONLY)
				616	ocfs2_set_ro_flag(osb, 0);
				617	}
				618
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	619	status = ocfs2_verify_heartbeat(osb);
				620	if (status < 0) {
				621	mlog_errno(status);
				622	goto read_super_error;
				623	}
				624
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	625	osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
				626	ocfs2_debugfs_root);
				627	if (!osb->osb_debug_root) {
				628	status = -EINVAL;
				629	mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n");
				630	goto read_super_error;
				631	}
				632
				633	status = ocfs2_mount_volume(sb);
				634	if (osb->root_inode)
				635	inode = igrab(osb->root_inode);
				636
				637	if (status < 0)
				638	goto read_super_error;
				639
				640	if (!inode) {
				641	status = -EIO;
				642	mlog_errno(status);
				643	goto read_super_error;
				644	}
				645
				646	root = d_alloc_root(inode);
				647	if (!root) {
				648	status = -ENOMEM;
				649	mlog_errno(status);
				650	goto read_super_error;
				651	}
				652
				653	sb->s_root = root;
				654
				655	ocfs2_complete_mount_recovery(osb);
				656
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	657	if (ocfs2_mount_local(osb))
				658	snprintf(nodestr, sizeof(nodestr), "local");
				659	else
				660	snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
				661
				662	printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) "
Sunil Mushran	781ee3e	2006-04-27 16:41:31 -0700	[diff] [blame]	663	"with %s data mode.\n",
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	664	osb->dev_str, nodestr, osb->slot_num,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	665	osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
				666	"ordered");
				667
				668	atomic_set(&osb->vol_state, VOLUME_MOUNTED);
				669	wake_up(&osb->osb_mount_event);
				670
				671	mlog_exit(status);
				672	return status;
				673
				674	read_super_error:
				675	if (bh != NULL)
				676	brelse(bh);
				677
				678	if (inode)
				679	iput(inode);
				680
				681	if (osb) {
				682	atomic_set(&osb->vol_state, VOLUME_DISABLED);
				683	wake_up(&osb->osb_mount_event);
				684	ocfs2_dismount_volume(sb, 1);
				685	}
				686
				687	mlog_exit(status);
				688	return status;
				689	}
				690
David Howells	454e239	2006-06-23 02:02:57 -0700	[diff] [blame]	691	static int ocfs2_get_sb(struct file_system_type *fs_type,
				692	int flags,
				693	const char *dev_name,
				694	void *data,
				695	struct vfsmount *mnt)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	696	{
David Howells	454e239	2006-06-23 02:02:57 -0700	[diff] [blame]	697	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
				698	mnt);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	699	}
				700
				701	static struct file_system_type ocfs2_fs_type = {
				702	.owner = THIS_MODULE,
				703	.name = "ocfs2",
				704	.get_sb = ocfs2_get_sb, /* is this called when we mount
				705	* the fs? */
				706	.kill_sb = kill_block_super, /* set to the generic one
				707	* right now, but do we
				708	* need to change that? */
Mark Fasheh	1ba9da2	2006-09-08 14:22:54 -0700	[diff] [blame]	709	.fs_flags = FS_REQUIRES_DEV\|FS_RENAME_DOES_D_MOVE,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	710	.next = NULL
				711	};
				712
				713	static int ocfs2_parse_options(struct super_block *sb,
				714	char *options,
				715	unsigned long *mount_opt,
				716	int is_remount)
				717	{
				718	int status;
				719	char *p;
				720
				721	mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
				722	options ? options : "(none)");
				723
				724	*mount_opt = 0;
				725
				726	if (!options) {
				727	status = 1;
				728	goto bail;
				729	}
				730
				731	while ((p = strsep(&options, ",")) != NULL) {
				732	int token, option;
				733	substring_t args[MAX_OPT_ARGS];
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	734	struct ocfs2_super * osb = OCFS2_SB(sb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	735
				736	if (!*p)
				737	continue;
				738
				739	token = match_token(p, tokens, args);
				740	switch (token) {
				741	case Opt_hb_local:
				742	*mount_opt \|= OCFS2_MOUNT_HB_LOCAL;
				743	break;
				744	case Opt_hb_none:
				745	*mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
				746	break;
				747	case Opt_barrier:
				748	if (match_int(&args[0], &option)) {
				749	status = 0;
				750	goto bail;
				751	}
				752	if (option)
				753	*mount_opt \|= OCFS2_MOUNT_BARRIER;
				754	else
				755	*mount_opt &= ~OCFS2_MOUNT_BARRIER;
				756	break;
				757	case Opt_intr:
				758	*mount_opt &= ~OCFS2_MOUNT_NOINTR;
				759	break;
				760	case Opt_nointr:
				761	*mount_opt \|= OCFS2_MOUNT_NOINTR;
				762	break;
				763	case Opt_err_panic:
				764	*mount_opt \|= OCFS2_MOUNT_ERRORS_PANIC;
				765	break;
				766	case Opt_err_ro:
				767	*mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
				768	break;
				769	case Opt_data_ordered:
				770	*mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
				771	break;
				772	case Opt_data_writeback:
				773	*mount_opt \|= OCFS2_MOUNT_DATA_WRITEBACK;
				774	break;
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	775	case Opt_atime_quantum:
				776	if (match_int(&args[0], &option)) {
				777	status = 0;
				778	goto bail;
				779	}
				780	if (option >= 0)
				781	osb->s_atime_quantum = option;
				782	else
				783	osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
				784	break;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	785	default:
				786	mlog(ML_ERROR,
				787	"Unrecognized mount option \"%s\" "
				788	"or missing value\n", p);
				789	status = 0;
				790	goto bail;
				791	}
				792	}
				793
				794	status = 1;
				795
				796	bail:
				797	mlog_exit(status);
				798	return status;
				799	}
				800
				801	static int __init ocfs2_init(void)
				802	{
				803	int status;
				804
				805	mlog_entry_void();
				806
				807	ocfs2_print_version();
				808
				809	if (init_ocfs2_extent_maps())
				810	return -ENOMEM;
				811
				812	status = init_ocfs2_uptodate_cache();
				813	if (status < 0) {
				814	mlog_errno(status);
				815	goto leave;
				816	}
				817
				818	status = ocfs2_initialize_mem_caches();
				819	if (status < 0) {
				820	mlog_errno(status);
				821	goto leave;
				822	}
				823
				824	ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
				825	if (!ocfs2_wq) {
				826	status = -ENOMEM;
				827	goto leave;
				828	}
				829
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	830	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
				831	if (!ocfs2_debugfs_root) {
				832	status = -EFAULT;
				833	mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
				834	}
				835
				836	leave:
				837	if (status < 0) {
				838	ocfs2_free_mem_caches();
				839	exit_ocfs2_uptodate_cache();
				840	exit_ocfs2_extent_maps();
				841	}
				842
				843	mlog_exit(status);
				844
				845	if (status >= 0) {
				846	return register_filesystem(&ocfs2_fs_type);
				847	} else
				848	return -1;
				849	}
				850
				851	static void __exit ocfs2_exit(void)
				852	{
				853	mlog_entry_void();
				854
				855	if (ocfs2_wq) {
				856	flush_workqueue(ocfs2_wq);
				857	destroy_workqueue(ocfs2_wq);
				858	}
				859
				860	debugfs_remove(ocfs2_debugfs_root);
				861
				862	ocfs2_free_mem_caches();
				863
				864	unregister_filesystem(&ocfs2_fs_type);
				865
				866	exit_ocfs2_extent_maps();
				867
				868	exit_ocfs2_uptodate_cache();
				869
				870	mlog_exit_void();
				871	}
				872
				873	static void ocfs2_put_super(struct super_block *sb)
				874	{
				875	mlog_entry("(0x%p)\n", sb);
				876
				877	ocfs2_sync_blockdev(sb);
				878	ocfs2_dismount_volume(sb, 0);
				879
				880	mlog_exit_void();
				881	}
				882
David Howells	726c334	2006-06-23 02:02:58 -0700	[diff] [blame]	883	static int ocfs2_statfs(struct dentry dentry, struct kstatfs buf)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	884	{
				885	struct ocfs2_super *osb;
				886	u32 numbits, freebits;
				887	int status;
				888	struct ocfs2_dinode *bm_lock;
				889	struct buffer_head *bh = NULL;
				890	struct inode *inode = NULL;
				891
David Howells	726c334	2006-06-23 02:02:58 -0700	[diff] [blame]	892	mlog_entry("(%p, %p)\n", dentry->d_sb, buf);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	893
David Howells	726c334	2006-06-23 02:02:58 -0700	[diff] [blame]	894	osb = OCFS2_SB(dentry->d_sb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	895
				896	inode = ocfs2_get_system_file_inode(osb,
				897	GLOBAL_BITMAP_SYSTEM_INODE,
				898	OCFS2_INVALID_SLOT);
				899	if (!inode) {
				900	mlog(ML_ERROR, "failed to get bitmap inode\n");
				901	status = -EIO;
				902	goto bail;
				903	}
				904
Mark Fasheh	4bcec18	2006-10-09 16:02:40 -0700	[diff] [blame]	905	status = ocfs2_meta_lock(inode, &bh, 0);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	906	if (status < 0) {
				907	mlog_errno(status);
				908	goto bail;
				909	}
				910
				911	bm_lock = (struct ocfs2_dinode *) bh->b_data;
				912
				913	numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total);
				914	freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used);
				915
				916	buf->f_type = OCFS2_SUPER_MAGIC;
David Howells	726c334	2006-06-23 02:02:58 -0700	[diff] [blame]	917	buf->f_bsize = dentry->d_sb->s_blocksize;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	918	buf->f_namelen = OCFS2_MAX_FILENAME_LEN;
				919	buf->f_blocks = ((sector_t) numbits) *
				920	(osb->s_clustersize >> osb->sb->s_blocksize_bits);
				921	buf->f_bfree = ((sector_t) freebits) *
				922	(osb->s_clustersize >> osb->sb->s_blocksize_bits);
				923	buf->f_bavail = buf->f_bfree;
				924	buf->f_files = numbits;
				925	buf->f_ffree = freebits;
				926
				927	brelse(bh);
				928
				929	ocfs2_meta_unlock(inode, 0);
				930	status = 0;
				931	bail:
				932	if (inode)
				933	iput(inode);
				934
				935	mlog_exit(status);
				936
				937	return status;
				938	}
				939
				940	static void ocfs2_inode_init_once(void *data,
Christoph Lameter	e18b890	2006-12-06 20:33:20 -0800	[diff] [blame]	941	struct kmem_cache *cachep,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	942	unsigned long flags)
				943	{
				944	struct ocfs2_inode_info *oi = data;
				945
				946	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
				947	SLAB_CTOR_CONSTRUCTOR) {
				948	oi->ip_flags = 0;
				949	oi->ip_open_count = 0;
				950	spin_lock_init(&oi->ip_lock);
				951	ocfs2_extent_map_init(&oi->vfs_inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	952	INIT_LIST_HEAD(&oi->ip_io_markers);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	953	oi->ip_created_trans = 0;
				954	oi->ip_last_trans = 0;
				955	oi->ip_dir_start_lookup = 0;
				956
				957	init_rwsem(&oi->ip_alloc_sem);
Mark Fasheh	251b6ec	2006-01-10 15:41:43 -0800	[diff] [blame]	958	mutex_init(&oi->ip_io_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	959
				960	oi->ip_blkno = 0ULL;
				961	oi->ip_clusters = 0;
				962
				963	ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
				964	ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
				965	ocfs2_lock_res_init_once(&oi->ip_data_lockres);
				966
				967	ocfs2_metadata_cache_init(&oi->vfs_inode);
				968
				969	inode_init_once(&oi->vfs_inode);
				970	}
				971	}
				972
				973	static int ocfs2_initialize_mem_caches(void)
				974	{
				975	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
Paul Jackson	fffb60f	2006-03-24 03:16:06 -0800	[diff] [blame]	976	sizeof(struct ocfs2_inode_info),
				977	0,
				978	(SLAB_HWCACHE_ALIGN\|SLAB_RECLAIM_ACCOUNT\|
				979	SLAB_MEM_SPREAD),
				980	ocfs2_inode_init_once, NULL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	981	if (!ocfs2_inode_cachep)
				982	return -ENOMEM;
				983
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	984	return 0;
				985	}
				986
				987	static void ocfs2_free_mem_caches(void)
				988	{
				989	if (ocfs2_inode_cachep)
				990	kmem_cache_destroy(ocfs2_inode_cachep);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	991
				992	ocfs2_inode_cachep = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	993	}
				994
				995	static int ocfs2_get_sector(struct super_block *sb,
				996	struct buffer_head **bh,
				997	int block,
				998	int sect_size)
				999	{
				1000	if (!sb_set_blocksize(sb, sect_size)) {
				1001	mlog(ML_ERROR, "unable to set blocksize\n");
				1002	return -EIO;
				1003	}
				1004
				1005	*bh = sb_getblk(sb, block);
				1006	if (!*bh) {
				1007	mlog_errno(-EIO);
				1008	return -EIO;
				1009	}
				1010	lock_buffer(*bh);
				1011	if (!buffer_dirty(*bh))
				1012	clear_buffer_uptodate(*bh);
				1013	unlock_buffer(*bh);
				1014	ll_rw_block(READ, 1, bh);
				1015	wait_on_buffer(*bh);
				1016	return 0;
				1017	}
				1018
				1019	/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
				1020	static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
				1021	{
				1022	int status;
				1023
				1024	/* XXX hold a ref on the node while mounte? easy enough, if
				1025	* desirable. */
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1026	if (ocfs2_mount_local(osb))
				1027	osb->node_num = 0;
				1028	else
				1029	osb->node_num = o2nm_this_node();
				1030
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1031	if (osb->node_num == O2NM_MAX_NODES) {
				1032	mlog(ML_ERROR, "could not find this host's node number\n");
				1033	status = -ENOENT;
				1034	goto bail;
				1035	}
				1036
Sunil Mushran	781ee3e	2006-04-27 16:41:31 -0700	[diff] [blame]	1037	mlog(0, "I am node %d\n", osb->node_num);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1038
				1039	status = 0;
				1040	bail:
				1041	return status;
				1042	}
				1043
				1044	static int ocfs2_mount_volume(struct super_block *sb)
				1045	{
				1046	int status = 0;
				1047	int unlock_super = 0;
				1048	struct ocfs2_super *osb = OCFS2_SB(sb);
				1049
				1050	mlog_entry_void();
				1051
				1052	if (ocfs2_is_hard_readonly(osb))
				1053	goto leave;
				1054
				1055	status = ocfs2_fill_local_node_info(osb);
				1056	if (status < 0) {
				1057	mlog_errno(status);
				1058	goto leave;
				1059	}
				1060
				1061	status = ocfs2_register_hb_callbacks(osb);
				1062	if (status < 0) {
				1063	mlog_errno(status);
				1064	goto leave;
				1065	}
				1066
				1067	status = ocfs2_dlm_init(osb);
				1068	if (status < 0) {
				1069	mlog_errno(status);
				1070	goto leave;
				1071	}
				1072
				1073	/* requires vote_thread to be running. */
				1074	status = ocfs2_register_net_handlers(osb);
				1075	if (status < 0) {
				1076	mlog_errno(status);
				1077	goto leave;
				1078	}
				1079
				1080	status = ocfs2_super_lock(osb, 1);
				1081	if (status < 0) {
				1082	mlog_errno(status);
				1083	goto leave;
				1084	}
				1085	unlock_super = 1;
				1086
				1087	/* This will load up the node map and add ourselves to it. */
				1088	status = ocfs2_find_slot(osb);
				1089	if (status < 0) {
				1090	mlog_errno(status);
				1091	goto leave;
				1092	}
				1093
				1094	ocfs2_populate_mounted_map(osb);
				1095
				1096	/* load all node-local system inodes */
				1097	status = ocfs2_init_local_system_inodes(osb);
				1098	if (status < 0) {
				1099	mlog_errno(status);
				1100	goto leave;
				1101	}
				1102
				1103	status = ocfs2_check_volume(osb);
				1104	if (status < 0) {
				1105	mlog_errno(status);
				1106	goto leave;
				1107	}
				1108
				1109	status = ocfs2_truncate_log_init(osb);
				1110	if (status < 0) {
				1111	mlog_errno(status);
				1112	goto leave;
				1113	}
				1114
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1115	if (ocfs2_mount_local(osb))
				1116	goto leave;
				1117
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1118	/* This should be sent after we recovered our journal as it
				1119	* will cause other nodes to unmark us as needing
				1120	* recovery. However, we need to send it before dropping the
				1121	* super block lock as otherwise their recovery threads might
				1122	* try to clean us up while we're live! */
				1123	status = ocfs2_request_mount_vote(osb);
				1124	if (status < 0)
				1125	mlog_errno(status);
				1126
				1127	leave:
				1128	if (unlock_super)
				1129	ocfs2_super_unlock(osb, 1);
				1130
				1131	mlog_exit(status);
				1132	return status;
				1133	}
				1134
				1135	/* we can't grab the goofy sem lock from inside wait_event, so we use
				1136	* memory barriers to make sure that we'll see the null task before
				1137	* being woken up */
				1138	static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
				1139	{
				1140	mb();
				1141	return osb->recovery_thread_task != NULL;
				1142	}
				1143
				1144	static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
				1145	{
				1146	int tmp;
				1147	struct ocfs2_super *osb = NULL;
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1148	char nodestr[8];
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1149
				1150	mlog_entry("(0x%p)\n", sb);
				1151
				1152	BUG_ON(!sb);
				1153	osb = OCFS2_SB(sb);
				1154	BUG_ON(!osb);
				1155
				1156	ocfs2_shutdown_local_alloc(osb);
				1157
				1158	ocfs2_truncate_log_shutdown(osb);
				1159
				1160	/* disable any new recovery threads and wait for any currently
				1161	* running ones to exit. Do this before setting the vol_state. */
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	1162	mutex_lock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1163	osb->disable_recovery = 1;
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	1164	mutex_unlock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1165	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
				1166
				1167	/* At this point, we know that no more recovery threads can be
				1168	* launched, so wait for any recovery completion work to
				1169	* complete. */
				1170	flush_workqueue(ocfs2_wq);
				1171
				1172	ocfs2_journal_shutdown(osb);
				1173
				1174	ocfs2_sync_blockdev(sb);
				1175
				1176	/* No dlm means we've failed during mount, so skip all the
				1177	* steps which depended on that to complete. */
				1178	if (osb->dlm) {
				1179	tmp = ocfs2_super_lock(osb, 1);
				1180	if (tmp < 0) {
				1181	mlog_errno(tmp);
				1182	return;
				1183	}
				1184
				1185	tmp = ocfs2_request_umount_vote(osb);
				1186	if (tmp < 0)
				1187	mlog_errno(tmp);
				1188
				1189	if (osb->slot_num != OCFS2_INVALID_SLOT)
				1190	ocfs2_put_slot(osb);
				1191
				1192	ocfs2_super_unlock(osb, 1);
				1193	}
				1194
				1195	ocfs2_release_system_inodes(osb);
				1196
				1197	if (osb->dlm) {
				1198	ocfs2_unregister_net_handlers(osb);
				1199
				1200	ocfs2_dlm_shutdown(osb);
				1201	}
				1202
				1203	ocfs2_clear_hb_callbacks(osb);
				1204
				1205	debugfs_remove(osb->osb_debug_root);
				1206
				1207	if (!mnt_err)
				1208	ocfs2_stop_heartbeat(osb);
				1209
				1210	atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
				1211
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1212	if (ocfs2_mount_local(osb))
				1213	snprintf(nodestr, sizeof(nodestr), "local");
				1214	else
				1215	snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
				1216
				1217	printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n",
				1218	osb->dev_str, nodestr);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1219
				1220	ocfs2_delete_osb(osb);
				1221	kfree(osb);
				1222	sb->s_dev = 0;
				1223	sb->s_fs_info = NULL;
				1224	}
				1225
				1226	static int ocfs2_setup_osb_uuid(struct ocfs2_super osb, const unsigned char uuid,
				1227	unsigned uuid_bytes)
				1228	{
				1229	int i, ret;
				1230	char *ptr;
				1231
				1232	BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN);
				1233
Robert P. J. Day	cd86128	2006-12-13 00:34:52 -0800	[diff] [blame]	1234	osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1235	if (osb->uuid_str == NULL)
				1236	return -ENOMEM;
				1237
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1238	for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) {
				1239	/* print with null */
				1240	ret = snprintf(ptr, 3, "%02X", uuid[i]);
				1241	if (ret != 2) /* drop super cleans up */
				1242	return -EINVAL;
				1243	/* then only advance past the last char */
				1244	ptr += 2;
				1245	}
				1246
				1247	return 0;
				1248	}
				1249
				1250	static int ocfs2_initialize_super(struct super_block *sb,
				1251	struct buffer_head *bh,
				1252	int sector_size)
				1253	{
				1254	int status = 0;
				1255	int i;
				1256	struct ocfs2_dinode *di = NULL;
				1257	struct inode *inode = NULL;
				1258	struct buffer_head *bitmap_bh = NULL;
				1259	struct ocfs2_journal *journal;
				1260	__le32 uuid_net_key;
				1261	struct ocfs2_super *osb;
				1262
				1263	mlog_entry_void();
				1264
Robert P. J. Day	cd86128	2006-12-13 00:34:52 -0800	[diff] [blame]	1265	osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1266	if (!osb) {
				1267	status = -ENOMEM;
				1268	mlog_errno(status);
				1269	goto bail;
				1270	}
				1271
				1272	sb->s_fs_info = osb;
				1273	sb->s_op = &ocfs2_sops;
				1274	sb->s_export_op = &ocfs2_export_ops;
				1275	sb->s_flags \|= MS_NOATIME;
				1276	/* this is needed to support O_LARGEFILE */
				1277	sb->s_maxbytes = ocfs2_max_file_offset(sb->s_blocksize_bits);
				1278
				1279	osb->sb = sb;
				1280	/* Save off for ocfs2_rw_direct */
				1281	osb->s_sectsize_bits = blksize_bits(sector_size);
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	1282	BUG_ON(!osb->s_sectsize_bits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1283
				1284	osb->net_response_ids = 0;
				1285	spin_lock_init(&osb->net_response_lock);
				1286	INIT_LIST_HEAD(&osb->net_response_list);
				1287
				1288	INIT_LIST_HEAD(&osb->osb_net_handlers);
				1289	init_waitqueue_head(&osb->recovery_event);
				1290	spin_lock_init(&osb->vote_task_lock);
				1291	init_waitqueue_head(&osb->vote_event);
				1292	osb->vote_work_sequence = 0;
				1293	osb->vote_wake_sequence = 0;
				1294	INIT_LIST_HEAD(&osb->blocked_lock_list);
				1295	osb->blocked_lock_count = 0;
				1296	INIT_LIST_HEAD(&osb->vote_list);
				1297	spin_lock_init(&osb->osb_lock);
				1298
				1299	atomic_set(&osb->alloc_stats.moves, 0);
				1300	atomic_set(&osb->alloc_stats.local_data, 0);
				1301	atomic_set(&osb->alloc_stats.bitmap_data, 0);
				1302	atomic_set(&osb->alloc_stats.bg_allocs, 0);
				1303	atomic_set(&osb->alloc_stats.bg_extends, 0);
				1304
				1305	ocfs2_init_node_maps(osb);
				1306
				1307	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
				1308	MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
				1309
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	1310	mutex_init(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1311
				1312	osb->disable_recovery = 0;
				1313	osb->recovery_thread_task = NULL;
				1314
				1315	init_waitqueue_head(&osb->checkpoint_event);
				1316	atomic_set(&osb->needs_checkpoint, 0);
				1317
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	1318	osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
				1319
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1320	osb->node_num = O2NM_INVALID_NODE_NUM;
				1321	osb->slot_num = OCFS2_INVALID_SLOT;
				1322
				1323	osb->local_alloc_state = OCFS2_LA_UNUSED;
				1324	osb->local_alloc_bh = NULL;
				1325
				1326	ocfs2_setup_hb_callbacks(osb);
				1327
				1328	init_waitqueue_head(&osb->osb_mount_event);
				1329
				1330	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
				1331	if (!osb->vol_label) {
				1332	mlog(ML_ERROR, "unable to alloc vol label\n");
				1333	status = -ENOMEM;
				1334	goto bail;
				1335	}
				1336
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1337	di = (struct ocfs2_dinode *)bh->b_data;
				1338
				1339	osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
				1340	if (osb->max_slots > OCFS2_MAX_SLOTS \|\| osb->max_slots == 0) {
				1341	mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
				1342	osb->max_slots);
				1343	status = -EINVAL;
				1344	goto bail;
				1345	}
Sunil Mushran	781ee3e	2006-04-27 16:41:31 -0700	[diff] [blame]	1346	mlog(0, "max_slots for this device: %u\n", osb->max_slots);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1347
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1348	init_waitqueue_head(&osb->osb_wipe_event);
				1349	osb->osb_orphan_wipes = kcalloc(osb->max_slots,
				1350	sizeof(*osb->osb_orphan_wipes),
				1351	GFP_KERNEL);
				1352	if (!osb->osb_orphan_wipes) {
				1353	status = -ENOMEM;
				1354	mlog_errno(status);
				1355	goto bail;
				1356	}
				1357
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1358	osb->s_feature_compat =
				1359	le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
				1360	osb->s_feature_ro_compat =
				1361	le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat);
				1362	osb->s_feature_incompat =
				1363	le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat);
				1364
				1365	if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) {
				1366	mlog(ML_ERROR, "couldn't mount because of unsupported "
				1367	"optional features (%x).\n", i);
				1368	status = -EINVAL;
				1369	goto bail;
				1370	}
				1371	if (!(osb->sb->s_flags & MS_RDONLY) &&
				1372	(i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
				1373	mlog(ML_ERROR, "couldn't mount RDWR because of "
				1374	"unsupported optional features (%x).\n", i);
				1375	status = -EINVAL;
				1376	goto bail;
				1377	}
				1378
				1379	get_random_bytes(&osb->s_next_generation, sizeof(u32));
				1380
				1381	/* FIXME
				1382	* This should be done in ocfs2_journal_init(), but unknown
				1383	* ordering issues will cause the filesystem to crash.
				1384	* If anyone wants to figure out what part of the code
				1385	* refers to osb->journal before ocfs2_journal_init() is run,
				1386	* be my guest.
				1387	*/
				1388	/* initialize our journal structure */
				1389
Robert P. J. Day	cd86128	2006-12-13 00:34:52 -0800	[diff] [blame]	1390	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1391	if (!journal) {
				1392	mlog(ML_ERROR, "unable to alloc journal\n");
				1393	status = -ENOMEM;
				1394	goto bail;
				1395	}
				1396	osb->journal = journal;
				1397	journal->j_osb = osb;
				1398
				1399	atomic_set(&journal->j_num_trans, 0);
				1400	init_rwsem(&journal->j_trans_barrier);
				1401	init_waitqueue_head(&journal->j_checkpointed);
				1402	spin_lock_init(&journal->j_lock);
				1403	journal->j_trans_id = (unsigned long) 1;
				1404	INIT_LIST_HEAD(&journal->j_la_cleanups);
David Howells	c402895	2006-11-22 14:57:56 +0000	[diff] [blame]	1405	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1406	journal->j_state = OCFS2_JOURNAL_FREE;
				1407
				1408	/* get some pseudo constants for clustersize bits */
				1409	osb->s_clustersize_bits =
				1410	le32_to_cpu(di->id2.i_super.s_clustersize_bits);
				1411	osb->s_clustersize = 1 << osb->s_clustersize_bits;
				1412	mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits);
				1413
				1414	if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE \|\|
				1415	osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) {
				1416	mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n",
				1417	osb->s_clustersize);
				1418	status = -EINVAL;
				1419	goto bail;
				1420	}
				1421
				1422	if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1)
				1423	> (u32)~0UL) {
				1424	mlog(ML_ERROR, "Volume might try to write to blocks beyond "
				1425	"what jbd can address in 32 bits.\n");
				1426	status = -EINVAL;
				1427	goto bail;
				1428	}
				1429
				1430	if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid,
				1431	sizeof(di->id2.i_super.s_uuid))) {
				1432	mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n");
				1433	status = -ENOMEM;
				1434	goto bail;
				1435	}
				1436
Mark Fasheh	7842704	2006-05-04 12:03:26 -0700	[diff] [blame]	1437	memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1438	osb->net_key = le32_to_cpu(uuid_net_key);
				1439
				1440	strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
				1441	osb->vol_label[63] = '\0';
				1442	osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
				1443	osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
				1444	osb->first_cluster_group_blkno =
				1445	le64_to_cpu(di->id2.i_super.s_first_cluster_group);
				1446	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
				1447	mlog(0, "vol_label: %s\n", osb->vol_label);
				1448	mlog(0, "uuid: %s\n", osb->uuid_str);
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1449	mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
				1450	(unsigned long long)osb->root_blkno,
				1451	(unsigned long long)osb->system_dir_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1452
				1453	osb->osb_dlm_debug = ocfs2_new_dlm_debug();
				1454	if (!osb->osb_dlm_debug) {
				1455	status = -ENOMEM;
				1456	mlog_errno(status);
				1457	goto bail;
				1458	}
				1459
				1460	atomic_set(&osb->vol_state, VOLUME_INIT);
				1461
				1462	/* load root, system_dir, and all global system inodes */
				1463	status = ocfs2_init_global_system_inodes(osb);
				1464	if (status < 0) {
				1465	mlog_errno(status);
				1466	goto bail;
				1467	}
				1468
				1469	/*
				1470	* global bitmap
				1471	*/
				1472	inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE,
				1473	OCFS2_INVALID_SLOT);
				1474	if (!inode) {
				1475	status = -EINVAL;
				1476	mlog_errno(status);
				1477	goto bail;
				1478	}
				1479
				1480	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
				1481
Mark Fasheh	101ebf2	2006-05-02 17:54:45 -0700	[diff] [blame]	1482	/* We don't have a cluster lock on the bitmap here because
				1483	* we're only interested in static information and the extra
				1484	* complexity at mount time isn't worht it. Don't pass the
				1485	* inode in to the read function though as we don't want it to
				1486	* be put in the cache. */
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1487	status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
Mark Fasheh	101ebf2	2006-05-02 17:54:45 -0700	[diff] [blame]	1488	NULL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1489	iput(inode);
				1490	if (status < 0) {
				1491	mlog_errno(status);
				1492	goto bail;
				1493	}
				1494
				1495	di = (struct ocfs2_dinode *) bitmap_bh->b_data;
				1496	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1497	brelse(bitmap_bh);
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1498	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
				1499	(unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1500
				1501	status = ocfs2_init_slot_info(osb);
				1502	if (status < 0) {
				1503	mlog_errno(status);
				1504	goto bail;
				1505	}
				1506
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1507	bail:
				1508	mlog_exit(status);
				1509	return status;
				1510	}
				1511
				1512	/*
				1513	* will return: -EAGAIN if it is ok to keep searching for superblocks
				1514	* -EINVAL if there is a bad superblock
				1515	* 0 on success
				1516	*/
				1517	static int ocfs2_verify_volume(struct ocfs2_dinode *di,
				1518	struct buffer_head *bh,
				1519	u32 blksz)
				1520	{
				1521	int status = -EAGAIN;
				1522
				1523	mlog_entry_void();
				1524
				1525	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
				1526	strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
				1527	status = -EINVAL;
				1528	if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
				1529	mlog(ML_ERROR, "found superblock with incorrect block "
				1530	"size: found %u, should be %u\n",
				1531	1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
				1532	blksz);
				1533	} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
				1534	OCFS2_MAJOR_REV_LEVEL \|\|
				1535	le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
				1536	OCFS2_MINOR_REV_LEVEL) {
				1537	mlog(ML_ERROR, "found superblock with bad version: "
				1538	"found %u.%u, should be %u.%u\n",
				1539	le16_to_cpu(di->id2.i_super.s_major_rev_level),
				1540	le16_to_cpu(di->id2.i_super.s_minor_rev_level),
				1541	OCFS2_MAJOR_REV_LEVEL,
				1542	OCFS2_MINOR_REV_LEVEL);
				1543	} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
				1544	mlog(ML_ERROR, "bad block number on superblock: "
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1545	"found %llu, should be %llu\n",
				1546	(unsigned long long)di->i_blkno,
				1547	(unsigned long long)bh->b_blocknr);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1548	} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 \|\|
				1549	le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
				1550	mlog(ML_ERROR, "bad cluster size found: %u\n",
				1551	1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
				1552	} else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
				1553	mlog(ML_ERROR, "bad root_blkno: 0\n");
				1554	} else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
				1555	mlog(ML_ERROR, "bad system_dir_blkno: 0\n");
				1556	} else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) {
				1557	mlog(ML_ERROR,
				1558	"Superblock slots found greater than file system "
				1559	"maximum: found %u, max %u\n",
				1560	le16_to_cpu(di->id2.i_super.s_max_slots),
				1561	OCFS2_MAX_SLOTS);
				1562	} else {
				1563	/* found it! */
				1564	status = 0;
				1565	}
				1566	}
				1567
				1568	mlog_exit(status);
				1569	return status;
				1570	}
				1571
				1572	static int ocfs2_check_volume(struct ocfs2_super *osb)
				1573	{
				1574	int status = 0;
				1575	int dirty;
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1576	int local;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1577	struct ocfs2_dinode local_alloc = NULL; / only used if we
				1578	* recover
				1579	* ourselves. */
				1580
				1581	mlog_entry_void();
				1582
				1583	/* Init our journal object. */
				1584	status = ocfs2_journal_init(osb->journal, &dirty);
				1585	if (status < 0) {
				1586	mlog(ML_ERROR, "Could not initialize journal!\n");
				1587	goto finally;
				1588	}
				1589
				1590	/* If the journal was unmounted cleanly then we don't want to
				1591	* recover anything. Otherwise, journal_load will do that
				1592	* dirty work for us :) */
				1593	if (!dirty) {
				1594	status = ocfs2_journal_wipe(osb->journal, 0);
				1595	if (status < 0) {
				1596	mlog_errno(status);
				1597	goto finally;
				1598	}
				1599	} else {
				1600	mlog(ML_NOTICE, "File system was not unmounted cleanly, "
				1601	"recovering volume.\n");
				1602	}
				1603
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1604	local = ocfs2_mount_local(osb);
				1605
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1606	/* will play back anything left in the journal. */
Sunil Mushran	c271c5c	2006-12-05 17:56:35 -0800	[diff] [blame]	1607	ocfs2_journal_load(osb->journal, local);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1608
				1609	if (dirty) {
				1610	/* recover my local alloc if we didn't unmount cleanly. */
				1611	status = ocfs2_begin_local_alloc_recovery(osb,
				1612	osb->slot_num,
				1613	&local_alloc);
				1614	if (status < 0) {
				1615	mlog_errno(status);
				1616	goto finally;
				1617	}
				1618	/* we complete the recovery process after we've marked
				1619	* ourselves as mounted. */
				1620	}
				1621
				1622	mlog(0, "Journal loaded.\n");
				1623
				1624	status = ocfs2_load_local_alloc(osb);
				1625	if (status < 0) {
				1626	mlog_errno(status);
				1627	goto finally;
				1628	}
				1629
				1630	if (dirty) {
				1631	/* Recovery will be completed after we've mounted the
				1632	* rest of the volume. */
				1633	osb->dirty = 1;
				1634	osb->local_alloc_copy = local_alloc;
				1635	local_alloc = NULL;
				1636	}
				1637
				1638	/* go through each journal, trylock it and if you get the
				1639	* lock, and it's marked as dirty, set the bit in the recover
				1640	* map and launch a recovery thread for it. */
				1641	status = ocfs2_mark_dead_nodes(osb);
				1642	if (status < 0)
				1643	mlog_errno(status);
				1644
				1645	finally:
				1646	if (local_alloc)
				1647	kfree(local_alloc);
				1648
				1649	mlog_exit(status);
				1650	return status;
				1651	}
				1652
				1653	/*
				1654	* The routine gets called from dismount or close whenever a dismount on
				1655	* volume is requested and the osb open count becomes 1.
				1656	* It will remove the osb from the global list and also free up all the
				1657	* initialized resources and fileobject.
				1658	*/
				1659	static void ocfs2_delete_osb(struct ocfs2_super *osb)
				1660	{
				1661	mlog_entry_void();
				1662
				1663	/* This function assumes that the caller has the main osb resource */
				1664
				1665	if (osb->slot_info)
				1666	ocfs2_free_slot_info(osb->slot_info);
				1667
Mark Fasheh	b4df6ed	2006-02-22 17:35:08 -0800	[diff] [blame]	1668	kfree(osb->osb_orphan_wipes);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1669	/* FIXME
				1670	* This belongs in journal shutdown, but because we have to
				1671	* allocate osb->journal at the start of ocfs2_initalize_osb(),
				1672	* we free it here.
				1673	*/
				1674	kfree(osb->journal);
				1675	if (osb->local_alloc_copy)
				1676	kfree(osb->local_alloc_copy);
				1677	kfree(osb->uuid_str);
				1678	ocfs2_put_dlm_debug(osb->osb_dlm_debug);
				1679	memset(osb, 0, sizeof(struct ocfs2_super));
				1680
				1681	mlog_exit_void();
				1682	}
				1683
				1684	/* Put OCFS2 into a readonly state, or (if the user specifies it),
				1685	* panic(). We do not support continue-on-error operation. */
				1686	static void ocfs2_handle_error(struct super_block *sb)
				1687	{
				1688	struct ocfs2_super *osb = OCFS2_SB(sb);
				1689
				1690	if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC)
				1691	panic("OCFS2: (device %s): panic forced after error\n",
				1692	sb->s_id);
				1693
				1694	ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS);
				1695
				1696	if (sb->s_flags & MS_RDONLY &&
				1697	(ocfs2_is_soft_readonly(osb) \|\|
				1698	ocfs2_is_hard_readonly(osb)))
				1699	return;
				1700
				1701	printk(KERN_CRIT "File system is now read-only due to the potential "
				1702	"of on-disk corruption. Please run fsck.ocfs2 once the file "
				1703	"system is unmounted.\n");
				1704	sb->s_flags \|= MS_RDONLY;
				1705	ocfs2_set_ro_flag(osb, 0);
				1706	}
				1707
				1708	static char error_buf[1024];
				1709
				1710	void __ocfs2_error(struct super_block *sb,
				1711	const char *function,
				1712	const char *fmt, ...)
				1713	{
				1714	va_list args;
				1715
				1716	va_start(args, fmt);
Alexey Dobriyan	4a6e617	2006-12-06 20:37:04 -0800	[diff] [blame]	1717	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1718	va_end(args);
				1719
				1720	/* Not using mlog here because we want to show the actual
				1721	* function the error came from. */
				1722	printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n",
				1723	sb->s_id, function, error_buf);
				1724
				1725	ocfs2_handle_error(sb);
				1726	}
				1727
				1728	/* Handle critical errors. This is intentionally more drastic than
				1729	* ocfs2_handle_error, so we only use for things like journal errors,
				1730	* etc. */
				1731	void __ocfs2_abort(struct super_block* sb,
				1732	const char *function,
				1733	const char *fmt, ...)
				1734	{
				1735	va_list args;
				1736
				1737	va_start(args, fmt);
Alexey Dobriyan	4a6e617	2006-12-06 20:37:04 -0800	[diff] [blame]	1738	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1739	va_end(args);
				1740
				1741	printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
				1742	sb->s_id, function, error_buf);
				1743
				1744	/* We don't have the cluster support yet to go straight to
				1745	* hard readonly in here. Until then, we want to keep
				1746	* ocfs2_abort() so that we can at least mark critical
				1747	* errors.
				1748	*
				1749	* TODO: This should abort the journal and alert other nodes
				1750	* that our slot needs recovery. */
				1751
				1752	/* Force a panic(). This stinks, but it's better than letting
				1753	* things continue without having a proper hard readonly
				1754	* here. */
				1755	OCFS2_SB(sb)->s_mount_opt \|= OCFS2_MOUNT_ERRORS_PANIC;
				1756	ocfs2_handle_error(sb);
				1757	}
				1758
				1759	module_init(ocfs2_init);
				1760	module_exit(ocfs2_exit);