Blame - fs/ocfs2/super.c - kernel/msm-4.19

blob: 046824b6b6256267d8a9d15275b034e7abeae025 [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* super.c
				5	*
				6	* load/unload driver, mount/dismount volumes
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public
				12	* License as published by the Free Software Foundation; either
				13	* version 2 of the License, or (at your option) any later version.
				14	*
				15	* This program is distributed in the hope that it will be useful,
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*
				20	* You should have received a copy of the GNU General Public
				21	* License along with this program; if not, write to the
				22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				23	* Boston, MA 021110-1307, USA.
				24	*/
				25
				26	#include <linux/module.h>
				27	#include <linux/fs.h>
				28	#include <linux/types.h>
				29	#include <linux/slab.h>
				30	#include <linux/highmem.h>
				31	#include <linux/utsname.h>
				32	#include <linux/init.h>
				33	#include <linux/random.h>
				34	#include <linux/statfs.h>
				35	#include <linux/moduleparam.h>
				36	#include <linux/blkdev.h>
				37	#include <linux/socket.h>
				38	#include <linux/inet.h>
				39	#include <linux/parser.h>
				40	#include <linux/crc32.h>
				41	#include <linux/debugfs.h>
				42
				43	#include <cluster/nodemanager.h>
				44
				45	#define MLOG_MASK_PREFIX ML_SUPER
				46	#include <cluster/masklog.h>
				47
				48	#include "ocfs2.h"
				49
				50	/* this should be the only file to include a version 1 header */
				51	#include "ocfs1_fs_compat.h"
				52
				53	#include "alloc.h"
				54	#include "dlmglue.h"
				55	#include "export.h"
				56	#include "extent_map.h"
				57	#include "heartbeat.h"
				58	#include "inode.h"
				59	#include "journal.h"
				60	#include "localalloc.h"
				61	#include "namei.h"
				62	#include "slot_map.h"
				63	#include "super.h"
				64	#include "sysfile.h"
				65	#include "uptodate.h"
				66	#include "ver.h"
				67	#include "vote.h"
				68
				69	#include "buffer_head_io.h"
				70
				71	/*
				72	* Globals
				73	*/
				74	static spinlock_t ocfs2_globals_lock = SPIN_LOCK_UNLOCKED;
				75
				76	static u32 osb_id; /* Keeps track of next available OSB Id */
				77
				78	static kmem_cache_t *ocfs2_inode_cachep = NULL;
				79
				80	kmem_cache_t *ocfs2_lock_cache = NULL;
				81
				82	/* OCFS2 needs to schedule several differnt types of work which
				83	* require cluster locking, disk I/O, recovery waits, etc. Since these
				84	* types of work tend to be heavy we avoid using the kernel events
				85	* workqueue and schedule on our own. */
				86	struct workqueue_struct *ocfs2_wq = NULL;
				87
				88	static struct dentry *ocfs2_debugfs_root = NULL;
				89
				90	MODULE_AUTHOR("Oracle");
				91	MODULE_LICENSE("GPL");
				92
				93	static int ocfs2_parse_options(struct super_block sb, char options,
				94	unsigned long *mount_opt, int is_remount);
				95	static void ocfs2_put_super(struct super_block *sb);
				96	static int ocfs2_mount_volume(struct super_block *sb);
				97	static int ocfs2_remount(struct super_block sb, int flags, char *data);
				98	static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err);
				99	static int ocfs2_initialize_mem_caches(void);
				100	static void ocfs2_free_mem_caches(void);
				101	static void ocfs2_delete_osb(struct ocfs2_super *osb);
				102
				103	static int ocfs2_statfs(struct super_block sb, struct kstatfs buf);
				104
				105	static int ocfs2_sync_fs(struct super_block *sb, int wait);
				106
				107	static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
				108	static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
				109	static int ocfs2_release_system_inodes(struct ocfs2_super *osb);
				110	static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
				111	static int ocfs2_check_volume(struct ocfs2_super *osb);
				112	static int ocfs2_verify_volume(struct ocfs2_dinode *di,
				113	struct buffer_head *bh,
				114	u32 sectsize);
				115	static int ocfs2_initialize_super(struct super_block *sb,
				116	struct buffer_head *bh,
				117	int sector_size);
				118	static int ocfs2_get_sector(struct super_block *sb,
				119	struct buffer_head **bh,
				120	int block,
				121	int sect_size);
				122	static void ocfs2_write_super(struct super_block *sb);
				123	static struct inode ocfs2_alloc_inode(struct super_block sb);
				124	static void ocfs2_destroy_inode(struct inode *inode);
				125
				126	static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
				127
				128	static struct super_operations ocfs2_sops = {
				129	.statfs = ocfs2_statfs,
				130	.alloc_inode = ocfs2_alloc_inode,
				131	.destroy_inode = ocfs2_destroy_inode,
				132	.drop_inode = ocfs2_drop_inode,
				133	.clear_inode = ocfs2_clear_inode,
				134	.delete_inode = ocfs2_delete_inode,
				135	.sync_fs = ocfs2_sync_fs,
				136	.write_super = ocfs2_write_super,
				137	.put_super = ocfs2_put_super,
				138	.remount_fs = ocfs2_remount,
				139	};
				140
				141	enum {
				142	Opt_barrier,
				143	Opt_err_panic,
				144	Opt_err_ro,
				145	Opt_intr,
				146	Opt_nointr,
				147	Opt_hb_none,
				148	Opt_hb_local,
				149	Opt_data_ordered,
				150	Opt_data_writeback,
				151	Opt_err,
				152	};
				153
				154	static match_table_t tokens = {
				155	{Opt_barrier, "barrier=%u"},
				156	{Opt_err_panic, "errors=panic"},
				157	{Opt_err_ro, "errors=remount-ro"},
				158	{Opt_intr, "intr"},
				159	{Opt_nointr, "nointr"},
				160	{Opt_hb_none, OCFS2_HB_NONE},
				161	{Opt_hb_local, OCFS2_HB_LOCAL},
				162	{Opt_data_ordered, "data=ordered"},
				163	{Opt_data_writeback, "data=writeback"},
				164	{Opt_err, NULL}
				165	};
				166
				167	/*
				168	* write_super and sync_fs ripped right out of ext3.
				169	*/
				170	static void ocfs2_write_super(struct super_block *sb)
				171	{
Ingo Molnar	7892f2f4	2006-01-09 15:59:25 -0800	[diff] [blame]	172	if (mutex_trylock(&sb->s_lock) != 0)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	173	BUG();
				174	sb->s_dirt = 0;
				175	}
				176
				177	static int ocfs2_sync_fs(struct super_block *sb, int wait)
				178	{
				179	int status = 0;
				180	tid_t target;
				181	struct ocfs2_super *osb = OCFS2_SB(sb);
				182
				183	sb->s_dirt = 0;
				184
				185	if (ocfs2_is_hard_readonly(osb))
				186	return -EROFS;
				187
				188	if (wait) {
				189	status = ocfs2_flush_truncate_log(osb);
				190	if (status < 0)
				191	mlog_errno(status);
				192	} else {
				193	ocfs2_schedule_truncate_log_flush(osb, 0);
				194	}
				195
				196	if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) {
				197	if (wait)
				198	log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
				199	target);
				200	}
				201	return 0;
				202	}
				203
				204	static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
				205	{
				206	struct inode *new = NULL;
				207	int status = 0;
				208	int i;
				209
				210	mlog_entry_void();
				211
				212	new = ocfs2_iget(osb, osb->root_blkno);
				213	if (IS_ERR(new)) {
				214	status = PTR_ERR(new);
				215	mlog_errno(status);
				216	goto bail;
				217	}
				218	osb->root_inode = new;
				219
				220	new = ocfs2_iget(osb, osb->system_dir_blkno);
				221	if (IS_ERR(new)) {
				222	status = PTR_ERR(new);
				223	mlog_errno(status);
				224	goto bail;
				225	}
				226	osb->sys_root_inode = new;
				227
				228	for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
				229	i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
				230	new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
				231	if (!new) {
				232	ocfs2_release_system_inodes(osb);
				233	status = -EINVAL;
				234	mlog_errno(status);
				235	/* FIXME: Should ERROR_RO_FS */
				236	mlog(ML_ERROR, "Unable to load system inode %d, "
				237	"possibly corrupt fs?", i);
				238	goto bail;
				239	}
				240	// the array now has one ref, so drop this one
				241	iput(new);
				242	}
				243
				244	bail:
				245	mlog_exit(status);
				246	return status;
				247	}
				248
				249	static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
				250	{
				251	struct inode *new = NULL;
				252	int status = 0;
				253	int i;
				254
				255	mlog_entry_void();
				256
				257	for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
				258	i < NUM_SYSTEM_INODES;
				259	i++) {
				260	new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
				261	if (!new) {
				262	ocfs2_release_system_inodes(osb);
				263	status = -EINVAL;
				264	mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
				265	status, i, osb->slot_num);
				266	goto bail;
				267	}
				268	/* the array now has one ref, so drop this one */
				269	iput(new);
				270	}
				271
				272	bail:
				273	mlog_exit(status);
				274	return status;
				275	}
				276
				277	static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
				278	{
				279	int status = 0, i;
				280	struct inode *inode;
				281
				282	mlog_entry_void();
				283
				284	for (i = 0; i < NUM_SYSTEM_INODES; i++) {
				285	inode = osb->system_inodes[i];
				286	if (inode) {
				287	iput(inode);
				288	osb->system_inodes[i] = NULL;
				289	}
				290	}
				291
				292	inode = osb->sys_root_inode;
				293	if (inode) {
				294	iput(inode);
				295	osb->sys_root_inode = NULL;
				296	}
				297
				298	inode = osb->root_inode;
				299	if (inode) {
				300	iput(inode);
				301	osb->root_inode = NULL;
				302	}
				303
				304	mlog_exit(status);
				305	return status;
				306	}
				307
				308	/* We're allocating fs objects, use GFP_NOFS */
				309	static struct inode ocfs2_alloc_inode(struct super_block sb)
				310	{
				311	struct ocfs2_inode_info *oi;
				312
				313	oi = kmem_cache_alloc(ocfs2_inode_cachep, SLAB_NOFS);
				314	if (!oi)
				315	return NULL;
				316
				317	return &oi->vfs_inode;
				318	}
				319
				320	static void ocfs2_destroy_inode(struct inode *inode)
				321	{
				322	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
				323	}
				324
				325	/* From xfs_super.c:xfs_max_file_offset
				326	* Copyright (c) 2000-2004 Silicon Graphics, Inc.
				327	*/
				328	static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
				329	{
				330	unsigned int pagefactor = 1;
				331	unsigned int bitshift = BITS_PER_LONG - 1;
				332
				333	/* Figure out maximum filesize, on Linux this can depend on
				334	* the filesystem blocksize (on 32 bit platforms).
				335	* __block_prepare_write does this in an [unsigned] long...
				336	* page->index << (PAGE_CACHE_SHIFT - bbits)
				337	* So, for page sized blocks (4K on 32 bit platforms),
				338	* this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
				339	* (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
				340	* but for smaller blocksizes it is less (bbits = log2 bsize).
				341	* Note1: get_block_t takes a long (implicit cast from above)
				342	* Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
				343	* can optionally convert the [unsigned] long from above into
				344	* an [unsigned] long long.
				345	*/
				346
				347	#if BITS_PER_LONG == 32
				348	# if defined(CONFIG_LBD)
				349	BUG_ON(sizeof(sector_t) != 8);
				350	pagefactor = PAGE_CACHE_SIZE;
				351	bitshift = BITS_PER_LONG;
				352	# else
				353	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
				354	# endif
				355	#endif
				356
				357	return (((unsigned long long)pagefactor) << bitshift) - 1;
				358	}
				359
				360	static int ocfs2_remount(struct super_block sb, int flags, char *data)
				361	{
				362	int incompat_features;
				363	int ret = 0;
				364	unsigned long parsed_options;
				365	struct ocfs2_super *osb = OCFS2_SB(sb);
				366
				367	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
				368	ret = -EINVAL;
				369	goto out;
				370	}
				371
				372	if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
				373	(parsed_options & OCFS2_MOUNT_HB_LOCAL)) {
				374	ret = -EINVAL;
				375	mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
				376	goto out;
				377	}
				378
				379	if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
				380	(parsed_options & OCFS2_MOUNT_DATA_WRITEBACK)) {
				381	ret = -EINVAL;
				382	mlog(ML_ERROR, "Cannot change data mode on remount\n");
				383	goto out;
				384	}
				385
				386	/* We're going to/from readonly mode. */
				387	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
				388	/* Lock here so the check of HARD_RO and the potential
				389	* setting of SOFT_RO is atomic. */
				390	spin_lock(&osb->osb_lock);
				391	if (osb->osb_flags & OCFS2_OSB_HARD_RO) {
				392	mlog(ML_ERROR, "Remount on readonly device is forbidden.\n");
				393	ret = -EROFS;
				394	goto unlock_osb;
				395	}
				396
				397	if (*flags & MS_RDONLY) {
				398	mlog(0, "Going to ro mode.\n");
				399	sb->s_flags \|= MS_RDONLY;
				400	osb->osb_flags \|= OCFS2_OSB_SOFT_RO;
				401	} else {
				402	mlog(0, "Making ro filesystem writeable.\n");
				403
				404	if (osb->osb_flags & OCFS2_OSB_ERROR_FS) {
				405	mlog(ML_ERROR, "Cannot remount RDWR "
				406	"filesystem due to previous errors.\n");
				407	ret = -EROFS;
				408	goto unlock_osb;
				409	}
				410	incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP);
				411	if (incompat_features) {
				412	mlog(ML_ERROR, "Cannot remount RDWR because "
				413	"of unsupported optional features "
				414	"(%x).\n", incompat_features);
				415	ret = -EINVAL;
				416	goto unlock_osb;
				417	}
				418	sb->s_flags &= ~MS_RDONLY;
				419	osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
				420	}
				421	unlock_osb:
				422	spin_unlock(&osb->osb_lock);
				423	}
				424
				425	if (!ret) {
				426	if (!ocfs2_is_hard_readonly(osb))
				427	ocfs2_set_journal_params(osb);
				428
				429	/* Only save off the new mount options in case of a successful
				430	* remount. */
				431	osb->s_mount_opt = parsed_options;
				432	}
				433	out:
				434	return ret;
				435	}
				436
				437	static int ocfs2_sb_probe(struct super_block *sb,
				438	struct buffer_head **bh,
				439	int *sector_size)
				440	{
				441	int status = 0, tmpstat;
				442	struct ocfs1_vol_disk_hdr *hdr;
				443	struct ocfs2_dinode *di;
				444	int blksize;
				445
				446	*bh = NULL;
				447
				448	/* may be > 512 */
				449	*sector_size = bdev_hardsect_size(sb->s_bdev);
				450	if (*sector_size > OCFS2_MAX_BLOCKSIZE) {
				451	mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n",
				452	*sector_size, OCFS2_MAX_BLOCKSIZE);
				453	status = -EINVAL;
				454	goto bail;
				455	}
				456
				457	/* Can this really happen? */
				458	if (*sector_size < OCFS2_MIN_BLOCKSIZE)
				459	*sector_size = OCFS2_MIN_BLOCKSIZE;
				460
				461	/* check block zero for old format */
				462	status = ocfs2_get_sector(sb, bh, 0, *sector_size);
				463	if (status < 0) {
				464	mlog_errno(status);
				465	goto bail;
				466	}
				467	hdr = (struct ocfs1_vol_disk_hdr ) (bh)->b_data;
				468	if (hdr->major_version == OCFS1_MAJOR_VERSION) {
				469	mlog(ML_ERROR, "incompatible version: %u.%u\n",
				470	hdr->major_version, hdr->minor_version);
				471	status = -EINVAL;
				472	}
				473	if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE,
				474	strlen(OCFS1_VOLUME_SIGNATURE)) == 0) {
				475	mlog(ML_ERROR, "incompatible volume signature: %8s\n",
				476	hdr->signature);
				477	status = -EINVAL;
				478	}
				479	brelse(*bh);
				480	*bh = NULL;
				481	if (status < 0) {
				482	mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be "
				483	"upgraded before mounting with ocfs v2\n");
				484	goto bail;
				485	}
				486
				487	/*
				488	* Now check at magic offset for 512, 1024, 2048, 4096
				489	* blocksizes. 4096 is the maximum blocksize because it is
				490	* the minimum clustersize.
				491	*/
				492	status = -EINVAL;
				493	for (blksize = *sector_size;
				494	blksize <= OCFS2_MAX_BLOCKSIZE;
				495	blksize <<= 1) {
				496	tmpstat = ocfs2_get_sector(sb, bh,
				497	OCFS2_SUPER_BLOCK_BLKNO,
				498	blksize);
				499	if (tmpstat < 0) {
				500	status = tmpstat;
				501	mlog_errno(status);
				502	goto bail;
				503	}
				504	di = (struct ocfs2_dinode ) (bh)->b_data;
				505	status = ocfs2_verify_volume(di, *bh, blksize);
				506	if (status >= 0)
				507	goto bail;
				508	brelse(*bh);
				509	*bh = NULL;
				510	if (status != -EAGAIN)
				511	break;
				512	}
				513
				514	bail:
				515	return status;
				516	}
				517
				518	static int ocfs2_fill_super(struct super_block sb, void data, int silent)
				519	{
				520	struct dentry *root;
				521	int status, sector_size;
				522	unsigned long parsed_opt;
				523	struct inode *inode = NULL;
				524	struct ocfs2_super *osb = NULL;
				525	struct buffer_head *bh = NULL;
				526
				527	mlog_entry("%p, %p, %i", sb, data, silent);
				528
				529	/* for now we only have one cluster/node, make sure we see it
				530	* in the heartbeat universe */
				531	if (!o2hb_check_local_node_heartbeating()) {
				532	status = -EINVAL;
				533	goto read_super_error;
				534	}
				535
				536	/* probe for superblock */
				537	status = ocfs2_sb_probe(sb, &bh, &sector_size);
				538	if (status < 0) {
				539	mlog(ML_ERROR, "superblock probe failed!\n");
				540	goto read_super_error;
				541	}
				542
				543	status = ocfs2_initialize_super(sb, bh, sector_size);
				544	osb = OCFS2_SB(sb);
				545	if (status < 0) {
				546	mlog_errno(status);
				547	goto read_super_error;
				548	}
				549	brelse(bh);
				550	bh = NULL;
				551
				552	if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
				553	status = -EINVAL;
				554	goto read_super_error;
				555	}
				556	osb->s_mount_opt = parsed_opt;
				557
				558	sb->s_magic = OCFS2_SUPER_MAGIC;
				559
				560	/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
				561	* heartbeat=none */
				562	if (bdev_read_only(sb->s_bdev)) {
				563	if (!(sb->s_flags & MS_RDONLY)) {
				564	status = -EACCES;
				565	mlog(ML_ERROR, "Readonly device detected but readonly "
				566	"mount was not specified.\n");
				567	goto read_super_error;
				568	}
				569
				570	/* You should not be able to start a local heartbeat
				571	* on a readonly device. */
				572	if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
				573	status = -EROFS;
				574	mlog(ML_ERROR, "Local heartbeat specified on readonly "
				575	"device.\n");
				576	goto read_super_error;
				577	}
				578
				579	status = ocfs2_check_journals_nolocks(osb);
				580	if (status < 0) {
				581	if (status == -EROFS)
				582	mlog(ML_ERROR, "Recovery required on readonly "
				583	"file system, but write access is "
				584	"unavailable.\n");
				585	else
				586	mlog_errno(status);
				587	goto read_super_error;
				588	}
				589
				590	ocfs2_set_ro_flag(osb, 1);
				591
				592	printk(KERN_NOTICE "Readonly device detected. No cluster "
				593	"services will be utilized for this mount. Recovery "
				594	"will be skipped.\n");
				595	}
				596
				597	if (!ocfs2_is_hard_readonly(osb)) {
				598	/* If this isn't a hard readonly mount, then we need
				599	* to make sure that heartbeat is in a valid state,
				600	* and that we mark ourselves soft readonly is -oro
				601	* was specified. */
				602	if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
				603	mlog(ML_ERROR, "No heartbeat for device (%s)\n",
				604	sb->s_id);
				605	status = -EINVAL;
				606	goto read_super_error;
				607	}
				608
				609	if (sb->s_flags & MS_RDONLY)
				610	ocfs2_set_ro_flag(osb, 0);
				611	}
				612
				613	osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
				614	ocfs2_debugfs_root);
				615	if (!osb->osb_debug_root) {
				616	status = -EINVAL;
				617	mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n");
				618	goto read_super_error;
				619	}
				620
				621	status = ocfs2_mount_volume(sb);
				622	if (osb->root_inode)
				623	inode = igrab(osb->root_inode);
				624
				625	if (status < 0)
				626	goto read_super_error;
				627
				628	if (!inode) {
				629	status = -EIO;
				630	mlog_errno(status);
				631	goto read_super_error;
				632	}
				633
				634	root = d_alloc_root(inode);
				635	if (!root) {
				636	status = -ENOMEM;
				637	mlog_errno(status);
				638	goto read_super_error;
				639	}
				640
				641	sb->s_root = root;
				642
				643	ocfs2_complete_mount_recovery(osb);
				644
				645	printk("ocfs2: Mounting device (%u,%u) on (node %d, slot %d) with %s "
				646	"data mode.\n",
				647	MAJOR(sb->s_dev), MINOR(sb->s_dev), osb->node_num,
				648	osb->slot_num,
				649	osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
				650	"ordered");
				651
				652	atomic_set(&osb->vol_state, VOLUME_MOUNTED);
				653	wake_up(&osb->osb_mount_event);
				654
				655	mlog_exit(status);
				656	return status;
				657
				658	read_super_error:
				659	if (bh != NULL)
				660	brelse(bh);
				661
				662	if (inode)
				663	iput(inode);
				664
				665	if (osb) {
				666	atomic_set(&osb->vol_state, VOLUME_DISABLED);
				667	wake_up(&osb->osb_mount_event);
				668	ocfs2_dismount_volume(sb, 1);
				669	}
				670
				671	mlog_exit(status);
				672	return status;
				673	}
				674
				675	static struct super_block ocfs2_get_sb(struct file_system_type fs_type,
				676	int flags,
				677	const char *dev_name,
				678	void *data)
				679	{
				680	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
				681	}
				682
				683	static struct file_system_type ocfs2_fs_type = {
				684	.owner = THIS_MODULE,
				685	.name = "ocfs2",
				686	.get_sb = ocfs2_get_sb, /* is this called when we mount
				687	* the fs? */
				688	.kill_sb = kill_block_super, /* set to the generic one
				689	* right now, but do we
				690	* need to change that? */
				691	.fs_flags = FS_REQUIRES_DEV,
				692	.next = NULL
				693	};
				694
				695	static int ocfs2_parse_options(struct super_block *sb,
				696	char *options,
				697	unsigned long *mount_opt,
				698	int is_remount)
				699	{
				700	int status;
				701	char *p;
				702
				703	mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
				704	options ? options : "(none)");
				705
				706	*mount_opt = 0;
				707
				708	if (!options) {
				709	status = 1;
				710	goto bail;
				711	}
				712
				713	while ((p = strsep(&options, ",")) != NULL) {
				714	int token, option;
				715	substring_t args[MAX_OPT_ARGS];
				716
				717	if (!*p)
				718	continue;
				719
				720	token = match_token(p, tokens, args);
				721	switch (token) {
				722	case Opt_hb_local:
				723	*mount_opt \|= OCFS2_MOUNT_HB_LOCAL;
				724	break;
				725	case Opt_hb_none:
				726	*mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
				727	break;
				728	case Opt_barrier:
				729	if (match_int(&args[0], &option)) {
				730	status = 0;
				731	goto bail;
				732	}
				733	if (option)
				734	*mount_opt \|= OCFS2_MOUNT_BARRIER;
				735	else
				736	*mount_opt &= ~OCFS2_MOUNT_BARRIER;
				737	break;
				738	case Opt_intr:
				739	*mount_opt &= ~OCFS2_MOUNT_NOINTR;
				740	break;
				741	case Opt_nointr:
				742	*mount_opt \|= OCFS2_MOUNT_NOINTR;
				743	break;
				744	case Opt_err_panic:
				745	*mount_opt \|= OCFS2_MOUNT_ERRORS_PANIC;
				746	break;
				747	case Opt_err_ro:
				748	*mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
				749	break;
				750	case Opt_data_ordered:
				751	*mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
				752	break;
				753	case Opt_data_writeback:
				754	*mount_opt \|= OCFS2_MOUNT_DATA_WRITEBACK;
				755	break;
				756	default:
				757	mlog(ML_ERROR,
				758	"Unrecognized mount option \"%s\" "
				759	"or missing value\n", p);
				760	status = 0;
				761	goto bail;
				762	}
				763	}
				764
				765	status = 1;
				766
				767	bail:
				768	mlog_exit(status);
				769	return status;
				770	}
				771
				772	static int __init ocfs2_init(void)
				773	{
				774	int status;
				775
				776	mlog_entry_void();
				777
				778	ocfs2_print_version();
				779
				780	if (init_ocfs2_extent_maps())
				781	return -ENOMEM;
				782
				783	status = init_ocfs2_uptodate_cache();
				784	if (status < 0) {
				785	mlog_errno(status);
				786	goto leave;
				787	}
				788
				789	status = ocfs2_initialize_mem_caches();
				790	if (status < 0) {
				791	mlog_errno(status);
				792	goto leave;
				793	}
				794
				795	ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
				796	if (!ocfs2_wq) {
				797	status = -ENOMEM;
				798	goto leave;
				799	}
				800
				801	spin_lock(&ocfs2_globals_lock);
				802	osb_id = 0;
				803	spin_unlock(&ocfs2_globals_lock);
				804
				805	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
				806	if (!ocfs2_debugfs_root) {
				807	status = -EFAULT;
				808	mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
				809	}
				810
				811	leave:
				812	if (status < 0) {
				813	ocfs2_free_mem_caches();
				814	exit_ocfs2_uptodate_cache();
				815	exit_ocfs2_extent_maps();
				816	}
				817
				818	mlog_exit(status);
				819
				820	if (status >= 0) {
				821	return register_filesystem(&ocfs2_fs_type);
				822	} else
				823	return -1;
				824	}
				825
				826	static void __exit ocfs2_exit(void)
				827	{
				828	mlog_entry_void();
				829
				830	if (ocfs2_wq) {
				831	flush_workqueue(ocfs2_wq);
				832	destroy_workqueue(ocfs2_wq);
				833	}
				834
				835	debugfs_remove(ocfs2_debugfs_root);
				836
				837	ocfs2_free_mem_caches();
				838
				839	unregister_filesystem(&ocfs2_fs_type);
				840
				841	exit_ocfs2_extent_maps();
				842
				843	exit_ocfs2_uptodate_cache();
				844
				845	mlog_exit_void();
				846	}
				847
				848	static void ocfs2_put_super(struct super_block *sb)
				849	{
				850	mlog_entry("(0x%p)\n", sb);
				851
				852	ocfs2_sync_blockdev(sb);
				853	ocfs2_dismount_volume(sb, 0);
				854
				855	mlog_exit_void();
				856	}
				857
				858	static int ocfs2_statfs(struct super_block sb, struct kstatfs buf)
				859	{
				860	struct ocfs2_super *osb;
				861	u32 numbits, freebits;
				862	int status;
				863	struct ocfs2_dinode *bm_lock;
				864	struct buffer_head *bh = NULL;
				865	struct inode *inode = NULL;
				866
				867	mlog_entry("(%p, %p)\n", sb, buf);
				868
				869	osb = OCFS2_SB(sb);
				870
				871	inode = ocfs2_get_system_file_inode(osb,
				872	GLOBAL_BITMAP_SYSTEM_INODE,
				873	OCFS2_INVALID_SLOT);
				874	if (!inode) {
				875	mlog(ML_ERROR, "failed to get bitmap inode\n");
				876	status = -EIO;
				877	goto bail;
				878	}
				879
				880	status = ocfs2_meta_lock(inode, NULL, &bh, 0);
				881	if (status < 0) {
				882	mlog_errno(status);
				883	goto bail;
				884	}
				885
				886	bm_lock = (struct ocfs2_dinode *) bh->b_data;
				887
				888	numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total);
				889	freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used);
				890
				891	buf->f_type = OCFS2_SUPER_MAGIC;
				892	buf->f_bsize = sb->s_blocksize;
				893	buf->f_namelen = OCFS2_MAX_FILENAME_LEN;
				894	buf->f_blocks = ((sector_t) numbits) *
				895	(osb->s_clustersize >> osb->sb->s_blocksize_bits);
				896	buf->f_bfree = ((sector_t) freebits) *
				897	(osb->s_clustersize >> osb->sb->s_blocksize_bits);
				898	buf->f_bavail = buf->f_bfree;
				899	buf->f_files = numbits;
				900	buf->f_ffree = freebits;
				901
				902	brelse(bh);
				903
				904	ocfs2_meta_unlock(inode, 0);
				905	status = 0;
				906	bail:
				907	if (inode)
				908	iput(inode);
				909
				910	mlog_exit(status);
				911
				912	return status;
				913	}
				914
				915	static void ocfs2_inode_init_once(void *data,
				916	kmem_cache_t *cachep,
				917	unsigned long flags)
				918	{
				919	struct ocfs2_inode_info *oi = data;
				920
				921	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
				922	SLAB_CTOR_CONSTRUCTOR) {
				923	oi->ip_flags = 0;
				924	oi->ip_open_count = 0;
				925	spin_lock_init(&oi->ip_lock);
				926	ocfs2_extent_map_init(&oi->vfs_inode);
				927	INIT_LIST_HEAD(&oi->ip_handle_list);
				928	INIT_LIST_HEAD(&oi->ip_io_markers);
				929	oi->ip_handle = NULL;
				930	oi->ip_created_trans = 0;
				931	oi->ip_last_trans = 0;
				932	oi->ip_dir_start_lookup = 0;
				933
				934	init_rwsem(&oi->ip_alloc_sem);
Mark Fasheh	251b6ec	2006-01-10 15:41:43 -0800	[diff] [blame]	935	mutex_init(&oi->ip_io_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	936
				937	oi->ip_blkno = 0ULL;
				938	oi->ip_clusters = 0;
				939
				940	ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
				941	ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
				942	ocfs2_lock_res_init_once(&oi->ip_data_lockres);
				943
				944	ocfs2_metadata_cache_init(&oi->vfs_inode);
				945
				946	inode_init_once(&oi->vfs_inode);
				947	}
				948	}
				949
				950	static int ocfs2_initialize_mem_caches(void)
				951	{
				952	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
				953	sizeof(struct ocfs2_inode_info),
				954	0, SLAB_HWCACHE_ALIGN\|SLAB_RECLAIM_ACCOUNT,
				955	ocfs2_inode_init_once, NULL);
				956	if (!ocfs2_inode_cachep)
				957	return -ENOMEM;
				958
				959	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
				960	sizeof(struct ocfs2_journal_lock),
				961	0,
				962	SLAB_NO_REAP\|SLAB_HWCACHE_ALIGN,
				963	NULL, NULL);
				964	if (!ocfs2_lock_cache)
				965	return -ENOMEM;
				966
				967	return 0;
				968	}
				969
				970	static void ocfs2_free_mem_caches(void)
				971	{
				972	if (ocfs2_inode_cachep)
				973	kmem_cache_destroy(ocfs2_inode_cachep);
				974	if (ocfs2_lock_cache)
				975	kmem_cache_destroy(ocfs2_lock_cache);
				976
				977	ocfs2_inode_cachep = NULL;
				978	ocfs2_lock_cache = NULL;
				979	}
				980
				981	static int ocfs2_get_sector(struct super_block *sb,
				982	struct buffer_head **bh,
				983	int block,
				984	int sect_size)
				985	{
				986	if (!sb_set_blocksize(sb, sect_size)) {
				987	mlog(ML_ERROR, "unable to set blocksize\n");
				988	return -EIO;
				989	}
				990
				991	*bh = sb_getblk(sb, block);
				992	if (!*bh) {
				993	mlog_errno(-EIO);
				994	return -EIO;
				995	}
				996	lock_buffer(*bh);
				997	if (!buffer_dirty(*bh))
				998	clear_buffer_uptodate(*bh);
				999	unlock_buffer(*bh);
				1000	ll_rw_block(READ, 1, bh);
				1001	wait_on_buffer(*bh);
				1002	return 0;
				1003	}
				1004
				1005	/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
				1006	static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
				1007	{
				1008	int status;
				1009
				1010	/* XXX hold a ref on the node while mounte? easy enough, if
				1011	* desirable. */
				1012	osb->node_num = o2nm_this_node();
				1013	if (osb->node_num == O2NM_MAX_NODES) {
				1014	mlog(ML_ERROR, "could not find this host's node number\n");
				1015	status = -ENOENT;
				1016	goto bail;
				1017	}
				1018
				1019	mlog(ML_NOTICE, "I am node %d\n", osb->node_num);
				1020
				1021	status = 0;
				1022	bail:
				1023	return status;
				1024	}
				1025
				1026	static int ocfs2_mount_volume(struct super_block *sb)
				1027	{
				1028	int status = 0;
				1029	int unlock_super = 0;
				1030	struct ocfs2_super *osb = OCFS2_SB(sb);
				1031
				1032	mlog_entry_void();
				1033
				1034	if (ocfs2_is_hard_readonly(osb))
				1035	goto leave;
				1036
				1037	status = ocfs2_fill_local_node_info(osb);
				1038	if (status < 0) {
				1039	mlog_errno(status);
				1040	goto leave;
				1041	}
				1042
				1043	status = ocfs2_register_hb_callbacks(osb);
				1044	if (status < 0) {
				1045	mlog_errno(status);
				1046	goto leave;
				1047	}
				1048
				1049	status = ocfs2_dlm_init(osb);
				1050	if (status < 0) {
				1051	mlog_errno(status);
				1052	goto leave;
				1053	}
				1054
				1055	/* requires vote_thread to be running. */
				1056	status = ocfs2_register_net_handlers(osb);
				1057	if (status < 0) {
				1058	mlog_errno(status);
				1059	goto leave;
				1060	}
				1061
				1062	status = ocfs2_super_lock(osb, 1);
				1063	if (status < 0) {
				1064	mlog_errno(status);
				1065	goto leave;
				1066	}
				1067	unlock_super = 1;
				1068
				1069	/* This will load up the node map and add ourselves to it. */
				1070	status = ocfs2_find_slot(osb);
				1071	if (status < 0) {
				1072	mlog_errno(status);
				1073	goto leave;
				1074	}
				1075
				1076	ocfs2_populate_mounted_map(osb);
				1077
				1078	/* load all node-local system inodes */
				1079	status = ocfs2_init_local_system_inodes(osb);
				1080	if (status < 0) {
				1081	mlog_errno(status);
				1082	goto leave;
				1083	}
				1084
				1085	status = ocfs2_check_volume(osb);
				1086	if (status < 0) {
				1087	mlog_errno(status);
				1088	goto leave;
				1089	}
				1090
				1091	status = ocfs2_truncate_log_init(osb);
				1092	if (status < 0) {
				1093	mlog_errno(status);
				1094	goto leave;
				1095	}
				1096
				1097	/* This should be sent after we recovered our journal as it
				1098	* will cause other nodes to unmark us as needing
				1099	* recovery. However, we need to send it before dropping the
				1100	* super block lock as otherwise their recovery threads might
				1101	* try to clean us up while we're live! */
				1102	status = ocfs2_request_mount_vote(osb);
				1103	if (status < 0)
				1104	mlog_errno(status);
				1105
				1106	leave:
				1107	if (unlock_super)
				1108	ocfs2_super_unlock(osb, 1);
				1109
				1110	mlog_exit(status);
				1111	return status;
				1112	}
				1113
				1114	/* we can't grab the goofy sem lock from inside wait_event, so we use
				1115	* memory barriers to make sure that we'll see the null task before
				1116	* being woken up */
				1117	static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
				1118	{
				1119	mb();
				1120	return osb->recovery_thread_task != NULL;
				1121	}
				1122
				1123	static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
				1124	{
				1125	int tmp;
				1126	struct ocfs2_super *osb = NULL;
				1127
				1128	mlog_entry("(0x%p)\n", sb);
				1129
				1130	BUG_ON(!sb);
				1131	osb = OCFS2_SB(sb);
				1132	BUG_ON(!osb);
				1133
				1134	ocfs2_shutdown_local_alloc(osb);
				1135
				1136	ocfs2_truncate_log_shutdown(osb);
				1137
				1138	/* disable any new recovery threads and wait for any currently
				1139	* running ones to exit. Do this before setting the vol_state. */
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	1140	mutex_lock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1141	osb->disable_recovery = 1;
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	1142	mutex_unlock(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1143	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
				1144
				1145	/* At this point, we know that no more recovery threads can be
				1146	* launched, so wait for any recovery completion work to
				1147	* complete. */
				1148	flush_workqueue(ocfs2_wq);
				1149
				1150	ocfs2_journal_shutdown(osb);
				1151
				1152	ocfs2_sync_blockdev(sb);
				1153
				1154	/* No dlm means we've failed during mount, so skip all the
				1155	* steps which depended on that to complete. */
				1156	if (osb->dlm) {
				1157	tmp = ocfs2_super_lock(osb, 1);
				1158	if (tmp < 0) {
				1159	mlog_errno(tmp);
				1160	return;
				1161	}
				1162
				1163	tmp = ocfs2_request_umount_vote(osb);
				1164	if (tmp < 0)
				1165	mlog_errno(tmp);
				1166
				1167	if (osb->slot_num != OCFS2_INVALID_SLOT)
				1168	ocfs2_put_slot(osb);
				1169
				1170	ocfs2_super_unlock(osb, 1);
				1171	}
				1172
				1173	ocfs2_release_system_inodes(osb);
				1174
				1175	if (osb->dlm) {
				1176	ocfs2_unregister_net_handlers(osb);
				1177
				1178	ocfs2_dlm_shutdown(osb);
				1179	}
				1180
				1181	ocfs2_clear_hb_callbacks(osb);
				1182
				1183	debugfs_remove(osb->osb_debug_root);
				1184
				1185	if (!mnt_err)
				1186	ocfs2_stop_heartbeat(osb);
				1187
				1188	atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
				1189
				1190	printk("ocfs2: Unmounting device (%u,%u) on (node %d)\n",
				1191	MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev), osb->node_num);
				1192
				1193	ocfs2_delete_osb(osb);
				1194	kfree(osb);
				1195	sb->s_dev = 0;
				1196	sb->s_fs_info = NULL;
				1197	}
				1198
				1199	static int ocfs2_setup_osb_uuid(struct ocfs2_super osb, const unsigned char uuid,
				1200	unsigned uuid_bytes)
				1201	{
				1202	int i, ret;
				1203	char *ptr;
				1204
				1205	BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN);
				1206
				1207	osb->uuid_str = kcalloc(1, OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
				1208	if (osb->uuid_str == NULL)
				1209	return -ENOMEM;
				1210
				1211	memcpy(osb->uuid, uuid, OCFS2_VOL_UUID_LEN);
				1212
				1213	for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) {
				1214	/* print with null */
				1215	ret = snprintf(ptr, 3, "%02X", uuid[i]);
				1216	if (ret != 2) /* drop super cleans up */
				1217	return -EINVAL;
				1218	/* then only advance past the last char */
				1219	ptr += 2;
				1220	}
				1221
				1222	return 0;
				1223	}
				1224
				1225	static int ocfs2_initialize_super(struct super_block *sb,
				1226	struct buffer_head *bh,
				1227	int sector_size)
				1228	{
				1229	int status = 0;
				1230	int i;
				1231	struct ocfs2_dinode *di = NULL;
				1232	struct inode *inode = NULL;
				1233	struct buffer_head *bitmap_bh = NULL;
				1234	struct ocfs2_journal *journal;
				1235	__le32 uuid_net_key;
				1236	struct ocfs2_super *osb;
				1237
				1238	mlog_entry_void();
				1239
				1240	osb = kcalloc(1, sizeof(struct ocfs2_super), GFP_KERNEL);
				1241	if (!osb) {
				1242	status = -ENOMEM;
				1243	mlog_errno(status);
				1244	goto bail;
				1245	}
				1246
				1247	sb->s_fs_info = osb;
				1248	sb->s_op = &ocfs2_sops;
				1249	sb->s_export_op = &ocfs2_export_ops;
				1250	sb->s_flags \|= MS_NOATIME;
				1251	/* this is needed to support O_LARGEFILE */
				1252	sb->s_maxbytes = ocfs2_max_file_offset(sb->s_blocksize_bits);
				1253
				1254	osb->sb = sb;
				1255	/* Save off for ocfs2_rw_direct */
				1256	osb->s_sectsize_bits = blksize_bits(sector_size);
Eric Sesterhenn / snakebyte	ebdec83	2006-01-27 10:32:52 +0100	[diff] [blame]	1257	BUG_ON(!osb->s_sectsize_bits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1258
				1259	osb->net_response_ids = 0;
				1260	spin_lock_init(&osb->net_response_lock);
				1261	INIT_LIST_HEAD(&osb->net_response_list);
				1262
				1263	INIT_LIST_HEAD(&osb->osb_net_handlers);
				1264	init_waitqueue_head(&osb->recovery_event);
				1265	spin_lock_init(&osb->vote_task_lock);
				1266	init_waitqueue_head(&osb->vote_event);
				1267	osb->vote_work_sequence = 0;
				1268	osb->vote_wake_sequence = 0;
				1269	INIT_LIST_HEAD(&osb->blocked_lock_list);
				1270	osb->blocked_lock_count = 0;
				1271	INIT_LIST_HEAD(&osb->vote_list);
				1272	spin_lock_init(&osb->osb_lock);
				1273
				1274	atomic_set(&osb->alloc_stats.moves, 0);
				1275	atomic_set(&osb->alloc_stats.local_data, 0);
				1276	atomic_set(&osb->alloc_stats.bitmap_data, 0);
				1277	atomic_set(&osb->alloc_stats.bg_allocs, 0);
				1278	atomic_set(&osb->alloc_stats.bg_extends, 0);
				1279
				1280	ocfs2_init_node_maps(osb);
				1281
				1282	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
				1283	MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
				1284
Arjan van de Ven	c74ec2f	2006-01-13 21:54:23 -0800	[diff] [blame]	1285	mutex_init(&osb->recovery_lock);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1286
				1287	osb->disable_recovery = 0;
				1288	osb->recovery_thread_task = NULL;
				1289
				1290	init_waitqueue_head(&osb->checkpoint_event);
				1291	atomic_set(&osb->needs_checkpoint, 0);
				1292
				1293	osb->node_num = O2NM_INVALID_NODE_NUM;
				1294	osb->slot_num = OCFS2_INVALID_SLOT;
				1295
				1296	osb->local_alloc_state = OCFS2_LA_UNUSED;
				1297	osb->local_alloc_bh = NULL;
				1298
				1299	ocfs2_setup_hb_callbacks(osb);
				1300
				1301	init_waitqueue_head(&osb->osb_mount_event);
				1302
				1303	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
				1304	if (!osb->vol_label) {
				1305	mlog(ML_ERROR, "unable to alloc vol label\n");
				1306	status = -ENOMEM;
				1307	goto bail;
				1308	}
				1309
				1310	osb->uuid = kmalloc(OCFS2_VOL_UUID_LEN, GFP_KERNEL);
				1311	if (!osb->uuid) {
				1312	mlog(ML_ERROR, "unable to alloc uuid\n");
				1313	status = -ENOMEM;
				1314	goto bail;
				1315	}
				1316
				1317	di = (struct ocfs2_dinode *)bh->b_data;
				1318
				1319	osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
				1320	if (osb->max_slots > OCFS2_MAX_SLOTS \|\| osb->max_slots == 0) {
				1321	mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
				1322	osb->max_slots);
				1323	status = -EINVAL;
				1324	goto bail;
				1325	}
				1326	mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots);
				1327
				1328	osb->s_feature_compat =
				1329	le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
				1330	osb->s_feature_ro_compat =
				1331	le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat);
				1332	osb->s_feature_incompat =
				1333	le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat);
				1334
				1335	if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) {
				1336	mlog(ML_ERROR, "couldn't mount because of unsupported "
				1337	"optional features (%x).\n", i);
				1338	status = -EINVAL;
				1339	goto bail;
				1340	}
				1341	if (!(osb->sb->s_flags & MS_RDONLY) &&
				1342	(i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
				1343	mlog(ML_ERROR, "couldn't mount RDWR because of "
				1344	"unsupported optional features (%x).\n", i);
				1345	status = -EINVAL;
				1346	goto bail;
				1347	}
				1348
				1349	get_random_bytes(&osb->s_next_generation, sizeof(u32));
				1350
				1351	/* FIXME
				1352	* This should be done in ocfs2_journal_init(), but unknown
				1353	* ordering issues will cause the filesystem to crash.
				1354	* If anyone wants to figure out what part of the code
				1355	* refers to osb->journal before ocfs2_journal_init() is run,
				1356	* be my guest.
				1357	*/
				1358	/* initialize our journal structure */
				1359
				1360	journal = kcalloc(1, sizeof(struct ocfs2_journal), GFP_KERNEL);
				1361	if (!journal) {
				1362	mlog(ML_ERROR, "unable to alloc journal\n");
				1363	status = -ENOMEM;
				1364	goto bail;
				1365	}
				1366	osb->journal = journal;
				1367	journal->j_osb = osb;
				1368
				1369	atomic_set(&journal->j_num_trans, 0);
				1370	init_rwsem(&journal->j_trans_barrier);
				1371	init_waitqueue_head(&journal->j_checkpointed);
				1372	spin_lock_init(&journal->j_lock);
				1373	journal->j_trans_id = (unsigned long) 1;
				1374	INIT_LIST_HEAD(&journal->j_la_cleanups);
				1375	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery, osb);
				1376	journal->j_state = OCFS2_JOURNAL_FREE;
				1377
				1378	/* get some pseudo constants for clustersize bits */
				1379	osb->s_clustersize_bits =
				1380	le32_to_cpu(di->id2.i_super.s_clustersize_bits);
				1381	osb->s_clustersize = 1 << osb->s_clustersize_bits;
				1382	mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits);
				1383
				1384	if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE \|\|
				1385	osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) {
				1386	mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n",
				1387	osb->s_clustersize);
				1388	status = -EINVAL;
				1389	goto bail;
				1390	}
				1391
				1392	if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1)
				1393	> (u32)~0UL) {
				1394	mlog(ML_ERROR, "Volume might try to write to blocks beyond "
				1395	"what jbd can address in 32 bits.\n");
				1396	status = -EINVAL;
				1397	goto bail;
				1398	}
				1399
				1400	if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid,
				1401	sizeof(di->id2.i_super.s_uuid))) {
				1402	mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n");
				1403	status = -ENOMEM;
				1404	goto bail;
				1405	}
				1406
				1407	memcpy(&uuid_net_key, &osb->uuid[i], sizeof(osb->net_key));
				1408	osb->net_key = le32_to_cpu(uuid_net_key);
				1409
				1410	strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
				1411	osb->vol_label[63] = '\0';
				1412	osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
				1413	osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
				1414	osb->first_cluster_group_blkno =
				1415	le64_to_cpu(di->id2.i_super.s_first_cluster_group);
				1416	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
				1417	mlog(0, "vol_label: %s\n", osb->vol_label);
				1418	mlog(0, "uuid: %s\n", osb->uuid_str);
				1419	mlog(0, "root_blkno=%"MLFu64", system_dir_blkno=%"MLFu64"\n",
				1420	osb->root_blkno, osb->system_dir_blkno);
				1421
				1422	osb->osb_dlm_debug = ocfs2_new_dlm_debug();
				1423	if (!osb->osb_dlm_debug) {
				1424	status = -ENOMEM;
				1425	mlog_errno(status);
				1426	goto bail;
				1427	}
				1428
				1429	atomic_set(&osb->vol_state, VOLUME_INIT);
				1430
				1431	/* load root, system_dir, and all global system inodes */
				1432	status = ocfs2_init_global_system_inodes(osb);
				1433	if (status < 0) {
				1434	mlog_errno(status);
				1435	goto bail;
				1436	}
				1437
				1438	/*
				1439	* global bitmap
				1440	*/
				1441	inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE,
				1442	OCFS2_INVALID_SLOT);
				1443	if (!inode) {
				1444	status = -EINVAL;
				1445	mlog_errno(status);
				1446	goto bail;
				1447	}
				1448
				1449	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
				1450
				1451	status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
				1452	inode);
				1453	iput(inode);
				1454	if (status < 0) {
				1455	mlog_errno(status);
				1456	goto bail;
				1457	}
				1458
				1459	di = (struct ocfs2_dinode *) bitmap_bh->b_data;
				1460	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
				1461	osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
				1462	brelse(bitmap_bh);
				1463	mlog(0, "cluster bitmap inode: %"MLFu64", clusters per group: %u\n",
				1464	osb->bitmap_blkno, osb->bitmap_cpg);
				1465
				1466	status = ocfs2_init_slot_info(osb);
				1467	if (status < 0) {
				1468	mlog_errno(status);
				1469	goto bail;
				1470	}
				1471
				1472	/* Link this osb onto the global linked list of all osb structures. */
				1473	/* The Global Link List is mainted for the whole driver . */
				1474	spin_lock(&ocfs2_globals_lock);
				1475	osb->osb_id = osb_id;
				1476	if (osb_id < OCFS2_MAX_OSB_ID)
				1477	osb_id++;
				1478	else {
				1479	mlog(ML_ERROR, "Too many volumes mounted\n");
				1480	status = -ENOMEM;
				1481	}
				1482	spin_unlock(&ocfs2_globals_lock);
				1483
				1484	bail:
				1485	mlog_exit(status);
				1486	return status;
				1487	}
				1488
				1489	/*
				1490	* will return: -EAGAIN if it is ok to keep searching for superblocks
				1491	* -EINVAL if there is a bad superblock
				1492	* 0 on success
				1493	*/
				1494	static int ocfs2_verify_volume(struct ocfs2_dinode *di,
				1495	struct buffer_head *bh,
				1496	u32 blksz)
				1497	{
				1498	int status = -EAGAIN;
				1499
				1500	mlog_entry_void();
				1501
				1502	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
				1503	strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
				1504	status = -EINVAL;
				1505	if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
				1506	mlog(ML_ERROR, "found superblock with incorrect block "
				1507	"size: found %u, should be %u\n",
				1508	1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
				1509	blksz);
				1510	} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
				1511	OCFS2_MAJOR_REV_LEVEL \|\|
				1512	le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
				1513	OCFS2_MINOR_REV_LEVEL) {
				1514	mlog(ML_ERROR, "found superblock with bad version: "
				1515	"found %u.%u, should be %u.%u\n",
				1516	le16_to_cpu(di->id2.i_super.s_major_rev_level),
				1517	le16_to_cpu(di->id2.i_super.s_minor_rev_level),
				1518	OCFS2_MAJOR_REV_LEVEL,
				1519	OCFS2_MINOR_REV_LEVEL);
				1520	} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
				1521	mlog(ML_ERROR, "bad block number on superblock: "
				1522	"found %"MLFu64", should be %llu\n",
				1523	di->i_blkno, (unsigned long long)bh->b_blocknr);
				1524	} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 \|\|
				1525	le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
				1526	mlog(ML_ERROR, "bad cluster size found: %u\n",
				1527	1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
				1528	} else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
				1529	mlog(ML_ERROR, "bad root_blkno: 0\n");
				1530	} else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
				1531	mlog(ML_ERROR, "bad system_dir_blkno: 0\n");
				1532	} else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) {
				1533	mlog(ML_ERROR,
				1534	"Superblock slots found greater than file system "
				1535	"maximum: found %u, max %u\n",
				1536	le16_to_cpu(di->id2.i_super.s_max_slots),
				1537	OCFS2_MAX_SLOTS);
				1538	} else {
				1539	/* found it! */
				1540	status = 0;
				1541	}
				1542	}
				1543
				1544	mlog_exit(status);
				1545	return status;
				1546	}
				1547
				1548	static int ocfs2_check_volume(struct ocfs2_super *osb)
				1549	{
				1550	int status = 0;
				1551	int dirty;
				1552	struct ocfs2_dinode local_alloc = NULL; / only used if we
				1553	* recover
				1554	* ourselves. */
				1555
				1556	mlog_entry_void();
				1557
				1558	/* Init our journal object. */
				1559	status = ocfs2_journal_init(osb->journal, &dirty);
				1560	if (status < 0) {
				1561	mlog(ML_ERROR, "Could not initialize journal!\n");
				1562	goto finally;
				1563	}
				1564
				1565	/* If the journal was unmounted cleanly then we don't want to
				1566	* recover anything. Otherwise, journal_load will do that
				1567	* dirty work for us :) */
				1568	if (!dirty) {
				1569	status = ocfs2_journal_wipe(osb->journal, 0);
				1570	if (status < 0) {
				1571	mlog_errno(status);
				1572	goto finally;
				1573	}
				1574	} else {
				1575	mlog(ML_NOTICE, "File system was not unmounted cleanly, "
				1576	"recovering volume.\n");
				1577	}
				1578
				1579	/* will play back anything left in the journal. */
				1580	ocfs2_journal_load(osb->journal);
				1581
				1582	if (dirty) {
				1583	/* recover my local alloc if we didn't unmount cleanly. */
				1584	status = ocfs2_begin_local_alloc_recovery(osb,
				1585	osb->slot_num,
				1586	&local_alloc);
				1587	if (status < 0) {
				1588	mlog_errno(status);
				1589	goto finally;
				1590	}
				1591	/* we complete the recovery process after we've marked
				1592	* ourselves as mounted. */
				1593	}
				1594
				1595	mlog(0, "Journal loaded.\n");
				1596
				1597	status = ocfs2_load_local_alloc(osb);
				1598	if (status < 0) {
				1599	mlog_errno(status);
				1600	goto finally;
				1601	}
				1602
				1603	if (dirty) {
				1604	/* Recovery will be completed after we've mounted the
				1605	* rest of the volume. */
				1606	osb->dirty = 1;
				1607	osb->local_alloc_copy = local_alloc;
				1608	local_alloc = NULL;
				1609	}
				1610
				1611	/* go through each journal, trylock it and if you get the
				1612	* lock, and it's marked as dirty, set the bit in the recover
				1613	* map and launch a recovery thread for it. */
				1614	status = ocfs2_mark_dead_nodes(osb);
				1615	if (status < 0)
				1616	mlog_errno(status);
				1617
				1618	finally:
				1619	if (local_alloc)
				1620	kfree(local_alloc);
				1621
				1622	mlog_exit(status);
				1623	return status;
				1624	}
				1625
				1626	/*
				1627	* The routine gets called from dismount or close whenever a dismount on
				1628	* volume is requested and the osb open count becomes 1.
				1629	* It will remove the osb from the global list and also free up all the
				1630	* initialized resources and fileobject.
				1631	*/
				1632	static void ocfs2_delete_osb(struct ocfs2_super *osb)
				1633	{
				1634	mlog_entry_void();
				1635
				1636	/* This function assumes that the caller has the main osb resource */
				1637
				1638	if (osb->slot_info)
				1639	ocfs2_free_slot_info(osb->slot_info);
				1640
				1641	/* FIXME
				1642	* This belongs in journal shutdown, but because we have to
				1643	* allocate osb->journal at the start of ocfs2_initalize_osb(),
				1644	* we free it here.
				1645	*/
				1646	kfree(osb->journal);
				1647	if (osb->local_alloc_copy)
				1648	kfree(osb->local_alloc_copy);
				1649	kfree(osb->uuid_str);
				1650	ocfs2_put_dlm_debug(osb->osb_dlm_debug);
				1651	memset(osb, 0, sizeof(struct ocfs2_super));
				1652
				1653	mlog_exit_void();
				1654	}
				1655
				1656	/* Put OCFS2 into a readonly state, or (if the user specifies it),
				1657	* panic(). We do not support continue-on-error operation. */
				1658	static void ocfs2_handle_error(struct super_block *sb)
				1659	{
				1660	struct ocfs2_super *osb = OCFS2_SB(sb);
				1661
				1662	if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC)
				1663	panic("OCFS2: (device %s): panic forced after error\n",
				1664	sb->s_id);
				1665
				1666	ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS);
				1667
				1668	if (sb->s_flags & MS_RDONLY &&
				1669	(ocfs2_is_soft_readonly(osb) \|\|
				1670	ocfs2_is_hard_readonly(osb)))
				1671	return;
				1672
				1673	printk(KERN_CRIT "File system is now read-only due to the potential "
				1674	"of on-disk corruption. Please run fsck.ocfs2 once the file "
				1675	"system is unmounted.\n");
				1676	sb->s_flags \|= MS_RDONLY;
				1677	ocfs2_set_ro_flag(osb, 0);
				1678	}
				1679
				1680	static char error_buf[1024];
				1681
				1682	void __ocfs2_error(struct super_block *sb,
				1683	const char *function,
				1684	const char *fmt, ...)
				1685	{
				1686	va_list args;
				1687
				1688	va_start(args, fmt);
				1689	vsprintf(error_buf, fmt, args);
				1690	va_end(args);
				1691
				1692	/* Not using mlog here because we want to show the actual
				1693	* function the error came from. */
				1694	printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n",
				1695	sb->s_id, function, error_buf);
				1696
				1697	ocfs2_handle_error(sb);
				1698	}
				1699
				1700	/* Handle critical errors. This is intentionally more drastic than
				1701	* ocfs2_handle_error, so we only use for things like journal errors,
				1702	* etc. */
				1703	void __ocfs2_abort(struct super_block* sb,
				1704	const char *function,
				1705	const char *fmt, ...)
				1706	{
				1707	va_list args;
				1708
				1709	va_start(args, fmt);
				1710	vsprintf(error_buf, fmt, args);
				1711	va_end(args);
				1712
				1713	printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
				1714	sb->s_id, function, error_buf);
				1715
				1716	/* We don't have the cluster support yet to go straight to
				1717	* hard readonly in here. Until then, we want to keep
				1718	* ocfs2_abort() so that we can at least mark critical
				1719	* errors.
				1720	*
				1721	* TODO: This should abort the journal and alert other nodes
				1722	* that our slot needs recovery. */
				1723
				1724	/* Force a panic(). This stinks, but it's better than letting
				1725	* things continue without having a proper hard readonly
				1726	* here. */
				1727	OCFS2_SB(sb)->s_mount_opt \|= OCFS2_MOUNT_ERRORS_PANIC;
				1728	ocfs2_handle_error(sb);
				1729	}
				1730
				1731	module_init(ocfs2_init);
				1732	module_exit(ocfs2_exit);