Blame - fs/ext3/super.c - kernel/msm-4.19

blob: 3c3c6e399fb3df167e7d45936df8e290227d0262 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/fs/ext3/super.c
				3	*
				4	* Copyright (C) 1992, 1993, 1994, 1995
				5	* Remy Card (card@masi.ibp.fr)
				6	* Laboratoire MASI - Institut Blaise Pascal
				7	* Universite Pierre et Marie Curie (Paris VI)
				8	*
				9	* from
				10	*
				11	* linux/fs/minix/inode.c
				12	*
				13	* Copyright (C) 1991, 1992 Linus Torvalds
				14	*
				15	* Big-endian to little-endian byte-swapping/bitmaps by
				16	* David S. Miller (davem@caip.rutgers.edu), 1995
				17	*/
				18
				19	#include <linux/config.h>
				20	#include <linux/module.h>
				21	#include <linux/string.h>
				22	#include <linux/fs.h>
				23	#include <linux/time.h>
				24	#include <linux/jbd.h>
				25	#include <linux/ext3_fs.h>
				26	#include <linux/ext3_jbd.h>
				27	#include <linux/slab.h>
				28	#include <linux/init.h>
				29	#include <linux/blkdev.h>
				30	#include <linux/parser.h>
				31	#include <linux/smp_lock.h>
				32	#include <linux/buffer_head.h>
				33	#include <linux/vfs.h>
				34	#include <linux/random.h>
				35	#include <linux/mount.h>
				36	#include <linux/namei.h>
				37	#include <linux/quotaops.h>
				38	#include <asm/uaccess.h>
				39	#include "xattr.h"
				40	#include "acl.h"
				41
				42	static int ext3_load_journal(struct super_block , struct ext3_super_block );
				43	static int ext3_create_journal(struct super_block , struct ext3_super_block ,
				44	int);
				45	static void ext3_commit_super (struct super_block * sb,
				46	struct ext3_super_block * es,
				47	int sync);
				48	static void ext3_mark_recovery_complete(struct super_block * sb,
				49	struct ext3_super_block * es);
				50	static void ext3_clear_journal_err(struct super_block * sb,
				51	struct ext3_super_block * es);
				52	static int ext3_sync_fs(struct super_block *sb, int wait);
				53	static const char ext3_decode_error(struct super_block sb, int errno,
				54	char nbuf[16]);
				55	static int ext3_remount (struct super_block * sb, int * flags, char * data);
				56	static int ext3_statfs (struct super_block * sb, struct kstatfs * buf);
				57	static void ext3_unlockfs(struct super_block *sb);
				58	static void ext3_write_super (struct super_block * sb);
				59	static void ext3_write_super_lockfs(struct super_block *sb);
				60
				61	/*
				62	* Wrappers for journal_start/end.
				63	*
				64	* The only special thing we need to do here is to make sure that all
				65	* journal_end calls result in the superblock being marked dirty, so
				66	* that sync() will call the filesystem's write_super callback if
				67	* appropriate.
				68	*/
				69	handle_t ext3_journal_start_sb(struct super_block sb, int nblocks)
				70	{
				71	journal_t *journal;
				72
				73	if (sb->s_flags & MS_RDONLY)
				74	return ERR_PTR(-EROFS);
				75
				76	/* Special case here: if the journal has aborted behind our
				77	* backs (eg. EIO in the commit thread), then we still need to
				78	* take the FS itself readonly cleanly. */
				79	journal = EXT3_SB(sb)->s_journal;
				80	if (is_journal_aborted(journal)) {
				81	ext3_abort(sb, __FUNCTION__,
				82	"Detected aborted journal");
				83	return ERR_PTR(-EROFS);
				84	}
				85
				86	return journal_start(journal, nblocks);
				87	}
				88
				89	/*
				90	* The only special thing we need to do here is to make sure that all
				91	* journal_stop calls result in the superblock being marked dirty, so
				92	* that sync() will call the filesystem's write_super callback if
				93	* appropriate.
				94	*/
				95	int __ext3_journal_stop(const char where, handle_t handle)
				96	{
				97	struct super_block *sb;
				98	int err;
				99	int rc;
				100
				101	sb = handle->h_transaction->t_journal->j_private;
				102	err = handle->h_err;
				103	rc = journal_stop(handle);
				104
				105	if (!err)
				106	err = rc;
				107	if (err)
				108	__ext3_std_error(sb, where, err);
				109	return err;
				110	}
				111
				112	void ext3_journal_abort_handle(const char caller, const char err_fn,
				113	struct buffer_head bh, handle_t handle, int err)
				114	{
				115	char nbuf[16];
				116	const char *errstr = ext3_decode_error(NULL, err, nbuf);
				117
				118	if (bh)
				119	BUFFER_TRACE(bh, "abort");
				120
				121	if (!handle->h_err)
				122	handle->h_err = err;
				123
				124	if (is_handle_aborted(handle))
				125	return;
				126
				127	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
				128	caller, errstr, err_fn);
				129
				130	journal_abort_handle(handle);
				131	}
				132
				133	/* Deal with the reporting of failure conditions on a filesystem such as
				134	* inconsistencies detected or read IO failures.
				135	*
				136	* On ext2, we can store the error state of the filesystem in the
				137	* superblock. That is not possible on ext3, because we may have other
				138	* write ordering constraints on the superblock which prevent us from
				139	* writing it out straight away; and given that the journal is about to
				140	* be aborted, we can't rely on the current, or future, transactions to
				141	* write out the superblock safely.
				142	*
				143	* We'll just use the journal_abort() error code to record an error in
				144	* the journal instead. On recovery, the journal will compain about
				145	* that error until we've noted it down and cleared it.
				146	*/
				147
				148	static void ext3_handle_error(struct super_block *sb)
				149	{
				150	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				151
				152	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				153	es->s_state \|= cpu_to_le16(EXT3_ERROR_FS);
				154
				155	if (sb->s_flags & MS_RDONLY)
				156	return;
				157
				158	if (test_opt (sb, ERRORS_RO)) {
				159	printk (KERN_CRIT "Remounting filesystem read-only\n");
				160	sb->s_flags \|= MS_RDONLY;
				161	} else {
				162	journal_t *journal = EXT3_SB(sb)->s_journal;
				163
				164	EXT3_SB(sb)->s_mount_opt \|= EXT3_MOUNT_ABORT;
				165	if (journal)
				166	journal_abort(journal, -EIO);
				167	}
				168	if (test_opt(sb, ERRORS_PANIC))
				169	panic("EXT3-fs (device %s): panic forced after error\n",
				170	sb->s_id);
				171	ext3_commit_super(sb, es, 1);
				172	}
				173
				174	void ext3_error (struct super_block * sb, const char * function,
				175	const char * fmt, ...)
				176	{
				177	va_list args;
				178
				179	va_start(args, fmt);
				180	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
				181	vprintk(fmt, args);
				182	printk("\n");
				183	va_end(args);
				184
				185	ext3_handle_error(sb);
				186	}
				187
				188	static const char ext3_decode_error(struct super_block sb, int errno,
				189	char nbuf[16])
				190	{
				191	char *errstr = NULL;
				192
				193	switch (errno) {
				194	case -EIO:
				195	errstr = "IO failure";
				196	break;
				197	case -ENOMEM:
				198	errstr = "Out of memory";
				199	break;
				200	case -EROFS:
				201	if (!sb \|\| EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
				202	errstr = "Journal has aborted";
				203	else
				204	errstr = "Readonly filesystem";
				205	break;
				206	default:
				207	/* If the caller passed in an extra buffer for unknown
				208	* errors, textualise them now. Else we just return
				209	* NULL. */
				210	if (nbuf) {
				211	/* Check for truncated error codes... */
				212	if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				213	errstr = nbuf;
				214	}
				215	break;
				216	}
				217
				218	return errstr;
				219	}
				220
				221	/* __ext3_std_error decodes expected errors from journaling functions
				222	* automatically and invokes the appropriate error response. */
				223
				224	void __ext3_std_error (struct super_block * sb, const char * function,
				225	int errno)
				226	{
				227	char nbuf[16];
Stephen Tweedie	3012162	2005-05-18 11:47:17 -0400	[diff] [blame]	228	const char *errstr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	229
Stephen Tweedie	3012162	2005-05-18 11:47:17 -0400	[diff] [blame]	230	/* Special case: if the error is EROFS, and we're not already
				231	* inside a transaction, then there's really no point in logging
				232	* an error. */
				233	if (errno == -EROFS && journal_current_handle() == NULL &&
				234	(sb->s_flags & MS_RDONLY))
				235	return;
				236
				237	errstr = ext3_decode_error(sb, errno, nbuf);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	238	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
				239	sb->s_id, function, errstr);
				240
				241	ext3_handle_error(sb);
				242	}
				243
				244	/*
				245	* ext3_abort is a much stronger failure handler than ext3_error. The
				246	* abort function may be used to deal with unrecoverable failures such
				247	* as journal IO errors or ENOMEM at a critical moment in log management.
				248	*
				249	* We unconditionally force the filesystem into an ABORT\|READONLY state,
				250	* unless the error response on the fs has been set to panic in which
				251	* case we take the easy way out and panic immediately.
				252	*/
				253
				254	void ext3_abort (struct super_block * sb, const char * function,
				255	const char * fmt, ...)
				256	{
				257	va_list args;
				258
				259	printk (KERN_CRIT "ext3_abort called.\n");
				260
				261	va_start(args, fmt);
				262	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
				263	vprintk(fmt, args);
				264	printk("\n");
				265	va_end(args);
				266
				267	if (test_opt(sb, ERRORS_PANIC))
				268	panic("EXT3-fs panic from previous error\n");
				269
				270	if (sb->s_flags & MS_RDONLY)
				271	return;
				272
				273	printk(KERN_CRIT "Remounting filesystem read-only\n");
				274	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				275	sb->s_flags \|= MS_RDONLY;
				276	EXT3_SB(sb)->s_mount_opt \|= EXT3_MOUNT_ABORT;
				277	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
				278	}
				279
				280	void ext3_warning (struct super_block * sb, const char * function,
				281	const char * fmt, ...)
				282	{
				283	va_list args;
				284
				285	va_start(args, fmt);
				286	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
				287	sb->s_id, function);
				288	vprintk(fmt, args);
				289	printk("\n");
				290	va_end(args);
				291	}
				292
				293	void ext3_update_dynamic_rev(struct super_block *sb)
				294	{
				295	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				296
				297	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
				298	return;
				299
				300	ext3_warning(sb, __FUNCTION__,
				301	"updating to rev %d because of new feature flag, "
				302	"running e2fsck is recommended",
				303	EXT3_DYNAMIC_REV);
				304
				305	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
				306	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
				307	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
				308	/* leave es->s_feature_compat flags alone /
				309	/* es->s_uuid will be set by e2fsck if empty */
				310
				311	/*
				312	* The rest of the superblock fields should be zero, and if not it
				313	* means they are likely already in use, so leave them alone. We
				314	* can leave it up to e2fsck to clean up any inconsistencies there.
				315	*/
				316	}
				317
				318	/*
				319	* Open the external journal device
				320	*/
				321	static struct block_device *ext3_blkdev_get(dev_t dev)
				322	{
				323	struct block_device *bdev;
				324	char b[BDEVNAME_SIZE];
				325
				326	bdev = open_by_devnum(dev, FMODE_READ\|FMODE_WRITE);
				327	if (IS_ERR(bdev))
				328	goto fail;
				329	return bdev;
				330
				331	fail:
				332	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
				333	__bdevname(dev, b), PTR_ERR(bdev));
				334	return NULL;
				335	}
				336
				337	/*
				338	* Release the journal device
				339	*/
				340	static int ext3_blkdev_put(struct block_device *bdev)
				341	{
				342	bd_release(bdev);
				343	return blkdev_put(bdev);
				344	}
				345
				346	static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
				347	{
				348	struct block_device *bdev;
				349	int ret = -ENODEV;
				350
				351	bdev = sbi->journal_bdev;
				352	if (bdev) {
				353	ret = ext3_blkdev_put(bdev);
				354	sbi->journal_bdev = NULL;
				355	}
				356	return ret;
				357	}
				358
				359	static inline struct inode orphan_list_entry(struct list_head l)
				360	{
				361	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
				362	}
				363
				364	static void dump_orphan_list(struct super_block sb, struct ext3_sb_info sbi)
				365	{
				366	struct list_head *l;
				367
				368	printk(KERN_ERR "sb orphan head is %d\n",
				369	le32_to_cpu(sbi->s_es->s_last_orphan));
				370
				371	printk(KERN_ERR "sb_info orphan list:\n");
				372	list_for_each(l, &sbi->s_orphan) {
				373	struct inode *inode = orphan_list_entry(l);
				374	printk(KERN_ERR " "
				375	"inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
				376	inode->i_sb->s_id, inode->i_ino, inode,
				377	inode->i_mode, inode->i_nlink,
				378	NEXT_ORPHAN(inode));
				379	}
				380	}
				381
				382	static void ext3_put_super (struct super_block * sb)
				383	{
				384	struct ext3_sb_info *sbi = EXT3_SB(sb);
				385	struct ext3_super_block *es = sbi->s_es;
				386	int i;
				387
				388	ext3_xattr_put_super(sb);
				389	journal_destroy(sbi->s_journal);
				390	if (!(sb->s_flags & MS_RDONLY)) {
				391	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				392	es->s_state = cpu_to_le16(sbi->s_mount_state);
				393	BUFFER_TRACE(sbi->s_sbh, "marking dirty");
				394	mark_buffer_dirty(sbi->s_sbh);
				395	ext3_commit_super(sb, es, 1);
				396	}
				397
				398	for (i = 0; i < sbi->s_gdb_count; i++)
				399	brelse(sbi->s_group_desc[i]);
				400	kfree(sbi->s_group_desc);
				401	percpu_counter_destroy(&sbi->s_freeblocks_counter);
				402	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				403	percpu_counter_destroy(&sbi->s_dirs_counter);
				404	brelse(sbi->s_sbh);
				405	#ifdef CONFIG_QUOTA
				406	for (i = 0; i < MAXQUOTAS; i++)
				407	kfree(sbi->s_qf_names[i]);
				408	#endif
				409
				410	/* Debugging code just in case the in-memory inode orphan list
				411	* isn't empty. The on-disk one can be non-empty if we've
				412	* detected an error and taken the fs readonly, but the
				413	* in-memory list had better be clean by this point. */
				414	if (!list_empty(&sbi->s_orphan))
				415	dump_orphan_list(sb, sbi);
				416	J_ASSERT(list_empty(&sbi->s_orphan));
				417
				418	invalidate_bdev(sb->s_bdev, 0);
				419	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
				420	/*
				421	* Invalidate the journal device's buffers. We don't want them
				422	* floating about in memory - the physical journal device may
				423	* hotswapped, and it breaks the `ro-after' testing code.
				424	*/
				425	sync_blockdev(sbi->journal_bdev);
				426	invalidate_bdev(sbi->journal_bdev, 0);
				427	ext3_blkdev_remove(sbi);
				428	}
				429	sb->s_fs_info = NULL;
				430	kfree(sbi);
				431	return;
				432	}
				433
				434	static kmem_cache_t *ext3_inode_cachep;
				435
				436	/*
				437	* Called inside transaction, so use GFP_NOFS
				438	*/
				439	static struct inode ext3_alloc_inode(struct super_block sb)
				440	{
				441	struct ext3_inode_info *ei;
				442
				443	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
				444	if (!ei)
				445	return NULL;
				446	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				447	ei->i_acl = EXT3_ACL_NOT_CACHED;
				448	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
				449	#endif
				450	ei->i_block_alloc_info = NULL;
				451	ei->vfs_inode.i_version = 1;
				452	return &ei->vfs_inode;
				453	}
				454
				455	static void ext3_destroy_inode(struct inode *inode)
				456	{
				457	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
				458	}
				459
				460	static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
				461	{
				462	struct ext3_inode_info ei = (struct ext3_inode_info ) foo;
				463
				464	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
				465	SLAB_CTOR_CONSTRUCTOR) {
				466	INIT_LIST_HEAD(&ei->i_orphan);
				467	#ifdef CONFIG_EXT3_FS_XATTR
				468	init_rwsem(&ei->xattr_sem);
				469	#endif
				470	init_MUTEX(&ei->truncate_sem);
				471	inode_init_once(&ei->vfs_inode);
				472	}
				473	}
				474
				475	static int init_inodecache(void)
				476	{
				477	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
				478	sizeof(struct ext3_inode_info),
				479	0, SLAB_RECLAIM_ACCOUNT,
				480	init_once, NULL);
				481	if (ext3_inode_cachep == NULL)
				482	return -ENOMEM;
				483	return 0;
				484	}
				485
				486	static void destroy_inodecache(void)
				487	{
				488	if (kmem_cache_destroy(ext3_inode_cachep))
				489	printk(KERN_INFO "ext3_inode_cache: not all structures were freed\n");
				490	}
				491
				492	static void ext3_clear_inode(struct inode *inode)
				493	{
				494	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
				495	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				496	if (EXT3_I(inode)->i_acl &&
				497	EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
				498	posix_acl_release(EXT3_I(inode)->i_acl);
				499	EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
				500	}
				501	if (EXT3_I(inode)->i_default_acl &&
				502	EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
				503	posix_acl_release(EXT3_I(inode)->i_default_acl);
				504	EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
				505	}
				506	#endif
				507	ext3_discard_reservation(inode);
				508	EXT3_I(inode)->i_block_alloc_info = NULL;
				509	kfree(rsv);
				510	}
				511
				512	#ifdef CONFIG_QUOTA
				513
				514	#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
				515	#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
				516
				517	static int ext3_dquot_initialize(struct inode *inode, int type);
				518	static int ext3_dquot_drop(struct inode *inode);
				519	static int ext3_write_dquot(struct dquot *dquot);
				520	static int ext3_acquire_dquot(struct dquot *dquot);
				521	static int ext3_release_dquot(struct dquot *dquot);
				522	static int ext3_mark_dquot_dirty(struct dquot *dquot);
				523	static int ext3_write_info(struct super_block *sb, int type);
				524	static int ext3_quota_on(struct super_block sb, int type, int format_id, char path);
				525	static int ext3_quota_on_mount(struct super_block *sb, int type);
				526	static ssize_t ext3_quota_read(struct super_block sb, int type, char data,
				527	size_t len, loff_t off);
				528	static ssize_t ext3_quota_write(struct super_block *sb, int type,
				529	const char *data, size_t len, loff_t off);
				530
				531	static struct dquot_operations ext3_quota_operations = {
				532	.initialize = ext3_dquot_initialize,
				533	.drop = ext3_dquot_drop,
				534	.alloc_space = dquot_alloc_space,
				535	.alloc_inode = dquot_alloc_inode,
				536	.free_space = dquot_free_space,
				537	.free_inode = dquot_free_inode,
				538	.transfer = dquot_transfer,
				539	.write_dquot = ext3_write_dquot,
				540	.acquire_dquot = ext3_acquire_dquot,
				541	.release_dquot = ext3_release_dquot,
				542	.mark_dirty = ext3_mark_dquot_dirty,
				543	.write_info = ext3_write_info
				544	};
				545
				546	static struct quotactl_ops ext3_qctl_operations = {
				547	.quota_on = ext3_quota_on,
				548	.quota_off = vfs_quota_off,
				549	.quota_sync = vfs_quota_sync,
				550	.get_info = vfs_get_dqinfo,
				551	.set_info = vfs_set_dqinfo,
				552	.get_dqblk = vfs_get_dqblk,
				553	.set_dqblk = vfs_set_dqblk
				554	};
				555	#endif
				556
				557	static struct super_operations ext3_sops = {
				558	.alloc_inode = ext3_alloc_inode,
				559	.destroy_inode = ext3_destroy_inode,
				560	.read_inode = ext3_read_inode,
				561	.write_inode = ext3_write_inode,
				562	.dirty_inode = ext3_dirty_inode,
				563	.delete_inode = ext3_delete_inode,
				564	.put_super = ext3_put_super,
				565	.write_super = ext3_write_super,
				566	.sync_fs = ext3_sync_fs,
				567	.write_super_lockfs = ext3_write_super_lockfs,
				568	.unlockfs = ext3_unlockfs,
				569	.statfs = ext3_statfs,
				570	.remount_fs = ext3_remount,
				571	.clear_inode = ext3_clear_inode,
				572	#ifdef CONFIG_QUOTA
				573	.quota_read = ext3_quota_read,
				574	.quota_write = ext3_quota_write,
				575	#endif
				576	};
				577
				578	struct dentry ext3_get_parent(struct dentry child);
				579	static struct export_operations ext3_export_ops = {
				580	.get_parent = ext3_get_parent,
				581	};
				582
				583	enum {
				584	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
				585	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
				586	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
				587	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
				588	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
				589	Opt_commit, Opt_journal_update, Opt_journal_inum,
				590	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
				591	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	592	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	593	Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
				594	};
				595
				596	static match_table_t tokens = {
				597	{Opt_bsd_df, "bsddf"},
				598	{Opt_minix_df, "minixdf"},
				599	{Opt_grpid, "grpid"},
				600	{Opt_grpid, "bsdgroups"},
				601	{Opt_nogrpid, "nogrpid"},
				602	{Opt_nogrpid, "sysvgroups"},
				603	{Opt_resgid, "resgid=%u"},
				604	{Opt_resuid, "resuid=%u"},
				605	{Opt_sb, "sb=%u"},
				606	{Opt_err_cont, "errors=continue"},
				607	{Opt_err_panic, "errors=panic"},
				608	{Opt_err_ro, "errors=remount-ro"},
				609	{Opt_nouid32, "nouid32"},
				610	{Opt_nocheck, "nocheck"},
				611	{Opt_nocheck, "check=none"},
				612	{Opt_check, "check"},
				613	{Opt_debug, "debug"},
				614	{Opt_oldalloc, "oldalloc"},
				615	{Opt_orlov, "orlov"},
				616	{Opt_user_xattr, "user_xattr"},
				617	{Opt_nouser_xattr, "nouser_xattr"},
				618	{Opt_acl, "acl"},
				619	{Opt_noacl, "noacl"},
				620	{Opt_reservation, "reservation"},
				621	{Opt_noreservation, "noreservation"},
				622	{Opt_noload, "noload"},
				623	{Opt_nobh, "nobh"},
				624	{Opt_commit, "commit=%u"},
				625	{Opt_journal_update, "journal=update"},
				626	{Opt_journal_inum, "journal=%u"},
				627	{Opt_abort, "abort"},
				628	{Opt_data_journal, "data=journal"},
				629	{Opt_data_ordered, "data=ordered"},
				630	{Opt_data_writeback, "data=writeback"},
				631	{Opt_offusrjquota, "usrjquota="},
				632	{Opt_usrjquota, "usrjquota=%s"},
				633	{Opt_offgrpjquota, "grpjquota="},
				634	{Opt_grpjquota, "grpjquota=%s"},
				635	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
				636	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	637	{Opt_quota, "grpquota"},
				638	{Opt_noquota, "noquota"},
				639	{Opt_quota, "quota"},
				640	{Opt_quota, "usrquota"},
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	641	{Opt_barrier, "barrier=%u"},
				642	{Opt_err, NULL},
				643	{Opt_resize, "resize"},
				644	};
				645
				646	static unsigned long get_sb_block(void **data)
				647	{
				648	unsigned long sb_block;
				649	char options = (char ) *data;
				650
				651	if (!options \|\| strncmp(options, "sb=", 3) != 0)
				652	return 1; /* Default location */
				653	options += 3;
				654	sb_block = simple_strtoul(options, &options, 0);
				655	if (options && options != ',') {
				656	printk("EXT3-fs: Invalid sb specification: %s\n",
				657	(char ) data);
				658	return 1;
				659	}
				660	if (*options == ',')
				661	options++;
				662	data = (void ) options;
				663	return sb_block;
				664	}
				665
				666	static int parse_options (char * options, struct super_block *sb,
				667	unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
				668	{
				669	struct ext3_sb_info *sbi = EXT3_SB(sb);
				670	char * p;
				671	substring_t args[MAX_OPT_ARGS];
				672	int data_opt = 0;
				673	int option;
				674	#ifdef CONFIG_QUOTA
				675	int qtype;
				676	char *qname;
				677	#endif
				678
				679	if (!options)
				680	return 1;
				681
				682	while ((p = strsep (&options, ",")) != NULL) {
				683	int token;
				684	if (!*p)
				685	continue;
				686
				687	token = match_token(p, tokens, args);
				688	switch (token) {
				689	case Opt_bsd_df:
				690	clear_opt (sbi->s_mount_opt, MINIX_DF);
				691	break;
				692	case Opt_minix_df:
				693	set_opt (sbi->s_mount_opt, MINIX_DF);
				694	break;
				695	case Opt_grpid:
				696	set_opt (sbi->s_mount_opt, GRPID);
				697	break;
				698	case Opt_nogrpid:
				699	clear_opt (sbi->s_mount_opt, GRPID);
				700	break;
				701	case Opt_resuid:
				702	if (match_int(&args[0], &option))
				703	return 0;
				704	sbi->s_resuid = option;
				705	break;
				706	case Opt_resgid:
				707	if (match_int(&args[0], &option))
				708	return 0;
				709	sbi->s_resgid = option;
				710	break;
				711	case Opt_sb:
				712	/* handled by get_sb_block() instead of here */
				713	/* sb_block = match_int(&args[0]); /
				714	break;
				715	case Opt_err_panic:
				716	clear_opt (sbi->s_mount_opt, ERRORS_CONT);
				717	clear_opt (sbi->s_mount_opt, ERRORS_RO);
				718	set_opt (sbi->s_mount_opt, ERRORS_PANIC);
				719	break;
				720	case Opt_err_ro:
				721	clear_opt (sbi->s_mount_opt, ERRORS_CONT);
				722	clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
				723	set_opt (sbi->s_mount_opt, ERRORS_RO);
				724	break;
				725	case Opt_err_cont:
				726	clear_opt (sbi->s_mount_opt, ERRORS_RO);
				727	clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
				728	set_opt (sbi->s_mount_opt, ERRORS_CONT);
				729	break;
				730	case Opt_nouid32:
				731	set_opt (sbi->s_mount_opt, NO_UID32);
				732	break;
				733	case Opt_check:
				734	#ifdef CONFIG_EXT3_CHECK
				735	set_opt (sbi->s_mount_opt, CHECK);
				736	#else
				737	printk(KERN_ERR
				738	"EXT3 Check option not supported\n");
				739	#endif
				740	break;
				741	case Opt_nocheck:
				742	clear_opt (sbi->s_mount_opt, CHECK);
				743	break;
				744	case Opt_debug:
				745	set_opt (sbi->s_mount_opt, DEBUG);
				746	break;
				747	case Opt_oldalloc:
				748	set_opt (sbi->s_mount_opt, OLDALLOC);
				749	break;
				750	case Opt_orlov:
				751	clear_opt (sbi->s_mount_opt, OLDALLOC);
				752	break;
				753	#ifdef CONFIG_EXT3_FS_XATTR
				754	case Opt_user_xattr:
				755	set_opt (sbi->s_mount_opt, XATTR_USER);
				756	break;
				757	case Opt_nouser_xattr:
				758	clear_opt (sbi->s_mount_opt, XATTR_USER);
				759	break;
				760	#else
				761	case Opt_user_xattr:
				762	case Opt_nouser_xattr:
				763	printk("EXT3 (no)user_xattr options not supported\n");
				764	break;
				765	#endif
				766	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				767	case Opt_acl:
				768	set_opt(sbi->s_mount_opt, POSIX_ACL);
				769	break;
				770	case Opt_noacl:
				771	clear_opt(sbi->s_mount_opt, POSIX_ACL);
				772	break;
				773	#else
				774	case Opt_acl:
				775	case Opt_noacl:
				776	printk("EXT3 (no)acl options not supported\n");
				777	break;
				778	#endif
				779	case Opt_reservation:
				780	set_opt(sbi->s_mount_opt, RESERVATION);
				781	break;
				782	case Opt_noreservation:
				783	clear_opt(sbi->s_mount_opt, RESERVATION);
				784	break;
				785	case Opt_journal_update:
				786	/* @@@ FIXME */
				787	/* Eventually we will want to be able to create
				788	a journal file here. For now, only allow the
				789	user to specify an existing inode to be the
				790	journal file. */
				791	if (is_remount) {
				792	printk(KERN_ERR "EXT3-fs: cannot specify "
				793	"journal on remount\n");
				794	return 0;
				795	}
				796	set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
				797	break;
				798	case Opt_journal_inum:
				799	if (is_remount) {
				800	printk(KERN_ERR "EXT3-fs: cannot specify "
				801	"journal on remount\n");
				802	return 0;
				803	}
				804	if (match_int(&args[0], &option))
				805	return 0;
				806	*inum = option;
				807	break;
				808	case Opt_noload:
				809	set_opt (sbi->s_mount_opt, NOLOAD);
				810	break;
				811	case Opt_commit:
				812	if (match_int(&args[0], &option))
				813	return 0;
				814	if (option < 0)
				815	return 0;
				816	if (option == 0)
				817	option = JBD_DEFAULT_MAX_COMMIT_AGE;
				818	sbi->s_commit_interval = HZ * option;
				819	break;
				820	case Opt_data_journal:
				821	data_opt = EXT3_MOUNT_JOURNAL_DATA;
				822	goto datacheck;
				823	case Opt_data_ordered:
				824	data_opt = EXT3_MOUNT_ORDERED_DATA;
				825	goto datacheck;
				826	case Opt_data_writeback:
				827	data_opt = EXT3_MOUNT_WRITEBACK_DATA;
				828	datacheck:
				829	if (is_remount) {
				830	if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
				831	!= data_opt) {
				832	printk(KERN_ERR
				833	"EXT3-fs: cannot change data "
				834	"mode on remount\n");
				835	return 0;
				836	}
				837	} else {
				838	sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
				839	sbi->s_mount_opt \|= data_opt;
				840	}
				841	break;
				842	#ifdef CONFIG_QUOTA
				843	case Opt_usrjquota:
				844	qtype = USRQUOTA;
				845	goto set_qf_name;
				846	case Opt_grpjquota:
				847	qtype = GRPQUOTA;
				848	set_qf_name:
				849	if (sb_any_quota_enabled(sb)) {
				850	printk(KERN_ERR
				851	"EXT3-fs: Cannot change journalled "
				852	"quota options when quota turned on.\n");
				853	return 0;
				854	}
				855	qname = match_strdup(&args[0]);
				856	if (!qname) {
				857	printk(KERN_ERR
				858	"EXT3-fs: not enough memory for "
				859	"storing quotafile name.\n");
				860	return 0;
				861	}
				862	if (sbi->s_qf_names[qtype] &&
				863	strcmp(sbi->s_qf_names[qtype], qname)) {
				864	printk(KERN_ERR
				865	"EXT3-fs: %s quota file already "
				866	"specified.\n", QTYPE2NAME(qtype));
				867	kfree(qname);
				868	return 0;
				869	}
				870	sbi->s_qf_names[qtype] = qname;
				871	if (strchr(sbi->s_qf_names[qtype], '/')) {
				872	printk(KERN_ERR
				873	"EXT3-fs: quotafile must be on "
				874	"filesystem root.\n");
				875	kfree(sbi->s_qf_names[qtype]);
				876	sbi->s_qf_names[qtype] = NULL;
				877	return 0;
				878	}
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	879	set_opt(sbi->s_mount_opt, QUOTA);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	880	break;
				881	case Opt_offusrjquota:
				882	qtype = USRQUOTA;
				883	goto clear_qf_name;
				884	case Opt_offgrpjquota:
				885	qtype = GRPQUOTA;
				886	clear_qf_name:
				887	if (sb_any_quota_enabled(sb)) {
				888	printk(KERN_ERR "EXT3-fs: Cannot change "
				889	"journalled quota options when "
				890	"quota turned on.\n");
				891	return 0;
				892	}
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	893	/*
				894	* The space will be released later when all options
				895	* are confirmed to be correct
				896	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	897	sbi->s_qf_names[qtype] = NULL;
				898	break;
				899	case Opt_jqfmt_vfsold:
				900	sbi->s_jquota_fmt = QFMT_VFS_OLD;
				901	break;
				902	case Opt_jqfmt_vfsv0:
				903	sbi->s_jquota_fmt = QFMT_VFS_V0;
				904	break;
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	905	case Opt_quota:
				906	set_opt(sbi->s_mount_opt, QUOTA);
				907	break;
				908	case Opt_noquota:
				909	if (sb_any_quota_enabled(sb)) {
				910	printk(KERN_ERR "EXT3-fs: Cannot change quota "
				911	"options when quota turned on.\n");
				912	return 0;
				913	}
				914	clear_opt(sbi->s_mount_opt, QUOTA);
				915	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	916	#else
				917	case Opt_usrjquota:
				918	case Opt_grpjquota:
				919	case Opt_offusrjquota:
				920	case Opt_offgrpjquota:
				921	case Opt_jqfmt_vfsold:
				922	case Opt_jqfmt_vfsv0:
				923	printk(KERN_ERR
				924	"EXT3-fs: journalled quota options not "
				925	"supported.\n");
				926	break;
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	927	case Opt_quota:
				928	case Opt_noquota:
				929	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	930	#endif
				931	case Opt_abort:
				932	set_opt(sbi->s_mount_opt, ABORT);
				933	break;
				934	case Opt_barrier:
				935	if (match_int(&args[0], &option))
				936	return 0;
				937	if (option)
				938	set_opt(sbi->s_mount_opt, BARRIER);
				939	else
				940	clear_opt(sbi->s_mount_opt, BARRIER);
				941	break;
				942	case Opt_ignore:
				943	break;
				944	case Opt_resize:
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	945	if (!is_remount) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	946	printk("EXT3-fs: resize option only available "
				947	"for remount\n");
				948	return 0;
				949	}
KAMBAROV, ZAUR	c7f1721	2005-06-28 20:45:11 -0700	[diff] [blame]	950	if (match_int(&args[0], &option) != 0)
				951	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	952	*n_blocks_count = option;
				953	break;
				954	case Opt_nobh:
				955	set_opt(sbi->s_mount_opt, NOBH);
				956	break;
				957	default:
				958	printk (KERN_ERR
				959	"EXT3-fs: Unrecognized mount option \"%s\" "
				960	"or missing value\n", p);
				961	return 0;
				962	}
				963	}
				964	#ifdef CONFIG_QUOTA
				965	if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] \|\|
				966	sbi->s_qf_names[GRPQUOTA])) {
				967	printk(KERN_ERR
				968	"EXT3-fs: journalled quota format not specified.\n");
				969	return 0;
				970	}
				971	#endif
				972
				973	return 1;
				974	}
				975
				976	static int ext3_setup_super(struct super_block sb, struct ext3_super_block es,
				977	int read_only)
				978	{
				979	struct ext3_sb_info *sbi = EXT3_SB(sb);
				980	int res = 0;
				981
				982	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
				983	printk (KERN_ERR "EXT3-fs warning: revision level too high, "
				984	"forcing read-only mode\n");
				985	res = MS_RDONLY;
				986	}
				987	if (read_only)
				988	return res;
				989	if (!(sbi->s_mount_state & EXT3_VALID_FS))
				990	printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
				991	"running e2fsck is recommended\n");
				992	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
				993	printk (KERN_WARNING
				994	"EXT3-fs warning: mounting fs with errors, "
				995	"running e2fsck is recommended\n");
				996	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
				997	le16_to_cpu(es->s_mnt_count) >=
				998	(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
				999	printk (KERN_WARNING
				1000	"EXT3-fs warning: maximal mount count reached, "
				1001	"running e2fsck is recommended\n");
				1002	else if (le32_to_cpu(es->s_checkinterval) &&
				1003	(le32_to_cpu(es->s_lastcheck) +
				1004	le32_to_cpu(es->s_checkinterval) <= get_seconds()))
				1005	printk (KERN_WARNING
				1006	"EXT3-fs warning: checktime reached, "
				1007	"running e2fsck is recommended\n");
				1008	#if 0
				1009	/* @@@ We _will_ want to clear the valid bit if we find
				1010	inconsistencies, to force a fsck at reboot. But for
				1011	a plain journaled filesystem we can keep it set as
				1012	valid forever! :) */
				1013	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
				1014	#endif
				1015	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
				1016	es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
				1017	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
				1018	es->s_mtime = cpu_to_le32(get_seconds());
				1019	ext3_update_dynamic_rev(sb);
				1020	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				1021
				1022	ext3_commit_super(sb, es, 1);
				1023	if (test_opt(sb, DEBUG))
				1024	printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
				1025	"bpg=%lu, ipg=%lu, mo=%04lx]\n",
				1026	sb->s_blocksize,
				1027	sbi->s_groups_count,
				1028	EXT3_BLOCKS_PER_GROUP(sb),
				1029	EXT3_INODES_PER_GROUP(sb),
				1030	sbi->s_mount_opt);
				1031
				1032	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
				1033	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
				1034	char b[BDEVNAME_SIZE];
				1035
				1036	printk("external journal on %s\n",
				1037	bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
				1038	} else {
				1039	printk("internal journal\n");
				1040	}
				1041	#ifdef CONFIG_EXT3_CHECK
				1042	if (test_opt (sb, CHECK)) {
				1043	ext3_check_blocks_bitmap (sb);
				1044	ext3_check_inodes_bitmap (sb);
				1045	}
				1046	#endif
				1047	return res;
				1048	}
				1049
				1050	/* Called at mount-time, super-block is locked */
				1051	static int ext3_check_descriptors (struct super_block * sb)
				1052	{
				1053	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1054	unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
				1055	struct ext3_group_desc * gdp = NULL;
				1056	int desc_block = 0;
				1057	int i;
				1058
				1059	ext3_debug ("Checking group descriptors");
				1060
				1061	for (i = 0; i < sbi->s_groups_count; i++)
				1062	{
				1063	if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
				1064	gdp = (struct ext3_group_desc *)
				1065	sbi->s_group_desc[desc_block++]->b_data;
				1066	if (le32_to_cpu(gdp->bg_block_bitmap) < block \|\|
				1067	le32_to_cpu(gdp->bg_block_bitmap) >=
				1068	block + EXT3_BLOCKS_PER_GROUP(sb))
				1069	{
				1070	ext3_error (sb, "ext3_check_descriptors",
				1071	"Block bitmap for group %d"
				1072	" not in group (block %lu)!",
				1073	i, (unsigned long)
				1074	le32_to_cpu(gdp->bg_block_bitmap));
				1075	return 0;
				1076	}
				1077	if (le32_to_cpu(gdp->bg_inode_bitmap) < block \|\|
				1078	le32_to_cpu(gdp->bg_inode_bitmap) >=
				1079	block + EXT3_BLOCKS_PER_GROUP(sb))
				1080	{
				1081	ext3_error (sb, "ext3_check_descriptors",
				1082	"Inode bitmap for group %d"
				1083	" not in group (block %lu)!",
				1084	i, (unsigned long)
				1085	le32_to_cpu(gdp->bg_inode_bitmap));
				1086	return 0;
				1087	}
				1088	if (le32_to_cpu(gdp->bg_inode_table) < block \|\|
				1089	le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
				1090	block + EXT3_BLOCKS_PER_GROUP(sb))
				1091	{
				1092	ext3_error (sb, "ext3_check_descriptors",
				1093	"Inode table for group %d"
				1094	" not in group (block %lu)!",
				1095	i, (unsigned long)
				1096	le32_to_cpu(gdp->bg_inode_table));
				1097	return 0;
				1098	}
				1099	block += EXT3_BLOCKS_PER_GROUP(sb);
				1100	gdp++;
				1101	}
				1102
				1103	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
				1104	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
				1105	return 1;
				1106	}
				1107
				1108
				1109	/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
				1110	* the superblock) which were deleted from all directories, but held open by
				1111	* a process at the time of a crash. We walk the list and try to delete these
				1112	* inodes at recovery time (only with a read-write filesystem).
				1113	*
				1114	* In order to keep the orphan inode chain consistent during traversal (in
				1115	* case of crash during recovery), we link each inode into the superblock
				1116	* orphan list_head and handle it the same way as an inode deletion during
				1117	* normal operation (which journals the operations for us).
				1118	*
				1119	* We only do an iget() and an iput() on each inode, which is very safe if we
				1120	* accidentally point at an in-use or already deleted inode. The worst that
				1121	* can happen in this case is that we get a "bit already cleared" message from
				1122	* ext3_free_inode(). The only reason we would point at a wrong inode is if
				1123	* e2fsck was run on this filesystem, and it must have already done the orphan
				1124	* inode cleanup for us, so we can safely abort without any further action.
				1125	*/
				1126	static void ext3_orphan_cleanup (struct super_block * sb,
				1127	struct ext3_super_block * es)
				1128	{
				1129	unsigned int s_flags = sb->s_flags;
				1130	int nr_orphans = 0, nr_truncates = 0;
				1131	#ifdef CONFIG_QUOTA
				1132	int i;
				1133	#endif
				1134	if (!es->s_last_orphan) {
				1135	jbd_debug(4, "no orphan inodes to clean up\n");
				1136	return;
				1137	}
				1138
				1139	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
				1140	if (es->s_last_orphan)
				1141	jbd_debug(1, "Errors on filesystem, "
				1142	"clearing orphan list.\n");
				1143	es->s_last_orphan = 0;
				1144	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				1145	return;
				1146	}
				1147
				1148	if (s_flags & MS_RDONLY) {
				1149	printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
				1150	sb->s_id);
				1151	sb->s_flags &= ~MS_RDONLY;
				1152	}
				1153	#ifdef CONFIG_QUOTA
				1154	/* Needed for iput() to work correctly and not trash data */
				1155	sb->s_flags \|= MS_ACTIVE;
				1156	/* Turn on quotas so that they are updated correctly */
				1157	for (i = 0; i < MAXQUOTAS; i++) {
				1158	if (EXT3_SB(sb)->s_qf_names[i]) {
				1159	int ret = ext3_quota_on_mount(sb, i);
				1160	if (ret < 0)
				1161	printk(KERN_ERR
				1162	"EXT3-fs: Cannot turn on journalled "
				1163	"quota: error %d\n", ret);
				1164	}
				1165	}
				1166	#endif
				1167
				1168	while (es->s_last_orphan) {
				1169	struct inode *inode;
				1170
				1171	if (!(inode =
				1172	ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
				1173	es->s_last_orphan = 0;
				1174	break;
				1175	}
				1176
				1177	list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
				1178	DQUOT_INIT(inode);
				1179	if (inode->i_nlink) {
				1180	printk(KERN_DEBUG
				1181	"%s: truncating inode %ld to %Ld bytes\n",
				1182	__FUNCTION__, inode->i_ino, inode->i_size);
				1183	jbd_debug(2, "truncating inode %ld to %Ld bytes\n",
				1184	inode->i_ino, inode->i_size);
				1185	ext3_truncate(inode);
				1186	nr_truncates++;
				1187	} else {
				1188	printk(KERN_DEBUG
				1189	"%s: deleting unreferenced inode %ld\n",
				1190	__FUNCTION__, inode->i_ino);
				1191	jbd_debug(2, "deleting unreferenced inode %ld\n",
				1192	inode->i_ino);
				1193	nr_orphans++;
				1194	}
				1195	iput(inode); /* The delete magic happens here! */
				1196	}
				1197
				1198	#define PLURAL(x) (x), ((x)==1) ? "" : "s"
				1199
				1200	if (nr_orphans)
				1201	printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
				1202	sb->s_id, PLURAL(nr_orphans));
				1203	if (nr_truncates)
				1204	printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
				1205	sb->s_id, PLURAL(nr_truncates));
				1206	#ifdef CONFIG_QUOTA
				1207	/* Turn quotas off */
				1208	for (i = 0; i < MAXQUOTAS; i++) {
				1209	if (sb_dqopt(sb)->files[i])
				1210	vfs_quota_off(sb, i);
				1211	}
				1212	#endif
				1213	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
				1214	}
				1215
				1216	#define log2(n) ffz(~(n))
				1217
				1218	/*
				1219	* Maximal file size. There is a direct, and {,double-,triple-}indirect
				1220	* block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
				1221	* We need to be 1 filesystem block less than the 2^32 sector limit.
				1222	*/
				1223	static loff_t ext3_max_size(int bits)
				1224	{
				1225	loff_t res = EXT3_NDIR_BLOCKS;
				1226	/* This constant is calculated to be the largest file size for a
				1227	* dense, 4k-blocksize file such that the total number of
				1228	* sectors in the file, including data and all indirect blocks,
				1229	* does not exceed 2^32. */
				1230	const loff_t upper_limit = 0x1ff7fffd000LL;
				1231
				1232	res += 1LL << (bits-2);
				1233	res += 1LL << (2*(bits-2));
				1234	res += 1LL << (3*(bits-2));
				1235	res <<= bits;
				1236	if (res > upper_limit)
				1237	res = upper_limit;
				1238	return res;
				1239	}
				1240
				1241	static unsigned long descriptor_loc(struct super_block *sb,
				1242	unsigned long logic_sb_block,
				1243	int nr)
				1244	{
				1245	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1246	unsigned long bg, first_data_block, first_meta_bg;
				1247	int has_super = 0;
				1248
				1249	first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
				1250	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
				1251
				1252	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) \|\|
				1253	nr < first_meta_bg)
				1254	return (logic_sb_block + nr + 1);
				1255	bg = sbi->s_desc_per_block * nr;
				1256	if (ext3_bg_has_super(sb, bg))
				1257	has_super = 1;
				1258	return (first_data_block + has_super + (bg * sbi->s_blocks_per_group));
				1259	}
				1260
				1261
				1262	static int ext3_fill_super (struct super_block sb, void data, int silent)
				1263	{
				1264	struct buffer_head * bh;
				1265	struct ext3_super_block *es = NULL;
				1266	struct ext3_sb_info *sbi;
				1267	unsigned long block;
				1268	unsigned long sb_block = get_sb_block(&data);
				1269	unsigned long logic_sb_block;
				1270	unsigned long offset = 0;
				1271	unsigned long journal_inum = 0;
				1272	unsigned long def_mount_opts;
				1273	struct inode *root;
				1274	int blocksize;
				1275	int hblock;
				1276	int db_count;
				1277	int i;
				1278	int needs_recovery;
				1279	__le32 features;
				1280
				1281	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
				1282	if (!sbi)
				1283	return -ENOMEM;
				1284	sb->s_fs_info = sbi;
				1285	memset(sbi, 0, sizeof(*sbi));
				1286	sbi->s_mount_opt = 0;
				1287	sbi->s_resuid = EXT3_DEF_RESUID;
				1288	sbi->s_resgid = EXT3_DEF_RESGID;
				1289
				1290	unlock_kernel();
				1291
				1292	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
				1293	if (!blocksize) {
				1294	printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
				1295	goto out_fail;
				1296	}
				1297
				1298	/*
				1299	* The ext3 superblock will not be buffer aligned for other than 1kB
				1300	* block sizes. We need to calculate the offset from buffer start.
				1301	*/
				1302	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
				1303	logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
				1304	offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
				1305	} else {
				1306	logic_sb_block = sb_block;
				1307	}
				1308
				1309	if (!(bh = sb_bread(sb, logic_sb_block))) {
				1310	printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
				1311	goto out_fail;
				1312	}
				1313	/*
				1314	* Note: s_es must be initialized as soon as possible because
				1315	* some ext3 macro-instructions depend on its value
				1316	*/
				1317	es = (struct ext3_super_block ) (((char )bh->b_data) + offset);
				1318	sbi->s_es = es;
				1319	sb->s_magic = le16_to_cpu(es->s_magic);
				1320	if (sb->s_magic != EXT3_SUPER_MAGIC)
				1321	goto cantfind_ext3;
				1322
				1323	/* Set defaults before we parse the mount options */
				1324	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				1325	if (def_mount_opts & EXT3_DEFM_DEBUG)
				1326	set_opt(sbi->s_mount_opt, DEBUG);
				1327	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
				1328	set_opt(sbi->s_mount_opt, GRPID);
				1329	if (def_mount_opts & EXT3_DEFM_UID16)
				1330	set_opt(sbi->s_mount_opt, NO_UID32);
				1331	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
				1332	set_opt(sbi->s_mount_opt, XATTR_USER);
				1333	if (def_mount_opts & EXT3_DEFM_ACL)
				1334	set_opt(sbi->s_mount_opt, POSIX_ACL);
				1335	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
				1336	sbi->s_mount_opt \|= EXT3_MOUNT_JOURNAL_DATA;
				1337	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
				1338	sbi->s_mount_opt \|= EXT3_MOUNT_ORDERED_DATA;
				1339	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
				1340	sbi->s_mount_opt \|= EXT3_MOUNT_WRITEBACK_DATA;
				1341
				1342	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
				1343	set_opt(sbi->s_mount_opt, ERRORS_PANIC);
				1344	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
				1345	set_opt(sbi->s_mount_opt, ERRORS_RO);
				1346
				1347	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
				1348	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
				1349
				1350	set_opt(sbi->s_mount_opt, RESERVATION);
				1351
				1352	if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
				1353	goto failed_mount;
				1354
				1355	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				1356	((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
				1357
				1358	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
				1359	(EXT3_HAS_COMPAT_FEATURE(sb, ~0U) \|\|
				1360	EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) \|\|
				1361	EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
				1362	printk(KERN_WARNING
				1363	"EXT3-fs warning: feature flags set on rev 0 fs, "
				1364	"running e2fsck is recommended\n");
				1365	/*
				1366	* Check feature flags regardless of the revision level, since we
				1367	* previously didn't change the revision level when setting the flags,
				1368	* so there is a chance incompat flags are set on a rev 0 filesystem.
				1369	*/
				1370	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
				1371	if (features) {
				1372	printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
				1373	"unsupported optional features (%x).\n",
				1374	sb->s_id, le32_to_cpu(features));
				1375	goto failed_mount;
				1376	}
				1377	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
				1378	if (!(sb->s_flags & MS_RDONLY) && features) {
				1379	printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
				1380	"unsupported optional features (%x).\n",
				1381	sb->s_id, le32_to_cpu(features));
				1382	goto failed_mount;
				1383	}
				1384	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
				1385
				1386	if (blocksize < EXT3_MIN_BLOCK_SIZE \|\|
				1387	blocksize > EXT3_MAX_BLOCK_SIZE) {
				1388	printk(KERN_ERR
				1389	"EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
				1390	blocksize, sb->s_id);
				1391	goto failed_mount;
				1392	}
				1393
				1394	hblock = bdev_hardsect_size(sb->s_bdev);
				1395	if (sb->s_blocksize != blocksize) {
				1396	/*
				1397	* Make sure the blocksize for the filesystem is larger
				1398	* than the hardware sectorsize for the machine.
				1399	*/
				1400	if (blocksize < hblock) {
				1401	printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
				1402	"device blocksize %d.\n", blocksize, hblock);
				1403	goto failed_mount;
				1404	}
				1405
				1406	brelse (bh);
				1407	sb_set_blocksize(sb, blocksize);
				1408	logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
				1409	offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
				1410	bh = sb_bread(sb, logic_sb_block);
				1411	if (!bh) {
				1412	printk(KERN_ERR
				1413	"EXT3-fs: Can't read superblock on 2nd try.\n");
				1414	goto failed_mount;
				1415	}
				1416	es = (struct ext3_super_block )(((char )bh->b_data) + offset);
				1417	sbi->s_es = es;
				1418	if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
				1419	printk (KERN_ERR
				1420	"EXT3-fs: Magic mismatch, very weird !\n");
				1421	goto failed_mount;
				1422	}
				1423	}
				1424
				1425	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
				1426
				1427	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
				1428	sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
				1429	sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
				1430	} else {
				1431	sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
				1432	sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
				1433	if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) \|\|
				1434	(sbi->s_inode_size & (sbi->s_inode_size - 1)) \|\|
				1435	(sbi->s_inode_size > blocksize)) {
				1436	printk (KERN_ERR
				1437	"EXT3-fs: unsupported inode size: %d\n",
				1438	sbi->s_inode_size);
				1439	goto failed_mount;
				1440	}
				1441	}
				1442	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
				1443	le32_to_cpu(es->s_log_frag_size);
				1444	if (blocksize != sbi->s_frag_size) {
				1445	printk(KERN_ERR
				1446	"EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
				1447	sbi->s_frag_size, blocksize);
				1448	goto failed_mount;
				1449	}
				1450	sbi->s_frags_per_block = 1;
				1451	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
				1452	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
				1453	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
				1454	if (EXT3_INODE_SIZE(sb) == 0)
				1455	goto cantfind_ext3;
				1456	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
				1457	if (sbi->s_inodes_per_block == 0)
				1458	goto cantfind_ext3;
				1459	sbi->s_itb_per_group = sbi->s_inodes_per_group /
				1460	sbi->s_inodes_per_block;
				1461	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
				1462	sbi->s_sbh = bh;
				1463	sbi->s_mount_state = le16_to_cpu(es->s_state);
				1464	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
				1465	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
				1466	for (i=0; i < 4; i++)
				1467	sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
				1468	sbi->s_def_hash_version = es->s_def_hash_version;
				1469
				1470	if (sbi->s_blocks_per_group > blocksize * 8) {
				1471	printk (KERN_ERR
				1472	"EXT3-fs: #blocks per group too big: %lu\n",
				1473	sbi->s_blocks_per_group);
				1474	goto failed_mount;
				1475	}
				1476	if (sbi->s_frags_per_group > blocksize * 8) {
				1477	printk (KERN_ERR
				1478	"EXT3-fs: #fragments per group too big: %lu\n",
				1479	sbi->s_frags_per_group);
				1480	goto failed_mount;
				1481	}
				1482	if (sbi->s_inodes_per_group > blocksize * 8) {
				1483	printk (KERN_ERR
				1484	"EXT3-fs: #inodes per group too big: %lu\n",
				1485	sbi->s_inodes_per_group);
				1486	goto failed_mount;
				1487	}
				1488
				1489	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
				1490	goto cantfind_ext3;
				1491	sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
				1492	le32_to_cpu(es->s_first_data_block) +
				1493	EXT3_BLOCKS_PER_GROUP(sb) - 1) /
				1494	EXT3_BLOCKS_PER_GROUP(sb);
				1495	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
				1496	EXT3_DESC_PER_BLOCK(sb);
				1497	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
				1498	GFP_KERNEL);
				1499	if (sbi->s_group_desc == NULL) {
				1500	printk (KERN_ERR "EXT3-fs: not enough memory\n");
				1501	goto failed_mount;
				1502	}
				1503
				1504	percpu_counter_init(&sbi->s_freeblocks_counter);
				1505	percpu_counter_init(&sbi->s_freeinodes_counter);
				1506	percpu_counter_init(&sbi->s_dirs_counter);
				1507	bgl_lock_init(&sbi->s_blockgroup_lock);
				1508
				1509	for (i = 0; i < db_count; i++) {
				1510	block = descriptor_loc(sb, logic_sb_block, i);
				1511	sbi->s_group_desc[i] = sb_bread(sb, block);
				1512	if (!sbi->s_group_desc[i]) {
				1513	printk (KERN_ERR "EXT3-fs: "
				1514	"can't read group descriptor %d\n", i);
				1515	db_count = i;
				1516	goto failed_mount2;
				1517	}
				1518	}
				1519	if (!ext3_check_descriptors (sb)) {
				1520	printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n");
				1521	goto failed_mount2;
				1522	}
				1523	sbi->s_gdb_count = db_count;
				1524	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
				1525	spin_lock_init(&sbi->s_next_gen_lock);
				1526	/* per fileystem reservation list head & lock */
				1527	spin_lock_init(&sbi->s_rsv_window_lock);
				1528	sbi->s_rsv_window_root = RB_ROOT;
				1529	/* Add a single, static dummy reservation to the start of the
				1530	* reservation window list --- it gives us a placeholder for
				1531	* append-at-start-of-list which makes the allocation logic
				1532	* _much_ simpler. */
				1533	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
				1534	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
				1535	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
				1536	sbi->s_rsv_window_head.rsv_goal_size = 0;
				1537	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
				1538
				1539	/*
				1540	* set up enough so that it can read an inode
				1541	*/
				1542	sb->s_op = &ext3_sops;
				1543	sb->s_export_op = &ext3_export_ops;
				1544	sb->s_xattr = ext3_xattr_handlers;
				1545	#ifdef CONFIG_QUOTA
				1546	sb->s_qcop = &ext3_qctl_operations;
				1547	sb->dq_op = &ext3_quota_operations;
				1548	#endif
				1549	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
				1550
				1551	sb->s_root = NULL;
				1552
				1553	needs_recovery = (es->s_last_orphan != 0 \|\|
				1554	EXT3_HAS_INCOMPAT_FEATURE(sb,
				1555	EXT3_FEATURE_INCOMPAT_RECOVER));
				1556
				1557	/*
				1558	* The first inode we look at is the journal inode. Don't try
				1559	* root first: it may be modified in the journal!
				1560	*/
				1561	if (!test_opt(sb, NOLOAD) &&
				1562	EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
				1563	if (ext3_load_journal(sb, es))
				1564	goto failed_mount2;
				1565	} else if (journal_inum) {
				1566	if (ext3_create_journal(sb, es, journal_inum))
				1567	goto failed_mount2;
				1568	} else {
				1569	if (!silent)
				1570	printk (KERN_ERR
				1571	"ext3: No journal on filesystem on %s\n",
				1572	sb->s_id);
				1573	goto failed_mount2;
				1574	}
				1575
				1576	/* We have now updated the journal if required, so we can
				1577	* validate the data journaling mode. */
				1578	switch (test_opt(sb, DATA_FLAGS)) {
				1579	case 0:
				1580	/* No mode set, assume a default based on the journal
				1581	capabilities: ORDERED_DATA if the journal can
				1582	cope, else JOURNAL_DATA */
				1583	if (journal_check_available_features
				1584	(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
				1585	set_opt(sbi->s_mount_opt, ORDERED_DATA);
				1586	else
				1587	set_opt(sbi->s_mount_opt, JOURNAL_DATA);
				1588	break;
				1589
				1590	case EXT3_MOUNT_ORDERED_DATA:
				1591	case EXT3_MOUNT_WRITEBACK_DATA:
				1592	if (!journal_check_available_features
				1593	(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
				1594	printk(KERN_ERR "EXT3-fs: Journal does not support "
				1595	"requested data journaling mode\n");
				1596	goto failed_mount3;
				1597	}
				1598	default:
				1599	break;
				1600	}
				1601
				1602	if (test_opt(sb, NOBH)) {
				1603	if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
				1604	printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
				1605	"since filesystem blocksize doesn't match "
				1606	"pagesize\n");
				1607	clear_opt(sbi->s_mount_opt, NOBH);
				1608	}
				1609	if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
				1610	printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
				1611	"its supported only with writeback mode\n");
				1612	clear_opt(sbi->s_mount_opt, NOBH);
				1613	}
				1614	}
				1615	/*
				1616	* The journal_load will have done any necessary log recovery,
				1617	* so we can safely mount the rest of the filesystem now.
				1618	*/
				1619
				1620	root = iget(sb, EXT3_ROOT_INO);
				1621	sb->s_root = d_alloc_root(root);
				1622	if (!sb->s_root) {
				1623	printk(KERN_ERR "EXT3-fs: get root inode failed\n");
				1624	iput(root);
				1625	goto failed_mount3;
				1626	}
				1627	if (!S_ISDIR(root->i_mode) \|\| !root->i_blocks \|\| !root->i_size) {
				1628	dput(sb->s_root);
				1629	sb->s_root = NULL;
				1630	printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
				1631	goto failed_mount3;
				1632	}
				1633
				1634	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
				1635	/*
				1636	* akpm: core read_super() calls in here with the superblock locked.
				1637	* That deadlocks, because orphan cleanup needs to lock the superblock
				1638	* in numerous places. Here we just pop the lock - it's relatively
				1639	* harmless, because we are now ready to accept write_super() requests,
				1640	* and aviro says that's the only reason for hanging onto the
				1641	* superblock lock.
				1642	*/
				1643	EXT3_SB(sb)->s_mount_state \|= EXT3_ORPHAN_FS;
				1644	ext3_orphan_cleanup(sb, es);
				1645	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
				1646	if (needs_recovery)
				1647	printk (KERN_INFO "EXT3-fs: recovery complete.\n");
				1648	ext3_mark_recovery_complete(sb, es);
				1649	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
				1650	test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
				1651	test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
				1652	"writeback");
				1653
				1654	percpu_counter_mod(&sbi->s_freeblocks_counter,
				1655	ext3_count_free_blocks(sb));
				1656	percpu_counter_mod(&sbi->s_freeinodes_counter,
				1657	ext3_count_free_inodes(sb));
				1658	percpu_counter_mod(&sbi->s_dirs_counter,
				1659	ext3_count_dirs(sb));
				1660
				1661	lock_kernel();
				1662	return 0;
				1663
				1664	cantfind_ext3:
				1665	if (!silent)
				1666	printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
				1667	sb->s_id);
				1668	goto failed_mount;
				1669
				1670	failed_mount3:
				1671	journal_destroy(sbi->s_journal);
				1672	failed_mount2:
				1673	for (i = 0; i < db_count; i++)
				1674	brelse(sbi->s_group_desc[i]);
				1675	kfree(sbi->s_group_desc);
				1676	failed_mount:
				1677	#ifdef CONFIG_QUOTA
				1678	for (i = 0; i < MAXQUOTAS; i++)
				1679	kfree(sbi->s_qf_names[i]);
				1680	#endif
				1681	ext3_blkdev_remove(sbi);
				1682	brelse(bh);
				1683	out_fail:
				1684	sb->s_fs_info = NULL;
				1685	kfree(sbi);
				1686	lock_kernel();
				1687	return -EINVAL;
				1688	}
				1689
				1690	/*
				1691	* Setup any per-fs journal parameters now. We'll do this both on
				1692	* initial mount, once the journal has been initialised but before we've
				1693	* done any recovery; and again on any subsequent remount.
				1694	*/
				1695	static void ext3_init_journal_params(struct super_block sb, journal_t journal)
				1696	{
				1697	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1698
				1699	if (sbi->s_commit_interval)
				1700	journal->j_commit_interval = sbi->s_commit_interval;
				1701	/* We could also set up an ext3-specific default for the commit
				1702	* interval here, but for now we'll just fall back to the jbd
				1703	* default. */
				1704
				1705	spin_lock(&journal->j_state_lock);
				1706	if (test_opt(sb, BARRIER))
				1707	journal->j_flags \|= JFS_BARRIER;
				1708	else
				1709	journal->j_flags &= ~JFS_BARRIER;
				1710	spin_unlock(&journal->j_state_lock);
				1711	}
				1712
				1713	static journal_t ext3_get_journal(struct super_block sb, int journal_inum)
				1714	{
				1715	struct inode *journal_inode;
				1716	journal_t *journal;
				1717
				1718	/* First, test for the existence of a valid inode on disk. Bad
				1719	* things happen if we iget() an unused inode, as the subsequent
				1720	* iput() will try to delete it. */
				1721
				1722	journal_inode = iget(sb, journal_inum);
				1723	if (!journal_inode) {
				1724	printk(KERN_ERR "EXT3-fs: no journal found.\n");
				1725	return NULL;
				1726	}
				1727	if (!journal_inode->i_nlink) {
				1728	make_bad_inode(journal_inode);
				1729	iput(journal_inode);
				1730	printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
				1731	return NULL;
				1732	}
				1733
				1734	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
				1735	journal_inode, journal_inode->i_size);
				1736	if (is_bad_inode(journal_inode) \|\| !S_ISREG(journal_inode->i_mode)) {
				1737	printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
				1738	iput(journal_inode);
				1739	return NULL;
				1740	}
				1741
				1742	journal = journal_init_inode(journal_inode);
				1743	if (!journal) {
				1744	printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
				1745	iput(journal_inode);
				1746	return NULL;
				1747	}
				1748	journal->j_private = sb;
				1749	ext3_init_journal_params(sb, journal);
				1750	return journal;
				1751	}
				1752
				1753	static journal_t ext3_get_dev_journal(struct super_block sb,
				1754	dev_t j_dev)
				1755	{
				1756	struct buffer_head * bh;
				1757	journal_t *journal;
				1758	int start;
				1759	int len;
				1760	int hblock, blocksize;
				1761	unsigned long sb_block;
				1762	unsigned long offset;
				1763	struct ext3_super_block * es;
				1764	struct block_device *bdev;
				1765
				1766	bdev = ext3_blkdev_get(j_dev);
				1767	if (bdev == NULL)
				1768	return NULL;
				1769
				1770	if (bd_claim(bdev, sb)) {
				1771	printk(KERN_ERR
				1772	"EXT3: failed to claim external journal device.\n");
				1773	blkdev_put(bdev);
				1774	return NULL;
				1775	}
				1776
				1777	blocksize = sb->s_blocksize;
				1778	hblock = bdev_hardsect_size(bdev);
				1779	if (blocksize < hblock) {
				1780	printk(KERN_ERR
				1781	"EXT3-fs: blocksize too small for journal device.\n");
				1782	goto out_bdev;
				1783	}
				1784
				1785	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
				1786	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
				1787	set_blocksize(bdev, blocksize);
				1788	if (!(bh = __bread(bdev, sb_block, blocksize))) {
				1789	printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
				1790	"external journal\n");
				1791	goto out_bdev;
				1792	}
				1793
				1794	es = (struct ext3_super_block ) (((char )bh->b_data) + offset);
				1795	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) \|\|
				1796	!(le32_to_cpu(es->s_feature_incompat) &
				1797	EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
				1798	printk(KERN_ERR "EXT3-fs: external journal has "
				1799	"bad superblock\n");
				1800	brelse(bh);
				1801	goto out_bdev;
				1802	}
				1803
				1804	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
				1805	printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
				1806	brelse(bh);
				1807	goto out_bdev;
				1808	}
				1809
				1810	len = le32_to_cpu(es->s_blocks_count);
				1811	start = sb_block + 1;
				1812	brelse(bh); /* we're done with the superblock */
				1813
				1814	journal = journal_init_dev(bdev, sb->s_bdev,
				1815	start, len, blocksize);
				1816	if (!journal) {
				1817	printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
				1818	goto out_bdev;
				1819	}
				1820	journal->j_private = sb;
				1821	ll_rw_block(READ, 1, &journal->j_sb_buffer);
				1822	wait_on_buffer(journal->j_sb_buffer);
				1823	if (!buffer_uptodate(journal->j_sb_buffer)) {
				1824	printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
				1825	goto out_journal;
				1826	}
				1827	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
				1828	printk(KERN_ERR "EXT3-fs: External journal has more than one "
				1829	"user (unsupported) - %d\n",
				1830	be32_to_cpu(journal->j_superblock->s_nr_users));
				1831	goto out_journal;
				1832	}
				1833	EXT3_SB(sb)->journal_bdev = bdev;
				1834	ext3_init_journal_params(sb, journal);
				1835	return journal;
				1836	out_journal:
				1837	journal_destroy(journal);
				1838	out_bdev:
				1839	ext3_blkdev_put(bdev);
				1840	return NULL;
				1841	}
				1842
				1843	static int ext3_load_journal(struct super_block * sb,
				1844	struct ext3_super_block * es)
				1845	{
				1846	journal_t *journal;
				1847	int journal_inum = le32_to_cpu(es->s_journal_inum);
				1848	dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
				1849	int err = 0;
				1850	int really_read_only;
				1851
				1852	really_read_only = bdev_read_only(sb->s_bdev);
				1853
				1854	/*
				1855	* Are we loading a blank journal or performing recovery after a
				1856	* crash? For recovery, we need to check in advance whether we
				1857	* can get read-write access to the device.
				1858	*/
				1859
				1860	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
				1861	if (sb->s_flags & MS_RDONLY) {
				1862	printk(KERN_INFO "EXT3-fs: INFO: recovery "
				1863	"required on readonly filesystem.\n");
				1864	if (really_read_only) {
				1865	printk(KERN_ERR "EXT3-fs: write access "
				1866	"unavailable, cannot proceed.\n");
				1867	return -EROFS;
				1868	}
				1869	printk (KERN_INFO "EXT3-fs: write access will "
				1870	"be enabled during recovery.\n");
				1871	}
				1872	}
				1873
				1874	if (journal_inum && journal_dev) {
				1875	printk(KERN_ERR "EXT3-fs: filesystem has both journal "
				1876	"and inode journals!\n");
				1877	return -EINVAL;
				1878	}
				1879
				1880	if (journal_inum) {
				1881	if (!(journal = ext3_get_journal(sb, journal_inum)))
				1882	return -EINVAL;
				1883	} else {
				1884	if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
				1885	return -EINVAL;
				1886	}
				1887
				1888	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
				1889	err = journal_update_format(journal);
				1890	if (err) {
				1891	printk(KERN_ERR "EXT3-fs: error updating journal.\n");
				1892	journal_destroy(journal);
				1893	return err;
				1894	}
				1895	}
				1896
				1897	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
				1898	err = journal_wipe(journal, !really_read_only);
				1899	if (!err)
				1900	err = journal_load(journal);
				1901
				1902	if (err) {
				1903	printk(KERN_ERR "EXT3-fs: error loading journal.\n");
				1904	journal_destroy(journal);
				1905	return err;
				1906	}
				1907
				1908	EXT3_SB(sb)->s_journal = journal;
				1909	ext3_clear_journal_err(sb, es);
				1910	return 0;
				1911	}
				1912
				1913	static int ext3_create_journal(struct super_block * sb,
				1914	struct ext3_super_block * es,
				1915	int journal_inum)
				1916	{
				1917	journal_t *journal;
				1918
				1919	if (sb->s_flags & MS_RDONLY) {
				1920	printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
				1921	"create journal.\n");
				1922	return -EROFS;
				1923	}
				1924
				1925	if (!(journal = ext3_get_journal(sb, journal_inum)))
				1926	return -EINVAL;
				1927
				1928	printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n",
				1929	journal_inum);
				1930
				1931	if (journal_create(journal)) {
				1932	printk(KERN_ERR "EXT3-fs: error creating journal.\n");
				1933	journal_destroy(journal);
				1934	return -EIO;
				1935	}
				1936
				1937	EXT3_SB(sb)->s_journal = journal;
				1938
				1939	ext3_update_dynamic_rev(sb);
				1940	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				1941	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
				1942
				1943	es->s_journal_inum = cpu_to_le32(journal_inum);
				1944	sb->s_dirt = 1;
				1945
				1946	/* Make sure we flush the recovery flag to disk. */
				1947	ext3_commit_super(sb, es, 1);
				1948
				1949	return 0;
				1950	}
				1951
				1952	static void ext3_commit_super (struct super_block * sb,
				1953	struct ext3_super_block * es,
				1954	int sync)
				1955	{
				1956	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
				1957
				1958	if (!sbh)
				1959	return;
				1960	es->s_wtime = cpu_to_le32(get_seconds());
				1961	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
				1962	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
				1963	BUFFER_TRACE(sbh, "marking dirty");
				1964	mark_buffer_dirty(sbh);
				1965	if (sync)
				1966	sync_dirty_buffer(sbh);
				1967	}
				1968
				1969
				1970	/*
				1971	* Have we just finished recovery? If so, and if we are mounting (or
				1972	* remounting) the filesystem readonly, then we will end up with a
				1973	* consistent fs on disk. Record that fact.
				1974	*/
				1975	static void ext3_mark_recovery_complete(struct super_block * sb,
				1976	struct ext3_super_block * es)
				1977	{
				1978	journal_t *journal = EXT3_SB(sb)->s_journal;
				1979
				1980	journal_lock_updates(journal);
				1981	journal_flush(journal);
				1982	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
				1983	sb->s_flags & MS_RDONLY) {
				1984	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				1985	sb->s_dirt = 0;
				1986	ext3_commit_super(sb, es, 1);
				1987	}
				1988	journal_unlock_updates(journal);
				1989	}
				1990
				1991	/*
				1992	* If we are mounting (or read-write remounting) a filesystem whose journal
				1993	* has recorded an error from a previous lifetime, move that error to the
				1994	* main filesystem now.
				1995	*/
				1996	static void ext3_clear_journal_err(struct super_block * sb,
				1997	struct ext3_super_block * es)
				1998	{
				1999	journal_t *journal;
				2000	int j_errno;
				2001	const char *errstr;
				2002
				2003	journal = EXT3_SB(sb)->s_journal;
				2004
				2005	/*
				2006	* Now check for any error status which may have been recorded in the
				2007	* journal by a prior ext3_error() or ext3_abort()
				2008	*/
				2009
				2010	j_errno = journal_errno(journal);
				2011	if (j_errno) {
				2012	char nbuf[16];
				2013
				2014	errstr = ext3_decode_error(sb, j_errno, nbuf);
				2015	ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
				2016	"from previous mount: %s", errstr);
				2017	ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
				2018	"filesystem check.");
				2019
				2020	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				2021	es->s_state \|= cpu_to_le16(EXT3_ERROR_FS);
				2022	ext3_commit_super (sb, es, 1);
				2023
				2024	journal_clear_err(journal);
				2025	}
				2026	}
				2027
				2028	/*
				2029	* Force the running and committing transactions to commit,
				2030	* and wait on the commit.
				2031	*/
				2032	int ext3_force_commit(struct super_block *sb)
				2033	{
				2034	journal_t *journal;
				2035	int ret;
				2036
				2037	if (sb->s_flags & MS_RDONLY)
				2038	return 0;
				2039
				2040	journal = EXT3_SB(sb)->s_journal;
				2041	sb->s_dirt = 0;
				2042	ret = ext3_journal_force_commit(journal);
				2043	return ret;
				2044	}
				2045
				2046	/*
				2047	* Ext3 always journals updates to the superblock itself, so we don't
				2048	* have to propagate any other updates to the superblock on disk at this
				2049	* point. Just start an async writeback to get the buffers on their way
				2050	* to the disk.
				2051	*
				2052	* This implicitly triggers the writebehind on sync().
				2053	*/
				2054
				2055	static void ext3_write_super (struct super_block * sb)
				2056	{
				2057	if (down_trylock(&sb->s_lock) == 0)
				2058	BUG();
				2059	sb->s_dirt = 0;
				2060	}
				2061
				2062	static int ext3_sync_fs(struct super_block *sb, int wait)
				2063	{
				2064	tid_t target;
				2065
				2066	sb->s_dirt = 0;
				2067	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
				2068	if (wait)
				2069	log_wait_commit(EXT3_SB(sb)->s_journal, target);
				2070	}
				2071	return 0;
				2072	}
				2073
				2074	/*
				2075	* LVM calls this function before a (read-only) snapshot is created. This
				2076	* gives us a chance to flush the journal completely and mark the fs clean.
				2077	*/
				2078	static void ext3_write_super_lockfs(struct super_block *sb)
				2079	{
				2080	sb->s_dirt = 0;
				2081
				2082	if (!(sb->s_flags & MS_RDONLY)) {
				2083	journal_t *journal = EXT3_SB(sb)->s_journal;
				2084
				2085	/* Now we set up the journal barrier. */
				2086	journal_lock_updates(journal);
				2087	journal_flush(journal);
				2088
				2089	/* Journal blocked and flushed, clear needs_recovery flag. */
				2090	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2091	ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
				2092	}
				2093	}
				2094
				2095	/*
				2096	* Called by LVM after the snapshot is done. We need to reset the RECOVER
				2097	* flag here, even though the filesystem is not technically dirty yet.
				2098	*/
				2099	static void ext3_unlockfs(struct super_block *sb)
				2100	{
				2101	if (!(sb->s_flags & MS_RDONLY)) {
				2102	lock_super(sb);
				2103	/* Reser the needs_recovery flag before the fs is unlocked. */
				2104	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2105	ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
				2106	unlock_super(sb);
				2107	journal_unlock_updates(EXT3_SB(sb)->s_journal);
				2108	}
				2109	}
				2110
				2111	static int ext3_remount (struct super_block * sb, int * flags, char * data)
				2112	{
				2113	struct ext3_super_block * es;
				2114	struct ext3_sb_info *sbi = EXT3_SB(sb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2115	unsigned long n_blocks_count = 0;
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2116	unsigned long old_sb_flags;
				2117	struct ext3_mount_options old_opts;
				2118	int err;
				2119	#ifdef CONFIG_QUOTA
				2120	int i;
				2121	#endif
				2122
				2123	/* Store the original options */
				2124	old_sb_flags = sb->s_flags;
				2125	old_opts.s_mount_opt = sbi->s_mount_opt;
				2126	old_opts.s_resuid = sbi->s_resuid;
				2127	old_opts.s_resgid = sbi->s_resgid;
				2128	old_opts.s_commit_interval = sbi->s_commit_interval;
				2129	#ifdef CONFIG_QUOTA
				2130	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
				2131	for (i = 0; i < MAXQUOTAS; i++)
				2132	old_opts.s_qf_names[i] = sbi->s_qf_names[i];
				2133	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2134
				2135	/*
				2136	* Allow the "check" option to be passed as a remount option.
				2137	*/
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2138	if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) {
				2139	err = -EINVAL;
				2140	goto restore_opts;
				2141	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2142
				2143	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
				2144	ext3_abort(sb, __FUNCTION__, "Abort forced by user");
				2145
				2146	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				2147	((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
				2148
				2149	es = sbi->s_es;
				2150
				2151	ext3_init_journal_params(sb, sbi->s_journal);
				2152
				2153	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) \|\|
				2154	n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2155	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
				2156	err = -EROFS;
				2157	goto restore_opts;
				2158	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2159
				2160	if (*flags & MS_RDONLY) {
				2161	/*
				2162	* First of all, the unconditional stuff we have to do
				2163	* to disable replay of the journal when we next remount
				2164	*/
				2165	sb->s_flags \|= MS_RDONLY;
				2166
				2167	/*
				2168	* OK, test if we are remounting a valid rw partition
				2169	* readonly, and if so set the rdonly flag and then
				2170	* mark the partition as valid again.
				2171	*/
				2172	if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
				2173	(sbi->s_mount_state & EXT3_VALID_FS))
				2174	es->s_state = cpu_to_le16(sbi->s_mount_state);
				2175
				2176	ext3_mark_recovery_complete(sb, es);
				2177	} else {
				2178	__le32 ret;
				2179	if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
				2180	~EXT3_FEATURE_RO_COMPAT_SUPP))) {
				2181	printk(KERN_WARNING "EXT3-fs: %s: couldn't "
				2182	"remount RDWR because of unsupported "
				2183	"optional features (%x).\n",
				2184	sb->s_id, le32_to_cpu(ret));
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2185	err = -EROFS;
				2186	goto restore_opts;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2187	}
				2188	/*
				2189	* Mounting a RDONLY partition read-write, so reread
				2190	* and store the current valid flag. (It may have
				2191	* been changed by e2fsck since we originally mounted
				2192	* the partition.)
				2193	*/
				2194	ext3_clear_journal_err(sb, es);
				2195	sbi->s_mount_state = le16_to_cpu(es->s_state);
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2196	if ((ret = ext3_group_extend(sb, es, n_blocks_count))) {
				2197	err = ret;
				2198	goto restore_opts;
				2199	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2200	if (!ext3_setup_super (sb, es, 0))
				2201	sb->s_flags &= ~MS_RDONLY;
				2202	}
				2203	}
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2204	#ifdef CONFIG_QUOTA
				2205	/* Release old quota file names */
				2206	for (i = 0; i < MAXQUOTAS; i++)
				2207	if (old_opts.s_qf_names[i] &&
				2208	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				2209	kfree(old_opts.s_qf_names[i]);
				2210	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2211	return 0;
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2212	restore_opts:
				2213	sb->s_flags = old_sb_flags;
				2214	sbi->s_mount_opt = old_opts.s_mount_opt;
				2215	sbi->s_resuid = old_opts.s_resuid;
				2216	sbi->s_resgid = old_opts.s_resgid;
				2217	sbi->s_commit_interval = old_opts.s_commit_interval;
				2218	#ifdef CONFIG_QUOTA
				2219	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
				2220	for (i = 0; i < MAXQUOTAS; i++) {
				2221	if (sbi->s_qf_names[i] &&
				2222	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				2223	kfree(sbi->s_qf_names[i]);
				2224	sbi->s_qf_names[i] = old_opts.s_qf_names[i];
				2225	}
				2226	#endif
				2227	return err;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2228	}
				2229
				2230	static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
				2231	{
				2232	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				2233	unsigned long overhead;
				2234	int i;
				2235
				2236	if (test_opt (sb, MINIX_DF))
				2237	overhead = 0;
				2238	else {
				2239	unsigned long ngroups;
				2240	ngroups = EXT3_SB(sb)->s_groups_count;
				2241	smp_rmb();
				2242
				2243	/*
				2244	* Compute the overhead (FS structures)
				2245	*/
				2246
				2247	/*
				2248	* All of the blocks before first_data_block are
				2249	* overhead
				2250	*/
				2251	overhead = le32_to_cpu(es->s_first_data_block);
				2252
				2253	/*
				2254	* Add the overhead attributed to the superblock and
				2255	* block group descriptors. If the sparse superblocks
				2256	* feature is turned on, then not all groups have this.
				2257	*/
				2258	for (i = 0; i < ngroups; i++) {
				2259	overhead += ext3_bg_has_super(sb, i) +
				2260	ext3_bg_num_gdb(sb, i);
				2261	cond_resched();
				2262	}
				2263
				2264	/*
				2265	* Every block group has an inode bitmap, a block
				2266	* bitmap, and an inode table.
				2267	*/
				2268	overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
				2269	}
				2270
				2271	buf->f_type = EXT3_SUPER_MAGIC;
				2272	buf->f_bsize = sb->s_blocksize;
				2273	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
				2274	buf->f_bfree = ext3_count_free_blocks (sb);
				2275	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
				2276	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
				2277	buf->f_bavail = 0;
				2278	buf->f_files = le32_to_cpu(es->s_inodes_count);
				2279	buf->f_ffree = ext3_count_free_inodes (sb);
				2280	buf->f_namelen = EXT3_NAME_LEN;
				2281	return 0;
				2282	}
				2283
				2284	/* Helper function for writing quotas on sync - we need to start transaction before quota file
				2285	* is locked for write. Otherwise the are possible deadlocks:
				2286	* Process 1 Process 2
				2287	* ext3_create() quota_sync()
				2288	* journal_start() write_dquot()
				2289	* DQUOT_INIT() down(dqio_sem)
				2290	* down(dqio_sem) journal_start()
				2291	*
				2292	*/
				2293
				2294	#ifdef CONFIG_QUOTA
				2295
				2296	static inline struct inode dquot_to_inode(struct dquot dquot)
				2297	{
				2298	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
				2299	}
				2300
				2301	static int ext3_dquot_initialize(struct inode *inode, int type)
				2302	{
				2303	handle_t *handle;
				2304	int ret, err;
				2305
				2306	/* We may create quota structure so we need to reserve enough blocks */
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2307	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2308	if (IS_ERR(handle))
				2309	return PTR_ERR(handle);
				2310	ret = dquot_initialize(inode, type);
				2311	err = ext3_journal_stop(handle);
				2312	if (!ret)
				2313	ret = err;
				2314	return ret;
				2315	}
				2316
				2317	static int ext3_dquot_drop(struct inode *inode)
				2318	{
				2319	handle_t *handle;
				2320	int ret, err;
				2321
				2322	/* We may delete quota structure so we need to reserve enough blocks */
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2323	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2324	if (IS_ERR(handle))
				2325	return PTR_ERR(handle);
				2326	ret = dquot_drop(inode);
				2327	err = ext3_journal_stop(handle);
				2328	if (!ret)
				2329	ret = err;
				2330	return ret;
				2331	}
				2332
				2333	static int ext3_write_dquot(struct dquot *dquot)
				2334	{
				2335	int ret, err;
				2336	handle_t *handle;
				2337	struct inode *inode;
				2338
				2339	inode = dquot_to_inode(dquot);
				2340	handle = ext3_journal_start(inode,
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2341	EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2342	if (IS_ERR(handle))
				2343	return PTR_ERR(handle);
				2344	ret = dquot_commit(dquot);
				2345	err = ext3_journal_stop(handle);
				2346	if (!ret)
				2347	ret = err;
				2348	return ret;
				2349	}
				2350
				2351	static int ext3_acquire_dquot(struct dquot *dquot)
				2352	{
				2353	int ret, err;
				2354	handle_t *handle;
				2355
				2356	handle = ext3_journal_start(dquot_to_inode(dquot),
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2357	EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2358	if (IS_ERR(handle))
				2359	return PTR_ERR(handle);
				2360	ret = dquot_acquire(dquot);
				2361	err = ext3_journal_stop(handle);
				2362	if (!ret)
				2363	ret = err;
				2364	return ret;
				2365	}
				2366
				2367	static int ext3_release_dquot(struct dquot *dquot)
				2368	{
				2369	int ret, err;
				2370	handle_t *handle;
				2371
				2372	handle = ext3_journal_start(dquot_to_inode(dquot),
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2373	EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2374	if (IS_ERR(handle))
				2375	return PTR_ERR(handle);
				2376	ret = dquot_release(dquot);
				2377	err = ext3_journal_stop(handle);
				2378	if (!ret)
				2379	ret = err;
				2380	return ret;
				2381	}
				2382
				2383	static int ext3_mark_dquot_dirty(struct dquot *dquot)
				2384	{
				2385	/* Are we journalling quotas? */
				2386	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] \|\|
				2387	EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
				2388	dquot_mark_dquot_dirty(dquot);
				2389	return ext3_write_dquot(dquot);
				2390	} else {
				2391	return dquot_mark_dquot_dirty(dquot);
				2392	}
				2393	}
				2394
				2395	static int ext3_write_info(struct super_block *sb, int type)
				2396	{
				2397	int ret, err;
				2398	handle_t *handle;
				2399
				2400	/* Data block + inode block */
				2401	handle = ext3_journal_start(sb->s_root->d_inode, 2);
				2402	if (IS_ERR(handle))
				2403	return PTR_ERR(handle);
				2404	ret = dquot_commit_info(sb, type);
				2405	err = ext3_journal_stop(handle);
				2406	if (!ret)
				2407	ret = err;
				2408	return ret;
				2409	}
				2410
				2411	/*
				2412	* Turn on quotas during mount time - we need to find
				2413	* the quota file and such...
				2414	*/
				2415	static int ext3_quota_on_mount(struct super_block *sb, int type)
				2416	{
Christoph Hellwig	84de856	2005-06-23 00:09:16 -0700	[diff] [blame]	2417	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
				2418	EXT3_SB(sb)->s_jquota_fmt, type);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2419	}
				2420
				2421	/*
				2422	* Standard function to be called on quota_on
				2423	*/
				2424	static int ext3_quota_on(struct super_block *sb, int type, int format_id,
				2425	char *path)
				2426	{
				2427	int err;
				2428	struct nameidata nd;
				2429
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2430	if (!test_opt(sb, QUOTA))
				2431	return -EINVAL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2432	/* Not journalling quota? */
				2433	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
				2434	!EXT3_SB(sb)->s_qf_names[GRPQUOTA])
				2435	return vfs_quota_on(sb, type, format_id, path);
				2436	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
				2437	if (err)
				2438	return err;
				2439	/* Quotafile not on the same filesystem? */
				2440	if (nd.mnt->mnt_sb != sb) {
				2441	path_release(&nd);
				2442	return -EXDEV;
				2443	}
				2444	/* Quotafile not of fs root? */
				2445	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
				2446	printk(KERN_WARNING
				2447	"EXT3-fs: Quota file not on filesystem root. "
				2448	"Journalled quota will not work.\n");
				2449	path_release(&nd);
				2450	return vfs_quota_on(sb, type, format_id, path);
				2451	}
				2452
				2453	/* Read data from quotafile - avoid pagecache and such because we cannot afford
				2454	* acquiring the locks... As quota files are never truncated and quota code
				2455	* itself serializes the operations (and noone else should touch the files)
				2456	* we don't have to be afraid of races */
				2457	static ssize_t ext3_quota_read(struct super_block sb, int type, char data,
				2458	size_t len, loff_t off)
				2459	{
				2460	struct inode *inode = sb_dqopt(sb)->files[type];
				2461	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
				2462	int err = 0;
				2463	int offset = off & (sb->s_blocksize - 1);
				2464	int tocopy;
				2465	size_t toread;
				2466	struct buffer_head *bh;
				2467	loff_t i_size = i_size_read(inode);
				2468
				2469	if (off > i_size)
				2470	return 0;
				2471	if (off+len > i_size)
				2472	len = i_size-off;
				2473	toread = len;
				2474	while (toread > 0) {
				2475	tocopy = sb->s_blocksize - offset < toread ?
				2476	sb->s_blocksize - offset : toread;
				2477	bh = ext3_bread(NULL, inode, blk, 0, &err);
				2478	if (err)
				2479	return err;
				2480	if (!bh) /* A hole? */
				2481	memset(data, 0, tocopy);
				2482	else
				2483	memcpy(data, bh->b_data+offset, tocopy);
				2484	brelse(bh);
				2485	offset = 0;
				2486	toread -= tocopy;
				2487	data += tocopy;
				2488	blk++;
				2489	}
				2490	return len;
				2491	}
				2492
				2493	/* Write to quotafile (we know the transaction is already started and has
				2494	* enough credits) */
				2495	static ssize_t ext3_quota_write(struct super_block *sb, int type,
				2496	const char *data, size_t len, loff_t off)
				2497	{
				2498	struct inode *inode = sb_dqopt(sb)->files[type];
				2499	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
				2500	int err = 0;
				2501	int offset = off & (sb->s_blocksize - 1);
				2502	int tocopy;
				2503	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
				2504	size_t towrite = len;
				2505	struct buffer_head *bh;
				2506	handle_t *handle = journal_current_handle();
				2507
				2508	down(&inode->i_sem);
				2509	while (towrite > 0) {
				2510	tocopy = sb->s_blocksize - offset < towrite ?
				2511	sb->s_blocksize - offset : towrite;
				2512	bh = ext3_bread(handle, inode, blk, 1, &err);
				2513	if (!bh)
				2514	goto out;
				2515	if (journal_quota) {
				2516	err = ext3_journal_get_write_access(handle, bh);
				2517	if (err) {
				2518	brelse(bh);
				2519	goto out;
				2520	}
				2521	}
				2522	lock_buffer(bh);
				2523	memcpy(bh->b_data+offset, data, tocopy);
				2524	flush_dcache_page(bh->b_page);
				2525	unlock_buffer(bh);
				2526	if (journal_quota)
				2527	err = ext3_journal_dirty_metadata(handle, bh);
				2528	else {
				2529	/* Always do at least ordered writes for quotas */
				2530	err = ext3_journal_dirty_data(handle, bh);
				2531	mark_buffer_dirty(bh);
				2532	}
				2533	brelse(bh);
				2534	if (err)
				2535	goto out;
				2536	offset = 0;
				2537	towrite -= tocopy;
				2538	data += tocopy;
				2539	blk++;
				2540	}
				2541	out:
				2542	if (len == towrite)
				2543	return err;
				2544	if (inode->i_size < off+len-towrite) {
				2545	i_size_write(inode, off+len-towrite);
				2546	EXT3_I(inode)->i_disksize = inode->i_size;
				2547	}
				2548	inode->i_version++;
				2549	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
				2550	ext3_mark_inode_dirty(handle, inode);
				2551	up(&inode->i_sem);
				2552	return len - towrite;
				2553	}
				2554
				2555	#endif
				2556
				2557	static struct super_block ext3_get_sb(struct file_system_type fs_type,
				2558	int flags, const char dev_name, void data)
				2559	{
				2560	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
				2561	}
				2562
				2563	static struct file_system_type ext3_fs_type = {
				2564	.owner = THIS_MODULE,
				2565	.name = "ext3",
				2566	.get_sb = ext3_get_sb,
				2567	.kill_sb = kill_block_super,
				2568	.fs_flags = FS_REQUIRES_DEV,
				2569	};
				2570
				2571	static int __init init_ext3_fs(void)
				2572	{
				2573	int err = init_ext3_xattr();
				2574	if (err)
				2575	return err;
				2576	err = init_inodecache();
				2577	if (err)
				2578	goto out1;
				2579	err = register_filesystem(&ext3_fs_type);
				2580	if (err)
				2581	goto out;
				2582	return 0;
				2583	out:
				2584	destroy_inodecache();
				2585	out1:
				2586	exit_ext3_xattr();
				2587	return err;
				2588	}
				2589
				2590	static void __exit exit_ext3_fs(void)
				2591	{
				2592	unregister_filesystem(&ext3_fs_type);
				2593	destroy_inodecache();
				2594	exit_ext3_xattr();
				2595	}
				2596
				2597	MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
				2598	MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
				2599	MODULE_LICENSE("GPL");
				2600	module_init(init_ext3_fs)
				2601	module_exit(exit_ext3_fs)