Blame - fs/ext3/super.c - kernel/msm-4.9

blob: 097383c11154dcfff13f9977f4bb106f396cbe3b [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/fs/ext3/super.c
				3	*
				4	* Copyright (C) 1992, 1993, 1994, 1995
				5	* Remy Card (card@masi.ibp.fr)
				6	* Laboratoire MASI - Institut Blaise Pascal
				7	* Universite Pierre et Marie Curie (Paris VI)
				8	*
				9	* from
				10	*
				11	* linux/fs/minix/inode.c
				12	*
				13	* Copyright (C) 1991, 1992 Linus Torvalds
				14	*
				15	* Big-endian to little-endian byte-swapping/bitmaps by
				16	* David S. Miller (davem@caip.rutgers.edu), 1995
				17	*/
				18
				19	#include <linux/config.h>
				20	#include <linux/module.h>
				21	#include <linux/string.h>
				22	#include <linux/fs.h>
				23	#include <linux/time.h>
				24	#include <linux/jbd.h>
				25	#include <linux/ext3_fs.h>
				26	#include <linux/ext3_jbd.h>
				27	#include <linux/slab.h>
				28	#include <linux/init.h>
				29	#include <linux/blkdev.h>
				30	#include <linux/parser.h>
				31	#include <linux/smp_lock.h>
				32	#include <linux/buffer_head.h>
				33	#include <linux/vfs.h>
				34	#include <linux/random.h>
				35	#include <linux/mount.h>
				36	#include <linux/namei.h>
				37	#include <linux/quotaops.h>
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	38	#include <linux/seq_file.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	39	#include <asm/uaccess.h>
				40	#include "xattr.h"
				41	#include "acl.h"
				42
				43	static int ext3_load_journal(struct super_block , struct ext3_super_block );
				44	static int ext3_create_journal(struct super_block , struct ext3_super_block ,
				45	int);
				46	static void ext3_commit_super (struct super_block * sb,
				47	struct ext3_super_block * es,
				48	int sync);
				49	static void ext3_mark_recovery_complete(struct super_block * sb,
				50	struct ext3_super_block * es);
				51	static void ext3_clear_journal_err(struct super_block * sb,
				52	struct ext3_super_block * es);
				53	static int ext3_sync_fs(struct super_block *sb, int wait);
				54	static const char ext3_decode_error(struct super_block sb, int errno,
				55	char nbuf[16]);
				56	static int ext3_remount (struct super_block * sb, int * flags, char * data);
				57	static int ext3_statfs (struct super_block * sb, struct kstatfs * buf);
				58	static void ext3_unlockfs(struct super_block *sb);
				59	static void ext3_write_super (struct super_block * sb);
				60	static void ext3_write_super_lockfs(struct super_block *sb);
				61
				62	/*
				63	* Wrappers for journal_start/end.
				64	*
				65	* The only special thing we need to do here is to make sure that all
				66	* journal_end calls result in the superblock being marked dirty, so
				67	* that sync() will call the filesystem's write_super callback if
				68	* appropriate.
				69	*/
				70	handle_t ext3_journal_start_sb(struct super_block sb, int nblocks)
				71	{
				72	journal_t *journal;
				73
				74	if (sb->s_flags & MS_RDONLY)
				75	return ERR_PTR(-EROFS);
				76
				77	/* Special case here: if the journal has aborted behind our
				78	* backs (eg. EIO in the commit thread), then we still need to
				79	* take the FS itself readonly cleanly. */
				80	journal = EXT3_SB(sb)->s_journal;
				81	if (is_journal_aborted(journal)) {
				82	ext3_abort(sb, __FUNCTION__,
				83	"Detected aborted journal");
				84	return ERR_PTR(-EROFS);
				85	}
				86
				87	return journal_start(journal, nblocks);
				88	}
				89
				90	/*
				91	* The only special thing we need to do here is to make sure that all
				92	* journal_stop calls result in the superblock being marked dirty, so
				93	* that sync() will call the filesystem's write_super callback if
				94	* appropriate.
				95	*/
				96	int __ext3_journal_stop(const char where, handle_t handle)
				97	{
				98	struct super_block *sb;
				99	int err;
				100	int rc;
				101
				102	sb = handle->h_transaction->t_journal->j_private;
				103	err = handle->h_err;
				104	rc = journal_stop(handle);
				105
				106	if (!err)
				107	err = rc;
				108	if (err)
				109	__ext3_std_error(sb, where, err);
				110	return err;
				111	}
				112
				113	void ext3_journal_abort_handle(const char caller, const char err_fn,
				114	struct buffer_head bh, handle_t handle, int err)
				115	{
				116	char nbuf[16];
				117	const char *errstr = ext3_decode_error(NULL, err, nbuf);
				118
				119	if (bh)
				120	BUFFER_TRACE(bh, "abort");
				121
				122	if (!handle->h_err)
				123	handle->h_err = err;
				124
				125	if (is_handle_aborted(handle))
				126	return;
				127
				128	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
				129	caller, errstr, err_fn);
				130
				131	journal_abort_handle(handle);
				132	}
				133
				134	/* Deal with the reporting of failure conditions on a filesystem such as
				135	* inconsistencies detected or read IO failures.
				136	*
				137	* On ext2, we can store the error state of the filesystem in the
				138	* superblock. That is not possible on ext3, because we may have other
				139	* write ordering constraints on the superblock which prevent us from
				140	* writing it out straight away; and given that the journal is about to
				141	* be aborted, we can't rely on the current, or future, transactions to
				142	* write out the superblock safely.
				143	*
				144	* We'll just use the journal_abort() error code to record an error in
				145	* the journal instead. On recovery, the journal will compain about
				146	* that error until we've noted it down and cleared it.
				147	*/
				148
				149	static void ext3_handle_error(struct super_block *sb)
				150	{
				151	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				152
				153	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				154	es->s_state \|= cpu_to_le16(EXT3_ERROR_FS);
				155
				156	if (sb->s_flags & MS_RDONLY)
				157	return;
				158
				159	if (test_opt (sb, ERRORS_RO)) {
				160	printk (KERN_CRIT "Remounting filesystem read-only\n");
				161	sb->s_flags \|= MS_RDONLY;
				162	} else {
				163	journal_t *journal = EXT3_SB(sb)->s_journal;
				164
				165	EXT3_SB(sb)->s_mount_opt \|= EXT3_MOUNT_ABORT;
				166	if (journal)
				167	journal_abort(journal, -EIO);
				168	}
				169	if (test_opt(sb, ERRORS_PANIC))
				170	panic("EXT3-fs (device %s): panic forced after error\n",
				171	sb->s_id);
				172	ext3_commit_super(sb, es, 1);
				173	}
				174
				175	void ext3_error (struct super_block * sb, const char * function,
				176	const char * fmt, ...)
				177	{
				178	va_list args;
				179
				180	va_start(args, fmt);
				181	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
				182	vprintk(fmt, args);
				183	printk("\n");
				184	va_end(args);
				185
				186	ext3_handle_error(sb);
				187	}
				188
				189	static const char ext3_decode_error(struct super_block sb, int errno,
				190	char nbuf[16])
				191	{
				192	char *errstr = NULL;
				193
				194	switch (errno) {
				195	case -EIO:
				196	errstr = "IO failure";
				197	break;
				198	case -ENOMEM:
				199	errstr = "Out of memory";
				200	break;
				201	case -EROFS:
				202	if (!sb \|\| EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
				203	errstr = "Journal has aborted";
				204	else
				205	errstr = "Readonly filesystem";
				206	break;
				207	default:
				208	/* If the caller passed in an extra buffer for unknown
				209	* errors, textualise them now. Else we just return
				210	* NULL. */
				211	if (nbuf) {
				212	/* Check for truncated error codes... */
				213	if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
				214	errstr = nbuf;
				215	}
				216	break;
				217	}
				218
				219	return errstr;
				220	}
				221
				222	/* __ext3_std_error decodes expected errors from journaling functions
				223	* automatically and invokes the appropriate error response. */
				224
				225	void __ext3_std_error (struct super_block * sb, const char * function,
				226	int errno)
				227	{
				228	char nbuf[16];
Stephen Tweedie	3012162	2005-05-18 11:47:17 -0400	[diff] [blame]	229	const char *errstr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	230
Stephen Tweedie	3012162	2005-05-18 11:47:17 -0400	[diff] [blame]	231	/* Special case: if the error is EROFS, and we're not already
				232	* inside a transaction, then there's really no point in logging
				233	* an error. */
				234	if (errno == -EROFS && journal_current_handle() == NULL &&
				235	(sb->s_flags & MS_RDONLY))
				236	return;
				237
				238	errstr = ext3_decode_error(sb, errno, nbuf);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	239	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
				240	sb->s_id, function, errstr);
				241
				242	ext3_handle_error(sb);
				243	}
				244
				245	/*
				246	* ext3_abort is a much stronger failure handler than ext3_error. The
				247	* abort function may be used to deal with unrecoverable failures such
				248	* as journal IO errors or ENOMEM at a critical moment in log management.
				249	*
				250	* We unconditionally force the filesystem into an ABORT\|READONLY state,
				251	* unless the error response on the fs has been set to panic in which
				252	* case we take the easy way out and panic immediately.
				253	*/
				254
				255	void ext3_abort (struct super_block * sb, const char * function,
				256	const char * fmt, ...)
				257	{
				258	va_list args;
				259
				260	printk (KERN_CRIT "ext3_abort called.\n");
				261
				262	va_start(args, fmt);
				263	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
				264	vprintk(fmt, args);
				265	printk("\n");
				266	va_end(args);
				267
				268	if (test_opt(sb, ERRORS_PANIC))
				269	panic("EXT3-fs panic from previous error\n");
				270
				271	if (sb->s_flags & MS_RDONLY)
				272	return;
				273
				274	printk(KERN_CRIT "Remounting filesystem read-only\n");
				275	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				276	sb->s_flags \|= MS_RDONLY;
				277	EXT3_SB(sb)->s_mount_opt \|= EXT3_MOUNT_ABORT;
				278	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
				279	}
				280
				281	void ext3_warning (struct super_block * sb, const char * function,
				282	const char * fmt, ...)
				283	{
				284	va_list args;
				285
				286	va_start(args, fmt);
				287	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
				288	sb->s_id, function);
				289	vprintk(fmt, args);
				290	printk("\n");
				291	va_end(args);
				292	}
				293
				294	void ext3_update_dynamic_rev(struct super_block *sb)
				295	{
				296	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				297
				298	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
				299	return;
				300
				301	ext3_warning(sb, __FUNCTION__,
				302	"updating to rev %d because of new feature flag, "
				303	"running e2fsck is recommended",
				304	EXT3_DYNAMIC_REV);
				305
				306	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
				307	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
				308	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
				309	/* leave es->s_feature_compat flags alone /
				310	/* es->s_uuid will be set by e2fsck if empty */
				311
				312	/*
				313	* The rest of the superblock fields should be zero, and if not it
				314	* means they are likely already in use, so leave them alone. We
				315	* can leave it up to e2fsck to clean up any inconsistencies there.
				316	*/
				317	}
				318
				319	/*
				320	* Open the external journal device
				321	*/
				322	static struct block_device *ext3_blkdev_get(dev_t dev)
				323	{
				324	struct block_device *bdev;
				325	char b[BDEVNAME_SIZE];
				326
				327	bdev = open_by_devnum(dev, FMODE_READ\|FMODE_WRITE);
				328	if (IS_ERR(bdev))
				329	goto fail;
				330	return bdev;
				331
				332	fail:
				333	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
				334	__bdevname(dev, b), PTR_ERR(bdev));
				335	return NULL;
				336	}
				337
				338	/*
				339	* Release the journal device
				340	*/
				341	static int ext3_blkdev_put(struct block_device *bdev)
				342	{
				343	bd_release(bdev);
				344	return blkdev_put(bdev);
				345	}
				346
				347	static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
				348	{
				349	struct block_device *bdev;
				350	int ret = -ENODEV;
				351
				352	bdev = sbi->journal_bdev;
				353	if (bdev) {
				354	ret = ext3_blkdev_put(bdev);
				355	sbi->journal_bdev = NULL;
				356	}
				357	return ret;
				358	}
				359
				360	static inline struct inode orphan_list_entry(struct list_head l)
				361	{
				362	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
				363	}
				364
				365	static void dump_orphan_list(struct super_block sb, struct ext3_sb_info sbi)
				366	{
				367	struct list_head *l;
				368
				369	printk(KERN_ERR "sb orphan head is %d\n",
				370	le32_to_cpu(sbi->s_es->s_last_orphan));
				371
				372	printk(KERN_ERR "sb_info orphan list:\n");
				373	list_for_each(l, &sbi->s_orphan) {
				374	struct inode *inode = orphan_list_entry(l);
				375	printk(KERN_ERR " "
				376	"inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
				377	inode->i_sb->s_id, inode->i_ino, inode,
				378	inode->i_mode, inode->i_nlink,
				379	NEXT_ORPHAN(inode));
				380	}
				381	}
				382
				383	static void ext3_put_super (struct super_block * sb)
				384	{
				385	struct ext3_sb_info *sbi = EXT3_SB(sb);
				386	struct ext3_super_block *es = sbi->s_es;
				387	int i;
				388
				389	ext3_xattr_put_super(sb);
				390	journal_destroy(sbi->s_journal);
				391	if (!(sb->s_flags & MS_RDONLY)) {
				392	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				393	es->s_state = cpu_to_le16(sbi->s_mount_state);
				394	BUFFER_TRACE(sbi->s_sbh, "marking dirty");
				395	mark_buffer_dirty(sbi->s_sbh);
				396	ext3_commit_super(sb, es, 1);
				397	}
				398
				399	for (i = 0; i < sbi->s_gdb_count; i++)
				400	brelse(sbi->s_group_desc[i]);
				401	kfree(sbi->s_group_desc);
				402	percpu_counter_destroy(&sbi->s_freeblocks_counter);
				403	percpu_counter_destroy(&sbi->s_freeinodes_counter);
				404	percpu_counter_destroy(&sbi->s_dirs_counter);
				405	brelse(sbi->s_sbh);
				406	#ifdef CONFIG_QUOTA
				407	for (i = 0; i < MAXQUOTAS; i++)
				408	kfree(sbi->s_qf_names[i]);
				409	#endif
				410
				411	/* Debugging code just in case the in-memory inode orphan list
				412	* isn't empty. The on-disk one can be non-empty if we've
				413	* detected an error and taken the fs readonly, but the
				414	* in-memory list had better be clean by this point. */
				415	if (!list_empty(&sbi->s_orphan))
				416	dump_orphan_list(sb, sbi);
				417	J_ASSERT(list_empty(&sbi->s_orphan));
				418
				419	invalidate_bdev(sb->s_bdev, 0);
				420	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
				421	/*
				422	* Invalidate the journal device's buffers. We don't want them
				423	* floating about in memory - the physical journal device may
				424	* hotswapped, and it breaks the `ro-after' testing code.
				425	*/
				426	sync_blockdev(sbi->journal_bdev);
				427	invalidate_bdev(sbi->journal_bdev, 0);
				428	ext3_blkdev_remove(sbi);
				429	}
				430	sb->s_fs_info = NULL;
				431	kfree(sbi);
				432	return;
				433	}
				434
				435	static kmem_cache_t *ext3_inode_cachep;
				436
				437	/*
				438	* Called inside transaction, so use GFP_NOFS
				439	*/
				440	static struct inode ext3_alloc_inode(struct super_block sb)
				441	{
				442	struct ext3_inode_info *ei;
				443
				444	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
				445	if (!ei)
				446	return NULL;
				447	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				448	ei->i_acl = EXT3_ACL_NOT_CACHED;
				449	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
				450	#endif
				451	ei->i_block_alloc_info = NULL;
				452	ei->vfs_inode.i_version = 1;
				453	return &ei->vfs_inode;
				454	}
				455
				456	static void ext3_destroy_inode(struct inode *inode)
				457	{
				458	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
				459	}
				460
				461	static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
				462	{
				463	struct ext3_inode_info ei = (struct ext3_inode_info ) foo;
				464
				465	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
				466	SLAB_CTOR_CONSTRUCTOR) {
				467	INIT_LIST_HEAD(&ei->i_orphan);
				468	#ifdef CONFIG_EXT3_FS_XATTR
				469	init_rwsem(&ei->xattr_sem);
				470	#endif
				471	init_MUTEX(&ei->truncate_sem);
				472	inode_init_once(&ei->vfs_inode);
				473	}
				474	}
				475
				476	static int init_inodecache(void)
				477	{
				478	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
				479	sizeof(struct ext3_inode_info),
				480	0, SLAB_RECLAIM_ACCOUNT,
				481	init_once, NULL);
				482	if (ext3_inode_cachep == NULL)
				483	return -ENOMEM;
				484	return 0;
				485	}
				486
				487	static void destroy_inodecache(void)
				488	{
				489	if (kmem_cache_destroy(ext3_inode_cachep))
				490	printk(KERN_INFO "ext3_inode_cache: not all structures were freed\n");
				491	}
				492
				493	static void ext3_clear_inode(struct inode *inode)
				494	{
				495	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
				496	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				497	if (EXT3_I(inode)->i_acl &&
				498	EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
				499	posix_acl_release(EXT3_I(inode)->i_acl);
				500	EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
				501	}
				502	if (EXT3_I(inode)->i_default_acl &&
				503	EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
				504	posix_acl_release(EXT3_I(inode)->i_default_acl);
				505	EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
				506	}
				507	#endif
				508	ext3_discard_reservation(inode);
				509	EXT3_I(inode)->i_block_alloc_info = NULL;
				510	kfree(rsv);
				511	}
				512
Peter Osterlund	6cd37cd	2005-10-28 20:23:39 +0200	[diff] [blame^]	513	static inline void ext3_show_quota_options(struct seq_file seq, struct super_block sb)
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	514	{
Peter Osterlund	6cd37cd	2005-10-28 20:23:39 +0200	[diff] [blame^]	515	#if defined(CONFIG_QUOTA)
OGAWA Hirofumi	275abf5	2005-09-22 21:44:03 -0700	[diff] [blame]	516	struct ext3_sb_info *sbi = EXT3_SB(sb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	517
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	518	if (sbi->s_jquota_fmt)
				519	seq_printf(seq, ",jqfmt=%s",
				520	(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
				521
				522	if (sbi->s_qf_names[USRQUOTA])
				523	seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
				524
				525	if (sbi->s_qf_names[GRPQUOTA])
				526	seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
				527
				528	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
				529	seq_puts(seq, ",usrquota");
				530
				531	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
				532	seq_puts(seq, ",grpquota");
				533	#endif
Peter Osterlund	6cd37cd	2005-10-28 20:23:39 +0200	[diff] [blame^]	534	}
				535
				536	static int ext3_show_options(struct seq_file seq, struct vfsmount vfs)
				537	{
				538	struct super_block *sb = vfs->mnt_sb;
				539
				540	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
				541	seq_puts(seq, ",data=journal");
				542	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
				543	seq_puts(seq, ",data=ordered");
				544	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
				545	seq_puts(seq, ",data=writeback");
				546
				547	ext3_show_quota_options(seq, sb);
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	548
				549	return 0;
				550	}
				551
				552	#ifdef CONFIG_QUOTA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	553	#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
				554	#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
				555
				556	static int ext3_dquot_initialize(struct inode *inode, int type);
				557	static int ext3_dquot_drop(struct inode *inode);
				558	static int ext3_write_dquot(struct dquot *dquot);
				559	static int ext3_acquire_dquot(struct dquot *dquot);
				560	static int ext3_release_dquot(struct dquot *dquot);
				561	static int ext3_mark_dquot_dirty(struct dquot *dquot);
				562	static int ext3_write_info(struct super_block *sb, int type);
				563	static int ext3_quota_on(struct super_block sb, int type, int format_id, char path);
				564	static int ext3_quota_on_mount(struct super_block *sb, int type);
				565	static ssize_t ext3_quota_read(struct super_block sb, int type, char data,
				566	size_t len, loff_t off);
				567	static ssize_t ext3_quota_write(struct super_block *sb, int type,
				568	const char *data, size_t len, loff_t off);
				569
				570	static struct dquot_operations ext3_quota_operations = {
				571	.initialize = ext3_dquot_initialize,
				572	.drop = ext3_dquot_drop,
				573	.alloc_space = dquot_alloc_space,
				574	.alloc_inode = dquot_alloc_inode,
				575	.free_space = dquot_free_space,
				576	.free_inode = dquot_free_inode,
				577	.transfer = dquot_transfer,
				578	.write_dquot = ext3_write_dquot,
				579	.acquire_dquot = ext3_acquire_dquot,
				580	.release_dquot = ext3_release_dquot,
				581	.mark_dirty = ext3_mark_dquot_dirty,
				582	.write_info = ext3_write_info
				583	};
				584
				585	static struct quotactl_ops ext3_qctl_operations = {
				586	.quota_on = ext3_quota_on,
				587	.quota_off = vfs_quota_off,
				588	.quota_sync = vfs_quota_sync,
				589	.get_info = vfs_get_dqinfo,
				590	.set_info = vfs_set_dqinfo,
				591	.get_dqblk = vfs_get_dqblk,
				592	.set_dqblk = vfs_set_dqblk
				593	};
				594	#endif
				595
				596	static struct super_operations ext3_sops = {
				597	.alloc_inode = ext3_alloc_inode,
				598	.destroy_inode = ext3_destroy_inode,
				599	.read_inode = ext3_read_inode,
				600	.write_inode = ext3_write_inode,
				601	.dirty_inode = ext3_dirty_inode,
				602	.delete_inode = ext3_delete_inode,
				603	.put_super = ext3_put_super,
				604	.write_super = ext3_write_super,
				605	.sync_fs = ext3_sync_fs,
				606	.write_super_lockfs = ext3_write_super_lockfs,
				607	.unlockfs = ext3_unlockfs,
				608	.statfs = ext3_statfs,
				609	.remount_fs = ext3_remount,
				610	.clear_inode = ext3_clear_inode,
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	611	.show_options = ext3_show_options,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	612	#ifdef CONFIG_QUOTA
				613	.quota_read = ext3_quota_read,
				614	.quota_write = ext3_quota_write,
				615	#endif
				616	};
				617
				618	struct dentry ext3_get_parent(struct dentry child);
				619	static struct export_operations ext3_export_ops = {
				620	.get_parent = ext3_get_parent,
				621	};
				622
				623	enum {
				624	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
				625	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
				626	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
				627	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
				628	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
				629	Opt_commit, Opt_journal_update, Opt_journal_inum,
				630	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
				631	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	632	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	633	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
				634	Opt_grpquota
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	635	};
				636
				637	static match_table_t tokens = {
				638	{Opt_bsd_df, "bsddf"},
				639	{Opt_minix_df, "minixdf"},
				640	{Opt_grpid, "grpid"},
				641	{Opt_grpid, "bsdgroups"},
				642	{Opt_nogrpid, "nogrpid"},
				643	{Opt_nogrpid, "sysvgroups"},
				644	{Opt_resgid, "resgid=%u"},
				645	{Opt_resuid, "resuid=%u"},
				646	{Opt_sb, "sb=%u"},
				647	{Opt_err_cont, "errors=continue"},
				648	{Opt_err_panic, "errors=panic"},
				649	{Opt_err_ro, "errors=remount-ro"},
				650	{Opt_nouid32, "nouid32"},
				651	{Opt_nocheck, "nocheck"},
				652	{Opt_nocheck, "check=none"},
				653	{Opt_check, "check"},
				654	{Opt_debug, "debug"},
				655	{Opt_oldalloc, "oldalloc"},
				656	{Opt_orlov, "orlov"},
				657	{Opt_user_xattr, "user_xattr"},
				658	{Opt_nouser_xattr, "nouser_xattr"},
				659	{Opt_acl, "acl"},
				660	{Opt_noacl, "noacl"},
				661	{Opt_reservation, "reservation"},
				662	{Opt_noreservation, "noreservation"},
				663	{Opt_noload, "noload"},
				664	{Opt_nobh, "nobh"},
				665	{Opt_commit, "commit=%u"},
				666	{Opt_journal_update, "journal=update"},
				667	{Opt_journal_inum, "journal=%u"},
				668	{Opt_abort, "abort"},
				669	{Opt_data_journal, "data=journal"},
				670	{Opt_data_ordered, "data=ordered"},
				671	{Opt_data_writeback, "data=writeback"},
				672	{Opt_offusrjquota, "usrjquota="},
				673	{Opt_usrjquota, "usrjquota=%s"},
				674	{Opt_offgrpjquota, "grpjquota="},
				675	{Opt_grpjquota, "grpjquota=%s"},
				676	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
				677	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	678	{Opt_grpquota, "grpquota"},
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	679	{Opt_noquota, "noquota"},
				680	{Opt_quota, "quota"},
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	681	{Opt_usrquota, "usrquota"},
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	682	{Opt_barrier, "barrier=%u"},
				683	{Opt_err, NULL},
				684	{Opt_resize, "resize"},
				685	};
				686
				687	static unsigned long get_sb_block(void **data)
				688	{
				689	unsigned long sb_block;
				690	char options = (char ) *data;
				691
				692	if (!options \|\| strncmp(options, "sb=", 3) != 0)
				693	return 1; /* Default location */
				694	options += 3;
				695	sb_block = simple_strtoul(options, &options, 0);
				696	if (options && options != ',') {
				697	printk("EXT3-fs: Invalid sb specification: %s\n",
				698	(char ) data);
				699	return 1;
				700	}
				701	if (*options == ',')
				702	options++;
				703	data = (void ) options;
				704	return sb_block;
				705	}
				706
				707	static int parse_options (char * options, struct super_block *sb,
				708	unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
				709	{
				710	struct ext3_sb_info *sbi = EXT3_SB(sb);
				711	char * p;
				712	substring_t args[MAX_OPT_ARGS];
				713	int data_opt = 0;
				714	int option;
				715	#ifdef CONFIG_QUOTA
				716	int qtype;
				717	char *qname;
				718	#endif
				719
				720	if (!options)
				721	return 1;
				722
				723	while ((p = strsep (&options, ",")) != NULL) {
				724	int token;
				725	if (!*p)
				726	continue;
				727
				728	token = match_token(p, tokens, args);
				729	switch (token) {
				730	case Opt_bsd_df:
				731	clear_opt (sbi->s_mount_opt, MINIX_DF);
				732	break;
				733	case Opt_minix_df:
				734	set_opt (sbi->s_mount_opt, MINIX_DF);
				735	break;
				736	case Opt_grpid:
				737	set_opt (sbi->s_mount_opt, GRPID);
				738	break;
				739	case Opt_nogrpid:
				740	clear_opt (sbi->s_mount_opt, GRPID);
				741	break;
				742	case Opt_resuid:
				743	if (match_int(&args[0], &option))
				744	return 0;
				745	sbi->s_resuid = option;
				746	break;
				747	case Opt_resgid:
				748	if (match_int(&args[0], &option))
				749	return 0;
				750	sbi->s_resgid = option;
				751	break;
				752	case Opt_sb:
				753	/* handled by get_sb_block() instead of here */
				754	/* sb_block = match_int(&args[0]); /
				755	break;
				756	case Opt_err_panic:
				757	clear_opt (sbi->s_mount_opt, ERRORS_CONT);
				758	clear_opt (sbi->s_mount_opt, ERRORS_RO);
				759	set_opt (sbi->s_mount_opt, ERRORS_PANIC);
				760	break;
				761	case Opt_err_ro:
				762	clear_opt (sbi->s_mount_opt, ERRORS_CONT);
				763	clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
				764	set_opt (sbi->s_mount_opt, ERRORS_RO);
				765	break;
				766	case Opt_err_cont:
				767	clear_opt (sbi->s_mount_opt, ERRORS_RO);
				768	clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
				769	set_opt (sbi->s_mount_opt, ERRORS_CONT);
				770	break;
				771	case Opt_nouid32:
				772	set_opt (sbi->s_mount_opt, NO_UID32);
				773	break;
				774	case Opt_check:
				775	#ifdef CONFIG_EXT3_CHECK
				776	set_opt (sbi->s_mount_opt, CHECK);
				777	#else
				778	printk(KERN_ERR
				779	"EXT3 Check option not supported\n");
				780	#endif
				781	break;
				782	case Opt_nocheck:
				783	clear_opt (sbi->s_mount_opt, CHECK);
				784	break;
				785	case Opt_debug:
				786	set_opt (sbi->s_mount_opt, DEBUG);
				787	break;
				788	case Opt_oldalloc:
				789	set_opt (sbi->s_mount_opt, OLDALLOC);
				790	break;
				791	case Opt_orlov:
				792	clear_opt (sbi->s_mount_opt, OLDALLOC);
				793	break;
				794	#ifdef CONFIG_EXT3_FS_XATTR
				795	case Opt_user_xattr:
				796	set_opt (sbi->s_mount_opt, XATTR_USER);
				797	break;
				798	case Opt_nouser_xattr:
				799	clear_opt (sbi->s_mount_opt, XATTR_USER);
				800	break;
				801	#else
				802	case Opt_user_xattr:
				803	case Opt_nouser_xattr:
				804	printk("EXT3 (no)user_xattr options not supported\n");
				805	break;
				806	#endif
				807	#ifdef CONFIG_EXT3_FS_POSIX_ACL
				808	case Opt_acl:
				809	set_opt(sbi->s_mount_opt, POSIX_ACL);
				810	break;
				811	case Opt_noacl:
				812	clear_opt(sbi->s_mount_opt, POSIX_ACL);
				813	break;
				814	#else
				815	case Opt_acl:
				816	case Opt_noacl:
				817	printk("EXT3 (no)acl options not supported\n");
				818	break;
				819	#endif
				820	case Opt_reservation:
				821	set_opt(sbi->s_mount_opt, RESERVATION);
				822	break;
				823	case Opt_noreservation:
				824	clear_opt(sbi->s_mount_opt, RESERVATION);
				825	break;
				826	case Opt_journal_update:
				827	/* @@@ FIXME */
				828	/* Eventually we will want to be able to create
				829	a journal file here. For now, only allow the
				830	user to specify an existing inode to be the
				831	journal file. */
				832	if (is_remount) {
				833	printk(KERN_ERR "EXT3-fs: cannot specify "
				834	"journal on remount\n");
				835	return 0;
				836	}
				837	set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
				838	break;
				839	case Opt_journal_inum:
				840	if (is_remount) {
				841	printk(KERN_ERR "EXT3-fs: cannot specify "
				842	"journal on remount\n");
				843	return 0;
				844	}
				845	if (match_int(&args[0], &option))
				846	return 0;
				847	*inum = option;
				848	break;
				849	case Opt_noload:
				850	set_opt (sbi->s_mount_opt, NOLOAD);
				851	break;
				852	case Opt_commit:
				853	if (match_int(&args[0], &option))
				854	return 0;
				855	if (option < 0)
				856	return 0;
				857	if (option == 0)
				858	option = JBD_DEFAULT_MAX_COMMIT_AGE;
				859	sbi->s_commit_interval = HZ * option;
				860	break;
				861	case Opt_data_journal:
				862	data_opt = EXT3_MOUNT_JOURNAL_DATA;
				863	goto datacheck;
				864	case Opt_data_ordered:
				865	data_opt = EXT3_MOUNT_ORDERED_DATA;
				866	goto datacheck;
				867	case Opt_data_writeback:
				868	data_opt = EXT3_MOUNT_WRITEBACK_DATA;
				869	datacheck:
				870	if (is_remount) {
				871	if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
				872	!= data_opt) {
				873	printk(KERN_ERR
				874	"EXT3-fs: cannot change data "
				875	"mode on remount\n");
				876	return 0;
				877	}
				878	} else {
				879	sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
				880	sbi->s_mount_opt \|= data_opt;
				881	}
				882	break;
				883	#ifdef CONFIG_QUOTA
				884	case Opt_usrjquota:
				885	qtype = USRQUOTA;
				886	goto set_qf_name;
				887	case Opt_grpjquota:
				888	qtype = GRPQUOTA;
				889	set_qf_name:
				890	if (sb_any_quota_enabled(sb)) {
				891	printk(KERN_ERR
				892	"EXT3-fs: Cannot change journalled "
				893	"quota options when quota turned on.\n");
				894	return 0;
				895	}
				896	qname = match_strdup(&args[0]);
				897	if (!qname) {
				898	printk(KERN_ERR
				899	"EXT3-fs: not enough memory for "
				900	"storing quotafile name.\n");
				901	return 0;
				902	}
				903	if (sbi->s_qf_names[qtype] &&
				904	strcmp(sbi->s_qf_names[qtype], qname)) {
				905	printk(KERN_ERR
				906	"EXT3-fs: %s quota file already "
				907	"specified.\n", QTYPE2NAME(qtype));
				908	kfree(qname);
				909	return 0;
				910	}
				911	sbi->s_qf_names[qtype] = qname;
				912	if (strchr(sbi->s_qf_names[qtype], '/')) {
				913	printk(KERN_ERR
				914	"EXT3-fs: quotafile must be on "
				915	"filesystem root.\n");
				916	kfree(sbi->s_qf_names[qtype]);
				917	sbi->s_qf_names[qtype] = NULL;
				918	return 0;
				919	}
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	920	set_opt(sbi->s_mount_opt, QUOTA);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	921	break;
				922	case Opt_offusrjquota:
				923	qtype = USRQUOTA;
				924	goto clear_qf_name;
				925	case Opt_offgrpjquota:
				926	qtype = GRPQUOTA;
				927	clear_qf_name:
				928	if (sb_any_quota_enabled(sb)) {
				929	printk(KERN_ERR "EXT3-fs: Cannot change "
				930	"journalled quota options when "
				931	"quota turned on.\n");
				932	return 0;
				933	}
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	934	/*
				935	* The space will be released later when all options
				936	* are confirmed to be correct
				937	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	938	sbi->s_qf_names[qtype] = NULL;
				939	break;
				940	case Opt_jqfmt_vfsold:
				941	sbi->s_jquota_fmt = QFMT_VFS_OLD;
				942	break;
				943	case Opt_jqfmt_vfsv0:
				944	sbi->s_jquota_fmt = QFMT_VFS_V0;
				945	break;
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	946	case Opt_quota:
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	947	case Opt_usrquota:
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	948	set_opt(sbi->s_mount_opt, QUOTA);
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	949	set_opt(sbi->s_mount_opt, USRQUOTA);
				950	break;
				951	case Opt_grpquota:
				952	set_opt(sbi->s_mount_opt, QUOTA);
				953	set_opt(sbi->s_mount_opt, GRPQUOTA);
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	954	break;
				955	case Opt_noquota:
				956	if (sb_any_quota_enabled(sb)) {
				957	printk(KERN_ERR "EXT3-fs: Cannot change quota "
				958	"options when quota turned on.\n");
				959	return 0;
				960	}
				961	clear_opt(sbi->s_mount_opt, QUOTA);
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	962	clear_opt(sbi->s_mount_opt, USRQUOTA);
				963	clear_opt(sbi->s_mount_opt, GRPQUOTA);
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	964	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	965	#else
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	966	case Opt_quota:
				967	case Opt_usrquota:
				968	case Opt_grpquota:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	969	case Opt_usrjquota:
				970	case Opt_grpjquota:
				971	case Opt_offusrjquota:
				972	case Opt_offgrpjquota:
				973	case Opt_jqfmt_vfsold:
				974	case Opt_jqfmt_vfsv0:
				975	printk(KERN_ERR
				976	"EXT3-fs: journalled quota options not "
				977	"supported.\n");
				978	break;
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	979	case Opt_noquota:
				980	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	981	#endif
				982	case Opt_abort:
				983	set_opt(sbi->s_mount_opt, ABORT);
				984	break;
				985	case Opt_barrier:
				986	if (match_int(&args[0], &option))
				987	return 0;
				988	if (option)
				989	set_opt(sbi->s_mount_opt, BARRIER);
				990	else
				991	clear_opt(sbi->s_mount_opt, BARRIER);
				992	break;
				993	case Opt_ignore:
				994	break;
				995	case Opt_resize:
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	996	if (!is_remount) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	997	printk("EXT3-fs: resize option only available "
				998	"for remount\n");
				999	return 0;
				1000	}
KAMBAROV, ZAUR	c7f1721	2005-06-28 20:45:11 -0700	[diff] [blame]	1001	if (match_int(&args[0], &option) != 0)
				1002	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1003	*n_blocks_count = option;
				1004	break;
				1005	case Opt_nobh:
				1006	set_opt(sbi->s_mount_opt, NOBH);
				1007	break;
				1008	default:
				1009	printk (KERN_ERR
				1010	"EXT3-fs: Unrecognized mount option \"%s\" "
				1011	"or missing value\n", p);
				1012	return 0;
				1013	}
				1014	}
				1015	#ifdef CONFIG_QUOTA
Mark Bellon	8fc2751	2005-09-06 15:16:54 -0700	[diff] [blame]	1016	if (sbi->s_qf_names[USRQUOTA] \|\| sbi->s_qf_names[GRPQUOTA]) {
				1017	if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
				1018	sbi->s_qf_names[USRQUOTA])
				1019	clear_opt(sbi->s_mount_opt, USRQUOTA);
				1020
				1021	if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
				1022	sbi->s_qf_names[GRPQUOTA])
				1023	clear_opt(sbi->s_mount_opt, GRPQUOTA);
				1024
				1025	if ((sbi->s_qf_names[USRQUOTA] &&
				1026	(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) \|\|
				1027	(sbi->s_qf_names[GRPQUOTA] &&
				1028	(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
				1029	printk(KERN_ERR "EXT3-fs: old and new quota "
				1030	"format mixing.\n");
				1031	return 0;
				1032	}
				1033
				1034	if (!sbi->s_jquota_fmt) {
				1035	printk(KERN_ERR "EXT3-fs: journalled quota format "
				1036	"not specified.\n");
				1037	return 0;
				1038	}
				1039	} else {
				1040	if (sbi->s_jquota_fmt) {
				1041	printk(KERN_ERR "EXT3-fs: journalled quota format "
				1042	"specified with no journalling "
				1043	"enabled.\n");
				1044	return 0;
				1045	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1046	}
				1047	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1048	return 1;
				1049	}
				1050
				1051	static int ext3_setup_super(struct super_block sb, struct ext3_super_block es,
				1052	int read_only)
				1053	{
				1054	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1055	int res = 0;
				1056
				1057	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
				1058	printk (KERN_ERR "EXT3-fs warning: revision level too high, "
				1059	"forcing read-only mode\n");
				1060	res = MS_RDONLY;
				1061	}
				1062	if (read_only)
				1063	return res;
				1064	if (!(sbi->s_mount_state & EXT3_VALID_FS))
				1065	printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
				1066	"running e2fsck is recommended\n");
				1067	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
				1068	printk (KERN_WARNING
				1069	"EXT3-fs warning: mounting fs with errors, "
				1070	"running e2fsck is recommended\n");
				1071	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
				1072	le16_to_cpu(es->s_mnt_count) >=
				1073	(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
				1074	printk (KERN_WARNING
				1075	"EXT3-fs warning: maximal mount count reached, "
				1076	"running e2fsck is recommended\n");
				1077	else if (le32_to_cpu(es->s_checkinterval) &&
				1078	(le32_to_cpu(es->s_lastcheck) +
				1079	le32_to_cpu(es->s_checkinterval) <= get_seconds()))
				1080	printk (KERN_WARNING
				1081	"EXT3-fs warning: checktime reached, "
				1082	"running e2fsck is recommended\n");
				1083	#if 0
				1084	/* @@@ We _will_ want to clear the valid bit if we find
				1085	inconsistencies, to force a fsck at reboot. But for
				1086	a plain journaled filesystem we can keep it set as
				1087	valid forever! :) */
				1088	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
				1089	#endif
				1090	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
				1091	es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
				1092	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
				1093	es->s_mtime = cpu_to_le32(get_seconds());
				1094	ext3_update_dynamic_rev(sb);
				1095	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				1096
				1097	ext3_commit_super(sb, es, 1);
				1098	if (test_opt(sb, DEBUG))
				1099	printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
				1100	"bpg=%lu, ipg=%lu, mo=%04lx]\n",
				1101	sb->s_blocksize,
				1102	sbi->s_groups_count,
				1103	EXT3_BLOCKS_PER_GROUP(sb),
				1104	EXT3_INODES_PER_GROUP(sb),
				1105	sbi->s_mount_opt);
				1106
				1107	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
				1108	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
				1109	char b[BDEVNAME_SIZE];
				1110
				1111	printk("external journal on %s\n",
				1112	bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
				1113	} else {
				1114	printk("internal journal\n");
				1115	}
				1116	#ifdef CONFIG_EXT3_CHECK
				1117	if (test_opt (sb, CHECK)) {
				1118	ext3_check_blocks_bitmap (sb);
				1119	ext3_check_inodes_bitmap (sb);
				1120	}
				1121	#endif
				1122	return res;
				1123	}
				1124
				1125	/* Called at mount-time, super-block is locked */
				1126	static int ext3_check_descriptors (struct super_block * sb)
				1127	{
				1128	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1129	unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
				1130	struct ext3_group_desc * gdp = NULL;
				1131	int desc_block = 0;
				1132	int i;
				1133
				1134	ext3_debug ("Checking group descriptors");
				1135
				1136	for (i = 0; i < sbi->s_groups_count; i++)
				1137	{
				1138	if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
				1139	gdp = (struct ext3_group_desc *)
				1140	sbi->s_group_desc[desc_block++]->b_data;
				1141	if (le32_to_cpu(gdp->bg_block_bitmap) < block \|\|
				1142	le32_to_cpu(gdp->bg_block_bitmap) >=
				1143	block + EXT3_BLOCKS_PER_GROUP(sb))
				1144	{
				1145	ext3_error (sb, "ext3_check_descriptors",
				1146	"Block bitmap for group %d"
				1147	" not in group (block %lu)!",
				1148	i, (unsigned long)
				1149	le32_to_cpu(gdp->bg_block_bitmap));
				1150	return 0;
				1151	}
				1152	if (le32_to_cpu(gdp->bg_inode_bitmap) < block \|\|
				1153	le32_to_cpu(gdp->bg_inode_bitmap) >=
				1154	block + EXT3_BLOCKS_PER_GROUP(sb))
				1155	{
				1156	ext3_error (sb, "ext3_check_descriptors",
				1157	"Inode bitmap for group %d"
				1158	" not in group (block %lu)!",
				1159	i, (unsigned long)
				1160	le32_to_cpu(gdp->bg_inode_bitmap));
				1161	return 0;
				1162	}
				1163	if (le32_to_cpu(gdp->bg_inode_table) < block \|\|
				1164	le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
				1165	block + EXT3_BLOCKS_PER_GROUP(sb))
				1166	{
				1167	ext3_error (sb, "ext3_check_descriptors",
				1168	"Inode table for group %d"
				1169	" not in group (block %lu)!",
				1170	i, (unsigned long)
				1171	le32_to_cpu(gdp->bg_inode_table));
				1172	return 0;
				1173	}
				1174	block += EXT3_BLOCKS_PER_GROUP(sb);
				1175	gdp++;
				1176	}
				1177
				1178	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
				1179	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
				1180	return 1;
				1181	}
				1182
				1183
				1184	/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
				1185	* the superblock) which were deleted from all directories, but held open by
				1186	* a process at the time of a crash. We walk the list and try to delete these
				1187	* inodes at recovery time (only with a read-write filesystem).
				1188	*
				1189	* In order to keep the orphan inode chain consistent during traversal (in
				1190	* case of crash during recovery), we link each inode into the superblock
				1191	* orphan list_head and handle it the same way as an inode deletion during
				1192	* normal operation (which journals the operations for us).
				1193	*
				1194	* We only do an iget() and an iput() on each inode, which is very safe if we
				1195	* accidentally point at an in-use or already deleted inode. The worst that
				1196	* can happen in this case is that we get a "bit already cleared" message from
				1197	* ext3_free_inode(). The only reason we would point at a wrong inode is if
				1198	* e2fsck was run on this filesystem, and it must have already done the orphan
				1199	* inode cleanup for us, so we can safely abort without any further action.
				1200	*/
				1201	static void ext3_orphan_cleanup (struct super_block * sb,
				1202	struct ext3_super_block * es)
				1203	{
				1204	unsigned int s_flags = sb->s_flags;
				1205	int nr_orphans = 0, nr_truncates = 0;
				1206	#ifdef CONFIG_QUOTA
				1207	int i;
				1208	#endif
				1209	if (!es->s_last_orphan) {
				1210	jbd_debug(4, "no orphan inodes to clean up\n");
				1211	return;
				1212	}
				1213
				1214	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
				1215	if (es->s_last_orphan)
				1216	jbd_debug(1, "Errors on filesystem, "
				1217	"clearing orphan list.\n");
				1218	es->s_last_orphan = 0;
				1219	jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
				1220	return;
				1221	}
				1222
				1223	if (s_flags & MS_RDONLY) {
				1224	printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
				1225	sb->s_id);
				1226	sb->s_flags &= ~MS_RDONLY;
				1227	}
				1228	#ifdef CONFIG_QUOTA
				1229	/* Needed for iput() to work correctly and not trash data */
				1230	sb->s_flags \|= MS_ACTIVE;
				1231	/* Turn on quotas so that they are updated correctly */
				1232	for (i = 0; i < MAXQUOTAS; i++) {
				1233	if (EXT3_SB(sb)->s_qf_names[i]) {
				1234	int ret = ext3_quota_on_mount(sb, i);
				1235	if (ret < 0)
				1236	printk(KERN_ERR
				1237	"EXT3-fs: Cannot turn on journalled "
				1238	"quota: error %d\n", ret);
				1239	}
				1240	}
				1241	#endif
				1242
				1243	while (es->s_last_orphan) {
				1244	struct inode *inode;
				1245
				1246	if (!(inode =
				1247	ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
				1248	es->s_last_orphan = 0;
				1249	break;
				1250	}
				1251
				1252	list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
				1253	DQUOT_INIT(inode);
				1254	if (inode->i_nlink) {
				1255	printk(KERN_DEBUG
				1256	"%s: truncating inode %ld to %Ld bytes\n",
				1257	__FUNCTION__, inode->i_ino, inode->i_size);
				1258	jbd_debug(2, "truncating inode %ld to %Ld bytes\n",
				1259	inode->i_ino, inode->i_size);
				1260	ext3_truncate(inode);
				1261	nr_truncates++;
				1262	} else {
				1263	printk(KERN_DEBUG
				1264	"%s: deleting unreferenced inode %ld\n",
				1265	__FUNCTION__, inode->i_ino);
				1266	jbd_debug(2, "deleting unreferenced inode %ld\n",
				1267	inode->i_ino);
				1268	nr_orphans++;
				1269	}
				1270	iput(inode); /* The delete magic happens here! */
				1271	}
				1272
				1273	#define PLURAL(x) (x), ((x)==1) ? "" : "s"
				1274
				1275	if (nr_orphans)
				1276	printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
				1277	sb->s_id, PLURAL(nr_orphans));
				1278	if (nr_truncates)
				1279	printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
				1280	sb->s_id, PLURAL(nr_truncates));
				1281	#ifdef CONFIG_QUOTA
				1282	/* Turn quotas off */
				1283	for (i = 0; i < MAXQUOTAS; i++) {
				1284	if (sb_dqopt(sb)->files[i])
				1285	vfs_quota_off(sb, i);
				1286	}
				1287	#endif
				1288	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
				1289	}
				1290
				1291	#define log2(n) ffz(~(n))
				1292
				1293	/*
				1294	* Maximal file size. There is a direct, and {,double-,triple-}indirect
				1295	* block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
				1296	* We need to be 1 filesystem block less than the 2^32 sector limit.
				1297	*/
				1298	static loff_t ext3_max_size(int bits)
				1299	{
				1300	loff_t res = EXT3_NDIR_BLOCKS;
				1301	/* This constant is calculated to be the largest file size for a
				1302	* dense, 4k-blocksize file such that the total number of
				1303	* sectors in the file, including data and all indirect blocks,
				1304	* does not exceed 2^32. */
				1305	const loff_t upper_limit = 0x1ff7fffd000LL;
				1306
				1307	res += 1LL << (bits-2);
				1308	res += 1LL << (2*(bits-2));
				1309	res += 1LL << (3*(bits-2));
				1310	res <<= bits;
				1311	if (res > upper_limit)
				1312	res = upper_limit;
				1313	return res;
				1314	}
				1315
				1316	static unsigned long descriptor_loc(struct super_block *sb,
				1317	unsigned long logic_sb_block,
				1318	int nr)
				1319	{
				1320	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1321	unsigned long bg, first_data_block, first_meta_bg;
				1322	int has_super = 0;
				1323
				1324	first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
				1325	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
				1326
				1327	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) \|\|
				1328	nr < first_meta_bg)
				1329	return (logic_sb_block + nr + 1);
				1330	bg = sbi->s_desc_per_block * nr;
				1331	if (ext3_bg_has_super(sb, bg))
				1332	has_super = 1;
				1333	return (first_data_block + has_super + (bg * sbi->s_blocks_per_group));
				1334	}
				1335
				1336
				1337	static int ext3_fill_super (struct super_block sb, void data, int silent)
				1338	{
				1339	struct buffer_head * bh;
				1340	struct ext3_super_block *es = NULL;
				1341	struct ext3_sb_info *sbi;
				1342	unsigned long block;
				1343	unsigned long sb_block = get_sb_block(&data);
				1344	unsigned long logic_sb_block;
				1345	unsigned long offset = 0;
				1346	unsigned long journal_inum = 0;
				1347	unsigned long def_mount_opts;
				1348	struct inode *root;
				1349	int blocksize;
				1350	int hblock;
				1351	int db_count;
				1352	int i;
				1353	int needs_recovery;
				1354	__le32 features;
				1355
				1356	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
				1357	if (!sbi)
				1358	return -ENOMEM;
				1359	sb->s_fs_info = sbi;
				1360	memset(sbi, 0, sizeof(*sbi));
				1361	sbi->s_mount_opt = 0;
				1362	sbi->s_resuid = EXT3_DEF_RESUID;
				1363	sbi->s_resgid = EXT3_DEF_RESGID;
				1364
				1365	unlock_kernel();
				1366
				1367	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
				1368	if (!blocksize) {
				1369	printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
				1370	goto out_fail;
				1371	}
				1372
				1373	/*
				1374	* The ext3 superblock will not be buffer aligned for other than 1kB
				1375	* block sizes. We need to calculate the offset from buffer start.
				1376	*/
				1377	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
				1378	logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
				1379	offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
				1380	} else {
				1381	logic_sb_block = sb_block;
				1382	}
				1383
				1384	if (!(bh = sb_bread(sb, logic_sb_block))) {
				1385	printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
				1386	goto out_fail;
				1387	}
				1388	/*
				1389	* Note: s_es must be initialized as soon as possible because
				1390	* some ext3 macro-instructions depend on its value
				1391	*/
				1392	es = (struct ext3_super_block ) (((char )bh->b_data) + offset);
				1393	sbi->s_es = es;
				1394	sb->s_magic = le16_to_cpu(es->s_magic);
				1395	if (sb->s_magic != EXT3_SUPER_MAGIC)
				1396	goto cantfind_ext3;
				1397
				1398	/* Set defaults before we parse the mount options */
				1399	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
				1400	if (def_mount_opts & EXT3_DEFM_DEBUG)
				1401	set_opt(sbi->s_mount_opt, DEBUG);
				1402	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
				1403	set_opt(sbi->s_mount_opt, GRPID);
				1404	if (def_mount_opts & EXT3_DEFM_UID16)
				1405	set_opt(sbi->s_mount_opt, NO_UID32);
				1406	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
				1407	set_opt(sbi->s_mount_opt, XATTR_USER);
				1408	if (def_mount_opts & EXT3_DEFM_ACL)
				1409	set_opt(sbi->s_mount_opt, POSIX_ACL);
				1410	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
				1411	sbi->s_mount_opt \|= EXT3_MOUNT_JOURNAL_DATA;
				1412	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
				1413	sbi->s_mount_opt \|= EXT3_MOUNT_ORDERED_DATA;
				1414	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
				1415	sbi->s_mount_opt \|= EXT3_MOUNT_WRITEBACK_DATA;
				1416
				1417	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
				1418	set_opt(sbi->s_mount_opt, ERRORS_PANIC);
				1419	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
				1420	set_opt(sbi->s_mount_opt, ERRORS_RO);
				1421
				1422	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
				1423	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
				1424
				1425	set_opt(sbi->s_mount_opt, RESERVATION);
				1426
				1427	if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
				1428	goto failed_mount;
				1429
				1430	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				1431	((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
				1432
				1433	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
				1434	(EXT3_HAS_COMPAT_FEATURE(sb, ~0U) \|\|
				1435	EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) \|\|
				1436	EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
				1437	printk(KERN_WARNING
				1438	"EXT3-fs warning: feature flags set on rev 0 fs, "
				1439	"running e2fsck is recommended\n");
				1440	/*
				1441	* Check feature flags regardless of the revision level, since we
				1442	* previously didn't change the revision level when setting the flags,
				1443	* so there is a chance incompat flags are set on a rev 0 filesystem.
				1444	*/
				1445	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
				1446	if (features) {
				1447	printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
				1448	"unsupported optional features (%x).\n",
				1449	sb->s_id, le32_to_cpu(features));
				1450	goto failed_mount;
				1451	}
				1452	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
				1453	if (!(sb->s_flags & MS_RDONLY) && features) {
				1454	printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
				1455	"unsupported optional features (%x).\n",
				1456	sb->s_id, le32_to_cpu(features));
				1457	goto failed_mount;
				1458	}
				1459	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
				1460
				1461	if (blocksize < EXT3_MIN_BLOCK_SIZE \|\|
				1462	blocksize > EXT3_MAX_BLOCK_SIZE) {
				1463	printk(KERN_ERR
				1464	"EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
				1465	blocksize, sb->s_id);
				1466	goto failed_mount;
				1467	}
				1468
				1469	hblock = bdev_hardsect_size(sb->s_bdev);
				1470	if (sb->s_blocksize != blocksize) {
				1471	/*
				1472	* Make sure the blocksize for the filesystem is larger
				1473	* than the hardware sectorsize for the machine.
				1474	*/
				1475	if (blocksize < hblock) {
				1476	printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
				1477	"device blocksize %d.\n", blocksize, hblock);
				1478	goto failed_mount;
				1479	}
				1480
				1481	brelse (bh);
				1482	sb_set_blocksize(sb, blocksize);
				1483	logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
				1484	offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
				1485	bh = sb_bread(sb, logic_sb_block);
				1486	if (!bh) {
				1487	printk(KERN_ERR
				1488	"EXT3-fs: Can't read superblock on 2nd try.\n");
				1489	goto failed_mount;
				1490	}
				1491	es = (struct ext3_super_block )(((char )bh->b_data) + offset);
				1492	sbi->s_es = es;
				1493	if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
				1494	printk (KERN_ERR
				1495	"EXT3-fs: Magic mismatch, very weird !\n");
				1496	goto failed_mount;
				1497	}
				1498	}
				1499
				1500	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
				1501
				1502	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
				1503	sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
				1504	sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
				1505	} else {
				1506	sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
				1507	sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
				1508	if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) \|\|
				1509	(sbi->s_inode_size & (sbi->s_inode_size - 1)) \|\|
				1510	(sbi->s_inode_size > blocksize)) {
				1511	printk (KERN_ERR
				1512	"EXT3-fs: unsupported inode size: %d\n",
				1513	sbi->s_inode_size);
				1514	goto failed_mount;
				1515	}
				1516	}
				1517	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
				1518	le32_to_cpu(es->s_log_frag_size);
				1519	if (blocksize != sbi->s_frag_size) {
				1520	printk(KERN_ERR
				1521	"EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
				1522	sbi->s_frag_size, blocksize);
				1523	goto failed_mount;
				1524	}
				1525	sbi->s_frags_per_block = 1;
				1526	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
				1527	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
				1528	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
				1529	if (EXT3_INODE_SIZE(sb) == 0)
				1530	goto cantfind_ext3;
				1531	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
				1532	if (sbi->s_inodes_per_block == 0)
				1533	goto cantfind_ext3;
				1534	sbi->s_itb_per_group = sbi->s_inodes_per_group /
				1535	sbi->s_inodes_per_block;
				1536	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
				1537	sbi->s_sbh = bh;
				1538	sbi->s_mount_state = le16_to_cpu(es->s_state);
				1539	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
				1540	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
				1541	for (i=0; i < 4; i++)
				1542	sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
				1543	sbi->s_def_hash_version = es->s_def_hash_version;
				1544
				1545	if (sbi->s_blocks_per_group > blocksize * 8) {
				1546	printk (KERN_ERR
				1547	"EXT3-fs: #blocks per group too big: %lu\n",
				1548	sbi->s_blocks_per_group);
				1549	goto failed_mount;
				1550	}
				1551	if (sbi->s_frags_per_group > blocksize * 8) {
				1552	printk (KERN_ERR
				1553	"EXT3-fs: #fragments per group too big: %lu\n",
				1554	sbi->s_frags_per_group);
				1555	goto failed_mount;
				1556	}
				1557	if (sbi->s_inodes_per_group > blocksize * 8) {
				1558	printk (KERN_ERR
				1559	"EXT3-fs: #inodes per group too big: %lu\n",
				1560	sbi->s_inodes_per_group);
				1561	goto failed_mount;
				1562	}
				1563
				1564	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
				1565	goto cantfind_ext3;
				1566	sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
				1567	le32_to_cpu(es->s_first_data_block) +
				1568	EXT3_BLOCKS_PER_GROUP(sb) - 1) /
				1569	EXT3_BLOCKS_PER_GROUP(sb);
				1570	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
				1571	EXT3_DESC_PER_BLOCK(sb);
				1572	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
				1573	GFP_KERNEL);
				1574	if (sbi->s_group_desc == NULL) {
				1575	printk (KERN_ERR "EXT3-fs: not enough memory\n");
				1576	goto failed_mount;
				1577	}
				1578
				1579	percpu_counter_init(&sbi->s_freeblocks_counter);
				1580	percpu_counter_init(&sbi->s_freeinodes_counter);
				1581	percpu_counter_init(&sbi->s_dirs_counter);
				1582	bgl_lock_init(&sbi->s_blockgroup_lock);
				1583
				1584	for (i = 0; i < db_count; i++) {
				1585	block = descriptor_loc(sb, logic_sb_block, i);
				1586	sbi->s_group_desc[i] = sb_bread(sb, block);
				1587	if (!sbi->s_group_desc[i]) {
				1588	printk (KERN_ERR "EXT3-fs: "
				1589	"can't read group descriptor %d\n", i);
				1590	db_count = i;
				1591	goto failed_mount2;
				1592	}
				1593	}
				1594	if (!ext3_check_descriptors (sb)) {
				1595	printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n");
				1596	goto failed_mount2;
				1597	}
				1598	sbi->s_gdb_count = db_count;
				1599	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
				1600	spin_lock_init(&sbi->s_next_gen_lock);
				1601	/* per fileystem reservation list head & lock */
				1602	spin_lock_init(&sbi->s_rsv_window_lock);
				1603	sbi->s_rsv_window_root = RB_ROOT;
				1604	/* Add a single, static dummy reservation to the start of the
				1605	* reservation window list --- it gives us a placeholder for
				1606	* append-at-start-of-list which makes the allocation logic
				1607	* _much_ simpler. */
				1608	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
				1609	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
				1610	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
				1611	sbi->s_rsv_window_head.rsv_goal_size = 0;
				1612	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
				1613
				1614	/*
				1615	* set up enough so that it can read an inode
				1616	*/
				1617	sb->s_op = &ext3_sops;
				1618	sb->s_export_op = &ext3_export_ops;
				1619	sb->s_xattr = ext3_xattr_handlers;
				1620	#ifdef CONFIG_QUOTA
				1621	sb->s_qcop = &ext3_qctl_operations;
				1622	sb->dq_op = &ext3_quota_operations;
				1623	#endif
				1624	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
				1625
				1626	sb->s_root = NULL;
				1627
				1628	needs_recovery = (es->s_last_orphan != 0 \|\|
				1629	EXT3_HAS_INCOMPAT_FEATURE(sb,
				1630	EXT3_FEATURE_INCOMPAT_RECOVER));
				1631
				1632	/*
				1633	* The first inode we look at is the journal inode. Don't try
				1634	* root first: it may be modified in the journal!
				1635	*/
				1636	if (!test_opt(sb, NOLOAD) &&
				1637	EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
				1638	if (ext3_load_journal(sb, es))
				1639	goto failed_mount2;
				1640	} else if (journal_inum) {
				1641	if (ext3_create_journal(sb, es, journal_inum))
				1642	goto failed_mount2;
				1643	} else {
				1644	if (!silent)
				1645	printk (KERN_ERR
				1646	"ext3: No journal on filesystem on %s\n",
				1647	sb->s_id);
				1648	goto failed_mount2;
				1649	}
				1650
				1651	/* We have now updated the journal if required, so we can
				1652	* validate the data journaling mode. */
				1653	switch (test_opt(sb, DATA_FLAGS)) {
				1654	case 0:
				1655	/* No mode set, assume a default based on the journal
				1656	capabilities: ORDERED_DATA if the journal can
				1657	cope, else JOURNAL_DATA */
				1658	if (journal_check_available_features
				1659	(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
				1660	set_opt(sbi->s_mount_opt, ORDERED_DATA);
				1661	else
				1662	set_opt(sbi->s_mount_opt, JOURNAL_DATA);
				1663	break;
				1664
				1665	case EXT3_MOUNT_ORDERED_DATA:
				1666	case EXT3_MOUNT_WRITEBACK_DATA:
				1667	if (!journal_check_available_features
				1668	(sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
				1669	printk(KERN_ERR "EXT3-fs: Journal does not support "
				1670	"requested data journaling mode\n");
				1671	goto failed_mount3;
				1672	}
				1673	default:
				1674	break;
				1675	}
				1676
				1677	if (test_opt(sb, NOBH)) {
				1678	if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
				1679	printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
				1680	"since filesystem blocksize doesn't match "
				1681	"pagesize\n");
				1682	clear_opt(sbi->s_mount_opt, NOBH);
				1683	}
				1684	if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
				1685	printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
				1686	"its supported only with writeback mode\n");
				1687	clear_opt(sbi->s_mount_opt, NOBH);
				1688	}
				1689	}
				1690	/*
				1691	* The journal_load will have done any necessary log recovery,
				1692	* so we can safely mount the rest of the filesystem now.
				1693	*/
				1694
				1695	root = iget(sb, EXT3_ROOT_INO);
				1696	sb->s_root = d_alloc_root(root);
				1697	if (!sb->s_root) {
				1698	printk(KERN_ERR "EXT3-fs: get root inode failed\n");
				1699	iput(root);
				1700	goto failed_mount3;
				1701	}
				1702	if (!S_ISDIR(root->i_mode) \|\| !root->i_blocks \|\| !root->i_size) {
				1703	dput(sb->s_root);
				1704	sb->s_root = NULL;
				1705	printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
				1706	goto failed_mount3;
				1707	}
				1708
				1709	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
				1710	/*
				1711	* akpm: core read_super() calls in here with the superblock locked.
				1712	* That deadlocks, because orphan cleanup needs to lock the superblock
				1713	* in numerous places. Here we just pop the lock - it's relatively
				1714	* harmless, because we are now ready to accept write_super() requests,
				1715	* and aviro says that's the only reason for hanging onto the
				1716	* superblock lock.
				1717	*/
				1718	EXT3_SB(sb)->s_mount_state \|= EXT3_ORPHAN_FS;
				1719	ext3_orphan_cleanup(sb, es);
				1720	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
				1721	if (needs_recovery)
				1722	printk (KERN_INFO "EXT3-fs: recovery complete.\n");
				1723	ext3_mark_recovery_complete(sb, es);
				1724	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
				1725	test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
				1726	test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
				1727	"writeback");
				1728
				1729	percpu_counter_mod(&sbi->s_freeblocks_counter,
				1730	ext3_count_free_blocks(sb));
				1731	percpu_counter_mod(&sbi->s_freeinodes_counter,
				1732	ext3_count_free_inodes(sb));
				1733	percpu_counter_mod(&sbi->s_dirs_counter,
				1734	ext3_count_dirs(sb));
				1735
				1736	lock_kernel();
				1737	return 0;
				1738
				1739	cantfind_ext3:
				1740	if (!silent)
				1741	printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
				1742	sb->s_id);
				1743	goto failed_mount;
				1744
				1745	failed_mount3:
				1746	journal_destroy(sbi->s_journal);
				1747	failed_mount2:
				1748	for (i = 0; i < db_count; i++)
				1749	brelse(sbi->s_group_desc[i]);
				1750	kfree(sbi->s_group_desc);
				1751	failed_mount:
				1752	#ifdef CONFIG_QUOTA
				1753	for (i = 0; i < MAXQUOTAS; i++)
				1754	kfree(sbi->s_qf_names[i]);
				1755	#endif
				1756	ext3_blkdev_remove(sbi);
				1757	brelse(bh);
				1758	out_fail:
				1759	sb->s_fs_info = NULL;
				1760	kfree(sbi);
				1761	lock_kernel();
				1762	return -EINVAL;
				1763	}
				1764
				1765	/*
				1766	* Setup any per-fs journal parameters now. We'll do this both on
				1767	* initial mount, once the journal has been initialised but before we've
				1768	* done any recovery; and again on any subsequent remount.
				1769	*/
				1770	static void ext3_init_journal_params(struct super_block sb, journal_t journal)
				1771	{
				1772	struct ext3_sb_info *sbi = EXT3_SB(sb);
				1773
				1774	if (sbi->s_commit_interval)
				1775	journal->j_commit_interval = sbi->s_commit_interval;
				1776	/* We could also set up an ext3-specific default for the commit
				1777	* interval here, but for now we'll just fall back to the jbd
				1778	* default. */
				1779
				1780	spin_lock(&journal->j_state_lock);
				1781	if (test_opt(sb, BARRIER))
				1782	journal->j_flags \|= JFS_BARRIER;
				1783	else
				1784	journal->j_flags &= ~JFS_BARRIER;
				1785	spin_unlock(&journal->j_state_lock);
				1786	}
				1787
				1788	static journal_t ext3_get_journal(struct super_block sb, int journal_inum)
				1789	{
				1790	struct inode *journal_inode;
				1791	journal_t *journal;
				1792
				1793	/* First, test for the existence of a valid inode on disk. Bad
				1794	* things happen if we iget() an unused inode, as the subsequent
				1795	* iput() will try to delete it. */
				1796
				1797	journal_inode = iget(sb, journal_inum);
				1798	if (!journal_inode) {
				1799	printk(KERN_ERR "EXT3-fs: no journal found.\n");
				1800	return NULL;
				1801	}
				1802	if (!journal_inode->i_nlink) {
				1803	make_bad_inode(journal_inode);
				1804	iput(journal_inode);
				1805	printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
				1806	return NULL;
				1807	}
				1808
				1809	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
				1810	journal_inode, journal_inode->i_size);
				1811	if (is_bad_inode(journal_inode) \|\| !S_ISREG(journal_inode->i_mode)) {
				1812	printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
				1813	iput(journal_inode);
				1814	return NULL;
				1815	}
				1816
				1817	journal = journal_init_inode(journal_inode);
				1818	if (!journal) {
				1819	printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
				1820	iput(journal_inode);
				1821	return NULL;
				1822	}
				1823	journal->j_private = sb;
				1824	ext3_init_journal_params(sb, journal);
				1825	return journal;
				1826	}
				1827
				1828	static journal_t ext3_get_dev_journal(struct super_block sb,
				1829	dev_t j_dev)
				1830	{
				1831	struct buffer_head * bh;
				1832	journal_t *journal;
				1833	int start;
				1834	int len;
				1835	int hblock, blocksize;
				1836	unsigned long sb_block;
				1837	unsigned long offset;
				1838	struct ext3_super_block * es;
				1839	struct block_device *bdev;
				1840
				1841	bdev = ext3_blkdev_get(j_dev);
				1842	if (bdev == NULL)
				1843	return NULL;
				1844
				1845	if (bd_claim(bdev, sb)) {
				1846	printk(KERN_ERR
				1847	"EXT3: failed to claim external journal device.\n");
				1848	blkdev_put(bdev);
				1849	return NULL;
				1850	}
				1851
				1852	blocksize = sb->s_blocksize;
				1853	hblock = bdev_hardsect_size(bdev);
				1854	if (blocksize < hblock) {
				1855	printk(KERN_ERR
				1856	"EXT3-fs: blocksize too small for journal device.\n");
				1857	goto out_bdev;
				1858	}
				1859
				1860	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
				1861	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
				1862	set_blocksize(bdev, blocksize);
				1863	if (!(bh = __bread(bdev, sb_block, blocksize))) {
				1864	printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
				1865	"external journal\n");
				1866	goto out_bdev;
				1867	}
				1868
				1869	es = (struct ext3_super_block ) (((char )bh->b_data) + offset);
				1870	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) \|\|
				1871	!(le32_to_cpu(es->s_feature_incompat) &
				1872	EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
				1873	printk(KERN_ERR "EXT3-fs: external journal has "
				1874	"bad superblock\n");
				1875	brelse(bh);
				1876	goto out_bdev;
				1877	}
				1878
				1879	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
				1880	printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
				1881	brelse(bh);
				1882	goto out_bdev;
				1883	}
				1884
				1885	len = le32_to_cpu(es->s_blocks_count);
				1886	start = sb_block + 1;
				1887	brelse(bh); /* we're done with the superblock */
				1888
				1889	journal = journal_init_dev(bdev, sb->s_bdev,
				1890	start, len, blocksize);
				1891	if (!journal) {
				1892	printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
				1893	goto out_bdev;
				1894	}
				1895	journal->j_private = sb;
				1896	ll_rw_block(READ, 1, &journal->j_sb_buffer);
				1897	wait_on_buffer(journal->j_sb_buffer);
				1898	if (!buffer_uptodate(journal->j_sb_buffer)) {
				1899	printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
				1900	goto out_journal;
				1901	}
				1902	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
				1903	printk(KERN_ERR "EXT3-fs: External journal has more than one "
				1904	"user (unsupported) - %d\n",
				1905	be32_to_cpu(journal->j_superblock->s_nr_users));
				1906	goto out_journal;
				1907	}
				1908	EXT3_SB(sb)->journal_bdev = bdev;
				1909	ext3_init_journal_params(sb, journal);
				1910	return journal;
				1911	out_journal:
				1912	journal_destroy(journal);
				1913	out_bdev:
				1914	ext3_blkdev_put(bdev);
				1915	return NULL;
				1916	}
				1917
				1918	static int ext3_load_journal(struct super_block * sb,
				1919	struct ext3_super_block * es)
				1920	{
				1921	journal_t *journal;
				1922	int journal_inum = le32_to_cpu(es->s_journal_inum);
				1923	dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
				1924	int err = 0;
				1925	int really_read_only;
				1926
				1927	really_read_only = bdev_read_only(sb->s_bdev);
				1928
				1929	/*
				1930	* Are we loading a blank journal or performing recovery after a
				1931	* crash? For recovery, we need to check in advance whether we
				1932	* can get read-write access to the device.
				1933	*/
				1934
				1935	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
				1936	if (sb->s_flags & MS_RDONLY) {
				1937	printk(KERN_INFO "EXT3-fs: INFO: recovery "
				1938	"required on readonly filesystem.\n");
				1939	if (really_read_only) {
				1940	printk(KERN_ERR "EXT3-fs: write access "
				1941	"unavailable, cannot proceed.\n");
				1942	return -EROFS;
				1943	}
				1944	printk (KERN_INFO "EXT3-fs: write access will "
				1945	"be enabled during recovery.\n");
				1946	}
				1947	}
				1948
				1949	if (journal_inum && journal_dev) {
				1950	printk(KERN_ERR "EXT3-fs: filesystem has both journal "
				1951	"and inode journals!\n");
				1952	return -EINVAL;
				1953	}
				1954
				1955	if (journal_inum) {
				1956	if (!(journal = ext3_get_journal(sb, journal_inum)))
				1957	return -EINVAL;
				1958	} else {
				1959	if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
				1960	return -EINVAL;
				1961	}
				1962
				1963	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
				1964	err = journal_update_format(journal);
				1965	if (err) {
				1966	printk(KERN_ERR "EXT3-fs: error updating journal.\n");
				1967	journal_destroy(journal);
				1968	return err;
				1969	}
				1970	}
				1971
				1972	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
				1973	err = journal_wipe(journal, !really_read_only);
				1974	if (!err)
				1975	err = journal_load(journal);
				1976
				1977	if (err) {
				1978	printk(KERN_ERR "EXT3-fs: error loading journal.\n");
				1979	journal_destroy(journal);
				1980	return err;
				1981	}
				1982
				1983	EXT3_SB(sb)->s_journal = journal;
				1984	ext3_clear_journal_err(sb, es);
				1985	return 0;
				1986	}
				1987
				1988	static int ext3_create_journal(struct super_block * sb,
				1989	struct ext3_super_block * es,
				1990	int journal_inum)
				1991	{
				1992	journal_t *journal;
				1993
				1994	if (sb->s_flags & MS_RDONLY) {
				1995	printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
				1996	"create journal.\n");
				1997	return -EROFS;
				1998	}
				1999
				2000	if (!(journal = ext3_get_journal(sb, journal_inum)))
				2001	return -EINVAL;
				2002
				2003	printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n",
				2004	journal_inum);
				2005
				2006	if (journal_create(journal)) {
				2007	printk(KERN_ERR "EXT3-fs: error creating journal.\n");
				2008	journal_destroy(journal);
				2009	return -EIO;
				2010	}
				2011
				2012	EXT3_SB(sb)->s_journal = journal;
				2013
				2014	ext3_update_dynamic_rev(sb);
				2015	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2016	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
				2017
				2018	es->s_journal_inum = cpu_to_le32(journal_inum);
				2019	sb->s_dirt = 1;
				2020
				2021	/* Make sure we flush the recovery flag to disk. */
				2022	ext3_commit_super(sb, es, 1);
				2023
				2024	return 0;
				2025	}
				2026
				2027	static void ext3_commit_super (struct super_block * sb,
				2028	struct ext3_super_block * es,
				2029	int sync)
				2030	{
				2031	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
				2032
				2033	if (!sbh)
				2034	return;
				2035	es->s_wtime = cpu_to_le32(get_seconds());
				2036	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
				2037	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
				2038	BUFFER_TRACE(sbh, "marking dirty");
				2039	mark_buffer_dirty(sbh);
				2040	if (sync)
				2041	sync_dirty_buffer(sbh);
				2042	}
				2043
				2044
				2045	/*
				2046	* Have we just finished recovery? If so, and if we are mounting (or
				2047	* remounting) the filesystem readonly, then we will end up with a
				2048	* consistent fs on disk. Record that fact.
				2049	*/
				2050	static void ext3_mark_recovery_complete(struct super_block * sb,
				2051	struct ext3_super_block * es)
				2052	{
				2053	journal_t *journal = EXT3_SB(sb)->s_journal;
				2054
				2055	journal_lock_updates(journal);
				2056	journal_flush(journal);
				2057	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
				2058	sb->s_flags & MS_RDONLY) {
				2059	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2060	sb->s_dirt = 0;
				2061	ext3_commit_super(sb, es, 1);
				2062	}
				2063	journal_unlock_updates(journal);
				2064	}
				2065
				2066	/*
				2067	* If we are mounting (or read-write remounting) a filesystem whose journal
				2068	* has recorded an error from a previous lifetime, move that error to the
				2069	* main filesystem now.
				2070	*/
				2071	static void ext3_clear_journal_err(struct super_block * sb,
				2072	struct ext3_super_block * es)
				2073	{
				2074	journal_t *journal;
				2075	int j_errno;
				2076	const char *errstr;
				2077
				2078	journal = EXT3_SB(sb)->s_journal;
				2079
				2080	/*
				2081	* Now check for any error status which may have been recorded in the
				2082	* journal by a prior ext3_error() or ext3_abort()
				2083	*/
				2084
				2085	j_errno = journal_errno(journal);
				2086	if (j_errno) {
				2087	char nbuf[16];
				2088
				2089	errstr = ext3_decode_error(sb, j_errno, nbuf);
				2090	ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
				2091	"from previous mount: %s", errstr);
				2092	ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
				2093	"filesystem check.");
				2094
				2095	EXT3_SB(sb)->s_mount_state \|= EXT3_ERROR_FS;
				2096	es->s_state \|= cpu_to_le16(EXT3_ERROR_FS);
				2097	ext3_commit_super (sb, es, 1);
				2098
				2099	journal_clear_err(journal);
				2100	}
				2101	}
				2102
				2103	/*
				2104	* Force the running and committing transactions to commit,
				2105	* and wait on the commit.
				2106	*/
				2107	int ext3_force_commit(struct super_block *sb)
				2108	{
				2109	journal_t *journal;
				2110	int ret;
				2111
				2112	if (sb->s_flags & MS_RDONLY)
				2113	return 0;
				2114
				2115	journal = EXT3_SB(sb)->s_journal;
				2116	sb->s_dirt = 0;
				2117	ret = ext3_journal_force_commit(journal);
				2118	return ret;
				2119	}
				2120
				2121	/*
				2122	* Ext3 always journals updates to the superblock itself, so we don't
				2123	* have to propagate any other updates to the superblock on disk at this
				2124	* point. Just start an async writeback to get the buffers on their way
				2125	* to the disk.
				2126	*
				2127	* This implicitly triggers the writebehind on sync().
				2128	*/
				2129
				2130	static void ext3_write_super (struct super_block * sb)
				2131	{
				2132	if (down_trylock(&sb->s_lock) == 0)
				2133	BUG();
				2134	sb->s_dirt = 0;
				2135	}
				2136
				2137	static int ext3_sync_fs(struct super_block *sb, int wait)
				2138	{
				2139	tid_t target;
				2140
				2141	sb->s_dirt = 0;
				2142	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
				2143	if (wait)
				2144	log_wait_commit(EXT3_SB(sb)->s_journal, target);
				2145	}
				2146	return 0;
				2147	}
				2148
				2149	/*
				2150	* LVM calls this function before a (read-only) snapshot is created. This
				2151	* gives us a chance to flush the journal completely and mark the fs clean.
				2152	*/
				2153	static void ext3_write_super_lockfs(struct super_block *sb)
				2154	{
				2155	sb->s_dirt = 0;
				2156
				2157	if (!(sb->s_flags & MS_RDONLY)) {
				2158	journal_t *journal = EXT3_SB(sb)->s_journal;
				2159
				2160	/* Now we set up the journal barrier. */
				2161	journal_lock_updates(journal);
				2162	journal_flush(journal);
				2163
				2164	/* Journal blocked and flushed, clear needs_recovery flag. */
				2165	EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2166	ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
				2167	}
				2168	}
				2169
				2170	/*
				2171	* Called by LVM after the snapshot is done. We need to reset the RECOVER
				2172	* flag here, even though the filesystem is not technically dirty yet.
				2173	*/
				2174	static void ext3_unlockfs(struct super_block *sb)
				2175	{
				2176	if (!(sb->s_flags & MS_RDONLY)) {
				2177	lock_super(sb);
				2178	/* Reser the needs_recovery flag before the fs is unlocked. */
				2179	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
				2180	ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
				2181	unlock_super(sb);
				2182	journal_unlock_updates(EXT3_SB(sb)->s_journal);
				2183	}
				2184	}
				2185
				2186	static int ext3_remount (struct super_block * sb, int * flags, char * data)
				2187	{
				2188	struct ext3_super_block * es;
				2189	struct ext3_sb_info *sbi = EXT3_SB(sb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2190	unsigned long n_blocks_count = 0;
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2191	unsigned long old_sb_flags;
				2192	struct ext3_mount_options old_opts;
				2193	int err;
				2194	#ifdef CONFIG_QUOTA
				2195	int i;
				2196	#endif
				2197
				2198	/* Store the original options */
				2199	old_sb_flags = sb->s_flags;
				2200	old_opts.s_mount_opt = sbi->s_mount_opt;
				2201	old_opts.s_resuid = sbi->s_resuid;
				2202	old_opts.s_resgid = sbi->s_resgid;
				2203	old_opts.s_commit_interval = sbi->s_commit_interval;
				2204	#ifdef CONFIG_QUOTA
				2205	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
				2206	for (i = 0; i < MAXQUOTAS; i++)
				2207	old_opts.s_qf_names[i] = sbi->s_qf_names[i];
				2208	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2209
				2210	/*
				2211	* Allow the "check" option to be passed as a remount option.
				2212	*/
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2213	if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) {
				2214	err = -EINVAL;
				2215	goto restore_opts;
				2216	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2217
				2218	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
				2219	ext3_abort(sb, __FUNCTION__, "Abort forced by user");
				2220
				2221	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) \|
				2222	((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
				2223
				2224	es = sbi->s_es;
				2225
				2226	ext3_init_journal_params(sb, sbi->s_journal);
				2227
				2228	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) \|\|
				2229	n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2230	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
				2231	err = -EROFS;
				2232	goto restore_opts;
				2233	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2234
				2235	if (*flags & MS_RDONLY) {
				2236	/*
				2237	* First of all, the unconditional stuff we have to do
				2238	* to disable replay of the journal when we next remount
				2239	*/
				2240	sb->s_flags \|= MS_RDONLY;
				2241
				2242	/*
				2243	* OK, test if we are remounting a valid rw partition
				2244	* readonly, and if so set the rdonly flag and then
				2245	* mark the partition as valid again.
				2246	*/
				2247	if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
				2248	(sbi->s_mount_state & EXT3_VALID_FS))
				2249	es->s_state = cpu_to_le16(sbi->s_mount_state);
				2250
				2251	ext3_mark_recovery_complete(sb, es);
				2252	} else {
				2253	__le32 ret;
				2254	if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
				2255	~EXT3_FEATURE_RO_COMPAT_SUPP))) {
				2256	printk(KERN_WARNING "EXT3-fs: %s: couldn't "
				2257	"remount RDWR because of unsupported "
				2258	"optional features (%x).\n",
				2259	sb->s_id, le32_to_cpu(ret));
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2260	err = -EROFS;
				2261	goto restore_opts;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2262	}
				2263	/*
				2264	* Mounting a RDONLY partition read-write, so reread
				2265	* and store the current valid flag. (It may have
				2266	* been changed by e2fsck since we originally mounted
				2267	* the partition.)
				2268	*/
				2269	ext3_clear_journal_err(sb, es);
				2270	sbi->s_mount_state = le16_to_cpu(es->s_state);
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2271	if ((ret = ext3_group_extend(sb, es, n_blocks_count))) {
				2272	err = ret;
				2273	goto restore_opts;
				2274	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2275	if (!ext3_setup_super (sb, es, 0))
				2276	sb->s_flags &= ~MS_RDONLY;
				2277	}
				2278	}
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2279	#ifdef CONFIG_QUOTA
				2280	/* Release old quota file names */
				2281	for (i = 0; i < MAXQUOTAS; i++)
				2282	if (old_opts.s_qf_names[i] &&
				2283	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				2284	kfree(old_opts.s_qf_names[i]);
				2285	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2286	return 0;
Jan Kara	08c6a96	2005-07-12 13:58:28 -0700	[diff] [blame]	2287	restore_opts:
				2288	sb->s_flags = old_sb_flags;
				2289	sbi->s_mount_opt = old_opts.s_mount_opt;
				2290	sbi->s_resuid = old_opts.s_resuid;
				2291	sbi->s_resgid = old_opts.s_resgid;
				2292	sbi->s_commit_interval = old_opts.s_commit_interval;
				2293	#ifdef CONFIG_QUOTA
				2294	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
				2295	for (i = 0; i < MAXQUOTAS; i++) {
				2296	if (sbi->s_qf_names[i] &&
				2297	old_opts.s_qf_names[i] != sbi->s_qf_names[i])
				2298	kfree(sbi->s_qf_names[i]);
				2299	sbi->s_qf_names[i] = old_opts.s_qf_names[i];
				2300	}
				2301	#endif
				2302	return err;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2303	}
				2304
				2305	static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
				2306	{
				2307	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				2308	unsigned long overhead;
				2309	int i;
				2310
				2311	if (test_opt (sb, MINIX_DF))
				2312	overhead = 0;
				2313	else {
				2314	unsigned long ngroups;
				2315	ngroups = EXT3_SB(sb)->s_groups_count;
				2316	smp_rmb();
				2317
				2318	/*
				2319	* Compute the overhead (FS structures)
				2320	*/
				2321
				2322	/*
				2323	* All of the blocks before first_data_block are
				2324	* overhead
				2325	*/
				2326	overhead = le32_to_cpu(es->s_first_data_block);
				2327
				2328	/*
				2329	* Add the overhead attributed to the superblock and
				2330	* block group descriptors. If the sparse superblocks
				2331	* feature is turned on, then not all groups have this.
				2332	*/
				2333	for (i = 0; i < ngroups; i++) {
				2334	overhead += ext3_bg_has_super(sb, i) +
				2335	ext3_bg_num_gdb(sb, i);
				2336	cond_resched();
				2337	}
				2338
				2339	/*
				2340	* Every block group has an inode bitmap, a block
				2341	* bitmap, and an inode table.
				2342	*/
				2343	overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
				2344	}
				2345
				2346	buf->f_type = EXT3_SUPER_MAGIC;
				2347	buf->f_bsize = sb->s_blocksize;
				2348	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
				2349	buf->f_bfree = ext3_count_free_blocks (sb);
				2350	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
				2351	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
				2352	buf->f_bavail = 0;
				2353	buf->f_files = le32_to_cpu(es->s_inodes_count);
				2354	buf->f_ffree = ext3_count_free_inodes (sb);
				2355	buf->f_namelen = EXT3_NAME_LEN;
				2356	return 0;
				2357	}
				2358
				2359	/* Helper function for writing quotas on sync - we need to start transaction before quota file
				2360	* is locked for write. Otherwise the are possible deadlocks:
				2361	* Process 1 Process 2
				2362	* ext3_create() quota_sync()
				2363	* journal_start() write_dquot()
				2364	* DQUOT_INIT() down(dqio_sem)
				2365	* down(dqio_sem) journal_start()
				2366	*
				2367	*/
				2368
				2369	#ifdef CONFIG_QUOTA
				2370
				2371	static inline struct inode dquot_to_inode(struct dquot dquot)
				2372	{
				2373	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
				2374	}
				2375
				2376	static int ext3_dquot_initialize(struct inode *inode, int type)
				2377	{
				2378	handle_t *handle;
				2379	int ret, err;
				2380
				2381	/* We may create quota structure so we need to reserve enough blocks */
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2382	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2383	if (IS_ERR(handle))
				2384	return PTR_ERR(handle);
				2385	ret = dquot_initialize(inode, type);
				2386	err = ext3_journal_stop(handle);
				2387	if (!ret)
				2388	ret = err;
				2389	return ret;
				2390	}
				2391
				2392	static int ext3_dquot_drop(struct inode *inode)
				2393	{
				2394	handle_t *handle;
				2395	int ret, err;
				2396
				2397	/* We may delete quota structure so we need to reserve enough blocks */
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2398	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2399	if (IS_ERR(handle))
				2400	return PTR_ERR(handle);
				2401	ret = dquot_drop(inode);
				2402	err = ext3_journal_stop(handle);
				2403	if (!ret)
				2404	ret = err;
				2405	return ret;
				2406	}
				2407
				2408	static int ext3_write_dquot(struct dquot *dquot)
				2409	{
				2410	int ret, err;
				2411	handle_t *handle;
				2412	struct inode *inode;
				2413
				2414	inode = dquot_to_inode(dquot);
				2415	handle = ext3_journal_start(inode,
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2416	EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2417	if (IS_ERR(handle))
				2418	return PTR_ERR(handle);
				2419	ret = dquot_commit(dquot);
				2420	err = ext3_journal_stop(handle);
				2421	if (!ret)
				2422	ret = err;
				2423	return ret;
				2424	}
				2425
				2426	static int ext3_acquire_dquot(struct dquot *dquot)
				2427	{
				2428	int ret, err;
				2429	handle_t *handle;
				2430
				2431	handle = ext3_journal_start(dquot_to_inode(dquot),
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2432	EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2433	if (IS_ERR(handle))
				2434	return PTR_ERR(handle);
				2435	ret = dquot_acquire(dquot);
				2436	err = ext3_journal_stop(handle);
				2437	if (!ret)
				2438	ret = err;
				2439	return ret;
				2440	}
				2441
				2442	static int ext3_release_dquot(struct dquot *dquot)
				2443	{
				2444	int ret, err;
				2445	handle_t *handle;
				2446
				2447	handle = ext3_journal_start(dquot_to_inode(dquot),
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2448	EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2449	if (IS_ERR(handle))
				2450	return PTR_ERR(handle);
				2451	ret = dquot_release(dquot);
				2452	err = ext3_journal_stop(handle);
				2453	if (!ret)
				2454	ret = err;
				2455	return ret;
				2456	}
				2457
				2458	static int ext3_mark_dquot_dirty(struct dquot *dquot)
				2459	{
				2460	/* Are we journalling quotas? */
				2461	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] \|\|
				2462	EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
				2463	dquot_mark_dquot_dirty(dquot);
				2464	return ext3_write_dquot(dquot);
				2465	} else {
				2466	return dquot_mark_dquot_dirty(dquot);
				2467	}
				2468	}
				2469
				2470	static int ext3_write_info(struct super_block *sb, int type)
				2471	{
				2472	int ret, err;
				2473	handle_t *handle;
				2474
				2475	/* Data block + inode block */
				2476	handle = ext3_journal_start(sb->s_root->d_inode, 2);
				2477	if (IS_ERR(handle))
				2478	return PTR_ERR(handle);
				2479	ret = dquot_commit_info(sb, type);
				2480	err = ext3_journal_stop(handle);
				2481	if (!ret)
				2482	ret = err;
				2483	return ret;
				2484	}
				2485
				2486	/*
				2487	* Turn on quotas during mount time - we need to find
				2488	* the quota file and such...
				2489	*/
				2490	static int ext3_quota_on_mount(struct super_block *sb, int type)
				2491	{
Christoph Hellwig	84de856	2005-06-23 00:09:16 -0700	[diff] [blame]	2492	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
				2493	EXT3_SB(sb)->s_jquota_fmt, type);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2494	}
				2495
				2496	/*
				2497	* Standard function to be called on quota_on
				2498	*/
				2499	static int ext3_quota_on(struct super_block *sb, int type, int format_id,
				2500	char *path)
				2501	{
				2502	int err;
				2503	struct nameidata nd;
				2504
Jan Kara	1f54587	2005-06-23 22:01:04 -0700	[diff] [blame]	2505	if (!test_opt(sb, QUOTA))
				2506	return -EINVAL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2507	/* Not journalling quota? */
				2508	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
				2509	!EXT3_SB(sb)->s_qf_names[GRPQUOTA])
				2510	return vfs_quota_on(sb, type, format_id, path);
				2511	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
				2512	if (err)
				2513	return err;
				2514	/* Quotafile not on the same filesystem? */
				2515	if (nd.mnt->mnt_sb != sb) {
				2516	path_release(&nd);
				2517	return -EXDEV;
				2518	}
				2519	/* Quotafile not of fs root? */
				2520	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
				2521	printk(KERN_WARNING
				2522	"EXT3-fs: Quota file not on filesystem root. "
				2523	"Journalled quota will not work.\n");
				2524	path_release(&nd);
				2525	return vfs_quota_on(sb, type, format_id, path);
				2526	}
				2527
				2528	/* Read data from quotafile - avoid pagecache and such because we cannot afford
				2529	* acquiring the locks... As quota files are never truncated and quota code
				2530	* itself serializes the operations (and noone else should touch the files)
				2531	* we don't have to be afraid of races */
				2532	static ssize_t ext3_quota_read(struct super_block sb, int type, char data,
				2533	size_t len, loff_t off)
				2534	{
				2535	struct inode *inode = sb_dqopt(sb)->files[type];
				2536	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
				2537	int err = 0;
				2538	int offset = off & (sb->s_blocksize - 1);
				2539	int tocopy;
				2540	size_t toread;
				2541	struct buffer_head *bh;
				2542	loff_t i_size = i_size_read(inode);
				2543
				2544	if (off > i_size)
				2545	return 0;
				2546	if (off+len > i_size)
				2547	len = i_size-off;
				2548	toread = len;
				2549	while (toread > 0) {
				2550	tocopy = sb->s_blocksize - offset < toread ?
				2551	sb->s_blocksize - offset : toread;
				2552	bh = ext3_bread(NULL, inode, blk, 0, &err);
				2553	if (err)
				2554	return err;
				2555	if (!bh) /* A hole? */
				2556	memset(data, 0, tocopy);
				2557	else
				2558	memcpy(data, bh->b_data+offset, tocopy);
				2559	brelse(bh);
				2560	offset = 0;
				2561	toread -= tocopy;
				2562	data += tocopy;
				2563	blk++;
				2564	}
				2565	return len;
				2566	}
				2567
				2568	/* Write to quotafile (we know the transaction is already started and has
				2569	* enough credits) */
				2570	static ssize_t ext3_quota_write(struct super_block *sb, int type,
				2571	const char *data, size_t len, loff_t off)
				2572	{
				2573	struct inode *inode = sb_dqopt(sb)->files[type];
				2574	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
				2575	int err = 0;
				2576	int offset = off & (sb->s_blocksize - 1);
				2577	int tocopy;
				2578	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
				2579	size_t towrite = len;
				2580	struct buffer_head *bh;
				2581	handle_t *handle = journal_current_handle();
				2582
				2583	down(&inode->i_sem);
				2584	while (towrite > 0) {
				2585	tocopy = sb->s_blocksize - offset < towrite ?
				2586	sb->s_blocksize - offset : towrite;
				2587	bh = ext3_bread(handle, inode, blk, 1, &err);
				2588	if (!bh)
				2589	goto out;
				2590	if (journal_quota) {
				2591	err = ext3_journal_get_write_access(handle, bh);
				2592	if (err) {
				2593	brelse(bh);
				2594	goto out;
				2595	}
				2596	}
				2597	lock_buffer(bh);
				2598	memcpy(bh->b_data+offset, data, tocopy);
				2599	flush_dcache_page(bh->b_page);
				2600	unlock_buffer(bh);
				2601	if (journal_quota)
				2602	err = ext3_journal_dirty_metadata(handle, bh);
				2603	else {
				2604	/* Always do at least ordered writes for quotas */
				2605	err = ext3_journal_dirty_data(handle, bh);
				2606	mark_buffer_dirty(bh);
				2607	}
				2608	brelse(bh);
				2609	if (err)
				2610	goto out;
				2611	offset = 0;
				2612	towrite -= tocopy;
				2613	data += tocopy;
				2614	blk++;
				2615	}
				2616	out:
				2617	if (len == towrite)
				2618	return err;
				2619	if (inode->i_size < off+len-towrite) {
				2620	i_size_write(inode, off+len-towrite);
				2621	EXT3_I(inode)->i_disksize = inode->i_size;
				2622	}
				2623	inode->i_version++;
				2624	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
				2625	ext3_mark_inode_dirty(handle, inode);
				2626	up(&inode->i_sem);
				2627	return len - towrite;
				2628	}
				2629
				2630	#endif
				2631
				2632	static struct super_block ext3_get_sb(struct file_system_type fs_type,
				2633	int flags, const char dev_name, void data)
				2634	{
				2635	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
				2636	}
				2637
				2638	static struct file_system_type ext3_fs_type = {
				2639	.owner = THIS_MODULE,
				2640	.name = "ext3",
				2641	.get_sb = ext3_get_sb,
				2642	.kill_sb = kill_block_super,
				2643	.fs_flags = FS_REQUIRES_DEV,
				2644	};
				2645
				2646	static int __init init_ext3_fs(void)
				2647	{
				2648	int err = init_ext3_xattr();
				2649	if (err)
				2650	return err;
				2651	err = init_inodecache();
				2652	if (err)
				2653	goto out1;
				2654	err = register_filesystem(&ext3_fs_type);
				2655	if (err)
				2656	goto out;
				2657	return 0;
				2658	out:
				2659	destroy_inodecache();
				2660	out1:
				2661	exit_ext3_xattr();
				2662	return err;
				2663	}
				2664
				2665	static void __exit exit_ext3_fs(void)
				2666	{
				2667	unregister_filesystem(&ext3_fs_type);
				2668	destroy_inodecache();
				2669	exit_ext3_xattr();
				2670	}
				2671
				2672	MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
				2673	MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
				2674	MODULE_LICENSE("GPL");
				2675	module_init(init_ext3_fs)
				2676	module_exit(exit_ext3_fs)