Blame - fs/xfs/xfs_mount.c - kernel/msm

blob: 541d5dd474be9a2fa464c1966db37cbaa0d760fd [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	19	#include "xfs_fs.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs_types.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	21	#include "xfs_bit.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dir.h"
				28	#include "xfs_dir2.h"
				29	#include "xfs_dmapi.h"
				30	#include "xfs_mount.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31	#include "xfs_bmap_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	32	#include "xfs_alloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	#include "xfs_ialloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	34	#include "xfs_dir_sf.h"
				35	#include "xfs_dir2_sf.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	36	#include "xfs_attr_sf.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	#include "xfs_dinode.h"
				38	#include "xfs_inode.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	39	#include "xfs_btree.h"
				40	#include "xfs_ialloc.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	#include "xfs_alloc.h"
				42	#include "xfs_rtalloc.h"
				43	#include "xfs_bmap.h"
				44	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	#include "xfs_rw.h"
				46	#include "xfs_quota.h"
				47	#include "xfs_fsops.h"
				48
				49	STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
				50	STATIC int xfs_uuid_mount(xfs_mount_t *);
				51	STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	52	STATIC void xfs_unmountfs_wait(xfs_mount_t *);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53
				54	static struct {
				55	short offset;
				56	short type; /* 0 = integer
				57	* 1 = binary / string (no translation)
				58	*/
				59	} xfs_sb_info[] = {
				60	{ offsetof(xfs_sb_t, sb_magicnum), 0 },
				61	{ offsetof(xfs_sb_t, sb_blocksize), 0 },
				62	{ offsetof(xfs_sb_t, sb_dblocks), 0 },
				63	{ offsetof(xfs_sb_t, sb_rblocks), 0 },
				64	{ offsetof(xfs_sb_t, sb_rextents), 0 },
				65	{ offsetof(xfs_sb_t, sb_uuid), 1 },
				66	{ offsetof(xfs_sb_t, sb_logstart), 0 },
				67	{ offsetof(xfs_sb_t, sb_rootino), 0 },
				68	{ offsetof(xfs_sb_t, sb_rbmino), 0 },
				69	{ offsetof(xfs_sb_t, sb_rsumino), 0 },
				70	{ offsetof(xfs_sb_t, sb_rextsize), 0 },
				71	{ offsetof(xfs_sb_t, sb_agblocks), 0 },
				72	{ offsetof(xfs_sb_t, sb_agcount), 0 },
				73	{ offsetof(xfs_sb_t, sb_rbmblocks), 0 },
				74	{ offsetof(xfs_sb_t, sb_logblocks), 0 },
				75	{ offsetof(xfs_sb_t, sb_versionnum), 0 },
				76	{ offsetof(xfs_sb_t, sb_sectsize), 0 },
				77	{ offsetof(xfs_sb_t, sb_inodesize), 0 },
				78	{ offsetof(xfs_sb_t, sb_inopblock), 0 },
				79	{ offsetof(xfs_sb_t, sb_fname[0]), 1 },
				80	{ offsetof(xfs_sb_t, sb_blocklog), 0 },
				81	{ offsetof(xfs_sb_t, sb_sectlog), 0 },
				82	{ offsetof(xfs_sb_t, sb_inodelog), 0 },
				83	{ offsetof(xfs_sb_t, sb_inopblog), 0 },
				84	{ offsetof(xfs_sb_t, sb_agblklog), 0 },
				85	{ offsetof(xfs_sb_t, sb_rextslog), 0 },
				86	{ offsetof(xfs_sb_t, sb_inprogress), 0 },
				87	{ offsetof(xfs_sb_t, sb_imax_pct), 0 },
				88	{ offsetof(xfs_sb_t, sb_icount), 0 },
				89	{ offsetof(xfs_sb_t, sb_ifree), 0 },
				90	{ offsetof(xfs_sb_t, sb_fdblocks), 0 },
				91	{ offsetof(xfs_sb_t, sb_frextents), 0 },
				92	{ offsetof(xfs_sb_t, sb_uquotino), 0 },
				93	{ offsetof(xfs_sb_t, sb_gquotino), 0 },
				94	{ offsetof(xfs_sb_t, sb_qflags), 0 },
				95	{ offsetof(xfs_sb_t, sb_flags), 0 },
				96	{ offsetof(xfs_sb_t, sb_shared_vn), 0 },
				97	{ offsetof(xfs_sb_t, sb_inoalignmt), 0 },
				98	{ offsetof(xfs_sb_t, sb_unit), 0 },
				99	{ offsetof(xfs_sb_t, sb_width), 0 },
				100	{ offsetof(xfs_sb_t, sb_dirblklog), 0 },
				101	{ offsetof(xfs_sb_t, sb_logsectlog), 0 },
				102	{ offsetof(xfs_sb_t, sb_logsectsize),0 },
				103	{ offsetof(xfs_sb_t, sb_logsunit), 0 },
				104	{ offsetof(xfs_sb_t, sb_features2), 0 },
				105	{ sizeof(xfs_sb_t), 0 }
				106	};
				107
				108	/*
				109	* Return a pointer to an initialized xfs_mount structure.
				110	*/
				111	xfs_mount_t *
				112	xfs_mount_init(void)
				113	{
				114	xfs_mount_t *mp;
				115
				116	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
				117
				118	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
				119	spinlock_init(&mp->m_sb_lock, "xfs_sb");
				120	mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock");
				121	initnsema(&mp->m_growlock, 1, "xfs_grow");
				122	/*
				123	* Initialize the AIL.
				124	*/
				125	xfs_trans_ail_init(mp);
				126
				127	atomic_set(&mp->m_active_trans, 0);
				128
				129	return mp;
				130	}
				131
				132	/*
				133	* Free up the resources associated with a mount structure. Assume that
				134	* the structure was initially zeroed, so we can tell which fields got
				135	* initialized.
				136	*/
				137	void
				138	xfs_mount_free(
				139	xfs_mount_t *mp,
				140	int remove_bhv)
				141	{
				142	if (mp->m_ihash)
				143	xfs_ihash_free(mp);
				144	if (mp->m_chash)
				145	xfs_chash_free(mp);
				146
				147	if (mp->m_perag) {
				148	int agno;
				149
				150	for (agno = 0; agno < mp->m_maxagi; agno++)
				151	if (mp->m_perag[agno].pagb_list)
				152	kmem_free(mp->m_perag[agno].pagb_list,
				153	sizeof(xfs_perag_busy_t) *
				154	XFS_PAGB_NUM_SLOTS);
				155	kmem_free(mp->m_perag,
				156	sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
				157	}
				158
				159	AIL_LOCK_DESTROY(&mp->m_ail_lock);
				160	spinlock_destroy(&mp->m_sb_lock);
				161	mutex_destroy(&mp->m_ilock);
				162	freesema(&mp->m_growlock);
				163	if (mp->m_quotainfo)
				164	XFS_QM_DONE(mp);
				165
				166	if (mp->m_fsname != NULL)
				167	kmem_free(mp->m_fsname, mp->m_fsname_len);
Nathan Scott	fc1f8c1	2005-11-02 11:44:33 +1100	[diff] [blame]	168	if (mp->m_rtname != NULL)
				169	kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
				170	if (mp->m_logname != NULL)
				171	kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	172
				173	if (remove_bhv) {
				174	struct vfs *vfsp = XFS_MTOVFS(mp);
				175
				176	bhv_remove_all_vfsops(vfsp, 0);
				177	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
				178	}
				179
				180	kmem_free(mp, sizeof(xfs_mount_t));
				181	}
				182
				183
				184	/*
				185	* Check the validity of the SB found.
				186	*/
				187	STATIC int
				188	xfs_mount_validate_sb(
				189	xfs_mount_t *mp,
				190	xfs_sb_t *sbp)
				191	{
				192	/*
				193	* If the log device and data device have the
				194	* same device number, the log is internal.
				195	* Consequently, the sb_logstart should be non-zero. If
				196	* we have a zero sb_logstart in this case, we may be trying to mount
				197	* a volume filesystem in a non-volume manner.
				198	*/
				199	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
				200	cmn_err(CE_WARN, "XFS: bad magic number");
				201	return XFS_ERROR(EWRONGFS);
				202	}
				203
				204	if (!XFS_SB_GOOD_VERSION(sbp)) {
				205	cmn_err(CE_WARN, "XFS: bad version");
				206	return XFS_ERROR(EWRONGFS);
				207	}
				208
				209	if (unlikely(
				210	sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
				211	cmn_err(CE_WARN,
				212	"XFS: filesystem is marked as having an external log; "
				213	"specify logdev on the\nmount command line.");
				214	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)",
				215	XFS_ERRLEVEL_HIGH, mp, sbp);
				216	return XFS_ERROR(EFSCORRUPTED);
				217	}
				218
				219	if (unlikely(
				220	sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
				221	cmn_err(CE_WARN,
				222	"XFS: filesystem is marked as having an internal log; "
				223	"don't specify logdev on\nthe mount command line.");
				224	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)",
				225	XFS_ERRLEVEL_HIGH, mp, sbp);
				226	return XFS_ERROR(EFSCORRUPTED);
				227	}
				228
				229	/*
				230	* More sanity checking. These were stolen directly from
				231	* xfs_repair.
				232	*/
				233	if (unlikely(
				234	sbp->sb_agcount <= 0 \|\|
				235	sbp->sb_sectsize < XFS_MIN_SECTORSIZE \|\|
				236	sbp->sb_sectsize > XFS_MAX_SECTORSIZE \|\|
				237	sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG \|\|
				238	sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG \|\|
				239	sbp->sb_blocksize < XFS_MIN_BLOCKSIZE \|\|
				240	sbp->sb_blocksize > XFS_MAX_BLOCKSIZE \|\|
				241	sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG \|\|
				242	sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG \|\|
				243	sbp->sb_inodesize < XFS_DINODE_MIN_SIZE \|\|
				244	sbp->sb_inodesize > XFS_DINODE_MAX_SIZE \|\|
				245	(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) \|\|
				246	(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) \|\|
				247	sbp->sb_imax_pct > 100)) {
				248	cmn_err(CE_WARN, "XFS: SB sanity check 1 failed");
				249	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
				250	XFS_ERRLEVEL_LOW, mp, sbp);
				251	return XFS_ERROR(EFSCORRUPTED);
				252	}
				253
				254	/*
				255	* Sanity check AG count, size fields against data size field
				256	*/
				257	if (unlikely(
				258	sbp->sb_dblocks == 0 \|\|
				259	sbp->sb_dblocks >
				260	(xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks \|\|
				261	sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
				262	sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
				263	cmn_err(CE_WARN, "XFS: SB sanity check 2 failed");
				264	XFS_ERROR_REPORT("xfs_mount_validate_sb(4)",
				265	XFS_ERRLEVEL_LOW, mp);
				266	return XFS_ERROR(EFSCORRUPTED);
				267	}
				268
				269	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
				270	ASSERT(sbp->sb_blocklog >= BBSHIFT);
				271
				272	#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
				273	if (unlikely(
				274	(sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX \|\|
				275	(sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
				276	#else /* Limited by UINT_MAX of sectors */
				277	if (unlikely(
				278	(sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX \|\|
				279	(sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
				280	#endif
				281	cmn_err(CE_WARN,
				282	"XFS: File system is too large to be mounted on this system.");
				283	return XFS_ERROR(E2BIG);
				284	}
				285
				286	if (unlikely(sbp->sb_inprogress)) {
				287	cmn_err(CE_WARN, "XFS: file system busy");
				288	XFS_ERROR_REPORT("xfs_mount_validate_sb(5)",
				289	XFS_ERRLEVEL_LOW, mp);
				290	return XFS_ERROR(EFSCORRUPTED);
				291	}
				292
				293	/*
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	294	* Version 1 directory format has never worked on Linux.
				295	*/
				296	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
				297	cmn_err(CE_WARN,
				298	"XFS: Attempted to mount file system using version 1 directory format");
				299	return XFS_ERROR(ENOSYS);
				300	}
				301
				302	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	303	* Until this is fixed only page-sized or smaller data blocks work.
				304	*/
				305	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
				306	cmn_err(CE_WARN,
				307	"XFS: Attempted to mount file system with blocksize %d bytes",
				308	sbp->sb_blocksize);
				309	cmn_err(CE_WARN,
Christoph Hellwig	da1650a	2005-11-02 10:21:35 +1100	[diff] [blame]	310	"XFS: Only page-sized (%ld) or less blocksizes currently work.",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	311	PAGE_SIZE);
				312	return XFS_ERROR(ENOSYS);
				313	}
				314
				315	return 0;
				316	}
				317
				318	xfs_agnumber_t
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	319	xfs_initialize_perag(
				320	struct vfs *vfs,
				321	xfs_mount_t *mp,
				322	xfs_agnumber_t agcount)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	323	{
				324	xfs_agnumber_t index, max_metadata;
				325	xfs_perag_t *pag;
				326	xfs_agino_t agino;
				327	xfs_ino_t ino;
				328	xfs_sb_t *sbp = &mp->m_sb;
				329	xfs_ino_t max_inum = XFS_MAXINUMBER_32;
				330
				331	/* Check to see if the filesystem can overflow 32 bit inodes */
				332	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
				333	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
				334
				335	/* Clear the mount flag if no inode can overflow 32 bits
				336	* on this filesystem, or if specifically requested..
				337	*/
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	338	if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	339	mp->m_flags \|= XFS_MOUNT_32BITINODES;
				340	} else {
				341	mp->m_flags &= ~XFS_MOUNT_32BITINODES;
				342	}
				343
				344	/* If we can overflow then setup the ag headers accordingly */
				345	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				346	/* Calculate how much should be reserved for inodes to
				347	* meet the max inode percentage.
				348	*/
				349	if (mp->m_maxicount) {
				350	__uint64_t icount;
				351
				352	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				353	do_div(icount, 100);
				354	icount += sbp->sb_agblocks - 1;
Eric Sandeen	a749ee8	2005-11-02 15:13:42 +1100	[diff] [blame^]	355	do_div(icount, sbp->sb_agblocks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	356	max_metadata = icount;
				357	} else {
				358	max_metadata = agcount;
				359	}
				360	for (index = 0; index < agcount; index++) {
				361	ino = XFS_AGINO_TO_INO(mp, index, agino);
				362	if (ino > max_inum) {
				363	index++;
				364	break;
				365	}
				366
				367	/* This ag is prefered for inodes */
				368	pag = &mp->m_perag[index];
				369	pag->pagi_inodeok = 1;
				370	if (index < max_metadata)
				371	pag->pagf_metadata = 1;
				372	}
				373	} else {
				374	/* Setup default behavior for smaller filesystems */
				375	for (index = 0; index < agcount; index++) {
				376	pag = &mp->m_perag[index];
				377	pag->pagi_inodeok = 1;
				378	}
				379	}
				380	return index;
				381	}
				382
				383	/*
				384	* xfs_xlatesb
				385	*
				386	* data - on disk version of sb
				387	* sb - a superblock
				388	* dir - conversion direction: <0 - convert sb to buf
				389	* >0 - convert buf to sb
				390	* fields - which fields to copy (bitmask)
				391	*/
				392	void
				393	xfs_xlatesb(
				394	void *data,
				395	xfs_sb_t *sb,
				396	int dir,
				397	__int64_t fields)
				398	{
				399	xfs_caddr_t buf_ptr;
				400	xfs_caddr_t mem_ptr;
				401	xfs_sb_field_t f;
				402	int first;
				403	int size;
				404
				405	ASSERT(dir);
				406	ASSERT(fields);
				407
				408	if (!fields)
				409	return;
				410
				411	buf_ptr = (xfs_caddr_t)data;
				412	mem_ptr = (xfs_caddr_t)sb;
				413
				414	while (fields) {
				415	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				416	first = xfs_sb_info[f].offset;
				417	size = xfs_sb_info[f + 1].offset - first;
				418
				419	ASSERT(xfs_sb_info[f].type == 0 \|\| xfs_sb_info[f].type == 1);
				420
				421	if (size == 1 \|\| xfs_sb_info[f].type == 1) {
				422	if (dir > 0) {
				423	memcpy(mem_ptr + first, buf_ptr + first, size);
				424	} else {
				425	memcpy(buf_ptr + first, mem_ptr + first, size);
				426	}
				427	} else {
				428	switch (size) {
				429	case 2:
				430	INT_XLATE((__uint16_t)(buf_ptr+first),
				431	(__uint16_t)(mem_ptr+first),
				432	dir, ARCH_CONVERT);
				433	break;
				434	case 4:
				435	INT_XLATE((__uint32_t)(buf_ptr+first),
				436	(__uint32_t)(mem_ptr+first),
				437	dir, ARCH_CONVERT);
				438	break;
				439	case 8:
				440	INT_XLATE((__uint64_t)(buf_ptr+first),
				441	(__uint64_t)(mem_ptr+first), dir, ARCH_CONVERT);
				442	break;
				443	default:
				444	ASSERT(0);
				445	}
				446	}
				447
				448	fields &= ~(1LL << f);
				449	}
				450	}
				451
				452	/*
				453	* xfs_readsb
				454	*
				455	* Does the initial read of the superblock.
				456	*/
				457	int
				458	xfs_readsb(xfs_mount_t *mp)
				459	{
				460	unsigned int sector_size;
				461	unsigned int extra_flags;
				462	xfs_buf_t *bp;
				463	xfs_sb_t *sbp;
				464	int error;
				465
				466	ASSERT(mp->m_sb_bp == NULL);
				467	ASSERT(mp->m_ddev_targp != NULL);
				468
				469	/*
				470	* Allocate a (locked) buffer to hold the superblock.
				471	* This will be kept around at all times to optimize
				472	* access to the superblock.
				473	*/
				474	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
				475	extra_flags = XFS_BUF_LOCK \| XFS_BUF_MANAGE \| XFS_BUF_MAPPED;
				476
				477	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				478	BTOBB(sector_size), extra_flags);
				479	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
				480	cmn_err(CE_WARN, "XFS: SB read failed");
				481	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				482	goto fail;
				483	}
				484	ASSERT(XFS_BUF_ISBUSY(bp));
				485	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				486
				487	/*
				488	* Initialize the mount structure from the superblock.
				489	* But first do some basic consistency checking.
				490	*/
				491	sbp = XFS_BUF_TO_SBP(bp);
				492	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
				493
				494	error = xfs_mount_validate_sb(mp, &(mp->m_sb));
				495	if (error) {
				496	cmn_err(CE_WARN, "XFS: SB validate failed");
				497	goto fail;
				498	}
				499
				500	/*
				501	* We must be able to do sector-sized and sector-aligned IO.
				502	*/
				503	if (sector_size > mp->m_sb.sb_sectsize) {
				504	cmn_err(CE_WARN,
				505	"XFS: device supports only %u byte sectors (not %u)",
				506	sector_size, mp->m_sb.sb_sectsize);
				507	error = ENOSYS;
				508	goto fail;
				509	}
				510
				511	/*
				512	* If device sector size is smaller than the superblock size,
				513	* re-read the superblock so the buffer is correctly sized.
				514	*/
				515	if (sector_size < mp->m_sb.sb_sectsize) {
				516	XFS_BUF_UNMANAGE(bp);
				517	xfs_buf_relse(bp);
				518	sector_size = mp->m_sb.sb_sectsize;
				519	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				520	BTOBB(sector_size), extra_flags);
				521	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
				522	cmn_err(CE_WARN, "XFS: SB re-read failed");
				523	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				524	goto fail;
				525	}
				526	ASSERT(XFS_BUF_ISBUSY(bp));
				527	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				528	}
				529
				530	mp->m_sb_bp = bp;
				531	xfs_buf_relse(bp);
				532	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
				533	return 0;
				534
				535	fail:
				536	if (bp) {
				537	XFS_BUF_UNMANAGE(bp);
				538	xfs_buf_relse(bp);
				539	}
				540	return error;
				541	}
				542
				543
				544	/*
				545	* xfs_mount_common
				546	*
				547	* Mount initialization code establishing various mount
				548	* fields from the superblock associated with the given
				549	* mount structure
				550	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	551	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	552	xfs_mount_common(xfs_mount_t mp, xfs_sb_t sbp)
				553	{
				554	int i;
				555
				556	mp->m_agfrotor = mp->m_agirotor = 0;
				557	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
				558	mp->m_maxagi = mp->m_sb.sb_agcount;
				559	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
				560	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
				561	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
				562	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
				563	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
				564	mp->m_litino = sbp->sb_inodesize -
				565	((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
				566	mp->m_blockmask = sbp->sb_blocksize - 1;
				567	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
				568	mp->m_blockwmask = mp->m_blockwsize - 1;
				569	INIT_LIST_HEAD(&mp->m_del_inodes);
				570
				571	/*
				572	* Setup for attributes, in case they get created.
				573	* This value is for inodes getting attributes for the first time,
				574	* the per-inode value is for old attribute values.
				575	*/
				576	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
				577	switch (sbp->sb_inodesize) {
				578	case 256:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	579	mp->m_attroffset = XFS_LITINO(mp) -
				580	XFS_BMDR_SPACE_CALC(MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	581	break;
				582	case 512:
				583	case 1024:
				584	case 2048:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	585	mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	586	break;
				587	default:
				588	ASSERT(0);
				589	}
				590	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
				591
				592	for (i = 0; i < 2; i++) {
				593	mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				594	xfs_alloc, i == 0);
				595	mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				596	xfs_alloc, i == 0);
				597	}
				598	for (i = 0; i < 2; i++) {
				599	mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				600	xfs_bmbt, i == 0);
				601	mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				602	xfs_bmbt, i == 0);
				603	}
				604	for (i = 0; i < 2; i++) {
				605	mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				606	xfs_inobt, i == 0);
				607	mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				608	xfs_inobt, i == 0);
				609	}
				610
				611	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
				612	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
				613	sbp->sb_inopblock);
				614	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
				615	}
				616	/*
				617	* xfs_mountfs
				618	*
				619	* This function does the following on an initial mount of a file system:
				620	* - reads the superblock from disk and init the mount struct
				621	* - if we're a 32-bit kernel, do a size check on the superblock
				622	* so we don't mount terabyte filesystems
				623	* - init mount struct realtime fields
				624	* - allocate inode hash table for fs
				625	* - init directory manager
				626	* - perform recovery and init the log manager
				627	*/
				628	int
				629	xfs_mountfs(
				630	vfs_t *vfsp,
				631	xfs_mount_t *mp,
				632	int mfsi_flags)
				633	{
				634	xfs_buf_t *bp;
				635	xfs_sb_t *sbp = &(mp->m_sb);
				636	xfs_inode_t *rip;
				637	vnode_t *rvp = NULL;
				638	int readio_log, writeio_log;
				639	xfs_daddr_t d;
				640	__uint64_t ret64;
				641	__int64_t update_flags;
				642	uint quotamount, quotaflags;
				643	int agno;
				644	int uuid_mounted = 0;
				645	int error = 0;
				646
				647	if (mp->m_sb_bp == NULL) {
				648	if ((error = xfs_readsb(mp))) {
				649	return (error);
				650	}
				651	}
				652	xfs_mount_common(mp, sbp);
				653
				654	/*
				655	* Check if sb_agblocks is aligned at stripe boundary
				656	* If sb_agblocks is NOT aligned turn off m_dalign since
				657	* allocator alignment is within an ag, therefore ag has
				658	* to be aligned at stripe boundary.
				659	*/
				660	update_flags = 0LL;
				661	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
				662	/*
				663	* If stripe unit and stripe width are not multiples
				664	* of the fs blocksize turn off alignment.
				665	*/
				666	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
				667	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
				668	if (mp->m_flags & XFS_MOUNT_RETERR) {
				669	cmn_err(CE_WARN,
				670	"XFS: alignment check 1 failed");
				671	error = XFS_ERROR(EINVAL);
				672	goto error1;
				673	}
				674	mp->m_dalign = mp->m_swidth = 0;
				675	} else {
				676	/*
				677	* Convert the stripe unit and width to FSBs.
				678	*/
				679	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
				680	if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				681	if (mp->m_flags & XFS_MOUNT_RETERR) {
				682	error = XFS_ERROR(EINVAL);
				683	goto error1;
				684	}
				685	xfs_fs_cmn_err(CE_WARN, mp,
				686	"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
				687	mp->m_dalign, mp->m_swidth,
				688	sbp->sb_agblocks);
				689
				690	mp->m_dalign = 0;
				691	mp->m_swidth = 0;
				692	} else if (mp->m_dalign) {
				693	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
				694	} else {
				695	if (mp->m_flags & XFS_MOUNT_RETERR) {
				696	xfs_fs_cmn_err(CE_WARN, mp,
				697	"stripe alignment turned off: sunit(%d) less than bsize(%d)",
				698	mp->m_dalign,
				699	mp->m_blockmask +1);
				700	error = XFS_ERROR(EINVAL);
				701	goto error1;
				702	}
				703	mp->m_swidth = 0;
				704	}
				705	}
				706
				707	/*
				708	* Update superblock with new values
				709	* and log changes
				710	*/
				711	if (XFS_SB_VERSION_HASDALIGN(sbp)) {
				712	if (sbp->sb_unit != mp->m_dalign) {
				713	sbp->sb_unit = mp->m_dalign;
				714	update_flags \|= XFS_SB_UNIT;
				715	}
				716	if (sbp->sb_width != mp->m_swidth) {
				717	sbp->sb_width = mp->m_swidth;
				718	update_flags \|= XFS_SB_WIDTH;
				719	}
				720	}
				721	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
				722	XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
				723	mp->m_dalign = sbp->sb_unit;
				724	mp->m_swidth = sbp->sb_width;
				725	}
				726
				727	xfs_alloc_compute_maxlevels(mp);
				728	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
				729	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
				730	xfs_ialloc_compute_maxlevels(mp);
				731
				732	if (sbp->sb_imax_pct) {
				733	__uint64_t icount;
				734
				735	/* Make sure the maximum inode count is a multiple of the
				736	* units we allocate inodes in.
				737	*/
				738
				739	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				740	do_div(icount, 100);
				741	do_div(icount, mp->m_ialloc_blks);
				742	mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
				743	sbp->sb_inopblog;
				744	} else
				745	mp->m_maxicount = 0;
				746
				747	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
				748
				749	/*
				750	* XFS uses the uuid from the superblock as the unique
				751	* identifier for fsid. We can not use the uuid from the volume
				752	* since a single partition filesystem is identical to a single
				753	* partition volume/filesystem.
				754	*/
				755	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
				756	(mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
				757	if (xfs_uuid_mount(mp)) {
				758	error = XFS_ERROR(EINVAL);
				759	goto error1;
				760	}
				761	uuid_mounted=1;
				762	ret64 = uuid_hash64(&sbp->sb_uuid);
				763	memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
				764	}
				765
				766	/*
				767	* Set the default minimum read and write sizes unless
				768	* already specified in a mount option.
				769	* We use smaller I/O sizes when the file system
				770	* is being used for NFS service (wsync mount option).
				771	*/
				772	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
				773	if (mp->m_flags & XFS_MOUNT_WSYNC) {
				774	readio_log = XFS_WSYNC_READIO_LOG;
				775	writeio_log = XFS_WSYNC_WRITEIO_LOG;
				776	} else {
				777	readio_log = XFS_READIO_LOG_LARGE;
				778	writeio_log = XFS_WRITEIO_LOG_LARGE;
				779	}
				780	} else {
				781	readio_log = mp->m_readio_log;
				782	writeio_log = mp->m_writeio_log;
				783	}
				784
				785	/*
				786	* Set the number of readahead buffers to use based on
				787	* physical memory size.
				788	*/
				789	if (xfs_physmem <= 4096) /* <= 16MB */
				790	mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
				791	else if (xfs_physmem <= 8192) /* <= 32MB */
				792	mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
				793	else
				794	mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
				795	if (sbp->sb_blocklog > readio_log) {
				796	mp->m_readio_log = sbp->sb_blocklog;
				797	} else {
				798	mp->m_readio_log = readio_log;
				799	}
				800	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
				801	if (sbp->sb_blocklog > writeio_log) {
				802	mp->m_writeio_log = sbp->sb_blocklog;
				803	} else {
				804	mp->m_writeio_log = writeio_log;
				805	}
				806	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
				807
				808	/*
				809	* Set the inode cluster size based on the physical memory
				810	* size. This may still be overridden by the file system
				811	* block size if it is larger than the chosen cluster size.
				812	*/
				813	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
				814	mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
				815	} else {
				816	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
				817	}
				818	/*
				819	* Set whether we're using inode alignment.
				820	*/
				821	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
				822	mp->m_sb.sb_inoalignmt >=
				823	XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
				824	mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
				825	else
				826	mp->m_inoalign_mask = 0;
				827	/*
				828	* If we are using stripe alignment, check whether
				829	* the stripe unit is a multiple of the inode alignment
				830	*/
				831	if (mp->m_dalign && mp->m_inoalign_mask &&
				832	!(mp->m_dalign & mp->m_inoalign_mask))
				833	mp->m_sinoalign = mp->m_dalign;
				834	else
				835	mp->m_sinoalign = 0;
				836	/*
				837	* Check that the data (and log if separate) are an ok size.
				838	*/
				839	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
				840	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
				841	cmn_err(CE_WARN, "XFS: size check 1 failed");
				842	error = XFS_ERROR(E2BIG);
				843	goto error1;
				844	}
				845	error = xfs_read_buf(mp, mp->m_ddev_targp,
				846	d - XFS_FSS_TO_BB(mp, 1),
				847	XFS_FSS_TO_BB(mp, 1), 0, &bp);
				848	if (!error) {
				849	xfs_buf_relse(bp);
				850	} else {
				851	cmn_err(CE_WARN, "XFS: size check 2 failed");
				852	if (error == ENOSPC) {
				853	error = XFS_ERROR(E2BIG);
				854	}
				855	goto error1;
				856	}
				857
				858	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
				859	mp->m_logdev_targp != mp->m_ddev_targp) {
				860	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
				861	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
				862	cmn_err(CE_WARN, "XFS: size check 3 failed");
				863	error = XFS_ERROR(E2BIG);
				864	goto error1;
				865	}
				866	error = xfs_read_buf(mp, mp->m_logdev_targp,
				867	d - XFS_FSB_TO_BB(mp, 1),
				868	XFS_FSB_TO_BB(mp, 1), 0, &bp);
				869	if (!error) {
				870	xfs_buf_relse(bp);
				871	} else {
				872	cmn_err(CE_WARN, "XFS: size check 3 failed");
				873	if (error == ENOSPC) {
				874	error = XFS_ERROR(E2BIG);
				875	}
				876	goto error1;
				877	}
				878	}
				879
				880	/*
				881	* Initialize realtime fields in the mount structure
				882	*/
				883	if ((error = xfs_rtmount_init(mp))) {
				884	cmn_err(CE_WARN, "XFS: RT mount failed");
				885	goto error1;
				886	}
				887
				888	/*
				889	* For client case we are done now
				890	*/
				891	if (mfsi_flags & XFS_MFSI_CLIENT) {
				892	return(0);
				893	}
				894
				895	/*
				896	* Copies the low order bits of the timestamp and the randomly
				897	* set "sequence" number out of a UUID.
				898	*/
				899	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
				900
				901	/*
				902	* The vfs structure needs to have a file system independent
				903	* way of checking for the invariant file system ID. Since it
				904	* can't look at mount structures it has a pointer to the data
				905	* in the mount structure.
				906	*
				907	* File systems that don't support user level file handles (i.e.
				908	* all of them except for XFS) will leave vfs_altfsid as NULL.
				909	*/
				910	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
				911	mp->m_dmevmask = 0; /* not persistent; set after each mount */
				912
				913	/*
				914	* Select the right directory manager.
				915	*/
				916	mp->m_dirops =
				917	XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
				918	xfsv2_dirops :
				919	xfsv1_dirops;
				920
				921	/*
				922	* Initialize directory manager's entries.
				923	*/
				924	XFS_DIR_MOUNT(mp);
				925
				926	/*
				927	* Initialize the attribute manager's entries.
				928	*/
				929	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
				930
				931	/*
				932	* Initialize the precomputed transaction reservations values.
				933	*/
				934	xfs_trans_init(mp);
				935
				936	/*
				937	* Allocate and initialize the inode hash table for this
				938	* file system.
				939	*/
				940	xfs_ihash_init(mp);
				941	xfs_chash_init(mp);
				942
				943	/*
				944	* Allocate and initialize the per-ag data.
				945	*/
				946	init_rwsem(&mp->m_peraglock);
				947	mp->m_perag =
				948	kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
				949
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	950	mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	951
				952	/*
				953	* log's mount-time initialization. Perform 1st part recovery if needed
				954	*/
				955	if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */
				956	error = xfs_log_mount(mp, mp->m_logdev_targp,
				957	XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				958	XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
				959	if (error) {
				960	cmn_err(CE_WARN, "XFS: log mount failed");
				961	goto error2;
				962	}
				963	} else { /* No log has been defined */
				964	cmn_err(CE_WARN, "XFS: no log defined");
				965	XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
				966	error = XFS_ERROR(EFSCORRUPTED);
				967	goto error2;
				968	}
				969
				970	/*
				971	* Get and sanity-check the root inode.
				972	* Save the pointer to it in the mount structure.
				973	*/
				974	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
				975	if (error) {
				976	cmn_err(CE_WARN, "XFS: failed to read root inode");
				977	goto error3;
				978	}
				979
				980	ASSERT(rip != NULL);
				981	rvp = XFS_ITOV(rip);
				982
				983	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
				984	cmn_err(CE_WARN, "XFS: corrupted root inode");
				985	prdev("Root inode %llu is not a directory",
				986	mp->m_ddev_targp, (unsigned long long)rip->i_ino);
				987	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				988	XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				989	mp);
				990	error = XFS_ERROR(EFSCORRUPTED);
				991	goto error4;
				992	}
				993	mp->m_rootip = rip; /* save it */
				994
				995	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				996
				997	/*
				998	* Initialize realtime inode pointers in the mount structure
				999	*/
				1000	if ((error = xfs_rtmount_inodes(mp))) {
				1001	/*
				1002	* Free up the root inode.
				1003	*/
				1004	cmn_err(CE_WARN, "XFS: failed to read RT inodes");
				1005	goto error4;
				1006	}
				1007
				1008	/*
				1009	* If fs is not mounted readonly, then update the superblock
				1010	* unit and width changes.
				1011	*/
				1012	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
				1013	xfs_mount_log_sbunit(mp, update_flags);
				1014
				1015	/*
				1016	* Initialise the XFS quota management subsystem for this mount
				1017	*/
				1018	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
				1019	goto error4;
				1020
				1021	/*
				1022	* Finish recovering the file system. This part needed to be
				1023	* delayed until after the root and real-time bitmap inodes
				1024	* were consistently read in.
				1025	*/
				1026	error = xfs_log_mount_finish(mp, mfsi_flags);
				1027	if (error) {
				1028	cmn_err(CE_WARN, "XFS: log mount finish failed");
				1029	goto error4;
				1030	}
				1031
				1032	/*
				1033	* Complete the quota initialisation, post-log-replay component.
				1034	*/
				1035	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
				1036	goto error4;
				1037
				1038	return 0;
				1039
				1040	error4:
				1041	/*
				1042	* Free up the root inode.
				1043	*/
				1044	VN_RELE(rvp);
				1045	error3:
				1046	xfs_log_unmount_dealloc(mp);
				1047	error2:
				1048	xfs_ihash_free(mp);
				1049	xfs_chash_free(mp);
				1050	for (agno = 0; agno < sbp->sb_agcount; agno++)
				1051	if (mp->m_perag[agno].pagb_list)
				1052	kmem_free(mp->m_perag[agno].pagb_list,
				1053	sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
				1054	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
				1055	mp->m_perag = NULL;
				1056	/* FALLTHROUGH */
				1057	error1:
				1058	if (uuid_mounted)
				1059	xfs_uuid_unmount(mp);
				1060	xfs_freesb(mp);
				1061	return error;
				1062	}
				1063
				1064	/*
				1065	* xfs_unmountfs
				1066	*
				1067	* This flushes out the inodes,dquots and the superblock, unmounts the
				1068	* log and makes sure that incore structures are freed.
				1069	*/
				1070	int
				1071	xfs_unmountfs(xfs_mount_t mp, struct cred cr)
				1072	{
				1073	struct vfs *vfsp = XFS_MTOVFS(mp);
				1074	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1075	int64_t fsid;
				1076	#endif
				1077
Christoph Hellwig	efa8027	2005-06-21 15:37:17 +1000	[diff] [blame]	1078	xfs_iflush_all(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1079
				1080	XFS_QM_DQPURGEALL(mp,
				1081	XFS_QMOPT_UQUOTA \| XFS_QMOPT_GQUOTA \| XFS_QMOPT_UMOUNTING);
				1082
				1083	/*
				1084	* Flush out the log synchronously so that we know for sure
				1085	* that nothing is pinned. This is important because bflush()
				1086	* will skip pinned buffers.
				1087	*/
				1088	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);
				1089
				1090	xfs_binval(mp->m_ddev_targp);
				1091	if (mp->m_rtdev_targp) {
				1092	xfs_binval(mp->m_rtdev_targp);
				1093	}
				1094
				1095	xfs_unmountfs_writesb(mp);
				1096
				1097	xfs_unmountfs_wait(mp); /* wait for async bufs */
				1098
				1099	xfs_log_unmount(mp); /* Done! No more fs ops. */
				1100
				1101	xfs_freesb(mp);
				1102
				1103	/*
				1104	* All inodes from this mount point should be freed.
				1105	*/
				1106	ASSERT(mp->m_inodes == NULL);
				1107
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1108	xfs_unmountfs_close(mp, cr);
				1109	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
				1110	xfs_uuid_unmount(mp);
				1111
				1112	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1113	/*
				1114	* clear all error tags on this filesystem
				1115	*/
				1116	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
				1117	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
				1118	#endif
				1119	XFS_IODONE(vfsp);
				1120	xfs_mount_free(mp, 1);
				1121	return 0;
				1122	}
				1123
				1124	void
				1125	xfs_unmountfs_close(xfs_mount_t mp, struct cred cr)
				1126	{
				1127	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1128	xfs_free_buftarg(mp->m_logdev_targp, 1);
				1129	if (mp->m_rtdev_targp)
				1130	xfs_free_buftarg(mp->m_rtdev_targp, 1);
				1131	xfs_free_buftarg(mp->m_ddev_targp, 0);
				1132	}
				1133
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	1134	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1135	xfs_unmountfs_wait(xfs_mount_t *mp)
				1136	{
				1137	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1138	xfs_wait_buftarg(mp->m_logdev_targp);
				1139	if (mp->m_rtdev_targp)
				1140	xfs_wait_buftarg(mp->m_rtdev_targp);
				1141	xfs_wait_buftarg(mp->m_ddev_targp);
				1142	}
				1143
				1144	int
				1145	xfs_unmountfs_writesb(xfs_mount_t *mp)
				1146	{
				1147	xfs_buf_t *sbp;
				1148	xfs_sb_t *sb;
				1149	int error = 0;
				1150
				1151	/*
				1152	* skip superblock write if fs is read-only, or
				1153	* if we are doing a forced umount.
				1154	*/
				1155	sbp = xfs_getsb(mp, 0);
				1156	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY \|\|
				1157	XFS_FORCED_SHUTDOWN(mp))) {
				1158	/*
				1159	* mark shared-readonly if desired
				1160	*/
				1161	sb = XFS_BUF_TO_SBP(sbp);
				1162	if (mp->m_mk_sharedro) {
				1163	if (!(sb->sb_flags & XFS_SBF_READONLY))
				1164	sb->sb_flags \|= XFS_SBF_READONLY;
				1165	if (!XFS_SB_VERSION_HASSHARED(sb))
				1166	XFS_SB_VERSION_ADDSHARED(sb);
				1167	xfs_fs_cmn_err(CE_NOTE, mp,
				1168	"Unmounting, marking shared read-only");
				1169	}
				1170	XFS_BUF_UNDONE(sbp);
				1171	XFS_BUF_UNREAD(sbp);
				1172	XFS_BUF_UNDELAYWRITE(sbp);
				1173	XFS_BUF_WRITE(sbp);
				1174	XFS_BUF_UNASYNC(sbp);
				1175	ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
				1176	xfsbdstrat(mp, sbp);
				1177	/* Nevermind errors we might get here. */
				1178	error = xfs_iowait(sbp);
				1179	if (error)
				1180	xfs_ioerror_alert("xfs_unmountfs_writesb",
				1181	mp, sbp, XFS_BUF_ADDR(sbp));
				1182	if (error && mp->m_mk_sharedro)
				1183	xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
				1184	}
				1185	xfs_buf_relse(sbp);
				1186	return (error);
				1187	}
				1188
				1189	/*
				1190	* xfs_mod_sb() can be used to copy arbitrary changes to the
				1191	* in-core superblock into the superblock buffer to be logged.
				1192	* It does not provide the higher level of locking that is
				1193	* needed to protect the in-core superblock from concurrent
				1194	* access.
				1195	*/
				1196	void
				1197	xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
				1198	{
				1199	xfs_buf_t *bp;
				1200	int first;
				1201	int last;
				1202	xfs_mount_t *mp;
				1203	xfs_sb_t *sbp;
				1204	xfs_sb_field_t f;
				1205
				1206	ASSERT(fields);
				1207	if (!fields)
				1208	return;
				1209	mp = tp->t_mountp;
				1210	bp = xfs_trans_getsb(tp, mp, 0);
				1211	sbp = XFS_BUF_TO_SBP(bp);
				1212	first = sizeof(xfs_sb_t);
				1213	last = 0;
				1214
				1215	/* translate/copy */
				1216
				1217	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
				1218
				1219	/* find modified range */
				1220
				1221	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				1222	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1223	first = xfs_sb_info[f].offset;
				1224
				1225	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
				1226	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1227	last = xfs_sb_info[f + 1].offset - 1;
				1228
				1229	xfs_trans_log_buf(tp, bp, first, last);
				1230	}
				1231
				1232	/*
				1233	* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
				1234	* a delta to a specified field in the in-core superblock. Simply
				1235	* switch on the field indicated and apply the delta to that field.
				1236	* Fields are not allowed to dip below zero, so if the delta would
				1237	* do this do not apply it and return EINVAL.
				1238	*
				1239	* The SB_LOCK must be held when this routine is called.
				1240	*/
				1241	STATIC int
				1242	xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
				1243	int delta, int rsvd)
				1244	{
				1245	int scounter; /* short counter for 32 bit fields */
				1246	long long lcounter; /* long counter for 64 bit fields */
				1247	long long res_used, rem;
				1248
				1249	/*
				1250	* With the in-core superblock spin lock held, switch
				1251	* on the indicated field. Apply the delta to the
				1252	* proper field. If the fields value would dip below
				1253	* 0, then do not apply the delta and return EINVAL.
				1254	*/
				1255	switch (field) {
				1256	case XFS_SBS_ICOUNT:
				1257	lcounter = (long long)mp->m_sb.sb_icount;
				1258	lcounter += delta;
				1259	if (lcounter < 0) {
				1260	ASSERT(0);
				1261	return (XFS_ERROR(EINVAL));
				1262	}
				1263	mp->m_sb.sb_icount = lcounter;
				1264	return (0);
				1265	case XFS_SBS_IFREE:
				1266	lcounter = (long long)mp->m_sb.sb_ifree;
				1267	lcounter += delta;
				1268	if (lcounter < 0) {
				1269	ASSERT(0);
				1270	return (XFS_ERROR(EINVAL));
				1271	}
				1272	mp->m_sb.sb_ifree = lcounter;
				1273	return (0);
				1274	case XFS_SBS_FDBLOCKS:
				1275
				1276	lcounter = (long long)mp->m_sb.sb_fdblocks;
				1277	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
				1278
				1279	if (delta > 0) { /* Putting blocks back */
				1280	if (res_used > delta) {
				1281	mp->m_resblks_avail += delta;
				1282	} else {
				1283	rem = delta - res_used;
				1284	mp->m_resblks_avail = mp->m_resblks;
				1285	lcounter += rem;
				1286	}
				1287	} else { /* Taking blocks away */
				1288
				1289	lcounter += delta;
				1290
				1291	/*
				1292	* If were out of blocks, use any available reserved blocks if
				1293	* were allowed to.
				1294	*/
				1295
				1296	if (lcounter < 0) {
				1297	if (rsvd) {
				1298	lcounter = (long long)mp->m_resblks_avail + delta;
				1299	if (lcounter < 0) {
				1300	return (XFS_ERROR(ENOSPC));
				1301	}
				1302	mp->m_resblks_avail = lcounter;
				1303	return (0);
				1304	} else { /* not reserved */
				1305	return (XFS_ERROR(ENOSPC));
				1306	}
				1307	}
				1308	}
				1309
				1310	mp->m_sb.sb_fdblocks = lcounter;
				1311	return (0);
				1312	case XFS_SBS_FREXTENTS:
				1313	lcounter = (long long)mp->m_sb.sb_frextents;
				1314	lcounter += delta;
				1315	if (lcounter < 0) {
				1316	return (XFS_ERROR(ENOSPC));
				1317	}
				1318	mp->m_sb.sb_frextents = lcounter;
				1319	return (0);
				1320	case XFS_SBS_DBLOCKS:
				1321	lcounter = (long long)mp->m_sb.sb_dblocks;
				1322	lcounter += delta;
				1323	if (lcounter < 0) {
				1324	ASSERT(0);
				1325	return (XFS_ERROR(EINVAL));
				1326	}
				1327	mp->m_sb.sb_dblocks = lcounter;
				1328	return (0);
				1329	case XFS_SBS_AGCOUNT:
				1330	scounter = mp->m_sb.sb_agcount;
				1331	scounter += delta;
				1332	if (scounter < 0) {
				1333	ASSERT(0);
				1334	return (XFS_ERROR(EINVAL));
				1335	}
				1336	mp->m_sb.sb_agcount = scounter;
				1337	return (0);
				1338	case XFS_SBS_IMAX_PCT:
				1339	scounter = mp->m_sb.sb_imax_pct;
				1340	scounter += delta;
				1341	if (scounter < 0) {
				1342	ASSERT(0);
				1343	return (XFS_ERROR(EINVAL));
				1344	}
				1345	mp->m_sb.sb_imax_pct = scounter;
				1346	return (0);
				1347	case XFS_SBS_REXTSIZE:
				1348	scounter = mp->m_sb.sb_rextsize;
				1349	scounter += delta;
				1350	if (scounter < 0) {
				1351	ASSERT(0);
				1352	return (XFS_ERROR(EINVAL));
				1353	}
				1354	mp->m_sb.sb_rextsize = scounter;
				1355	return (0);
				1356	case XFS_SBS_RBMBLOCKS:
				1357	scounter = mp->m_sb.sb_rbmblocks;
				1358	scounter += delta;
				1359	if (scounter < 0) {
				1360	ASSERT(0);
				1361	return (XFS_ERROR(EINVAL));
				1362	}
				1363	mp->m_sb.sb_rbmblocks = scounter;
				1364	return (0);
				1365	case XFS_SBS_RBLOCKS:
				1366	lcounter = (long long)mp->m_sb.sb_rblocks;
				1367	lcounter += delta;
				1368	if (lcounter < 0) {
				1369	ASSERT(0);
				1370	return (XFS_ERROR(EINVAL));
				1371	}
				1372	mp->m_sb.sb_rblocks = lcounter;
				1373	return (0);
				1374	case XFS_SBS_REXTENTS:
				1375	lcounter = (long long)mp->m_sb.sb_rextents;
				1376	lcounter += delta;
				1377	if (lcounter < 0) {
				1378	ASSERT(0);
				1379	return (XFS_ERROR(EINVAL));
				1380	}
				1381	mp->m_sb.sb_rextents = lcounter;
				1382	return (0);
				1383	case XFS_SBS_REXTSLOG:
				1384	scounter = mp->m_sb.sb_rextslog;
				1385	scounter += delta;
				1386	if (scounter < 0) {
				1387	ASSERT(0);
				1388	return (XFS_ERROR(EINVAL));
				1389	}
				1390	mp->m_sb.sb_rextslog = scounter;
				1391	return (0);
				1392	default:
				1393	ASSERT(0);
				1394	return (XFS_ERROR(EINVAL));
				1395	}
				1396	}
				1397
				1398	/*
				1399	* xfs_mod_incore_sb() is used to change a field in the in-core
				1400	* superblock structure by the specified delta. This modification
				1401	* is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked()
				1402	* routine to do the work.
				1403	*/
				1404	int
				1405	xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
				1406	{
				1407	unsigned long s;
				1408	int status;
				1409
				1410	s = XFS_SB_LOCK(mp);
				1411	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				1412	XFS_SB_UNLOCK(mp, s);
				1413	return (status);
				1414	}
				1415
				1416	/*
				1417	* xfs_mod_incore_sb_batch() is used to change more than one field
				1418	* in the in-core superblock structure at a time. This modification
				1419	* is protected by a lock internal to this module. The fields and
				1420	* changes to those fields are specified in the array of xfs_mod_sb
				1421	* structures passed in.
				1422	*
				1423	* Either all of the specified deltas will be applied or none of
				1424	* them will. If any modified field dips below 0, then all modifications
				1425	* will be backed out and EINVAL will be returned.
				1426	*/
				1427	int
				1428	xfs_mod_incore_sb_batch(xfs_mount_t mp, xfs_mod_sb_t msb, uint nmsb, int rsvd)
				1429	{
				1430	unsigned long s;
				1431	int status=0;
				1432	xfs_mod_sb_t *msbp;
				1433
				1434	/*
				1435	* Loop through the array of mod structures and apply each
				1436	* individually. If any fail, then back out all those
				1437	* which have already been applied. Do all of this within
				1438	* the scope of the SB_LOCK so that all of the changes will
				1439	* be atomic.
				1440	*/
				1441	s = XFS_SB_LOCK(mp);
				1442	msbp = &msb[0];
				1443	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
				1444	/*
				1445	* Apply the delta at index n. If it fails, break
				1446	* from the loop so we'll fall into the undo loop
				1447	* below.
				1448	*/
				1449	status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
				1450	msbp->msb_delta, rsvd);
				1451	if (status != 0) {
				1452	break;
				1453	}
				1454	}
				1455
				1456	/*
				1457	* If we didn't complete the loop above, then back out
				1458	* any changes made to the superblock. If you add code
				1459	* between the loop above and here, make sure that you
				1460	* preserve the value of status. Loop back until
				1461	* we step below the beginning of the array. Make sure
				1462	* we don't touch anything back there.
				1463	*/
				1464	if (status != 0) {
				1465	msbp--;
				1466	while (msbp >= msb) {
				1467	status = xfs_mod_incore_sb_unlocked(mp,
				1468	msbp->msb_field, -(msbp->msb_delta), rsvd);
				1469	ASSERT(status == 0);
				1470	msbp--;
				1471	}
				1472	}
				1473	XFS_SB_UNLOCK(mp, s);
				1474	return (status);
				1475	}
				1476
				1477	/*
				1478	* xfs_getsb() is called to obtain the buffer for the superblock.
				1479	* The buffer is returned locked and read in from disk.
				1480	* The buffer should be released with a call to xfs_brelse().
				1481	*
				1482	* If the flags parameter is BUF_TRYLOCK, then we'll only return
				1483	* the superblock buffer if it can be locked without sleeping.
				1484	* If it can't then we'll return NULL.
				1485	*/
				1486	xfs_buf_t *
				1487	xfs_getsb(
				1488	xfs_mount_t *mp,
				1489	int flags)
				1490	{
				1491	xfs_buf_t *bp;
				1492
				1493	ASSERT(mp->m_sb_bp != NULL);
				1494	bp = mp->m_sb_bp;
				1495	if (flags & XFS_BUF_TRYLOCK) {
				1496	if (!XFS_BUF_CPSEMA(bp)) {
				1497	return NULL;
				1498	}
				1499	} else {
				1500	XFS_BUF_PSEMA(bp, PRIBIO);
				1501	}
				1502	XFS_BUF_HOLD(bp);
				1503	ASSERT(XFS_BUF_ISDONE(bp));
				1504	return (bp);
				1505	}
				1506
				1507	/*
				1508	* Used to free the superblock along various error paths.
				1509	*/
				1510	void
				1511	xfs_freesb(
				1512	xfs_mount_t *mp)
				1513	{
				1514	xfs_buf_t *bp;
				1515
				1516	/*
				1517	* Use xfs_getsb() so that the buffer will be locked
				1518	* when we call xfs_buf_relse().
				1519	*/
				1520	bp = xfs_getsb(mp, 0);
				1521	XFS_BUF_UNMANAGE(bp);
				1522	xfs_buf_relse(bp);
				1523	mp->m_sb_bp = NULL;
				1524	}
				1525
				1526	/*
				1527	* See if the UUID is unique among mounted XFS filesystems.
				1528	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
				1529	*/
				1530	STATIC int
				1531	xfs_uuid_mount(
				1532	xfs_mount_t *mp)
				1533	{
				1534	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
				1535	cmn_err(CE_WARN,
				1536	"XFS: Filesystem %s has nil UUID - can't mount",
				1537	mp->m_fsname);
				1538	return -1;
				1539	}
				1540	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
				1541	cmn_err(CE_WARN,
				1542	"XFS: Filesystem %s has duplicate UUID - can't mount",
				1543	mp->m_fsname);
				1544	return -1;
				1545	}
				1546	return 0;
				1547	}
				1548
				1549	/*
				1550	* Remove filesystem from the UUID table.
				1551	*/
				1552	STATIC void
				1553	xfs_uuid_unmount(
				1554	xfs_mount_t *mp)
				1555	{
				1556	uuid_table_remove(&mp->m_sb.sb_uuid);
				1557	}
				1558
				1559	/*
				1560	* Used to log changes to the superblock unit and width fields which could
				1561	* be altered by the mount options. Only the first superblock is updated.
				1562	*/
				1563	STATIC void
				1564	xfs_mount_log_sbunit(
				1565	xfs_mount_t *mp,
				1566	__int64_t fields)
				1567	{
				1568	xfs_trans_t *tp;
				1569
				1570	ASSERT(fields & (XFS_SB_UNIT\|XFS_SB_WIDTH\|XFS_SB_UUID));
				1571
				1572	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
				1573	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				1574	XFS_DEFAULT_LOG_COUNT)) {
				1575	xfs_trans_cancel(tp, 0);
				1576	return;
				1577	}
				1578	xfs_mod_sb(tp, fields);
				1579	xfs_trans_commit(tp, 0, NULL);
				1580	}