Blame - fs/xfs/xfs_mount.c - fp2-dev/kernel/msm

blob: f618f6d6381c7293e318ece36b34ca89538c2def [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or modify it
				5	* under the terms of version 2 of the GNU General Public License as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it would be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
				11	*
				12	* Further, this software is distributed without any warranty that it is
				13	* free of the rightful claim of any third person regarding infringement
				14	* or the like. Any license provided herein, whether implied or
				15	* otherwise, applies only to this software file. Patent licenses, if
				16	* any, provided herein do not apply to combinations of this program with
				17	* other software, or any other product whatsoever.
				18	*
				19	* You should have received a copy of the GNU General Public License along
				20	* with this program; if not, write the Free Software Foundation, Inc., 59
				21	* Temple Place - Suite 330, Boston MA 02111-1307, USA.
				22	*
				23	* Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
				24	* Mountain View, CA 94043, or:
				25	*
				26	* http://www.sgi.com
				27	*
				28	* For further information regarding this notice, see:
				29	*
				30	* http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
				31	*/
				32
				33	#include "xfs.h"
				34	#include "xfs_macros.h"
				35	#include "xfs_types.h"
				36	#include "xfs_inum.h"
				37	#include "xfs_log.h"
				38	#include "xfs_trans.h"
				39	#include "xfs_sb.h"
				40	#include "xfs_ag.h"
				41	#include "xfs_dir.h"
				42	#include "xfs_dir2.h"
				43	#include "xfs_dmapi.h"
				44	#include "xfs_mount.h"
				45	#include "xfs_alloc_btree.h"
				46	#include "xfs_bmap_btree.h"
				47	#include "xfs_ialloc_btree.h"
				48	#include "xfs_btree.h"
				49	#include "xfs_ialloc.h"
				50	#include "xfs_attr_sf.h"
				51	#include "xfs_dir_sf.h"
				52	#include "xfs_dir2_sf.h"
				53	#include "xfs_dinode.h"
				54	#include "xfs_inode.h"
				55	#include "xfs_alloc.h"
				56	#include "xfs_rtalloc.h"
				57	#include "xfs_bmap.h"
				58	#include "xfs_error.h"
				59	#include "xfs_bit.h"
				60	#include "xfs_rw.h"
				61	#include "xfs_quota.h"
				62	#include "xfs_fsops.h"
				63
				64	STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
				65	STATIC int xfs_uuid_mount(xfs_mount_t *);
				66	STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame^]	67	STATIC void xfs_unmountfs_wait(xfs_mount_t *);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	68
				69	static struct {
				70	short offset;
				71	short type; /* 0 = integer
				72	* 1 = binary / string (no translation)
				73	*/
				74	} xfs_sb_info[] = {
				75	{ offsetof(xfs_sb_t, sb_magicnum), 0 },
				76	{ offsetof(xfs_sb_t, sb_blocksize), 0 },
				77	{ offsetof(xfs_sb_t, sb_dblocks), 0 },
				78	{ offsetof(xfs_sb_t, sb_rblocks), 0 },
				79	{ offsetof(xfs_sb_t, sb_rextents), 0 },
				80	{ offsetof(xfs_sb_t, sb_uuid), 1 },
				81	{ offsetof(xfs_sb_t, sb_logstart), 0 },
				82	{ offsetof(xfs_sb_t, sb_rootino), 0 },
				83	{ offsetof(xfs_sb_t, sb_rbmino), 0 },
				84	{ offsetof(xfs_sb_t, sb_rsumino), 0 },
				85	{ offsetof(xfs_sb_t, sb_rextsize), 0 },
				86	{ offsetof(xfs_sb_t, sb_agblocks), 0 },
				87	{ offsetof(xfs_sb_t, sb_agcount), 0 },
				88	{ offsetof(xfs_sb_t, sb_rbmblocks), 0 },
				89	{ offsetof(xfs_sb_t, sb_logblocks), 0 },
				90	{ offsetof(xfs_sb_t, sb_versionnum), 0 },
				91	{ offsetof(xfs_sb_t, sb_sectsize), 0 },
				92	{ offsetof(xfs_sb_t, sb_inodesize), 0 },
				93	{ offsetof(xfs_sb_t, sb_inopblock), 0 },
				94	{ offsetof(xfs_sb_t, sb_fname[0]), 1 },
				95	{ offsetof(xfs_sb_t, sb_blocklog), 0 },
				96	{ offsetof(xfs_sb_t, sb_sectlog), 0 },
				97	{ offsetof(xfs_sb_t, sb_inodelog), 0 },
				98	{ offsetof(xfs_sb_t, sb_inopblog), 0 },
				99	{ offsetof(xfs_sb_t, sb_agblklog), 0 },
				100	{ offsetof(xfs_sb_t, sb_rextslog), 0 },
				101	{ offsetof(xfs_sb_t, sb_inprogress), 0 },
				102	{ offsetof(xfs_sb_t, sb_imax_pct), 0 },
				103	{ offsetof(xfs_sb_t, sb_icount), 0 },
				104	{ offsetof(xfs_sb_t, sb_ifree), 0 },
				105	{ offsetof(xfs_sb_t, sb_fdblocks), 0 },
				106	{ offsetof(xfs_sb_t, sb_frextents), 0 },
				107	{ offsetof(xfs_sb_t, sb_uquotino), 0 },
				108	{ offsetof(xfs_sb_t, sb_gquotino), 0 },
				109	{ offsetof(xfs_sb_t, sb_qflags), 0 },
				110	{ offsetof(xfs_sb_t, sb_flags), 0 },
				111	{ offsetof(xfs_sb_t, sb_shared_vn), 0 },
				112	{ offsetof(xfs_sb_t, sb_inoalignmt), 0 },
				113	{ offsetof(xfs_sb_t, sb_unit), 0 },
				114	{ offsetof(xfs_sb_t, sb_width), 0 },
				115	{ offsetof(xfs_sb_t, sb_dirblklog), 0 },
				116	{ offsetof(xfs_sb_t, sb_logsectlog), 0 },
				117	{ offsetof(xfs_sb_t, sb_logsectsize),0 },
				118	{ offsetof(xfs_sb_t, sb_logsunit), 0 },
				119	{ offsetof(xfs_sb_t, sb_features2), 0 },
				120	{ sizeof(xfs_sb_t), 0 }
				121	};
				122
				123	/*
				124	* Return a pointer to an initialized xfs_mount structure.
				125	*/
				126	xfs_mount_t *
				127	xfs_mount_init(void)
				128	{
				129	xfs_mount_t *mp;
				130
				131	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
				132
				133	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
				134	spinlock_init(&mp->m_sb_lock, "xfs_sb");
				135	mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock");
				136	initnsema(&mp->m_growlock, 1, "xfs_grow");
				137	/*
				138	* Initialize the AIL.
				139	*/
				140	xfs_trans_ail_init(mp);
				141
				142	atomic_set(&mp->m_active_trans, 0);
				143
				144	return mp;
				145	}
				146
				147	/*
				148	* Free up the resources associated with a mount structure. Assume that
				149	* the structure was initially zeroed, so we can tell which fields got
				150	* initialized.
				151	*/
				152	void
				153	xfs_mount_free(
				154	xfs_mount_t *mp,
				155	int remove_bhv)
				156	{
				157	if (mp->m_ihash)
				158	xfs_ihash_free(mp);
				159	if (mp->m_chash)
				160	xfs_chash_free(mp);
				161
				162	if (mp->m_perag) {
				163	int agno;
				164
				165	for (agno = 0; agno < mp->m_maxagi; agno++)
				166	if (mp->m_perag[agno].pagb_list)
				167	kmem_free(mp->m_perag[agno].pagb_list,
				168	sizeof(xfs_perag_busy_t) *
				169	XFS_PAGB_NUM_SLOTS);
				170	kmem_free(mp->m_perag,
				171	sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
				172	}
				173
				174	AIL_LOCK_DESTROY(&mp->m_ail_lock);
				175	spinlock_destroy(&mp->m_sb_lock);
				176	mutex_destroy(&mp->m_ilock);
				177	freesema(&mp->m_growlock);
				178	if (mp->m_quotainfo)
				179	XFS_QM_DONE(mp);
				180
				181	if (mp->m_fsname != NULL)
				182	kmem_free(mp->m_fsname, mp->m_fsname_len);
				183
				184	if (remove_bhv) {
				185	struct vfs *vfsp = XFS_MTOVFS(mp);
				186
				187	bhv_remove_all_vfsops(vfsp, 0);
				188	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
				189	}
				190
				191	kmem_free(mp, sizeof(xfs_mount_t));
				192	}
				193
				194
				195	/*
				196	* Check the validity of the SB found.
				197	*/
				198	STATIC int
				199	xfs_mount_validate_sb(
				200	xfs_mount_t *mp,
				201	xfs_sb_t *sbp)
				202	{
				203	/*
				204	* If the log device and data device have the
				205	* same device number, the log is internal.
				206	* Consequently, the sb_logstart should be non-zero. If
				207	* we have a zero sb_logstart in this case, we may be trying to mount
				208	* a volume filesystem in a non-volume manner.
				209	*/
				210	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
				211	cmn_err(CE_WARN, "XFS: bad magic number");
				212	return XFS_ERROR(EWRONGFS);
				213	}
				214
				215	if (!XFS_SB_GOOD_VERSION(sbp)) {
				216	cmn_err(CE_WARN, "XFS: bad version");
				217	return XFS_ERROR(EWRONGFS);
				218	}
				219
				220	if (unlikely(
				221	sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
				222	cmn_err(CE_WARN,
				223	"XFS: filesystem is marked as having an external log; "
				224	"specify logdev on the\nmount command line.");
				225	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)",
				226	XFS_ERRLEVEL_HIGH, mp, sbp);
				227	return XFS_ERROR(EFSCORRUPTED);
				228	}
				229
				230	if (unlikely(
				231	sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
				232	cmn_err(CE_WARN,
				233	"XFS: filesystem is marked as having an internal log; "
				234	"don't specify logdev on\nthe mount command line.");
				235	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)",
				236	XFS_ERRLEVEL_HIGH, mp, sbp);
				237	return XFS_ERROR(EFSCORRUPTED);
				238	}
				239
				240	/*
				241	* More sanity checking. These were stolen directly from
				242	* xfs_repair.
				243	*/
				244	if (unlikely(
				245	sbp->sb_agcount <= 0 \|\|
				246	sbp->sb_sectsize < XFS_MIN_SECTORSIZE \|\|
				247	sbp->sb_sectsize > XFS_MAX_SECTORSIZE \|\|
				248	sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG \|\|
				249	sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG \|\|
				250	sbp->sb_blocksize < XFS_MIN_BLOCKSIZE \|\|
				251	sbp->sb_blocksize > XFS_MAX_BLOCKSIZE \|\|
				252	sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG \|\|
				253	sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG \|\|
				254	sbp->sb_inodesize < XFS_DINODE_MIN_SIZE \|\|
				255	sbp->sb_inodesize > XFS_DINODE_MAX_SIZE \|\|
				256	(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) \|\|
				257	(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) \|\|
				258	sbp->sb_imax_pct > 100)) {
				259	cmn_err(CE_WARN, "XFS: SB sanity check 1 failed");
				260	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
				261	XFS_ERRLEVEL_LOW, mp, sbp);
				262	return XFS_ERROR(EFSCORRUPTED);
				263	}
				264
				265	/*
				266	* Sanity check AG count, size fields against data size field
				267	*/
				268	if (unlikely(
				269	sbp->sb_dblocks == 0 \|\|
				270	sbp->sb_dblocks >
				271	(xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks \|\|
				272	sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
				273	sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
				274	cmn_err(CE_WARN, "XFS: SB sanity check 2 failed");
				275	XFS_ERROR_REPORT("xfs_mount_validate_sb(4)",
				276	XFS_ERRLEVEL_LOW, mp);
				277	return XFS_ERROR(EFSCORRUPTED);
				278	}
				279
				280	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
				281	ASSERT(sbp->sb_blocklog >= BBSHIFT);
				282
				283	#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
				284	if (unlikely(
				285	(sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX \|\|
				286	(sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
				287	#else /* Limited by UINT_MAX of sectors */
				288	if (unlikely(
				289	(sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX \|\|
				290	(sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
				291	#endif
				292	cmn_err(CE_WARN,
				293	"XFS: File system is too large to be mounted on this system.");
				294	return XFS_ERROR(E2BIG);
				295	}
				296
				297	if (unlikely(sbp->sb_inprogress)) {
				298	cmn_err(CE_WARN, "XFS: file system busy");
				299	XFS_ERROR_REPORT("xfs_mount_validate_sb(5)",
				300	XFS_ERRLEVEL_LOW, mp);
				301	return XFS_ERROR(EFSCORRUPTED);
				302	}
				303
				304	/*
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	305	* Version 1 directory format has never worked on Linux.
				306	*/
				307	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
				308	cmn_err(CE_WARN,
				309	"XFS: Attempted to mount file system using version 1 directory format");
				310	return XFS_ERROR(ENOSYS);
				311	}
				312
				313	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	314	* Until this is fixed only page-sized or smaller data blocks work.
				315	*/
				316	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
				317	cmn_err(CE_WARN,
				318	"XFS: Attempted to mount file system with blocksize %d bytes",
				319	sbp->sb_blocksize);
				320	cmn_err(CE_WARN,
				321	"XFS: Only page-sized (%d) or less blocksizes currently work.",
				322	PAGE_SIZE);
				323	return XFS_ERROR(ENOSYS);
				324	}
				325
				326	return 0;
				327	}
				328
				329	xfs_agnumber_t
				330	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount)
				331	{
				332	xfs_agnumber_t index, max_metadata;
				333	xfs_perag_t *pag;
				334	xfs_agino_t agino;
				335	xfs_ino_t ino;
				336	xfs_sb_t *sbp = &mp->m_sb;
				337	xfs_ino_t max_inum = XFS_MAXINUMBER_32;
				338
				339	/* Check to see if the filesystem can overflow 32 bit inodes */
				340	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
				341	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
				342
				343	/* Clear the mount flag if no inode can overflow 32 bits
				344	* on this filesystem, or if specifically requested..
				345	*/
				346	if ((mp->m_flags & XFS_MOUNT_32BITINOOPT) && ino > max_inum) {
				347	mp->m_flags \|= XFS_MOUNT_32BITINODES;
				348	} else {
				349	mp->m_flags &= ~XFS_MOUNT_32BITINODES;
				350	}
				351
				352	/* If we can overflow then setup the ag headers accordingly */
				353	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				354	/* Calculate how much should be reserved for inodes to
				355	* meet the max inode percentage.
				356	*/
				357	if (mp->m_maxicount) {
				358	__uint64_t icount;
				359
				360	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				361	do_div(icount, 100);
				362	icount += sbp->sb_agblocks - 1;
				363	do_div(icount, mp->m_ialloc_blks);
				364	max_metadata = icount;
				365	} else {
				366	max_metadata = agcount;
				367	}
				368	for (index = 0; index < agcount; index++) {
				369	ino = XFS_AGINO_TO_INO(mp, index, agino);
				370	if (ino > max_inum) {
				371	index++;
				372	break;
				373	}
				374
				375	/* This ag is prefered for inodes */
				376	pag = &mp->m_perag[index];
				377	pag->pagi_inodeok = 1;
				378	if (index < max_metadata)
				379	pag->pagf_metadata = 1;
				380	}
				381	} else {
				382	/* Setup default behavior for smaller filesystems */
				383	for (index = 0; index < agcount; index++) {
				384	pag = &mp->m_perag[index];
				385	pag->pagi_inodeok = 1;
				386	}
				387	}
				388	return index;
				389	}
				390
				391	/*
				392	* xfs_xlatesb
				393	*
				394	* data - on disk version of sb
				395	* sb - a superblock
				396	* dir - conversion direction: <0 - convert sb to buf
				397	* >0 - convert buf to sb
				398	* fields - which fields to copy (bitmask)
				399	*/
				400	void
				401	xfs_xlatesb(
				402	void *data,
				403	xfs_sb_t *sb,
				404	int dir,
				405	__int64_t fields)
				406	{
				407	xfs_caddr_t buf_ptr;
				408	xfs_caddr_t mem_ptr;
				409	xfs_sb_field_t f;
				410	int first;
				411	int size;
				412
				413	ASSERT(dir);
				414	ASSERT(fields);
				415
				416	if (!fields)
				417	return;
				418
				419	buf_ptr = (xfs_caddr_t)data;
				420	mem_ptr = (xfs_caddr_t)sb;
				421
				422	while (fields) {
				423	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				424	first = xfs_sb_info[f].offset;
				425	size = xfs_sb_info[f + 1].offset - first;
				426
				427	ASSERT(xfs_sb_info[f].type == 0 \|\| xfs_sb_info[f].type == 1);
				428
				429	if (size == 1 \|\| xfs_sb_info[f].type == 1) {
				430	if (dir > 0) {
				431	memcpy(mem_ptr + first, buf_ptr + first, size);
				432	} else {
				433	memcpy(buf_ptr + first, mem_ptr + first, size);
				434	}
				435	} else {
				436	switch (size) {
				437	case 2:
				438	INT_XLATE((__uint16_t)(buf_ptr+first),
				439	(__uint16_t)(mem_ptr+first),
				440	dir, ARCH_CONVERT);
				441	break;
				442	case 4:
				443	INT_XLATE((__uint32_t)(buf_ptr+first),
				444	(__uint32_t)(mem_ptr+first),
				445	dir, ARCH_CONVERT);
				446	break;
				447	case 8:
				448	INT_XLATE((__uint64_t)(buf_ptr+first),
				449	(__uint64_t)(mem_ptr+first), dir, ARCH_CONVERT);
				450	break;
				451	default:
				452	ASSERT(0);
				453	}
				454	}
				455
				456	fields &= ~(1LL << f);
				457	}
				458	}
				459
				460	/*
				461	* xfs_readsb
				462	*
				463	* Does the initial read of the superblock.
				464	*/
				465	int
				466	xfs_readsb(xfs_mount_t *mp)
				467	{
				468	unsigned int sector_size;
				469	unsigned int extra_flags;
				470	xfs_buf_t *bp;
				471	xfs_sb_t *sbp;
				472	int error;
				473
				474	ASSERT(mp->m_sb_bp == NULL);
				475	ASSERT(mp->m_ddev_targp != NULL);
				476
				477	/*
				478	* Allocate a (locked) buffer to hold the superblock.
				479	* This will be kept around at all times to optimize
				480	* access to the superblock.
				481	*/
				482	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
				483	extra_flags = XFS_BUF_LOCK \| XFS_BUF_MANAGE \| XFS_BUF_MAPPED;
				484
				485	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				486	BTOBB(sector_size), extra_flags);
				487	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
				488	cmn_err(CE_WARN, "XFS: SB read failed");
				489	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				490	goto fail;
				491	}
				492	ASSERT(XFS_BUF_ISBUSY(bp));
				493	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				494
				495	/*
				496	* Initialize the mount structure from the superblock.
				497	* But first do some basic consistency checking.
				498	*/
				499	sbp = XFS_BUF_TO_SBP(bp);
				500	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
				501
				502	error = xfs_mount_validate_sb(mp, &(mp->m_sb));
				503	if (error) {
				504	cmn_err(CE_WARN, "XFS: SB validate failed");
				505	goto fail;
				506	}
				507
				508	/*
				509	* We must be able to do sector-sized and sector-aligned IO.
				510	*/
				511	if (sector_size > mp->m_sb.sb_sectsize) {
				512	cmn_err(CE_WARN,
				513	"XFS: device supports only %u byte sectors (not %u)",
				514	sector_size, mp->m_sb.sb_sectsize);
				515	error = ENOSYS;
				516	goto fail;
				517	}
				518
				519	/*
				520	* If device sector size is smaller than the superblock size,
				521	* re-read the superblock so the buffer is correctly sized.
				522	*/
				523	if (sector_size < mp->m_sb.sb_sectsize) {
				524	XFS_BUF_UNMANAGE(bp);
				525	xfs_buf_relse(bp);
				526	sector_size = mp->m_sb.sb_sectsize;
				527	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				528	BTOBB(sector_size), extra_flags);
				529	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
				530	cmn_err(CE_WARN, "XFS: SB re-read failed");
				531	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				532	goto fail;
				533	}
				534	ASSERT(XFS_BUF_ISBUSY(bp));
				535	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				536	}
				537
				538	mp->m_sb_bp = bp;
				539	xfs_buf_relse(bp);
				540	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
				541	return 0;
				542
				543	fail:
				544	if (bp) {
				545	XFS_BUF_UNMANAGE(bp);
				546	xfs_buf_relse(bp);
				547	}
				548	return error;
				549	}
				550
				551
				552	/*
				553	* xfs_mount_common
				554	*
				555	* Mount initialization code establishing various mount
				556	* fields from the superblock associated with the given
				557	* mount structure
				558	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame^]	559	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	560	xfs_mount_common(xfs_mount_t mp, xfs_sb_t sbp)
				561	{
				562	int i;
				563
				564	mp->m_agfrotor = mp->m_agirotor = 0;
				565	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
				566	mp->m_maxagi = mp->m_sb.sb_agcount;
				567	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
				568	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
				569	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
				570	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
				571	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
				572	mp->m_litino = sbp->sb_inodesize -
				573	((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
				574	mp->m_blockmask = sbp->sb_blocksize - 1;
				575	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
				576	mp->m_blockwmask = mp->m_blockwsize - 1;
				577	INIT_LIST_HEAD(&mp->m_del_inodes);
				578
				579	/*
				580	* Setup for attributes, in case they get created.
				581	* This value is for inodes getting attributes for the first time,
				582	* the per-inode value is for old attribute values.
				583	*/
				584	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
				585	switch (sbp->sb_inodesize) {
				586	case 256:
				587	mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2);
				588	break;
				589	case 512:
				590	case 1024:
				591	case 2048:
				592	mp->m_attroffset = XFS_BMDR_SPACE_CALC(12);
				593	break;
				594	default:
				595	ASSERT(0);
				596	}
				597	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
				598
				599	for (i = 0; i < 2; i++) {
				600	mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				601	xfs_alloc, i == 0);
				602	mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				603	xfs_alloc, i == 0);
				604	}
				605	for (i = 0; i < 2; i++) {
				606	mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				607	xfs_bmbt, i == 0);
				608	mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				609	xfs_bmbt, i == 0);
				610	}
				611	for (i = 0; i < 2; i++) {
				612	mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				613	xfs_inobt, i == 0);
				614	mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				615	xfs_inobt, i == 0);
				616	}
				617
				618	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
				619	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
				620	sbp->sb_inopblock);
				621	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
				622	}
				623	/*
				624	* xfs_mountfs
				625	*
				626	* This function does the following on an initial mount of a file system:
				627	* - reads the superblock from disk and init the mount struct
				628	* - if we're a 32-bit kernel, do a size check on the superblock
				629	* so we don't mount terabyte filesystems
				630	* - init mount struct realtime fields
				631	* - allocate inode hash table for fs
				632	* - init directory manager
				633	* - perform recovery and init the log manager
				634	*/
				635	int
				636	xfs_mountfs(
				637	vfs_t *vfsp,
				638	xfs_mount_t *mp,
				639	int mfsi_flags)
				640	{
				641	xfs_buf_t *bp;
				642	xfs_sb_t *sbp = &(mp->m_sb);
				643	xfs_inode_t *rip;
				644	vnode_t *rvp = NULL;
				645	int readio_log, writeio_log;
				646	xfs_daddr_t d;
				647	__uint64_t ret64;
				648	__int64_t update_flags;
				649	uint quotamount, quotaflags;
				650	int agno;
				651	int uuid_mounted = 0;
				652	int error = 0;
				653
				654	if (mp->m_sb_bp == NULL) {
				655	if ((error = xfs_readsb(mp))) {
				656	return (error);
				657	}
				658	}
				659	xfs_mount_common(mp, sbp);
				660
				661	/*
				662	* Check if sb_agblocks is aligned at stripe boundary
				663	* If sb_agblocks is NOT aligned turn off m_dalign since
				664	* allocator alignment is within an ag, therefore ag has
				665	* to be aligned at stripe boundary.
				666	*/
				667	update_flags = 0LL;
				668	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
				669	/*
				670	* If stripe unit and stripe width are not multiples
				671	* of the fs blocksize turn off alignment.
				672	*/
				673	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
				674	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
				675	if (mp->m_flags & XFS_MOUNT_RETERR) {
				676	cmn_err(CE_WARN,
				677	"XFS: alignment check 1 failed");
				678	error = XFS_ERROR(EINVAL);
				679	goto error1;
				680	}
				681	mp->m_dalign = mp->m_swidth = 0;
				682	} else {
				683	/*
				684	* Convert the stripe unit and width to FSBs.
				685	*/
				686	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
				687	if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				688	if (mp->m_flags & XFS_MOUNT_RETERR) {
				689	error = XFS_ERROR(EINVAL);
				690	goto error1;
				691	}
				692	xfs_fs_cmn_err(CE_WARN, mp,
				693	"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
				694	mp->m_dalign, mp->m_swidth,
				695	sbp->sb_agblocks);
				696
				697	mp->m_dalign = 0;
				698	mp->m_swidth = 0;
				699	} else if (mp->m_dalign) {
				700	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
				701	} else {
				702	if (mp->m_flags & XFS_MOUNT_RETERR) {
				703	xfs_fs_cmn_err(CE_WARN, mp,
				704	"stripe alignment turned off: sunit(%d) less than bsize(%d)",
				705	mp->m_dalign,
				706	mp->m_blockmask +1);
				707	error = XFS_ERROR(EINVAL);
				708	goto error1;
				709	}
				710	mp->m_swidth = 0;
				711	}
				712	}
				713
				714	/*
				715	* Update superblock with new values
				716	* and log changes
				717	*/
				718	if (XFS_SB_VERSION_HASDALIGN(sbp)) {
				719	if (sbp->sb_unit != mp->m_dalign) {
				720	sbp->sb_unit = mp->m_dalign;
				721	update_flags \|= XFS_SB_UNIT;
				722	}
				723	if (sbp->sb_width != mp->m_swidth) {
				724	sbp->sb_width = mp->m_swidth;
				725	update_flags \|= XFS_SB_WIDTH;
				726	}
				727	}
				728	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
				729	XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
				730	mp->m_dalign = sbp->sb_unit;
				731	mp->m_swidth = sbp->sb_width;
				732	}
				733
				734	xfs_alloc_compute_maxlevels(mp);
				735	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
				736	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
				737	xfs_ialloc_compute_maxlevels(mp);
				738
				739	if (sbp->sb_imax_pct) {
				740	__uint64_t icount;
				741
				742	/* Make sure the maximum inode count is a multiple of the
				743	* units we allocate inodes in.
				744	*/
				745
				746	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				747	do_div(icount, 100);
				748	do_div(icount, mp->m_ialloc_blks);
				749	mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
				750	sbp->sb_inopblog;
				751	} else
				752	mp->m_maxicount = 0;
				753
				754	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
				755
				756	/*
				757	* XFS uses the uuid from the superblock as the unique
				758	* identifier for fsid. We can not use the uuid from the volume
				759	* since a single partition filesystem is identical to a single
				760	* partition volume/filesystem.
				761	*/
				762	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
				763	(mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
				764	if (xfs_uuid_mount(mp)) {
				765	error = XFS_ERROR(EINVAL);
				766	goto error1;
				767	}
				768	uuid_mounted=1;
				769	ret64 = uuid_hash64(&sbp->sb_uuid);
				770	memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
				771	}
				772
				773	/*
				774	* Set the default minimum read and write sizes unless
				775	* already specified in a mount option.
				776	* We use smaller I/O sizes when the file system
				777	* is being used for NFS service (wsync mount option).
				778	*/
				779	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
				780	if (mp->m_flags & XFS_MOUNT_WSYNC) {
				781	readio_log = XFS_WSYNC_READIO_LOG;
				782	writeio_log = XFS_WSYNC_WRITEIO_LOG;
				783	} else {
				784	readio_log = XFS_READIO_LOG_LARGE;
				785	writeio_log = XFS_WRITEIO_LOG_LARGE;
				786	}
				787	} else {
				788	readio_log = mp->m_readio_log;
				789	writeio_log = mp->m_writeio_log;
				790	}
				791
				792	/*
				793	* Set the number of readahead buffers to use based on
				794	* physical memory size.
				795	*/
				796	if (xfs_physmem <= 4096) /* <= 16MB */
				797	mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
				798	else if (xfs_physmem <= 8192) /* <= 32MB */
				799	mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
				800	else
				801	mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
				802	if (sbp->sb_blocklog > readio_log) {
				803	mp->m_readio_log = sbp->sb_blocklog;
				804	} else {
				805	mp->m_readio_log = readio_log;
				806	}
				807	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
				808	if (sbp->sb_blocklog > writeio_log) {
				809	mp->m_writeio_log = sbp->sb_blocklog;
				810	} else {
				811	mp->m_writeio_log = writeio_log;
				812	}
				813	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
				814
				815	/*
				816	* Set the inode cluster size based on the physical memory
				817	* size. This may still be overridden by the file system
				818	* block size if it is larger than the chosen cluster size.
				819	*/
				820	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
				821	mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
				822	} else {
				823	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
				824	}
				825	/*
				826	* Set whether we're using inode alignment.
				827	*/
				828	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
				829	mp->m_sb.sb_inoalignmt >=
				830	XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
				831	mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
				832	else
				833	mp->m_inoalign_mask = 0;
				834	/*
				835	* If we are using stripe alignment, check whether
				836	* the stripe unit is a multiple of the inode alignment
				837	*/
				838	if (mp->m_dalign && mp->m_inoalign_mask &&
				839	!(mp->m_dalign & mp->m_inoalign_mask))
				840	mp->m_sinoalign = mp->m_dalign;
				841	else
				842	mp->m_sinoalign = 0;
				843	/*
				844	* Check that the data (and log if separate) are an ok size.
				845	*/
				846	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
				847	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
				848	cmn_err(CE_WARN, "XFS: size check 1 failed");
				849	error = XFS_ERROR(E2BIG);
				850	goto error1;
				851	}
				852	error = xfs_read_buf(mp, mp->m_ddev_targp,
				853	d - XFS_FSS_TO_BB(mp, 1),
				854	XFS_FSS_TO_BB(mp, 1), 0, &bp);
				855	if (!error) {
				856	xfs_buf_relse(bp);
				857	} else {
				858	cmn_err(CE_WARN, "XFS: size check 2 failed");
				859	if (error == ENOSPC) {
				860	error = XFS_ERROR(E2BIG);
				861	}
				862	goto error1;
				863	}
				864
				865	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
				866	mp->m_logdev_targp != mp->m_ddev_targp) {
				867	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
				868	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
				869	cmn_err(CE_WARN, "XFS: size check 3 failed");
				870	error = XFS_ERROR(E2BIG);
				871	goto error1;
				872	}
				873	error = xfs_read_buf(mp, mp->m_logdev_targp,
				874	d - XFS_FSB_TO_BB(mp, 1),
				875	XFS_FSB_TO_BB(mp, 1), 0, &bp);
				876	if (!error) {
				877	xfs_buf_relse(bp);
				878	} else {
				879	cmn_err(CE_WARN, "XFS: size check 3 failed");
				880	if (error == ENOSPC) {
				881	error = XFS_ERROR(E2BIG);
				882	}
				883	goto error1;
				884	}
				885	}
				886
				887	/*
				888	* Initialize realtime fields in the mount structure
				889	*/
				890	if ((error = xfs_rtmount_init(mp))) {
				891	cmn_err(CE_WARN, "XFS: RT mount failed");
				892	goto error1;
				893	}
				894
				895	/*
				896	* For client case we are done now
				897	*/
				898	if (mfsi_flags & XFS_MFSI_CLIENT) {
				899	return(0);
				900	}
				901
				902	/*
				903	* Copies the low order bits of the timestamp and the randomly
				904	* set "sequence" number out of a UUID.
				905	*/
				906	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
				907
				908	/*
				909	* The vfs structure needs to have a file system independent
				910	* way of checking for the invariant file system ID. Since it
				911	* can't look at mount structures it has a pointer to the data
				912	* in the mount structure.
				913	*
				914	* File systems that don't support user level file handles (i.e.
				915	* all of them except for XFS) will leave vfs_altfsid as NULL.
				916	*/
				917	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
				918	mp->m_dmevmask = 0; /* not persistent; set after each mount */
				919
				920	/*
				921	* Select the right directory manager.
				922	*/
				923	mp->m_dirops =
				924	XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
				925	xfsv2_dirops :
				926	xfsv1_dirops;
				927
				928	/*
				929	* Initialize directory manager's entries.
				930	*/
				931	XFS_DIR_MOUNT(mp);
				932
				933	/*
				934	* Initialize the attribute manager's entries.
				935	*/
				936	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
				937
				938	/*
				939	* Initialize the precomputed transaction reservations values.
				940	*/
				941	xfs_trans_init(mp);
				942
				943	/*
				944	* Allocate and initialize the inode hash table for this
				945	* file system.
				946	*/
				947	xfs_ihash_init(mp);
				948	xfs_chash_init(mp);
				949
				950	/*
				951	* Allocate and initialize the per-ag data.
				952	*/
				953	init_rwsem(&mp->m_peraglock);
				954	mp->m_perag =
				955	kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
				956
				957	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
				958
				959	/*
				960	* log's mount-time initialization. Perform 1st part recovery if needed
				961	*/
				962	if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */
				963	error = xfs_log_mount(mp, mp->m_logdev_targp,
				964	XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				965	XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
				966	if (error) {
				967	cmn_err(CE_WARN, "XFS: log mount failed");
				968	goto error2;
				969	}
				970	} else { /* No log has been defined */
				971	cmn_err(CE_WARN, "XFS: no log defined");
				972	XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
				973	error = XFS_ERROR(EFSCORRUPTED);
				974	goto error2;
				975	}
				976
				977	/*
				978	* Get and sanity-check the root inode.
				979	* Save the pointer to it in the mount structure.
				980	*/
				981	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
				982	if (error) {
				983	cmn_err(CE_WARN, "XFS: failed to read root inode");
				984	goto error3;
				985	}
				986
				987	ASSERT(rip != NULL);
				988	rvp = XFS_ITOV(rip);
				989
				990	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
				991	cmn_err(CE_WARN, "XFS: corrupted root inode");
				992	prdev("Root inode %llu is not a directory",
				993	mp->m_ddev_targp, (unsigned long long)rip->i_ino);
				994	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				995	XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				996	mp);
				997	error = XFS_ERROR(EFSCORRUPTED);
				998	goto error4;
				999	}
				1000	mp->m_rootip = rip; /* save it */
				1001
				1002	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1003
				1004	/*
				1005	* Initialize realtime inode pointers in the mount structure
				1006	*/
				1007	if ((error = xfs_rtmount_inodes(mp))) {
				1008	/*
				1009	* Free up the root inode.
				1010	*/
				1011	cmn_err(CE_WARN, "XFS: failed to read RT inodes");
				1012	goto error4;
				1013	}
				1014
				1015	/*
				1016	* If fs is not mounted readonly, then update the superblock
				1017	* unit and width changes.
				1018	*/
				1019	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
				1020	xfs_mount_log_sbunit(mp, update_flags);
				1021
				1022	/*
				1023	* Initialise the XFS quota management subsystem for this mount
				1024	*/
				1025	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
				1026	goto error4;
				1027
				1028	/*
				1029	* Finish recovering the file system. This part needed to be
				1030	* delayed until after the root and real-time bitmap inodes
				1031	* were consistently read in.
				1032	*/
				1033	error = xfs_log_mount_finish(mp, mfsi_flags);
				1034	if (error) {
				1035	cmn_err(CE_WARN, "XFS: log mount finish failed");
				1036	goto error4;
				1037	}
				1038
				1039	/*
				1040	* Complete the quota initialisation, post-log-replay component.
				1041	*/
				1042	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
				1043	goto error4;
				1044
				1045	return 0;
				1046
				1047	error4:
				1048	/*
				1049	* Free up the root inode.
				1050	*/
				1051	VN_RELE(rvp);
				1052	error3:
				1053	xfs_log_unmount_dealloc(mp);
				1054	error2:
				1055	xfs_ihash_free(mp);
				1056	xfs_chash_free(mp);
				1057	for (agno = 0; agno < sbp->sb_agcount; agno++)
				1058	if (mp->m_perag[agno].pagb_list)
				1059	kmem_free(mp->m_perag[agno].pagb_list,
				1060	sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
				1061	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
				1062	mp->m_perag = NULL;
				1063	/* FALLTHROUGH */
				1064	error1:
				1065	if (uuid_mounted)
				1066	xfs_uuid_unmount(mp);
				1067	xfs_freesb(mp);
				1068	return error;
				1069	}
				1070
				1071	/*
				1072	* xfs_unmountfs
				1073	*
				1074	* This flushes out the inodes,dquots and the superblock, unmounts the
				1075	* log and makes sure that incore structures are freed.
				1076	*/
				1077	int
				1078	xfs_unmountfs(xfs_mount_t mp, struct cred cr)
				1079	{
				1080	struct vfs *vfsp = XFS_MTOVFS(mp);
				1081	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1082	int64_t fsid;
				1083	#endif
				1084
				1085	xfs_iflush_all(mp, XFS_FLUSH_ALL);
				1086
				1087	XFS_QM_DQPURGEALL(mp,
				1088	XFS_QMOPT_UQUOTA \| XFS_QMOPT_GQUOTA \| XFS_QMOPT_UMOUNTING);
				1089
				1090	/*
				1091	* Flush out the log synchronously so that we know for sure
				1092	* that nothing is pinned. This is important because bflush()
				1093	* will skip pinned buffers.
				1094	*/
				1095	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);
				1096
				1097	xfs_binval(mp->m_ddev_targp);
				1098	if (mp->m_rtdev_targp) {
				1099	xfs_binval(mp->m_rtdev_targp);
				1100	}
				1101
				1102	xfs_unmountfs_writesb(mp);
				1103
				1104	xfs_unmountfs_wait(mp); /* wait for async bufs */
				1105
				1106	xfs_log_unmount(mp); /* Done! No more fs ops. */
				1107
				1108	xfs_freesb(mp);
				1109
				1110	/*
				1111	* All inodes from this mount point should be freed.
				1112	*/
				1113	ASSERT(mp->m_inodes == NULL);
				1114
				1115	/*
				1116	* We may have bufs that are in the process of getting written still.
				1117	* We must wait for the I/O completion of those. The sync flag here
				1118	* does a two pass iteration thru the bufcache.
				1119	*/
				1120	if (XFS_FORCED_SHUTDOWN(mp)) {
				1121	xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */
				1122	}
				1123
				1124	xfs_unmountfs_close(mp, cr);
				1125	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
				1126	xfs_uuid_unmount(mp);
				1127
				1128	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1129	/*
				1130	* clear all error tags on this filesystem
				1131	*/
				1132	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
				1133	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
				1134	#endif
				1135	XFS_IODONE(vfsp);
				1136	xfs_mount_free(mp, 1);
				1137	return 0;
				1138	}
				1139
				1140	void
				1141	xfs_unmountfs_close(xfs_mount_t mp, struct cred cr)
				1142	{
				1143	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1144	xfs_free_buftarg(mp->m_logdev_targp, 1);
				1145	if (mp->m_rtdev_targp)
				1146	xfs_free_buftarg(mp->m_rtdev_targp, 1);
				1147	xfs_free_buftarg(mp->m_ddev_targp, 0);
				1148	}
				1149
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame^]	1150	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1151	xfs_unmountfs_wait(xfs_mount_t *mp)
				1152	{
				1153	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1154	xfs_wait_buftarg(mp->m_logdev_targp);
				1155	if (mp->m_rtdev_targp)
				1156	xfs_wait_buftarg(mp->m_rtdev_targp);
				1157	xfs_wait_buftarg(mp->m_ddev_targp);
				1158	}
				1159
				1160	int
				1161	xfs_unmountfs_writesb(xfs_mount_t *mp)
				1162	{
				1163	xfs_buf_t *sbp;
				1164	xfs_sb_t *sb;
				1165	int error = 0;
				1166
				1167	/*
				1168	* skip superblock write if fs is read-only, or
				1169	* if we are doing a forced umount.
				1170	*/
				1171	sbp = xfs_getsb(mp, 0);
				1172	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY \|\|
				1173	XFS_FORCED_SHUTDOWN(mp))) {
				1174	/*
				1175	* mark shared-readonly if desired
				1176	*/
				1177	sb = XFS_BUF_TO_SBP(sbp);
				1178	if (mp->m_mk_sharedro) {
				1179	if (!(sb->sb_flags & XFS_SBF_READONLY))
				1180	sb->sb_flags \|= XFS_SBF_READONLY;
				1181	if (!XFS_SB_VERSION_HASSHARED(sb))
				1182	XFS_SB_VERSION_ADDSHARED(sb);
				1183	xfs_fs_cmn_err(CE_NOTE, mp,
				1184	"Unmounting, marking shared read-only");
				1185	}
				1186	XFS_BUF_UNDONE(sbp);
				1187	XFS_BUF_UNREAD(sbp);
				1188	XFS_BUF_UNDELAYWRITE(sbp);
				1189	XFS_BUF_WRITE(sbp);
				1190	XFS_BUF_UNASYNC(sbp);
				1191	ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
				1192	xfsbdstrat(mp, sbp);
				1193	/* Nevermind errors we might get here. */
				1194	error = xfs_iowait(sbp);
				1195	if (error)
				1196	xfs_ioerror_alert("xfs_unmountfs_writesb",
				1197	mp, sbp, XFS_BUF_ADDR(sbp));
				1198	if (error && mp->m_mk_sharedro)
				1199	xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
				1200	}
				1201	xfs_buf_relse(sbp);
				1202	return (error);
				1203	}
				1204
				1205	/*
				1206	* xfs_mod_sb() can be used to copy arbitrary changes to the
				1207	* in-core superblock into the superblock buffer to be logged.
				1208	* It does not provide the higher level of locking that is
				1209	* needed to protect the in-core superblock from concurrent
				1210	* access.
				1211	*/
				1212	void
				1213	xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
				1214	{
				1215	xfs_buf_t *bp;
				1216	int first;
				1217	int last;
				1218	xfs_mount_t *mp;
				1219	xfs_sb_t *sbp;
				1220	xfs_sb_field_t f;
				1221
				1222	ASSERT(fields);
				1223	if (!fields)
				1224	return;
				1225	mp = tp->t_mountp;
				1226	bp = xfs_trans_getsb(tp, mp, 0);
				1227	sbp = XFS_BUF_TO_SBP(bp);
				1228	first = sizeof(xfs_sb_t);
				1229	last = 0;
				1230
				1231	/* translate/copy */
				1232
				1233	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
				1234
				1235	/* find modified range */
				1236
				1237	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				1238	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1239	first = xfs_sb_info[f].offset;
				1240
				1241	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
				1242	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1243	last = xfs_sb_info[f + 1].offset - 1;
				1244
				1245	xfs_trans_log_buf(tp, bp, first, last);
				1246	}
				1247
				1248	/*
				1249	* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
				1250	* a delta to a specified field in the in-core superblock. Simply
				1251	* switch on the field indicated and apply the delta to that field.
				1252	* Fields are not allowed to dip below zero, so if the delta would
				1253	* do this do not apply it and return EINVAL.
				1254	*
				1255	* The SB_LOCK must be held when this routine is called.
				1256	*/
				1257	STATIC int
				1258	xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
				1259	int delta, int rsvd)
				1260	{
				1261	int scounter; /* short counter for 32 bit fields */
				1262	long long lcounter; /* long counter for 64 bit fields */
				1263	long long res_used, rem;
				1264
				1265	/*
				1266	* With the in-core superblock spin lock held, switch
				1267	* on the indicated field. Apply the delta to the
				1268	* proper field. If the fields value would dip below
				1269	* 0, then do not apply the delta and return EINVAL.
				1270	*/
				1271	switch (field) {
				1272	case XFS_SBS_ICOUNT:
				1273	lcounter = (long long)mp->m_sb.sb_icount;
				1274	lcounter += delta;
				1275	if (lcounter < 0) {
				1276	ASSERT(0);
				1277	return (XFS_ERROR(EINVAL));
				1278	}
				1279	mp->m_sb.sb_icount = lcounter;
				1280	return (0);
				1281	case XFS_SBS_IFREE:
				1282	lcounter = (long long)mp->m_sb.sb_ifree;
				1283	lcounter += delta;
				1284	if (lcounter < 0) {
				1285	ASSERT(0);
				1286	return (XFS_ERROR(EINVAL));
				1287	}
				1288	mp->m_sb.sb_ifree = lcounter;
				1289	return (0);
				1290	case XFS_SBS_FDBLOCKS:
				1291
				1292	lcounter = (long long)mp->m_sb.sb_fdblocks;
				1293	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
				1294
				1295	if (delta > 0) { /* Putting blocks back */
				1296	if (res_used > delta) {
				1297	mp->m_resblks_avail += delta;
				1298	} else {
				1299	rem = delta - res_used;
				1300	mp->m_resblks_avail = mp->m_resblks;
				1301	lcounter += rem;
				1302	}
				1303	} else { /* Taking blocks away */
				1304
				1305	lcounter += delta;
				1306
				1307	/*
				1308	* If were out of blocks, use any available reserved blocks if
				1309	* were allowed to.
				1310	*/
				1311
				1312	if (lcounter < 0) {
				1313	if (rsvd) {
				1314	lcounter = (long long)mp->m_resblks_avail + delta;
				1315	if (lcounter < 0) {
				1316	return (XFS_ERROR(ENOSPC));
				1317	}
				1318	mp->m_resblks_avail = lcounter;
				1319	return (0);
				1320	} else { /* not reserved */
				1321	return (XFS_ERROR(ENOSPC));
				1322	}
				1323	}
				1324	}
				1325
				1326	mp->m_sb.sb_fdblocks = lcounter;
				1327	return (0);
				1328	case XFS_SBS_FREXTENTS:
				1329	lcounter = (long long)mp->m_sb.sb_frextents;
				1330	lcounter += delta;
				1331	if (lcounter < 0) {
				1332	return (XFS_ERROR(ENOSPC));
				1333	}
				1334	mp->m_sb.sb_frextents = lcounter;
				1335	return (0);
				1336	case XFS_SBS_DBLOCKS:
				1337	lcounter = (long long)mp->m_sb.sb_dblocks;
				1338	lcounter += delta;
				1339	if (lcounter < 0) {
				1340	ASSERT(0);
				1341	return (XFS_ERROR(EINVAL));
				1342	}
				1343	mp->m_sb.sb_dblocks = lcounter;
				1344	return (0);
				1345	case XFS_SBS_AGCOUNT:
				1346	scounter = mp->m_sb.sb_agcount;
				1347	scounter += delta;
				1348	if (scounter < 0) {
				1349	ASSERT(0);
				1350	return (XFS_ERROR(EINVAL));
				1351	}
				1352	mp->m_sb.sb_agcount = scounter;
				1353	return (0);
				1354	case XFS_SBS_IMAX_PCT:
				1355	scounter = mp->m_sb.sb_imax_pct;
				1356	scounter += delta;
				1357	if (scounter < 0) {
				1358	ASSERT(0);
				1359	return (XFS_ERROR(EINVAL));
				1360	}
				1361	mp->m_sb.sb_imax_pct = scounter;
				1362	return (0);
				1363	case XFS_SBS_REXTSIZE:
				1364	scounter = mp->m_sb.sb_rextsize;
				1365	scounter += delta;
				1366	if (scounter < 0) {
				1367	ASSERT(0);
				1368	return (XFS_ERROR(EINVAL));
				1369	}
				1370	mp->m_sb.sb_rextsize = scounter;
				1371	return (0);
				1372	case XFS_SBS_RBMBLOCKS:
				1373	scounter = mp->m_sb.sb_rbmblocks;
				1374	scounter += delta;
				1375	if (scounter < 0) {
				1376	ASSERT(0);
				1377	return (XFS_ERROR(EINVAL));
				1378	}
				1379	mp->m_sb.sb_rbmblocks = scounter;
				1380	return (0);
				1381	case XFS_SBS_RBLOCKS:
				1382	lcounter = (long long)mp->m_sb.sb_rblocks;
				1383	lcounter += delta;
				1384	if (lcounter < 0) {
				1385	ASSERT(0);
				1386	return (XFS_ERROR(EINVAL));
				1387	}
				1388	mp->m_sb.sb_rblocks = lcounter;
				1389	return (0);
				1390	case XFS_SBS_REXTENTS:
				1391	lcounter = (long long)mp->m_sb.sb_rextents;
				1392	lcounter += delta;
				1393	if (lcounter < 0) {
				1394	ASSERT(0);
				1395	return (XFS_ERROR(EINVAL));
				1396	}
				1397	mp->m_sb.sb_rextents = lcounter;
				1398	return (0);
				1399	case XFS_SBS_REXTSLOG:
				1400	scounter = mp->m_sb.sb_rextslog;
				1401	scounter += delta;
				1402	if (scounter < 0) {
				1403	ASSERT(0);
				1404	return (XFS_ERROR(EINVAL));
				1405	}
				1406	mp->m_sb.sb_rextslog = scounter;
				1407	return (0);
				1408	default:
				1409	ASSERT(0);
				1410	return (XFS_ERROR(EINVAL));
				1411	}
				1412	}
				1413
				1414	/*
				1415	* xfs_mod_incore_sb() is used to change a field in the in-core
				1416	* superblock structure by the specified delta. This modification
				1417	* is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked()
				1418	* routine to do the work.
				1419	*/
				1420	int
				1421	xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
				1422	{
				1423	unsigned long s;
				1424	int status;
				1425
				1426	s = XFS_SB_LOCK(mp);
				1427	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				1428	XFS_SB_UNLOCK(mp, s);
				1429	return (status);
				1430	}
				1431
				1432	/*
				1433	* xfs_mod_incore_sb_batch() is used to change more than one field
				1434	* in the in-core superblock structure at a time. This modification
				1435	* is protected by a lock internal to this module. The fields and
				1436	* changes to those fields are specified in the array of xfs_mod_sb
				1437	* structures passed in.
				1438	*
				1439	* Either all of the specified deltas will be applied or none of
				1440	* them will. If any modified field dips below 0, then all modifications
				1441	* will be backed out and EINVAL will be returned.
				1442	*/
				1443	int
				1444	xfs_mod_incore_sb_batch(xfs_mount_t mp, xfs_mod_sb_t msb, uint nmsb, int rsvd)
				1445	{
				1446	unsigned long s;
				1447	int status=0;
				1448	xfs_mod_sb_t *msbp;
				1449
				1450	/*
				1451	* Loop through the array of mod structures and apply each
				1452	* individually. If any fail, then back out all those
				1453	* which have already been applied. Do all of this within
				1454	* the scope of the SB_LOCK so that all of the changes will
				1455	* be atomic.
				1456	*/
				1457	s = XFS_SB_LOCK(mp);
				1458	msbp = &msb[0];
				1459	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
				1460	/*
				1461	* Apply the delta at index n. If it fails, break
				1462	* from the loop so we'll fall into the undo loop
				1463	* below.
				1464	*/
				1465	status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
				1466	msbp->msb_delta, rsvd);
				1467	if (status != 0) {
				1468	break;
				1469	}
				1470	}
				1471
				1472	/*
				1473	* If we didn't complete the loop above, then back out
				1474	* any changes made to the superblock. If you add code
				1475	* between the loop above and here, make sure that you
				1476	* preserve the value of status. Loop back until
				1477	* we step below the beginning of the array. Make sure
				1478	* we don't touch anything back there.
				1479	*/
				1480	if (status != 0) {
				1481	msbp--;
				1482	while (msbp >= msb) {
				1483	status = xfs_mod_incore_sb_unlocked(mp,
				1484	msbp->msb_field, -(msbp->msb_delta), rsvd);
				1485	ASSERT(status == 0);
				1486	msbp--;
				1487	}
				1488	}
				1489	XFS_SB_UNLOCK(mp, s);
				1490	return (status);
				1491	}
				1492
				1493	/*
				1494	* xfs_getsb() is called to obtain the buffer for the superblock.
				1495	* The buffer is returned locked and read in from disk.
				1496	* The buffer should be released with a call to xfs_brelse().
				1497	*
				1498	* If the flags parameter is BUF_TRYLOCK, then we'll only return
				1499	* the superblock buffer if it can be locked without sleeping.
				1500	* If it can't then we'll return NULL.
				1501	*/
				1502	xfs_buf_t *
				1503	xfs_getsb(
				1504	xfs_mount_t *mp,
				1505	int flags)
				1506	{
				1507	xfs_buf_t *bp;
				1508
				1509	ASSERT(mp->m_sb_bp != NULL);
				1510	bp = mp->m_sb_bp;
				1511	if (flags & XFS_BUF_TRYLOCK) {
				1512	if (!XFS_BUF_CPSEMA(bp)) {
				1513	return NULL;
				1514	}
				1515	} else {
				1516	XFS_BUF_PSEMA(bp, PRIBIO);
				1517	}
				1518	XFS_BUF_HOLD(bp);
				1519	ASSERT(XFS_BUF_ISDONE(bp));
				1520	return (bp);
				1521	}
				1522
				1523	/*
				1524	* Used to free the superblock along various error paths.
				1525	*/
				1526	void
				1527	xfs_freesb(
				1528	xfs_mount_t *mp)
				1529	{
				1530	xfs_buf_t *bp;
				1531
				1532	/*
				1533	* Use xfs_getsb() so that the buffer will be locked
				1534	* when we call xfs_buf_relse().
				1535	*/
				1536	bp = xfs_getsb(mp, 0);
				1537	XFS_BUF_UNMANAGE(bp);
				1538	xfs_buf_relse(bp);
				1539	mp->m_sb_bp = NULL;
				1540	}
				1541
				1542	/*
				1543	* See if the UUID is unique among mounted XFS filesystems.
				1544	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
				1545	*/
				1546	STATIC int
				1547	xfs_uuid_mount(
				1548	xfs_mount_t *mp)
				1549	{
				1550	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
				1551	cmn_err(CE_WARN,
				1552	"XFS: Filesystem %s has nil UUID - can't mount",
				1553	mp->m_fsname);
				1554	return -1;
				1555	}
				1556	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
				1557	cmn_err(CE_WARN,
				1558	"XFS: Filesystem %s has duplicate UUID - can't mount",
				1559	mp->m_fsname);
				1560	return -1;
				1561	}
				1562	return 0;
				1563	}
				1564
				1565	/*
				1566	* Remove filesystem from the UUID table.
				1567	*/
				1568	STATIC void
				1569	xfs_uuid_unmount(
				1570	xfs_mount_t *mp)
				1571	{
				1572	uuid_table_remove(&mp->m_sb.sb_uuid);
				1573	}
				1574
				1575	/*
				1576	* Used to log changes to the superblock unit and width fields which could
				1577	* be altered by the mount options. Only the first superblock is updated.
				1578	*/
				1579	STATIC void
				1580	xfs_mount_log_sbunit(
				1581	xfs_mount_t *mp,
				1582	__int64_t fields)
				1583	{
				1584	xfs_trans_t *tp;
				1585
				1586	ASSERT(fields & (XFS_SB_UNIT\|XFS_SB_WIDTH\|XFS_SB_UUID));
				1587
				1588	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
				1589	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				1590	XFS_DEFAULT_LOG_COUNT)) {
				1591	xfs_trans_cancel(tp, 0);
				1592	return;
				1593	}
				1594	xfs_mod_sb(tp, fields);
				1595	xfs_trans_commit(tp, 0, NULL);
				1596	}