Blame - fs/xfs/xfs_mount.c - kernel/msm

blob: a66b398051760f3dd2695777bbb4e637e2280279 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	19	#include "xfs_fs.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs_types.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	21	#include "xfs_bit.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27	#include "xfs_dir2.h"
				28	#include "xfs_dmapi.h"
				29	#include "xfs_mount.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	30	#include "xfs_bmap_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	31	#include "xfs_alloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	32	#include "xfs_ialloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	#include "xfs_dir2_sf.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	34	#include "xfs_attr_sf.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	35	#include "xfs_dinode.h"
				36	#include "xfs_inode.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	37	#include "xfs_btree.h"
				38	#include "xfs_ialloc.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	39	#include "xfs_alloc.h"
				40	#include "xfs_rtalloc.h"
				41	#include "xfs_bmap.h"
				42	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	#include "xfs_rw.h"
				44	#include "xfs_quota.h"
				45	#include "xfs_fsops.h"
				46
				47	STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
				48	STATIC int xfs_uuid_mount(xfs_mount_t *);
				49	STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	50	STATIC void xfs_unmountfs_wait(xfs_mount_t *);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	51
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	52
				53	#ifdef HAVE_PERCPU_SB
				54	STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
David Chinner	20f4ebf	2007-02-10 18:36:10 +1100	[diff] [blame]	55	STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
				56	int, int);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	57	STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
				58	STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
David Chinner	20f4ebf	2007-02-10 18:36:10 +1100	[diff] [blame]	59	int64_t, int);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	60	STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	61
				62	#else
				63
				64	#define xfs_icsb_destroy_counters(mp) do { } while (0)
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	65	#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0)
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	66	#define xfs_icsb_sync_counters(mp) do { } while (0)
				67	#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	68
				69	#endif
				70
Christoph Hellwig	1df84c9	2006-01-11 15:29:52 +1100	[diff] [blame]	71	static const struct {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	72	short offset;
				73	short type; /* 0 = integer
				74	* 1 = binary / string (no translation)
				75	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	76	} xfs_sb_info[] = {
				77	{ offsetof(xfs_sb_t, sb_magicnum), 0 },
				78	{ offsetof(xfs_sb_t, sb_blocksize), 0 },
				79	{ offsetof(xfs_sb_t, sb_dblocks), 0 },
				80	{ offsetof(xfs_sb_t, sb_rblocks), 0 },
				81	{ offsetof(xfs_sb_t, sb_rextents), 0 },
				82	{ offsetof(xfs_sb_t, sb_uuid), 1 },
				83	{ offsetof(xfs_sb_t, sb_logstart), 0 },
				84	{ offsetof(xfs_sb_t, sb_rootino), 0 },
				85	{ offsetof(xfs_sb_t, sb_rbmino), 0 },
				86	{ offsetof(xfs_sb_t, sb_rsumino), 0 },
				87	{ offsetof(xfs_sb_t, sb_rextsize), 0 },
				88	{ offsetof(xfs_sb_t, sb_agblocks), 0 },
				89	{ offsetof(xfs_sb_t, sb_agcount), 0 },
				90	{ offsetof(xfs_sb_t, sb_rbmblocks), 0 },
				91	{ offsetof(xfs_sb_t, sb_logblocks), 0 },
				92	{ offsetof(xfs_sb_t, sb_versionnum), 0 },
				93	{ offsetof(xfs_sb_t, sb_sectsize), 0 },
				94	{ offsetof(xfs_sb_t, sb_inodesize), 0 },
				95	{ offsetof(xfs_sb_t, sb_inopblock), 0 },
				96	{ offsetof(xfs_sb_t, sb_fname[0]), 1 },
				97	{ offsetof(xfs_sb_t, sb_blocklog), 0 },
				98	{ offsetof(xfs_sb_t, sb_sectlog), 0 },
				99	{ offsetof(xfs_sb_t, sb_inodelog), 0 },
				100	{ offsetof(xfs_sb_t, sb_inopblog), 0 },
				101	{ offsetof(xfs_sb_t, sb_agblklog), 0 },
				102	{ offsetof(xfs_sb_t, sb_rextslog), 0 },
				103	{ offsetof(xfs_sb_t, sb_inprogress), 0 },
				104	{ offsetof(xfs_sb_t, sb_imax_pct), 0 },
				105	{ offsetof(xfs_sb_t, sb_icount), 0 },
				106	{ offsetof(xfs_sb_t, sb_ifree), 0 },
				107	{ offsetof(xfs_sb_t, sb_fdblocks), 0 },
				108	{ offsetof(xfs_sb_t, sb_frextents), 0 },
				109	{ offsetof(xfs_sb_t, sb_uquotino), 0 },
				110	{ offsetof(xfs_sb_t, sb_gquotino), 0 },
				111	{ offsetof(xfs_sb_t, sb_qflags), 0 },
				112	{ offsetof(xfs_sb_t, sb_flags), 0 },
				113	{ offsetof(xfs_sb_t, sb_shared_vn), 0 },
				114	{ offsetof(xfs_sb_t, sb_inoalignmt), 0 },
				115	{ offsetof(xfs_sb_t, sb_unit), 0 },
				116	{ offsetof(xfs_sb_t, sb_width), 0 },
				117	{ offsetof(xfs_sb_t, sb_dirblklog), 0 },
				118	{ offsetof(xfs_sb_t, sb_logsectlog), 0 },
				119	{ offsetof(xfs_sb_t, sb_logsectsize),0 },
				120	{ offsetof(xfs_sb_t, sb_logsunit), 0 },
				121	{ offsetof(xfs_sb_t, sb_features2), 0 },
				122	{ sizeof(xfs_sb_t), 0 }
				123	};
				124
				125	/*
				126	* Return a pointer to an initialized xfs_mount structure.
				127	*/
				128	xfs_mount_t *
				129	xfs_mount_init(void)
				130	{
				131	xfs_mount_t *mp;
				132
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	133	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
				134
				135	if (xfs_icsb_init_counters(mp)) {
				136	mp->m_flags \|= XFS_MOUNT_NO_PERCPU_SB;
				137	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	138
				139	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
				140	spinlock_init(&mp->m_sb_lock, "xfs_sb");
Jes Sorensen	794ee1b	2006-01-09 15:59:21 -0800	[diff] [blame]	141	mutex_init(&mp->m_ilock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	142	initnsema(&mp->m_growlock, 1, "xfs_grow");
				143	/*
				144	* Initialize the AIL.
				145	*/
				146	xfs_trans_ail_init(mp);
				147
				148	atomic_set(&mp->m_active_trans, 0);
				149
				150	return mp;
				151	}
				152
				153	/*
				154	* Free up the resources associated with a mount structure. Assume that
				155	* the structure was initially zeroed, so we can tell which fields got
				156	* initialized.
				157	*/
				158	void
				159	xfs_mount_free(
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	160	xfs_mount_t *mp,
				161	int remove_bhv)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	162	{
				163	if (mp->m_ihash)
				164	xfs_ihash_free(mp);
				165	if (mp->m_chash)
				166	xfs_chash_free(mp);
				167
				168	if (mp->m_perag) {
				169	int agno;
				170
				171	for (agno = 0; agno < mp->m_maxagi; agno++)
				172	if (mp->m_perag[agno].pagb_list)
				173	kmem_free(mp->m_perag[agno].pagb_list,
				174	sizeof(xfs_perag_busy_t) *
				175	XFS_PAGB_NUM_SLOTS);
				176	kmem_free(mp->m_perag,
				177	sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
				178	}
				179
				180	AIL_LOCK_DESTROY(&mp->m_ail_lock);
				181	spinlock_destroy(&mp->m_sb_lock);
				182	mutex_destroy(&mp->m_ilock);
				183	freesema(&mp->m_growlock);
				184	if (mp->m_quotainfo)
				185	XFS_QM_DONE(mp);
				186
				187	if (mp->m_fsname != NULL)
				188	kmem_free(mp->m_fsname, mp->m_fsname_len);
Nathan Scott	fc1f8c1	2005-11-02 11:44:33 +1100	[diff] [blame]	189	if (mp->m_rtname != NULL)
				190	kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
				191	if (mp->m_logname != NULL)
				192	kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	193
				194	if (remove_bhv) {
Nathan Scott	b83bd13	2006-06-09 16:48:30 +1000	[diff] [blame]	195	struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	196
				197	bhv_remove_all_vfsops(vfsp, 0);
				198	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
				199	}
				200
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	201	xfs_icsb_destroy_counters(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	202	kmem_free(mp, sizeof(xfs_mount_t));
				203	}
				204
Nathan Scott	4cc929e	2007-05-14 18:24:02 +1000	[diff] [blame]	205	/*
				206	* Check size of device based on the (data/realtime) block count.
				207	* Note: this check is used by the growfs code as well as mount.
				208	*/
				209	int
				210	xfs_sb_validate_fsb_count(
				211	xfs_sb_t *sbp,
				212	__uint64_t nblocks)
				213	{
				214	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
				215	ASSERT(sbp->sb_blocklog >= BBSHIFT);
				216
				217	#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
				218	if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
				219	return E2BIG;
				220	#else /* Limited by UINT_MAX of sectors */
				221	if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
				222	return E2BIG;
				223	#endif
				224	return 0;
				225	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	226
				227	/*
				228	* Check the validity of the SB found.
				229	*/
				230	STATIC int
				231	xfs_mount_validate_sb(
				232	xfs_mount_t *mp,
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	233	xfs_sb_t *sbp,
				234	int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	235	{
				236	/*
				237	* If the log device and data device have the
				238	* same device number, the log is internal.
				239	* Consequently, the sb_logstart should be non-zero. If
				240	* we have a zero sb_logstart in this case, we may be trying to mount
				241	* a volume filesystem in a non-volume manner.
				242	*/
				243	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	244	xfs_fs_mount_cmn_err(flags, "bad magic number");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	245	return XFS_ERROR(EWRONGFS);
				246	}
				247
				248	if (!XFS_SB_GOOD_VERSION(sbp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	249	xfs_fs_mount_cmn_err(flags, "bad version");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	250	return XFS_ERROR(EWRONGFS);
				251	}
				252
				253	if (unlikely(
				254	sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	255	xfs_fs_mount_cmn_err(flags,
				256	"filesystem is marked as having an external log; "
				257	"specify logdev on the\nmount command line.");
				258	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	259	}
				260
				261	if (unlikely(
				262	sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	263	xfs_fs_mount_cmn_err(flags,
				264	"filesystem is marked as having an internal log; "
				265	"do not specify logdev on\nthe mount command line.");
				266	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	267	}
				268
				269	/*
				270	* More sanity checking. These were stolen directly from
				271	* xfs_repair.
				272	*/
				273	if (unlikely(
				274	sbp->sb_agcount <= 0 \|\|
				275	sbp->sb_sectsize < XFS_MIN_SECTORSIZE \|\|
				276	sbp->sb_sectsize > XFS_MAX_SECTORSIZE \|\|
				277	sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG \|\|
				278	sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG \|\|
				279	sbp->sb_blocksize < XFS_MIN_BLOCKSIZE \|\|
				280	sbp->sb_blocksize > XFS_MAX_BLOCKSIZE \|\|
				281	sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG \|\|
				282	sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG \|\|
				283	sbp->sb_inodesize < XFS_DINODE_MIN_SIZE \|\|
				284	sbp->sb_inodesize > XFS_DINODE_MAX_SIZE \|\|
Nathan Scott	9f989c9	2006-03-14 13:29:32 +1100	[diff] [blame]	285	sbp->sb_inodelog < XFS_DINODE_MIN_LOG \|\|
				286	sbp->sb_inodelog > XFS_DINODE_MAX_LOG \|\|
				287	(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) \|\|
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	288	(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) \|\|
				289	(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) \|\|
Nathan Scott	e50bd16	2006-04-11 15:10:45 +1000	[diff] [blame]	290	(sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	291	xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	292	return XFS_ERROR(EFSCORRUPTED);
				293	}
				294
				295	/*
				296	* Sanity check AG count, size fields against data size field
				297	*/
				298	if (unlikely(
				299	sbp->sb_dblocks == 0 \|\|
				300	sbp->sb_dblocks >
				301	(xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks \|\|
				302	sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
				303	sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	304	xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	305	return XFS_ERROR(EFSCORRUPTED);
				306	}
				307
Nathan Scott	4cc929e	2007-05-14 18:24:02 +1000	[diff] [blame]	308	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) \|\|
				309	xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	310	xfs_fs_mount_cmn_err(flags,
				311	"file system too large to be mounted on this system.");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	312	return XFS_ERROR(E2BIG);
				313	}
				314
				315	if (unlikely(sbp->sb_inprogress)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	316	xfs_fs_mount_cmn_err(flags, "file system busy");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	317	return XFS_ERROR(EFSCORRUPTED);
				318	}
				319
				320	/*
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	321	* Version 1 directory format has never worked on Linux.
				322	*/
				323	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	324	xfs_fs_mount_cmn_err(flags,
				325	"file system using version 1 directory format");
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	326	return XFS_ERROR(ENOSYS);
				327	}
				328
				329	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	330	* Until this is fixed only page-sized or smaller data blocks work.
				331	*/
				332	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	333	xfs_fs_mount_cmn_err(flags,
				334	"file system with blocksize %d bytes",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	335	sbp->sb_blocksize);
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	336	xfs_fs_mount_cmn_err(flags,
				337	"only pagesize (%ld) or less will currently work.",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	338	PAGE_SIZE);
				339	return XFS_ERROR(ENOSYS);
				340	}
				341
				342	return 0;
				343	}
				344
				345	xfs_agnumber_t
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	346	xfs_initialize_perag(
Nathan Scott	b83bd13	2006-06-09 16:48:30 +1000	[diff] [blame]	347	bhv_vfs_t *vfs,
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	348	xfs_mount_t *mp,
				349	xfs_agnumber_t agcount)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	350	{
				351	xfs_agnumber_t index, max_metadata;
				352	xfs_perag_t *pag;
				353	xfs_agino_t agino;
				354	xfs_ino_t ino;
				355	xfs_sb_t *sbp = &mp->m_sb;
				356	xfs_ino_t max_inum = XFS_MAXINUMBER_32;
				357
				358	/* Check to see if the filesystem can overflow 32 bit inodes */
				359	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
				360	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
				361
				362	/* Clear the mount flag if no inode can overflow 32 bits
				363	* on this filesystem, or if specifically requested..
				364	*/
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	365	if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	366	mp->m_flags \|= XFS_MOUNT_32BITINODES;
				367	} else {
				368	mp->m_flags &= ~XFS_MOUNT_32BITINODES;
				369	}
				370
				371	/* If we can overflow then setup the ag headers accordingly */
				372	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				373	/* Calculate how much should be reserved for inodes to
				374	* meet the max inode percentage.
				375	*/
				376	if (mp->m_maxicount) {
				377	__uint64_t icount;
				378
				379	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				380	do_div(icount, 100);
				381	icount += sbp->sb_agblocks - 1;
Eric Sandeen	a749ee8	2005-11-02 15:13:42 +1100	[diff] [blame]	382	do_div(icount, sbp->sb_agblocks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	383	max_metadata = icount;
				384	} else {
				385	max_metadata = agcount;
				386	}
				387	for (index = 0; index < agcount; index++) {
				388	ino = XFS_AGINO_TO_INO(mp, index, agino);
				389	if (ino > max_inum) {
				390	index++;
				391	break;
				392	}
				393
Nathan Scott	c41564b	2006-03-29 08:55:14 +1000	[diff] [blame]	394	/* This ag is preferred for inodes */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	395	pag = &mp->m_perag[index];
				396	pag->pagi_inodeok = 1;
				397	if (index < max_metadata)
				398	pag->pagf_metadata = 1;
				399	}
				400	} else {
				401	/* Setup default behavior for smaller filesystems */
				402	for (index = 0; index < agcount; index++) {
				403	pag = &mp->m_perag[index];
				404	pag->pagi_inodeok = 1;
				405	}
				406	}
				407	return index;
				408	}
				409
				410	/*
				411	* xfs_xlatesb
				412	*
				413	* data - on disk version of sb
				414	* sb - a superblock
				415	* dir - conversion direction: <0 - convert sb to buf
				416	* >0 - convert buf to sb
				417	* fields - which fields to copy (bitmask)
				418	*/
				419	void
				420	xfs_xlatesb(
				421	void *data,
				422	xfs_sb_t *sb,
				423	int dir,
				424	__int64_t fields)
				425	{
				426	xfs_caddr_t buf_ptr;
				427	xfs_caddr_t mem_ptr;
				428	xfs_sb_field_t f;
				429	int first;
				430	int size;
				431
				432	ASSERT(dir);
				433	ASSERT(fields);
				434
				435	if (!fields)
				436	return;
				437
				438	buf_ptr = (xfs_caddr_t)data;
				439	mem_ptr = (xfs_caddr_t)sb;
				440
				441	while (fields) {
				442	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				443	first = xfs_sb_info[f].offset;
				444	size = xfs_sb_info[f + 1].offset - first;
				445
				446	ASSERT(xfs_sb_info[f].type == 0 \|\| xfs_sb_info[f].type == 1);
				447
				448	if (size == 1 \|\| xfs_sb_info[f].type == 1) {
				449	if (dir > 0) {
				450	memcpy(mem_ptr + first, buf_ptr + first, size);
				451	} else {
				452	memcpy(buf_ptr + first, mem_ptr + first, size);
				453	}
				454	} else {
				455	switch (size) {
				456	case 2:
				457	INT_XLATE((__uint16_t)(buf_ptr+first),
				458	(__uint16_t)(mem_ptr+first),
				459	dir, ARCH_CONVERT);
				460	break;
				461	case 4:
				462	INT_XLATE((__uint32_t)(buf_ptr+first),
				463	(__uint32_t)(mem_ptr+first),
				464	dir, ARCH_CONVERT);
				465	break;
				466	case 8:
				467	INT_XLATE((__uint64_t)(buf_ptr+first),
				468	(__uint64_t)(mem_ptr+first), dir, ARCH_CONVERT);
				469	break;
				470	default:
				471	ASSERT(0);
				472	}
				473	}
				474
				475	fields &= ~(1LL << f);
				476	}
				477	}
				478
				479	/*
				480	* xfs_readsb
				481	*
				482	* Does the initial read of the superblock.
				483	*/
				484	int
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	485	xfs_readsb(xfs_mount_t *mp, int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	486	{
				487	unsigned int sector_size;
				488	unsigned int extra_flags;
				489	xfs_buf_t *bp;
				490	xfs_sb_t *sbp;
				491	int error;
				492
				493	ASSERT(mp->m_sb_bp == NULL);
				494	ASSERT(mp->m_ddev_targp != NULL);
				495
				496	/*
				497	* Allocate a (locked) buffer to hold the superblock.
				498	* This will be kept around at all times to optimize
				499	* access to the superblock.
				500	*/
				501	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
				502	extra_flags = XFS_BUF_LOCK \| XFS_BUF_MANAGE \| XFS_BUF_MAPPED;
				503
				504	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				505	BTOBB(sector_size), extra_flags);
				506	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	507	xfs_fs_mount_cmn_err(flags, "SB read failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	508	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				509	goto fail;
				510	}
				511	ASSERT(XFS_BUF_ISBUSY(bp));
				512	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				513
				514	/*
				515	* Initialize the mount structure from the superblock.
				516	* But first do some basic consistency checking.
				517	*/
				518	sbp = XFS_BUF_TO_SBP(bp);
				519	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
				520
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	521	error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	522	if (error) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	523	xfs_fs_mount_cmn_err(flags, "SB validate failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524	goto fail;
				525	}
				526
				527	/*
				528	* We must be able to do sector-sized and sector-aligned IO.
				529	*/
				530	if (sector_size > mp->m_sb.sb_sectsize) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	531	xfs_fs_mount_cmn_err(flags,
				532	"device supports only %u byte sectors (not %u)",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	533	sector_size, mp->m_sb.sb_sectsize);
				534	error = ENOSYS;
				535	goto fail;
				536	}
				537
				538	/*
				539	* If device sector size is smaller than the superblock size,
				540	* re-read the superblock so the buffer is correctly sized.
				541	*/
				542	if (sector_size < mp->m_sb.sb_sectsize) {
				543	XFS_BUF_UNMANAGE(bp);
				544	xfs_buf_relse(bp);
				545	sector_size = mp->m_sb.sb_sectsize;
				546	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				547	BTOBB(sector_size), extra_flags);
				548	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	549	xfs_fs_mount_cmn_err(flags, "SB re-read failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	550	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				551	goto fail;
				552	}
				553	ASSERT(XFS_BUF_ISBUSY(bp));
				554	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				555	}
				556
Lachlan McIlroy	5478eea	2007-02-10 18:36:29 +1100	[diff] [blame]	557	/* Initialize per-cpu counters */
				558	xfs_icsb_reinit_counters(mp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	559
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	560	mp->m_sb_bp = bp;
				561	xfs_buf_relse(bp);
				562	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
				563	return 0;
				564
				565	fail:
				566	if (bp) {
				567	XFS_BUF_UNMANAGE(bp);
				568	xfs_buf_relse(bp);
				569	}
				570	return error;
				571	}
				572
				573
				574	/*
				575	* xfs_mount_common
				576	*
				577	* Mount initialization code establishing various mount
				578	* fields from the superblock associated with the given
				579	* mount structure
				580	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	581	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	582	xfs_mount_common(xfs_mount_t mp, xfs_sb_t sbp)
				583	{
				584	int i;
				585
				586	mp->m_agfrotor = mp->m_agirotor = 0;
				587	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
				588	mp->m_maxagi = mp->m_sb.sb_agcount;
				589	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
				590	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
				591	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
				592	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
				593	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
				594	mp->m_litino = sbp->sb_inodesize -
				595	((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
				596	mp->m_blockmask = sbp->sb_blocksize - 1;
				597	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
				598	mp->m_blockwmask = mp->m_blockwsize - 1;
				599	INIT_LIST_HEAD(&mp->m_del_inodes);
				600
				601	/*
				602	* Setup for attributes, in case they get created.
				603	* This value is for inodes getting attributes for the first time,
				604	* the per-inode value is for old attribute values.
				605	*/
				606	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
				607	switch (sbp->sb_inodesize) {
				608	case 256:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	609	mp->m_attroffset = XFS_LITINO(mp) -
				610	XFS_BMDR_SPACE_CALC(MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	611	break;
				612	case 512:
				613	case 1024:
				614	case 2048:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	615	mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616	break;
				617	default:
				618	ASSERT(0);
				619	}
				620	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
				621
				622	for (i = 0; i < 2; i++) {
				623	mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				624	xfs_alloc, i == 0);
				625	mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				626	xfs_alloc, i == 0);
				627	}
				628	for (i = 0; i < 2; i++) {
				629	mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				630	xfs_bmbt, i == 0);
				631	mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				632	xfs_bmbt, i == 0);
				633	}
				634	for (i = 0; i < 2; i++) {
				635	mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				636	xfs_inobt, i == 0);
				637	mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				638	xfs_inobt, i == 0);
				639	}
				640
				641	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
				642	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
				643	sbp->sb_inopblock);
				644	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
				645	}
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	646
				647	/*
				648	* xfs_initialize_perag_data
				649	*
				650	* Read in each per-ag structure so we can count up the number of
				651	* allocated inodes, free inodes and used filesystem blocks as this
				652	* information is no longer persistent in the superblock. Once we have
				653	* this information, write it into the in-core superblock structure.
				654	*/
				655	STATIC int
				656	xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
				657	{
				658	xfs_agnumber_t index;
				659	xfs_perag_t *pag;
				660	xfs_sb_t *sbp = &mp->m_sb;
				661	uint64_t ifree = 0;
				662	uint64_t ialloc = 0;
				663	uint64_t bfree = 0;
				664	uint64_t bfreelst = 0;
				665	uint64_t btree = 0;
				666	int error;
				667	int s;
				668
				669	for (index = 0; index < agcount; index++) {
				670	/*
				671	* read the agf, then the agi. This gets us
				672	* all the inforamtion we need and populates the
				673	* per-ag structures for us.
				674	*/
				675	error = xfs_alloc_pagf_init(mp, NULL, index, 0);
				676	if (error)
				677	return error;
				678
				679	error = xfs_ialloc_pagi_init(mp, NULL, index);
				680	if (error)
				681	return error;
				682	pag = &mp->m_perag[index];
				683	ifree += pag->pagi_freecount;
				684	ialloc += pag->pagi_count;
				685	bfree += pag->pagf_freeblks;
				686	bfreelst += pag->pagf_flcount;
				687	btree += pag->pagf_btreeblks;
				688	}
				689	/*
				690	* Overwrite incore superblock counters with just-read data
				691	*/
				692	s = XFS_SB_LOCK(mp);
				693	sbp->sb_ifree = ifree;
				694	sbp->sb_icount = ialloc;
				695	sbp->sb_fdblocks = bfree + bfreelst + btree;
				696	XFS_SB_UNLOCK(mp, s);
				697
				698	/* Fixup the per-cpu counters as well. */
				699	xfs_icsb_reinit_counters(mp);
				700
				701	return 0;
				702	}
				703
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	704	/*
				705	* xfs_mountfs
				706	*
				707	* This function does the following on an initial mount of a file system:
				708	* - reads the superblock from disk and init the mount struct
				709	* - if we're a 32-bit kernel, do a size check on the superblock
				710	* so we don't mount terabyte filesystems
				711	* - init mount struct realtime fields
				712	* - allocate inode hash table for fs
				713	* - init directory manager
				714	* - perform recovery and init the log manager
				715	*/
				716	int
				717	xfs_mountfs(
Nathan Scott	b83bd13	2006-06-09 16:48:30 +1000	[diff] [blame]	718	bhv_vfs_t *vfsp,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	719	xfs_mount_t *mp,
				720	int mfsi_flags)
				721	{
				722	xfs_buf_t *bp;
				723	xfs_sb_t *sbp = &(mp->m_sb);
				724	xfs_inode_t *rip;
Nathan Scott	67fcaa7	2006-06-09 17:00:52 +1000	[diff] [blame]	725	bhv_vnode_t *rvp = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	726	int readio_log, writeio_log;
				727	xfs_daddr_t d;
David Chinner	84e1e99	2007-06-18 16:50:27 +1000	[diff] [blame]	728	__uint64_t resblks;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	729	__int64_t update_flags;
				730	uint quotamount, quotaflags;
				731	int agno;
				732	int uuid_mounted = 0;
				733	int error = 0;
				734
				735	if (mp->m_sb_bp == NULL) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	736	if ((error = xfs_readsb(mp, mfsi_flags))) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	737	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	738	}
				739	}
				740	xfs_mount_common(mp, sbp);
				741
				742	/*
				743	* Check if sb_agblocks is aligned at stripe boundary
				744	* If sb_agblocks is NOT aligned turn off m_dalign since
				745	* allocator alignment is within an ag, therefore ag has
				746	* to be aligned at stripe boundary.
				747	*/
				748	update_flags = 0LL;
				749	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
				750	/*
				751	* If stripe unit and stripe width are not multiples
				752	* of the fs blocksize turn off alignment.
				753	*/
				754	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
				755	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
				756	if (mp->m_flags & XFS_MOUNT_RETERR) {
				757	cmn_err(CE_WARN,
				758	"XFS: alignment check 1 failed");
				759	error = XFS_ERROR(EINVAL);
				760	goto error1;
				761	}
				762	mp->m_dalign = mp->m_swidth = 0;
				763	} else {
				764	/*
				765	* Convert the stripe unit and width to FSBs.
				766	*/
				767	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
				768	if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				769	if (mp->m_flags & XFS_MOUNT_RETERR) {
				770	error = XFS_ERROR(EINVAL);
				771	goto error1;
				772	}
				773	xfs_fs_cmn_err(CE_WARN, mp,
				774	"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
				775	mp->m_dalign, mp->m_swidth,
				776	sbp->sb_agblocks);
				777
				778	mp->m_dalign = 0;
				779	mp->m_swidth = 0;
				780	} else if (mp->m_dalign) {
				781	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
				782	} else {
				783	if (mp->m_flags & XFS_MOUNT_RETERR) {
				784	xfs_fs_cmn_err(CE_WARN, mp,
				785	"stripe alignment turned off: sunit(%d) less than bsize(%d)",
				786	mp->m_dalign,
				787	mp->m_blockmask +1);
				788	error = XFS_ERROR(EINVAL);
				789	goto error1;
				790	}
				791	mp->m_swidth = 0;
				792	}
				793	}
				794
				795	/*
				796	* Update superblock with new values
				797	* and log changes
				798	*/
				799	if (XFS_SB_VERSION_HASDALIGN(sbp)) {
				800	if (sbp->sb_unit != mp->m_dalign) {
				801	sbp->sb_unit = mp->m_dalign;
				802	update_flags \|= XFS_SB_UNIT;
				803	}
				804	if (sbp->sb_width != mp->m_swidth) {
				805	sbp->sb_width = mp->m_swidth;
				806	update_flags \|= XFS_SB_WIDTH;
				807	}
				808	}
				809	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
				810	XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
				811	mp->m_dalign = sbp->sb_unit;
				812	mp->m_swidth = sbp->sb_width;
				813	}
				814
				815	xfs_alloc_compute_maxlevels(mp);
				816	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
				817	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
				818	xfs_ialloc_compute_maxlevels(mp);
				819
				820	if (sbp->sb_imax_pct) {
				821	__uint64_t icount;
				822
				823	/* Make sure the maximum inode count is a multiple of the
				824	* units we allocate inodes in.
				825	*/
				826
				827	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				828	do_div(icount, 100);
				829	do_div(icount, mp->m_ialloc_blks);
				830	mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
				831	sbp->sb_inopblog;
				832	} else
				833	mp->m_maxicount = 0;
				834
				835	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
				836
				837	/*
				838	* XFS uses the uuid from the superblock as the unique
				839	* identifier for fsid. We can not use the uuid from the volume
				840	* since a single partition filesystem is identical to a single
				841	* partition volume/filesystem.
				842	*/
				843	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
				844	(mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
David Chinner	84e1e99	2007-06-18 16:50:27 +1000	[diff] [blame]	845	__uint64_t ret64;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	846	if (xfs_uuid_mount(mp)) {
				847	error = XFS_ERROR(EINVAL);
				848	goto error1;
				849	}
				850	uuid_mounted=1;
				851	ret64 = uuid_hash64(&sbp->sb_uuid);
				852	memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
				853	}
				854
				855	/*
				856	* Set the default minimum read and write sizes unless
				857	* already specified in a mount option.
				858	* We use smaller I/O sizes when the file system
				859	* is being used for NFS service (wsync mount option).
				860	*/
				861	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
				862	if (mp->m_flags & XFS_MOUNT_WSYNC) {
				863	readio_log = XFS_WSYNC_READIO_LOG;
				864	writeio_log = XFS_WSYNC_WRITEIO_LOG;
				865	} else {
				866	readio_log = XFS_READIO_LOG_LARGE;
				867	writeio_log = XFS_WRITEIO_LOG_LARGE;
				868	}
				869	} else {
				870	readio_log = mp->m_readio_log;
				871	writeio_log = mp->m_writeio_log;
				872	}
				873
				874	/*
				875	* Set the number of readahead buffers to use based on
				876	* physical memory size.
				877	*/
				878	if (xfs_physmem <= 4096) /* <= 16MB */
				879	mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
				880	else if (xfs_physmem <= 8192) /* <= 32MB */
				881	mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
				882	else
				883	mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
				884	if (sbp->sb_blocklog > readio_log) {
				885	mp->m_readio_log = sbp->sb_blocklog;
				886	} else {
				887	mp->m_readio_log = readio_log;
				888	}
				889	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
				890	if (sbp->sb_blocklog > writeio_log) {
				891	mp->m_writeio_log = sbp->sb_blocklog;
				892	} else {
				893	mp->m_writeio_log = writeio_log;
				894	}
				895	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
				896
				897	/*
				898	* Set the inode cluster size based on the physical memory
				899	* size. This may still be overridden by the file system
				900	* block size if it is larger than the chosen cluster size.
				901	*/
				902	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
				903	mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
				904	} else {
				905	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
				906	}
				907	/*
				908	* Set whether we're using inode alignment.
				909	*/
				910	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
				911	mp->m_sb.sb_inoalignmt >=
				912	XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
				913	mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
				914	else
				915	mp->m_inoalign_mask = 0;
				916	/*
				917	* If we are using stripe alignment, check whether
				918	* the stripe unit is a multiple of the inode alignment
				919	*/
				920	if (mp->m_dalign && mp->m_inoalign_mask &&
				921	!(mp->m_dalign & mp->m_inoalign_mask))
				922	mp->m_sinoalign = mp->m_dalign;
				923	else
				924	mp->m_sinoalign = 0;
				925	/*
				926	* Check that the data (and log if separate) are an ok size.
				927	*/
				928	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
				929	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
				930	cmn_err(CE_WARN, "XFS: size check 1 failed");
				931	error = XFS_ERROR(E2BIG);
				932	goto error1;
				933	}
				934	error = xfs_read_buf(mp, mp->m_ddev_targp,
				935	d - XFS_FSS_TO_BB(mp, 1),
				936	XFS_FSS_TO_BB(mp, 1), 0, &bp);
				937	if (!error) {
				938	xfs_buf_relse(bp);
				939	} else {
				940	cmn_err(CE_WARN, "XFS: size check 2 failed");
				941	if (error == ENOSPC) {
				942	error = XFS_ERROR(E2BIG);
				943	}
				944	goto error1;
				945	}
				946
				947	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
				948	mp->m_logdev_targp != mp->m_ddev_targp) {
				949	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
				950	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
				951	cmn_err(CE_WARN, "XFS: size check 3 failed");
				952	error = XFS_ERROR(E2BIG);
				953	goto error1;
				954	}
				955	error = xfs_read_buf(mp, mp->m_logdev_targp,
				956	d - XFS_FSB_TO_BB(mp, 1),
				957	XFS_FSB_TO_BB(mp, 1), 0, &bp);
				958	if (!error) {
				959	xfs_buf_relse(bp);
				960	} else {
				961	cmn_err(CE_WARN, "XFS: size check 3 failed");
				962	if (error == ENOSPC) {
				963	error = XFS_ERROR(E2BIG);
				964	}
				965	goto error1;
				966	}
				967	}
				968
				969	/*
				970	* Initialize realtime fields in the mount structure
				971	*/
				972	if ((error = xfs_rtmount_init(mp))) {
				973	cmn_err(CE_WARN, "XFS: RT mount failed");
				974	goto error1;
				975	}
				976
				977	/*
				978	* For client case we are done now
				979	*/
				980	if (mfsi_flags & XFS_MFSI_CLIENT) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	981	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	982	}
				983
				984	/*
				985	* Copies the low order bits of the timestamp and the randomly
				986	* set "sequence" number out of a UUID.
				987	*/
				988	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
				989
				990	/*
				991	* The vfs structure needs to have a file system independent
				992	* way of checking for the invariant file system ID. Since it
				993	* can't look at mount structures it has a pointer to the data
				994	* in the mount structure.
				995	*
				996	* File systems that don't support user level file handles (i.e.
				997	* all of them except for XFS) will leave vfs_altfsid as NULL.
				998	*/
				999	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
				1000	mp->m_dmevmask = 0; /* not persistent; set after each mount */
				1001
Nathan Scott	f6c2d1f	2006-06-20 13:04:51 +1000	[diff] [blame]	1002	xfs_dir_mount(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1003
				1004	/*
				1005	* Initialize the attribute manager's entries.
				1006	*/
				1007	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
				1008
				1009	/*
				1010	* Initialize the precomputed transaction reservations values.
				1011	*/
				1012	xfs_trans_init(mp);
				1013
				1014	/*
				1015	* Allocate and initialize the inode hash table for this
				1016	* file system.
				1017	*/
				1018	xfs_ihash_init(mp);
				1019	xfs_chash_init(mp);
				1020
				1021	/*
				1022	* Allocate and initialize the per-ag data.
				1023	*/
				1024	init_rwsem(&mp->m_peraglock);
				1025	mp->m_perag =
				1026	kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
				1027
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	1028	mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1029
				1030	/*
				1031	* log's mount-time initialization. Perform 1st part recovery if needed
				1032	*/
				1033	if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */
				1034	error = xfs_log_mount(mp, mp->m_logdev_targp,
				1035	XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				1036	XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
				1037	if (error) {
				1038	cmn_err(CE_WARN, "XFS: log mount failed");
				1039	goto error2;
				1040	}
				1041	} else { /* No log has been defined */
				1042	cmn_err(CE_WARN, "XFS: no log defined");
				1043	XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
				1044	error = XFS_ERROR(EFSCORRUPTED);
				1045	goto error2;
				1046	}
				1047
				1048	/*
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	1049	* Now the log is mounted, we know if it was an unclean shutdown or
				1050	* not. If it was, with the first phase of recovery has completed, we
				1051	* have consistent AG blocks on disk. We have not recovered EFIs yet,
				1052	* but they are recovered transactionally in the second recovery phase
				1053	* later.
				1054	*
				1055	* Hence we can safely re-initialise incore superblock counters from
				1056	* the per-ag data. These may not be correct if the filesystem was not
				1057	* cleanly unmounted, so we need to wait for recovery to finish before
				1058	* doing this.
				1059	*
				1060	* If the filesystem was cleanly unmounted, then we can trust the
				1061	* values in the superblock to be correct and we don't need to do
				1062	* anything here.
				1063	*
				1064	* If we are currently making the filesystem, the initialisation will
				1065	* fail as the perag data is in an undefined state.
				1066	*/
				1067
				1068	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
				1069	!XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
				1070	!mp->m_sb.sb_inprogress) {
				1071	error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
				1072	if (error) {
				1073	goto error2;
				1074	}
				1075	}
				1076	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1077	* Get and sanity-check the root inode.
				1078	* Save the pointer to it in the mount structure.
				1079	*/
				1080	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
				1081	if (error) {
				1082	cmn_err(CE_WARN, "XFS: failed to read root inode");
				1083	goto error3;
				1084	}
				1085
				1086	ASSERT(rip != NULL);
				1087	rvp = XFS_ITOV(rip);
				1088
				1089	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
				1090	cmn_err(CE_WARN, "XFS: corrupted root inode");
Nathan Scott	b657452	2006-06-09 15:29:40 +1000	[diff] [blame]	1091	cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
				1092	XFS_BUFTARG_NAME(mp->m_ddev_targp),
				1093	(unsigned long long)rip->i_ino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1094	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1095	XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				1096	mp);
				1097	error = XFS_ERROR(EFSCORRUPTED);
				1098	goto error4;
				1099	}
				1100	mp->m_rootip = rip; /* save it */
				1101
				1102	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1103
				1104	/*
				1105	* Initialize realtime inode pointers in the mount structure
				1106	*/
				1107	if ((error = xfs_rtmount_inodes(mp))) {
				1108	/*
				1109	* Free up the root inode.
				1110	*/
				1111	cmn_err(CE_WARN, "XFS: failed to read RT inodes");
				1112	goto error4;
				1113	}
				1114
				1115	/*
				1116	* If fs is not mounted readonly, then update the superblock
				1117	* unit and width changes.
				1118	*/
				1119	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
				1120	xfs_mount_log_sbunit(mp, update_flags);
				1121
				1122	/*
				1123	* Initialise the XFS quota management subsystem for this mount
				1124	*/
				1125	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
				1126	goto error4;
				1127
				1128	/*
				1129	* Finish recovering the file system. This part needed to be
				1130	* delayed until after the root and real-time bitmap inodes
				1131	* were consistently read in.
				1132	*/
				1133	error = xfs_log_mount_finish(mp, mfsi_flags);
				1134	if (error) {
				1135	cmn_err(CE_WARN, "XFS: log mount finish failed");
				1136	goto error4;
				1137	}
				1138
				1139	/*
				1140	* Complete the quota initialisation, post-log-replay component.
				1141	*/
				1142	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
				1143	goto error4;
				1144
David Chinner	84e1e99	2007-06-18 16:50:27 +1000	[diff] [blame]	1145	/*
				1146	* Now we are mounted, reserve a small amount of unused space for
				1147	* privileged transactions. This is needed so that transaction
				1148	* space required for critical operations can dip into this pool
				1149	* when at ENOSPC. This is needed for operations like create with
				1150	* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
				1151	* are not allowed to use this reserved space.
				1152	*
				1153	* We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
				1154	* This may drive us straight to ENOSPC on mount, but that implies
				1155	* we were already there on the last unmount.
				1156	*/
Christoph Hellwig	39726be	2007-06-18 17:57:45 +1000	[diff] [blame]	1157	resblks = mp->m_sb.sb_dblocks;
				1158	do_div(resblks, 20);
				1159	resblks = min_t(__uint64_t, resblks, 1024);
David Chinner	84e1e99	2007-06-18 16:50:27 +1000	[diff] [blame]	1160	xfs_reserve_blocks(mp, &resblks, NULL);
				1161
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1162	return 0;
				1163
				1164	error4:
				1165	/*
				1166	* Free up the root inode.
				1167	*/
				1168	VN_RELE(rvp);
				1169	error3:
				1170	xfs_log_unmount_dealloc(mp);
				1171	error2:
				1172	xfs_ihash_free(mp);
				1173	xfs_chash_free(mp);
				1174	for (agno = 0; agno < sbp->sb_agcount; agno++)
				1175	if (mp->m_perag[agno].pagb_list)
				1176	kmem_free(mp->m_perag[agno].pagb_list,
				1177	sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
				1178	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
				1179	mp->m_perag = NULL;
				1180	/* FALLTHROUGH */
				1181	error1:
				1182	if (uuid_mounted)
				1183	xfs_uuid_unmount(mp);
				1184	xfs_freesb(mp);
				1185	return error;
				1186	}
				1187
				1188	/*
				1189	* xfs_unmountfs
				1190	*
				1191	* This flushes out the inodes,dquots and the superblock, unmounts the
				1192	* log and makes sure that incore structures are freed.
				1193	*/
				1194	int
				1195	xfs_unmountfs(xfs_mount_t mp, struct cred cr)
				1196	{
Nathan Scott	b83bd13	2006-06-09 16:48:30 +1000	[diff] [blame]	1197	struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1198	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1199	int64_t fsid;
				1200	#endif
David Chinner	84e1e99	2007-06-18 16:50:27 +1000	[diff] [blame]	1201	__uint64_t resblks;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1202
David Chinner	641c56f	2007-06-18 16:50:17 +1000	[diff] [blame]	1203	/*
				1204	* We can potentially deadlock here if we have an inode cluster
				1205	* that has been freed has it's buffer still pinned in memory because
				1206	* the transaction is still sitting in a iclog. The stale inodes
				1207	* on that buffer will have their flush locks held until the
				1208	* transaction hits the disk and the callbacks run. the inode
				1209	* flush takes the flush lock unconditionally and with nothing to
				1210	* push out the iclog we will never get that unlocked. hence we
				1211	* need to force the log first.
				1212	*/
				1213	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);
Christoph Hellwig	efa8027	2005-06-21 15:37:17 +1000	[diff] [blame]	1214	xfs_iflush_all(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1215
Nathan Scott	ee2a4f7	2006-01-11 15:33:36 +1100	[diff] [blame]	1216	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL \| XFS_QMOPT_UMOUNTING);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1217
				1218	/*
				1219	* Flush out the log synchronously so that we know for sure
				1220	* that nothing is pinned. This is important because bflush()
				1221	* will skip pinned buffers.
				1222	*/
				1223	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);
				1224
				1225	xfs_binval(mp->m_ddev_targp);
				1226	if (mp->m_rtdev_targp) {
				1227	xfs_binval(mp->m_rtdev_targp);
				1228	}
				1229
David Chinner	84e1e99	2007-06-18 16:50:27 +1000	[diff] [blame]	1230	/*
				1231	* Unreserve any blocks we have so that when we unmount we don't account
				1232	* the reserved free space as used. This is really only necessary for
				1233	* lazy superblock counting because it trusts the incore superblock
				1234	* counters to be aboslutely correct on clean unmount.
				1235	*
				1236	* We don't bother correcting this elsewhere for lazy superblock
				1237	* counting because on mount of an unclean filesystem we reconstruct the
				1238	* correct counter value and this is irrelevant.
				1239	*
				1240	* For non-lazy counter filesystems, this doesn't matter at all because
				1241	* we only every apply deltas to the superblock and hence the incore
				1242	* value does not matter....
				1243	*/
				1244	resblks = 0;
				1245	xfs_reserve_blocks(mp, &resblks, NULL);
				1246
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	1247	xfs_log_sbcount(mp, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1248	xfs_unmountfs_writesb(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1249	xfs_unmountfs_wait(mp); /* wait for async bufs */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1250	xfs_log_unmount(mp); /* Done! No more fs ops. */
				1251
				1252	xfs_freesb(mp);
				1253
				1254	/*
				1255	* All inodes from this mount point should be freed.
				1256	*/
				1257	ASSERT(mp->m_inodes == NULL);
				1258
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1259	xfs_unmountfs_close(mp, cr);
				1260	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
				1261	xfs_uuid_unmount(mp);
				1262
				1263	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1264	/*
				1265	* clear all error tags on this filesystem
				1266	*/
				1267	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
				1268	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
				1269	#endif
				1270	XFS_IODONE(vfsp);
				1271	xfs_mount_free(mp, 1);
				1272	return 0;
				1273	}
				1274
				1275	void
				1276	xfs_unmountfs_close(xfs_mount_t mp, struct cred cr)
				1277	{
				1278	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1279	xfs_free_buftarg(mp->m_logdev_targp, 1);
				1280	if (mp->m_rtdev_targp)
				1281	xfs_free_buftarg(mp->m_rtdev_targp, 1);
				1282	xfs_free_buftarg(mp->m_ddev_targp, 0);
				1283	}
				1284
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	1285	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1286	xfs_unmountfs_wait(xfs_mount_t *mp)
				1287	{
				1288	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1289	xfs_wait_buftarg(mp->m_logdev_targp);
				1290	if (mp->m_rtdev_targp)
				1291	xfs_wait_buftarg(mp->m_rtdev_targp);
				1292	xfs_wait_buftarg(mp->m_ddev_targp);
				1293	}
				1294
				1295	int
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	1296	xfs_fs_writable(xfs_mount_t *mp)
				1297	{
				1298	bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
				1299
				1300	return !(vfs_test_for_freeze(vfsp) \|\| XFS_FORCED_SHUTDOWN(mp) \|\|
				1301	(vfsp->vfs_flag & VFS_RDONLY));
				1302	}
				1303
				1304	/*
				1305	* xfs_log_sbcount
				1306	*
				1307	* Called either periodically to keep the on disk superblock values
				1308	* roughly up to date or from unmount to make sure the values are
				1309	* correct on a clean unmount.
				1310	*
				1311	* Note this code can be called during the process of freezing, so
				1312	* we may need to use the transaction allocator which does not not
				1313	* block when the transaction subsystem is in its frozen state.
				1314	*/
				1315	int
				1316	xfs_log_sbcount(
				1317	xfs_mount_t *mp,
				1318	uint sync)
				1319	{
				1320	xfs_trans_t *tp;
				1321	int error;
				1322
				1323	if (!xfs_fs_writable(mp))
				1324	return 0;
				1325
				1326	xfs_icsb_sync_counters(mp);
				1327
				1328	/*
				1329	* we don't need to do this if we are updating the superblock
				1330	* counters on every modification.
				1331	*/
				1332	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
				1333	return 0;
				1334
				1335	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
				1336	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				1337	XFS_DEFAULT_LOG_COUNT);
				1338	if (error) {
				1339	xfs_trans_cancel(tp, 0);
				1340	return error;
				1341	}
				1342
				1343	xfs_mod_sb(tp, XFS_SB_IFREE \| XFS_SB_ICOUNT \| XFS_SB_FDBLOCKS);
				1344	if (sync)
				1345	xfs_trans_set_sync(tp);
				1346	xfs_trans_commit(tp, 0);
				1347
				1348	return 0;
				1349	}
				1350
				1351	int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1352	xfs_unmountfs_writesb(xfs_mount_t *mp)
				1353	{
				1354	xfs_buf_t *sbp;
				1355	xfs_sb_t *sb;
				1356	int error = 0;
				1357
				1358	/*
				1359	* skip superblock write if fs is read-only, or
				1360	* if we are doing a forced umount.
				1361	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1362	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY \|\|
				1363	XFS_FORCED_SHUTDOWN(mp))) {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1364
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	1365	sbp = xfs_getsb(mp, 0);
				1366	sb = XFS_BUF_TO_SBP(sbp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1367
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1368	/*
				1369	* mark shared-readonly if desired
				1370	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1371	if (mp->m_mk_sharedro) {
				1372	if (!(sb->sb_flags & XFS_SBF_READONLY))
				1373	sb->sb_flags \|= XFS_SBF_READONLY;
				1374	if (!XFS_SB_VERSION_HASSHARED(sb))
				1375	XFS_SB_VERSION_ADDSHARED(sb);
				1376	xfs_fs_cmn_err(CE_NOTE, mp,
				1377	"Unmounting, marking shared read-only");
				1378	}
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	1379
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1380	XFS_BUF_UNDONE(sbp);
				1381	XFS_BUF_UNREAD(sbp);
				1382	XFS_BUF_UNDELAYWRITE(sbp);
				1383	XFS_BUF_WRITE(sbp);
				1384	XFS_BUF_UNASYNC(sbp);
				1385	ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
				1386	xfsbdstrat(mp, sbp);
				1387	/* Nevermind errors we might get here. */
				1388	error = xfs_iowait(sbp);
				1389	if (error)
				1390	xfs_ioerror_alert("xfs_unmountfs_writesb",
				1391	mp, sbp, XFS_BUF_ADDR(sbp));
				1392	if (error && mp->m_mk_sharedro)
				1393	xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
David Chinner	92821e2	2007-05-24 15:26:31 +1000	[diff] [blame]	1394	xfs_buf_relse(sbp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1395	}
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1396	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1397	}
				1398
				1399	/*
				1400	* xfs_mod_sb() can be used to copy arbitrary changes to the
				1401	* in-core superblock into the superblock buffer to be logged.
				1402	* It does not provide the higher level of locking that is
				1403	* needed to protect the in-core superblock from concurrent
				1404	* access.
				1405	*/
				1406	void
				1407	xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
				1408	{
				1409	xfs_buf_t *bp;
				1410	int first;
				1411	int last;
				1412	xfs_mount_t *mp;
				1413	xfs_sb_t *sbp;
				1414	xfs_sb_field_t f;
				1415
				1416	ASSERT(fields);
				1417	if (!fields)
				1418	return;
				1419	mp = tp->t_mountp;
				1420	bp = xfs_trans_getsb(tp, mp, 0);
				1421	sbp = XFS_BUF_TO_SBP(bp);
				1422	first = sizeof(xfs_sb_t);
				1423	last = 0;
				1424
				1425	/* translate/copy */
				1426
				1427	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
				1428
				1429	/* find modified range */
				1430
				1431	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				1432	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1433	first = xfs_sb_info[f].offset;
				1434
				1435	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
				1436	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1437	last = xfs_sb_info[f + 1].offset - 1;
				1438
				1439	xfs_trans_log_buf(tp, bp, first, last);
				1440	}
Yingping Lu	d210a28	2006-06-09 14:55:18 +1000	[diff] [blame]	1441
Yingping Lu	d210a28	2006-06-09 14:55:18 +1000	[diff] [blame]	1442
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1443	/*
				1444	* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
				1445	* a delta to a specified field in the in-core superblock. Simply
				1446	* switch on the field indicated and apply the delta to that field.
				1447	* Fields are not allowed to dip below zero, so if the delta would
				1448	* do this do not apply it and return EINVAL.
				1449	*
				1450	* The SB_LOCK must be held when this routine is called.
				1451	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1452	int
David Chinner	20f4ebf	2007-02-10 18:36:10 +1100	[diff] [blame]	1453	xfs_mod_incore_sb_unlocked(
				1454	xfs_mount_t *mp,
				1455	xfs_sb_field_t field,
				1456	int64_t delta,
				1457	int rsvd)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1458	{
				1459	int scounter; /* short counter for 32 bit fields */
				1460	long long lcounter; /* long counter for 64 bit fields */
				1461	long long res_used, rem;
				1462
				1463	/*
				1464	* With the in-core superblock spin lock held, switch
				1465	* on the indicated field. Apply the delta to the
				1466	* proper field. If the fields value would dip below
				1467	* 0, then do not apply the delta and return EINVAL.
				1468	*/
				1469	switch (field) {
				1470	case XFS_SBS_ICOUNT:
				1471	lcounter = (long long)mp->m_sb.sb_icount;
				1472	lcounter += delta;
				1473	if (lcounter < 0) {
				1474	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1475	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1476	}
				1477	mp->m_sb.sb_icount = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1478	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1479	case XFS_SBS_IFREE:
				1480	lcounter = (long long)mp->m_sb.sb_ifree;
				1481	lcounter += delta;
				1482	if (lcounter < 0) {
				1483	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1484	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1485	}
				1486	mp->m_sb.sb_ifree = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1487	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1488	case XFS_SBS_FDBLOCKS:
David Chinner	4be536d	2006-09-07 14:26:50 +1000	[diff] [blame]	1489	lcounter = (long long)
				1490	mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1491	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
				1492
				1493	if (delta > 0) { /* Putting blocks back */
				1494	if (res_used > delta) {
				1495	mp->m_resblks_avail += delta;
				1496	} else {
				1497	rem = delta - res_used;
				1498	mp->m_resblks_avail = mp->m_resblks;
				1499	lcounter += rem;
				1500	}
				1501	} else { /* Taking blocks away */
				1502
				1503	lcounter += delta;
				1504
				1505	/*
				1506	* If were out of blocks, use any available reserved blocks if
				1507	* were allowed to.
				1508	*/
				1509
				1510	if (lcounter < 0) {
				1511	if (rsvd) {
				1512	lcounter = (long long)mp->m_resblks_avail + delta;
				1513	if (lcounter < 0) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1514	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1515	}
				1516	mp->m_resblks_avail = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1517	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1518	} else { /* not reserved */
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1519	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1520	}
				1521	}
				1522	}
				1523
David Chinner	4be536d	2006-09-07 14:26:50 +1000	[diff] [blame]	1524	mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1525	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1526	case XFS_SBS_FREXTENTS:
				1527	lcounter = (long long)mp->m_sb.sb_frextents;
				1528	lcounter += delta;
				1529	if (lcounter < 0) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1530	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1531	}
				1532	mp->m_sb.sb_frextents = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1533	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1534	case XFS_SBS_DBLOCKS:
				1535	lcounter = (long long)mp->m_sb.sb_dblocks;
				1536	lcounter += delta;
				1537	if (lcounter < 0) {
				1538	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1539	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1540	}
				1541	mp->m_sb.sb_dblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1542	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1543	case XFS_SBS_AGCOUNT:
				1544	scounter = mp->m_sb.sb_agcount;
				1545	scounter += delta;
				1546	if (scounter < 0) {
				1547	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1548	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1549	}
				1550	mp->m_sb.sb_agcount = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1551	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1552	case XFS_SBS_IMAX_PCT:
				1553	scounter = mp->m_sb.sb_imax_pct;
				1554	scounter += delta;
				1555	if (scounter < 0) {
				1556	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1557	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1558	}
				1559	mp->m_sb.sb_imax_pct = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1560	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1561	case XFS_SBS_REXTSIZE:
				1562	scounter = mp->m_sb.sb_rextsize;
				1563	scounter += delta;
				1564	if (scounter < 0) {
				1565	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1566	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1567	}
				1568	mp->m_sb.sb_rextsize = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1569	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1570	case XFS_SBS_RBMBLOCKS:
				1571	scounter = mp->m_sb.sb_rbmblocks;
				1572	scounter += delta;
				1573	if (scounter < 0) {
				1574	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1575	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1576	}
				1577	mp->m_sb.sb_rbmblocks = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1578	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1579	case XFS_SBS_RBLOCKS:
				1580	lcounter = (long long)mp->m_sb.sb_rblocks;
				1581	lcounter += delta;
				1582	if (lcounter < 0) {
				1583	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1584	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1585	}
				1586	mp->m_sb.sb_rblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1587	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1588	case XFS_SBS_REXTENTS:
				1589	lcounter = (long long)mp->m_sb.sb_rextents;
				1590	lcounter += delta;
				1591	if (lcounter < 0) {
				1592	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1593	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1594	}
				1595	mp->m_sb.sb_rextents = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1596	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1597	case XFS_SBS_REXTSLOG:
				1598	scounter = mp->m_sb.sb_rextslog;
				1599	scounter += delta;
				1600	if (scounter < 0) {
				1601	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1602	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1603	}
				1604	mp->m_sb.sb_rextslog = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1605	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1606	default:
				1607	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1608	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1609	}
				1610	}
				1611
				1612	/*
				1613	* xfs_mod_incore_sb() is used to change a field in the in-core
				1614	* superblock structure by the specified delta. This modification
				1615	* is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked()
				1616	* routine to do the work.
				1617	*/
				1618	int
David Chinner	20f4ebf	2007-02-10 18:36:10 +1100	[diff] [blame]	1619	xfs_mod_incore_sb(
				1620	xfs_mount_t *mp,
				1621	xfs_sb_field_t field,
				1622	int64_t delta,
				1623	int rsvd)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1624	{
				1625	unsigned long s;
				1626	int status;
				1627
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1628	/* check for per-cpu counters */
				1629	switch (field) {
				1630	#ifdef HAVE_PERCPU_SB
				1631	case XFS_SBS_ICOUNT:
				1632	case XFS_SBS_IFREE:
				1633	case XFS_SBS_FDBLOCKS:
				1634	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1635	status = xfs_icsb_modify_counters(mp, field,
				1636	delta, rsvd);
				1637	break;
				1638	}
				1639	/* FALLTHROUGH */
				1640	#endif
				1641	default:
				1642	s = XFS_SB_LOCK(mp);
				1643	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				1644	XFS_SB_UNLOCK(mp, s);
				1645	break;
				1646	}
				1647
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1648	return status;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1649	}
				1650
				1651	/*
				1652	* xfs_mod_incore_sb_batch() is used to change more than one field
				1653	* in the in-core superblock structure at a time. This modification
				1654	* is protected by a lock internal to this module. The fields and
				1655	* changes to those fields are specified in the array of xfs_mod_sb
				1656	* structures passed in.
				1657	*
				1658	* Either all of the specified deltas will be applied or none of
				1659	* them will. If any modified field dips below 0, then all modifications
				1660	* will be backed out and EINVAL will be returned.
				1661	*/
				1662	int
				1663	xfs_mod_incore_sb_batch(xfs_mount_t mp, xfs_mod_sb_t msb, uint nmsb, int rsvd)
				1664	{
				1665	unsigned long s;
				1666	int status=0;
				1667	xfs_mod_sb_t *msbp;
				1668
				1669	/*
				1670	* Loop through the array of mod structures and apply each
				1671	* individually. If any fail, then back out all those
				1672	* which have already been applied. Do all of this within
				1673	* the scope of the SB_LOCK so that all of the changes will
				1674	* be atomic.
				1675	*/
				1676	s = XFS_SB_LOCK(mp);
				1677	msbp = &msb[0];
				1678	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
				1679	/*
				1680	* Apply the delta at index n. If it fails, break
				1681	* from the loop so we'll fall into the undo loop
				1682	* below.
				1683	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1684	switch (msbp->msb_field) {
				1685	#ifdef HAVE_PERCPU_SB
				1686	case XFS_SBS_ICOUNT:
				1687	case XFS_SBS_IFREE:
				1688	case XFS_SBS_FDBLOCKS:
				1689	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	1690	XFS_SB_UNLOCK(mp, s);
				1691	status = xfs_icsb_modify_counters(mp,
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1692	msbp->msb_field,
				1693	msbp->msb_delta, rsvd);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	1694	s = XFS_SB_LOCK(mp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1695	break;
				1696	}
				1697	/* FALLTHROUGH */
				1698	#endif
				1699	default:
				1700	status = xfs_mod_incore_sb_unlocked(mp,
				1701	msbp->msb_field,
				1702	msbp->msb_delta, rsvd);
				1703	break;
				1704	}
				1705
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1706	if (status != 0) {
				1707	break;
				1708	}
				1709	}
				1710
				1711	/*
				1712	* If we didn't complete the loop above, then back out
				1713	* any changes made to the superblock. If you add code
				1714	* between the loop above and here, make sure that you
				1715	* preserve the value of status. Loop back until
				1716	* we step below the beginning of the array. Make sure
				1717	* we don't touch anything back there.
				1718	*/
				1719	if (status != 0) {
				1720	msbp--;
				1721	while (msbp >= msb) {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1722	switch (msbp->msb_field) {
				1723	#ifdef HAVE_PERCPU_SB
				1724	case XFS_SBS_ICOUNT:
				1725	case XFS_SBS_IFREE:
				1726	case XFS_SBS_FDBLOCKS:
				1727	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	1728	XFS_SB_UNLOCK(mp, s);
				1729	status = xfs_icsb_modify_counters(mp,
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1730	msbp->msb_field,
				1731	-(msbp->msb_delta),
				1732	rsvd);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	1733	s = XFS_SB_LOCK(mp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1734	break;
				1735	}
				1736	/* FALLTHROUGH */
				1737	#endif
				1738	default:
				1739	status = xfs_mod_incore_sb_unlocked(mp,
				1740	msbp->msb_field,
				1741	-(msbp->msb_delta),
				1742	rsvd);
				1743	break;
				1744	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1745	ASSERT(status == 0);
				1746	msbp--;
				1747	}
				1748	}
				1749	XFS_SB_UNLOCK(mp, s);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1750	return status;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1751	}
				1752
				1753	/*
				1754	* xfs_getsb() is called to obtain the buffer for the superblock.
				1755	* The buffer is returned locked and read in from disk.
				1756	* The buffer should be released with a call to xfs_brelse().
				1757	*
				1758	* If the flags parameter is BUF_TRYLOCK, then we'll only return
				1759	* the superblock buffer if it can be locked without sleeping.
				1760	* If it can't then we'll return NULL.
				1761	*/
				1762	xfs_buf_t *
				1763	xfs_getsb(
				1764	xfs_mount_t *mp,
				1765	int flags)
				1766	{
				1767	xfs_buf_t *bp;
				1768
				1769	ASSERT(mp->m_sb_bp != NULL);
				1770	bp = mp->m_sb_bp;
				1771	if (flags & XFS_BUF_TRYLOCK) {
				1772	if (!XFS_BUF_CPSEMA(bp)) {
				1773	return NULL;
				1774	}
				1775	} else {
				1776	XFS_BUF_PSEMA(bp, PRIBIO);
				1777	}
				1778	XFS_BUF_HOLD(bp);
				1779	ASSERT(XFS_BUF_ISDONE(bp));
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1780	return bp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1781	}
				1782
				1783	/*
				1784	* Used to free the superblock along various error paths.
				1785	*/
				1786	void
				1787	xfs_freesb(
				1788	xfs_mount_t *mp)
				1789	{
				1790	xfs_buf_t *bp;
				1791
				1792	/*
				1793	* Use xfs_getsb() so that the buffer will be locked
				1794	* when we call xfs_buf_relse().
				1795	*/
				1796	bp = xfs_getsb(mp, 0);
				1797	XFS_BUF_UNMANAGE(bp);
				1798	xfs_buf_relse(bp);
				1799	mp->m_sb_bp = NULL;
				1800	}
				1801
				1802	/*
				1803	* See if the UUID is unique among mounted XFS filesystems.
				1804	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
				1805	*/
				1806	STATIC int
				1807	xfs_uuid_mount(
				1808	xfs_mount_t *mp)
				1809	{
				1810	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
				1811	cmn_err(CE_WARN,
				1812	"XFS: Filesystem %s has nil UUID - can't mount",
				1813	mp->m_fsname);
				1814	return -1;
				1815	}
				1816	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
				1817	cmn_err(CE_WARN,
				1818	"XFS: Filesystem %s has duplicate UUID - can't mount",
				1819	mp->m_fsname);
				1820	return -1;
				1821	}
				1822	return 0;
				1823	}
				1824
				1825	/*
				1826	* Remove filesystem from the UUID table.
				1827	*/
				1828	STATIC void
				1829	xfs_uuid_unmount(
				1830	xfs_mount_t *mp)
				1831	{
				1832	uuid_table_remove(&mp->m_sb.sb_uuid);
				1833	}
				1834
				1835	/*
				1836	* Used to log changes to the superblock unit and width fields which could
				1837	* be altered by the mount options. Only the first superblock is updated.
				1838	*/
				1839	STATIC void
				1840	xfs_mount_log_sbunit(
				1841	xfs_mount_t *mp,
				1842	__int64_t fields)
				1843	{
				1844	xfs_trans_t *tp;
				1845
				1846	ASSERT(fields & (XFS_SB_UNIT\|XFS_SB_WIDTH\|XFS_SB_UUID));
				1847
				1848	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
				1849	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				1850	XFS_DEFAULT_LOG_COUNT)) {
				1851	xfs_trans_cancel(tp, 0);
				1852	return;
				1853	}
				1854	xfs_mod_sb(tp, fields);
Eric Sandeen	1c72bf9	2007-05-08 13:48:42 +1000	[diff] [blame]	1855	xfs_trans_commit(tp, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1856	}
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1857
				1858
				1859	#ifdef HAVE_PERCPU_SB
				1860	/*
				1861	* Per-cpu incore superblock counters
				1862	*
				1863	* Simple concept, difficult implementation
				1864	*
				1865	* Basically, replace the incore superblock counters with a distributed per cpu
				1866	* counter for contended fields (e.g. free block count).
				1867	*
				1868	* Difficulties arise in that the incore sb is used for ENOSPC checking, and
				1869	* hence needs to be accurately read when we are running low on space. Hence
				1870	* there is a method to enable and disable the per-cpu counters based on how
				1871	* much "stuff" is available in them.
				1872	*
				1873	* Basically, a counter is enabled if there is enough free resource to justify
				1874	* running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
				1875	* ENOSPC), then we disable the counters to synchronise all callers and
				1876	* re-distribute the available resources.
				1877	*
				1878	* If, once we redistributed the available resources, we still get a failure,
				1879	* we disable the per-cpu counter and go through the slow path.
				1880	*
				1881	* The slow path is the current xfs_mod_incore_sb() function. This means that
				1882	* when we disable a per-cpu counter, we need to drain it's resources back to
				1883	* the global superblock. We do this after disabling the counter to prevent
				1884	* more threads from queueing up on the counter.
				1885	*
				1886	* Essentially, this means that we still need a lock in the fast path to enable
				1887	* synchronisation between the global counters and the per-cpu counters. This
				1888	* is not a problem because the lock will be local to a CPU almost all the time
				1889	* and have little contention except when we get to ENOSPC conditions.
				1890	*
				1891	* Basically, this lock becomes a barrier that enables us to lock out the fast
				1892	* path while we do things like enabling and disabling counters and
				1893	* synchronising the counters.
				1894	*
				1895	* Locking rules:
				1896	*
				1897	* 1. XFS_SB_LOCK() before picking up per-cpu locks
				1898	* 2. per-cpu locks always picked up via for_each_online_cpu() order
				1899	* 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
				1900	* 4. modifying per-cpu counters requires holding per-cpu lock
				1901	* 5. modifying global counters requires holding XFS_SB_LOCK
				1902	* 6. enabling or disabling a counter requires holding the XFS_SB_LOCK
				1903	* and _none_ of the per-cpu locks.
				1904	*
				1905	* Disabled counters are only ever re-enabled by a balance operation
				1906	* that results in more free resources per CPU than a given threshold.
				1907	* To ensure counters don't remain disabled, they are rebalanced when
				1908	* the global resource goes above a higher threshold (i.e. some hysteresis
				1909	* is present to prevent thrashing).
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1910	*/
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1911
Chandra Seetharaman	5a67e4c	2006-06-27 02:54:11 -0700	[diff] [blame]	1912	#ifdef CONFIG_HOTPLUG_CPU
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1913	/*
				1914	* hot-plug CPU notifier support.
				1915	*
Chandra Seetharaman	5a67e4c	2006-06-27 02:54:11 -0700	[diff] [blame]	1916	* We need a notifier per filesystem as we need to be able to identify
				1917	* the filesystem to balance the counters out. This is achieved by
				1918	* having a notifier block embedded in the xfs_mount_t and doing pointer
				1919	* magic to get the mount pointer from the notifier block address.
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1920	*/
				1921	STATIC int
				1922	xfs_icsb_cpu_notify(
				1923	struct notifier_block *nfb,
				1924	unsigned long action,
				1925	void *hcpu)
				1926	{
				1927	xfs_icsb_cnts_t *cntp;
				1928	xfs_mount_t *mp;
				1929	int s;
				1930
				1931	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
				1932	cntp = (xfs_icsb_cnts_t *)
				1933	per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
				1934	switch (action) {
				1935	case CPU_UP_PREPARE:
Rafael J. Wysocki	8bb7844	2007-05-09 02:35:10 -0700	[diff] [blame]	1936	case CPU_UP_PREPARE_FROZEN:
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1937	/* Easy Case - initialize the area and locks, and
				1938	* then rebalance when online does everything else for us. */
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1939	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1940	break;
				1941	case CPU_ONLINE:
Rafael J. Wysocki	8bb7844	2007-05-09 02:35:10 -0700	[diff] [blame]	1942	case CPU_ONLINE_FROZEN:
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	1943	xfs_icsb_lock(mp);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	1944	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
				1945	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
				1946	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	1947	xfs_icsb_unlock(mp);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1948	break;
				1949	case CPU_DEAD:
Rafael J. Wysocki	8bb7844	2007-05-09 02:35:10 -0700	[diff] [blame]	1950	case CPU_DEAD_FROZEN:
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1951	/* Disable all the counters, then fold the dead cpu's
				1952	* count into the total on the global superblock and
				1953	* re-enable the counters. */
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	1954	xfs_icsb_lock(mp);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1955	s = XFS_SB_LOCK(mp);
				1956	xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
				1957	xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
				1958	xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
				1959
				1960	mp->m_sb.sb_icount += cntp->icsb_icount;
				1961	mp->m_sb.sb_ifree += cntp->icsb_ifree;
				1962	mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
				1963
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1964	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1965
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	1966	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
				1967	XFS_ICSB_SB_LOCKED, 0);
				1968	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
				1969	XFS_ICSB_SB_LOCKED, 0);
				1970	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
				1971	XFS_ICSB_SB_LOCKED, 0);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1972	XFS_SB_UNLOCK(mp, s);
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	1973	xfs_icsb_unlock(mp);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1974	break;
				1975	}
				1976
				1977	return NOTIFY_OK;
				1978	}
Chandra Seetharaman	5a67e4c	2006-06-27 02:54:11 -0700	[diff] [blame]	1979	#endif /* CONFIG_HOTPLUG_CPU */
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1980
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1981	int
				1982	xfs_icsb_init_counters(
				1983	xfs_mount_t *mp)
				1984	{
				1985	xfs_icsb_cnts_t *cntp;
				1986	int i;
				1987
				1988	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
				1989	if (mp->m_sb_cnts == NULL)
				1990	return -ENOMEM;
				1991
Chandra Seetharaman	5a67e4c	2006-06-27 02:54:11 -0700	[diff] [blame]	1992	#ifdef CONFIG_HOTPLUG_CPU
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1993	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
				1994	mp->m_icsb_notifier.priority = 0;
Chandra Seetharaman	5a67e4c	2006-06-27 02:54:11 -0700	[diff] [blame]	1995	register_hotcpu_notifier(&mp->m_icsb_notifier);
				1996	#endif /* CONFIG_HOTPLUG_CPU */
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1997
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1998	for_each_online_cpu(i) {
				1999	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2000	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2001	}
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2002
				2003	mutex_init(&mp->m_icsb_mutex);
				2004
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2005	/*
				2006	* start with all counters disabled so that the
				2007	* initial balance kicks us off correctly
				2008	*/
				2009	mp->m_icsb_counters = -1;
				2010	return 0;
				2011	}
				2012
Lachlan McIlroy	5478eea	2007-02-10 18:36:29 +1100	[diff] [blame]	2013	void
				2014	xfs_icsb_reinit_counters(
				2015	xfs_mount_t *mp)
				2016	{
				2017	xfs_icsb_lock(mp);
				2018	/*
				2019	* start with all counters disabled so that the
				2020	* initial balance kicks us off correctly
				2021	*/
				2022	mp->m_icsb_counters = -1;
				2023	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
				2024	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
				2025	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
				2026	xfs_icsb_unlock(mp);
				2027	}
				2028
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2029	STATIC void
				2030	xfs_icsb_destroy_counters(
				2031	xfs_mount_t *mp)
				2032	{
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	2033	if (mp->m_sb_cnts) {
Chandra Seetharaman	5a67e4c	2006-06-27 02:54:11 -0700	[diff] [blame]	2034	unregister_hotcpu_notifier(&mp->m_icsb_notifier);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2035	free_percpu(mp->m_sb_cnts);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	2036	}
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	2037	mutex_destroy(&mp->m_icsb_mutex);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2038	}
				2039
David Chinner	7989cb8	2007-02-10 18:34:56 +1100	[diff] [blame]	2040	STATIC_INLINE void
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2041	xfs_icsb_lock_cntr(
				2042	xfs_icsb_cnts_t *icsbp)
				2043	{
				2044	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
				2045	ndelay(1000);
				2046	}
				2047	}
				2048
David Chinner	7989cb8	2007-02-10 18:34:56 +1100	[diff] [blame]	2049	STATIC_INLINE void
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2050	xfs_icsb_unlock_cntr(
				2051	xfs_icsb_cnts_t *icsbp)
				2052	{
				2053	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
				2054	}
				2055
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2056
David Chinner	7989cb8	2007-02-10 18:34:56 +1100	[diff] [blame]	2057	STATIC_INLINE void
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2058	xfs_icsb_lock_all_counters(
				2059	xfs_mount_t *mp)
				2060	{
				2061	xfs_icsb_cnts_t *cntp;
				2062	int i;
				2063
				2064	for_each_online_cpu(i) {
				2065	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2066	xfs_icsb_lock_cntr(cntp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2067	}
				2068	}
				2069
David Chinner	7989cb8	2007-02-10 18:34:56 +1100	[diff] [blame]	2070	STATIC_INLINE void
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2071	xfs_icsb_unlock_all_counters(
				2072	xfs_mount_t *mp)
				2073	{
				2074	xfs_icsb_cnts_t *cntp;
				2075	int i;
				2076
				2077	for_each_online_cpu(i) {
				2078	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2079	xfs_icsb_unlock_cntr(cntp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2080	}
				2081	}
				2082
				2083	STATIC void
				2084	xfs_icsb_count(
				2085	xfs_mount_t *mp,
				2086	xfs_icsb_cnts_t *cnt,
				2087	int flags)
				2088	{
				2089	xfs_icsb_cnts_t *cntp;
				2090	int i;
				2091
				2092	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
				2093
				2094	if (!(flags & XFS_ICSB_LAZY_COUNT))
				2095	xfs_icsb_lock_all_counters(mp);
				2096
				2097	for_each_online_cpu(i) {
				2098	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
				2099	cnt->icsb_icount += cntp->icsb_icount;
				2100	cnt->icsb_ifree += cntp->icsb_ifree;
				2101	cnt->icsb_fdblocks += cntp->icsb_fdblocks;
				2102	}
				2103
				2104	if (!(flags & XFS_ICSB_LAZY_COUNT))
				2105	xfs_icsb_unlock_all_counters(mp);
				2106	}
				2107
				2108	STATIC int
				2109	xfs_icsb_counter_disabled(
				2110	xfs_mount_t *mp,
				2111	xfs_sb_field_t field)
				2112	{
				2113	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				2114	return test_bit(field, &mp->m_icsb_counters);
				2115	}
				2116
				2117	STATIC int
				2118	xfs_icsb_disable_counter(
				2119	xfs_mount_t *mp,
				2120	xfs_sb_field_t field)
				2121	{
				2122	xfs_icsb_cnts_t cnt;
				2123
				2124	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				2125
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2126	/*
				2127	* If we are already disabled, then there is nothing to do
				2128	* here. We check before locking all the counters to avoid
				2129	* the expensive lock operation when being called in the
				2130	* slow path and the counter is already disabled. This is
				2131	* safe because the only time we set or clear this state is under
				2132	* the m_icsb_mutex.
				2133	*/
				2134	if (xfs_icsb_counter_disabled(mp, field))
				2135	return 0;
				2136
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2137	xfs_icsb_lock_all_counters(mp);
				2138	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
				2139	/* drain back to superblock */
				2140
				2141	xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED\|XFS_ICSB_LAZY_COUNT);
				2142	switch(field) {
				2143	case XFS_SBS_ICOUNT:
				2144	mp->m_sb.sb_icount = cnt.icsb_icount;
				2145	break;
				2146	case XFS_SBS_IFREE:
				2147	mp->m_sb.sb_ifree = cnt.icsb_ifree;
				2148	break;
				2149	case XFS_SBS_FDBLOCKS:
				2150	mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
				2151	break;
				2152	default:
				2153	BUG();
				2154	}
				2155	}
				2156
				2157	xfs_icsb_unlock_all_counters(mp);
				2158
				2159	return 0;
				2160	}
				2161
				2162	STATIC void
				2163	xfs_icsb_enable_counter(
				2164	xfs_mount_t *mp,
				2165	xfs_sb_field_t field,
				2166	uint64_t count,
				2167	uint64_t resid)
				2168	{
				2169	xfs_icsb_cnts_t *cntp;
				2170	int i;
				2171
				2172	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				2173
				2174	xfs_icsb_lock_all_counters(mp);
				2175	for_each_online_cpu(i) {
				2176	cntp = per_cpu_ptr(mp->m_sb_cnts, i);
				2177	switch (field) {
				2178	case XFS_SBS_ICOUNT:
				2179	cntp->icsb_icount = count + resid;
				2180	break;
				2181	case XFS_SBS_IFREE:
				2182	cntp->icsb_ifree = count + resid;
				2183	break;
				2184	case XFS_SBS_FDBLOCKS:
				2185	cntp->icsb_fdblocks = count + resid;
				2186	break;
				2187	default:
				2188	BUG();
				2189	break;
				2190	}
				2191	resid = 0;
				2192	}
				2193	clear_bit(field, &mp->m_icsb_counters);
				2194	xfs_icsb_unlock_all_counters(mp);
				2195	}
				2196
David Chinner	dbcabad	2007-02-10 18:36:17 +1100	[diff] [blame]	2197	void
				2198	xfs_icsb_sync_counters_flags(
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2199	xfs_mount_t *mp,
				2200	int flags)
				2201	{
				2202	xfs_icsb_cnts_t cnt;
				2203	int s;
				2204
				2205	/* Pass 1: lock all counters */
				2206	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
				2207	s = XFS_SB_LOCK(mp);
				2208
				2209	xfs_icsb_count(mp, &cnt, flags);
				2210
				2211	/* Step 3: update mp->m_sb fields */
				2212	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
				2213	mp->m_sb.sb_icount = cnt.icsb_icount;
				2214	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
				2215	mp->m_sb.sb_ifree = cnt.icsb_ifree;
				2216	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
				2217	mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
				2218
				2219	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
				2220	XFS_SB_UNLOCK(mp, s);
				2221	}
				2222
				2223	/*
				2224	* Accurate update of per-cpu counters to incore superblock
				2225	*/
				2226	STATIC void
				2227	xfs_icsb_sync_counters(
				2228	xfs_mount_t *mp)
				2229	{
David Chinner	dbcabad	2007-02-10 18:36:17 +1100	[diff] [blame]	2230	xfs_icsb_sync_counters_flags(mp, 0);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2231	}
				2232
				2233	/*
				2234	* Balance and enable/disable counters as necessary.
				2235	*
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2236	* Thresholds for re-enabling counters are somewhat magic. inode counts are
				2237	* chosen to be the same number as single on disk allocation chunk per CPU, and
				2238	* free blocks is something far enough zero that we aren't going thrash when we
				2239	* get near ENOSPC. We also need to supply a minimum we require per cpu to
				2240	* prevent looping endlessly when xfs_alloc_space asks for more than will
				2241	* be distributed to a single CPU but each CPU has enough blocks to be
				2242	* reenabled.
				2243	*
				2244	* Note that we can be called when counters are already disabled.
				2245	* xfs_icsb_disable_counter() optimises the counter locking in this case to
				2246	* prevent locking every per-cpu counter needlessly.
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2247	*/
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2248
				2249	#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
David Chinner	4be536d	2006-09-07 14:26:50 +1000	[diff] [blame]	2250	#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2251	(uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2252	STATIC void
				2253	xfs_icsb_balance_counter(
				2254	xfs_mount_t *mp,
				2255	xfs_sb_field_t field,
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2256	int flags,
				2257	int min_per_cpu)
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2258	{
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	2259	uint64_t count, resid;
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2260	int weight = num_online_cpus();
				2261	int s;
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2262	uint64_t min = (uint64_t)min_per_cpu;
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2263
				2264	if (!(flags & XFS_ICSB_SB_LOCKED))
				2265	s = XFS_SB_LOCK(mp);
				2266
				2267	/* disable counter and sync counter */
				2268	xfs_icsb_disable_counter(mp, field);
				2269
				2270	/* update counters - first CPU gets residual*/
				2271	switch (field) {
				2272	case XFS_SBS_ICOUNT:
				2273	count = mp->m_sb.sb_icount;
				2274	resid = do_div(count, weight);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2275	if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2276	goto out;
				2277	break;
				2278	case XFS_SBS_IFREE:
				2279	count = mp->m_sb.sb_ifree;
				2280	resid = do_div(count, weight);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2281	if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2282	goto out;
				2283	break;
				2284	case XFS_SBS_FDBLOCKS:
				2285	count = mp->m_sb.sb_fdblocks;
				2286	resid = do_div(count, weight);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2287	if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2288	goto out;
				2289	break;
				2290	default:
				2291	BUG();
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	2292	count = resid = 0; /* quiet, gcc */
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2293	break;
				2294	}
				2295
				2296	xfs_icsb_enable_counter(mp, field, count, resid);
				2297	out:
				2298	if (!(flags & XFS_ICSB_SB_LOCKED))
				2299	XFS_SB_UNLOCK(mp, s);
				2300	}
				2301
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2302	int
				2303	xfs_icsb_modify_counters(
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2304	xfs_mount_t *mp,
				2305	xfs_sb_field_t field,
David Chinner	20f4ebf	2007-02-10 18:36:10 +1100	[diff] [blame]	2306	int64_t delta,
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2307	int rsvd)
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2308	{
				2309	xfs_icsb_cnts_t *icsbp;
				2310	long long lcounter; /* long counter for 64 bit fields */
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2311	int cpu, ret = 0, s;
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2312
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2313	might_sleep();
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2314	again:
				2315	cpu = get_cpu();
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2316	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu);
				2317
				2318	/*
				2319	* if the counter is disabled, go to slow path
				2320	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2321	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
				2322	goto slow_path;
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2323	xfs_icsb_lock_cntr(icsbp);
				2324	if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
				2325	xfs_icsb_unlock_cntr(icsbp);
				2326	goto slow_path;
				2327	}
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2328
				2329	switch (field) {
				2330	case XFS_SBS_ICOUNT:
				2331	lcounter = icsbp->icsb_icount;
				2332	lcounter += delta;
				2333	if (unlikely(lcounter < 0))
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2334	goto balance_counter;
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2335	icsbp->icsb_icount = lcounter;
				2336	break;
				2337
				2338	case XFS_SBS_IFREE:
				2339	lcounter = icsbp->icsb_ifree;
				2340	lcounter += delta;
				2341	if (unlikely(lcounter < 0))
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2342	goto balance_counter;
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2343	icsbp->icsb_ifree = lcounter;
				2344	break;
				2345
				2346	case XFS_SBS_FDBLOCKS:
				2347	BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
				2348
David Chinner	4be536d	2006-09-07 14:26:50 +1000	[diff] [blame]	2349	lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2350	lcounter += delta;
				2351	if (unlikely(lcounter < 0))
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2352	goto balance_counter;
David Chinner	4be536d	2006-09-07 14:26:50 +1000	[diff] [blame]	2353	icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2354	break;
				2355	default:
				2356	BUG();
				2357	break;
				2358	}
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2359	xfs_icsb_unlock_cntr(icsbp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2360	put_cpu();
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2361	return 0;
				2362
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2363	slow_path:
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2364	put_cpu();
				2365
				2366	/*
				2367	* serialise with a mutex so we don't burn lots of cpu on
				2368	* the superblock lock. We still need to hold the superblock
				2369	* lock, however, when we modify the global structures.
				2370	*/
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	2371	xfs_icsb_lock(mp);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2372
				2373	/*
				2374	* Now running atomically.
				2375	*
				2376	* If the counter is enabled, someone has beaten us to rebalancing.
				2377	* Drop the lock and try again in the fast path....
				2378	*/
				2379	if (!(xfs_icsb_counter_disabled(mp, field))) {
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	2380	xfs_icsb_unlock(mp);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2381	goto again;
				2382	}
				2383
				2384	/*
				2385	* The counter is currently disabled. Because we are
				2386	* running atomically here, we know a rebalance cannot
				2387	* be in progress. Hence we can go straight to operating
				2388	* on the global superblock. We do not call xfs_mod_incore_sb()
				2389	* here even though we need to get the SB_LOCK. Doing so
				2390	* will cause us to re-enter this function and deadlock.
				2391	* Hence we get the SB_LOCK ourselves and then call
				2392	* xfs_mod_incore_sb_unlocked() as the unlocked path operates
				2393	* directly on the global counters.
				2394	*/
				2395	s = XFS_SB_LOCK(mp);
				2396	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				2397	XFS_SB_UNLOCK(mp, s);
				2398
				2399	/*
				2400	* Now that we've modified the global superblock, we
				2401	* may be able to re-enable the distributed counters
				2402	* (e.g. lots of space just got freed). After that
				2403	* we are done.
				2404	*/
				2405	if (ret != ENOSPC)
				2406	xfs_icsb_balance_counter(mp, field, 0, 0);
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	2407	xfs_icsb_unlock(mp);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2408	return ret;
				2409
				2410	balance_counter:
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2411	xfs_icsb_unlock_cntr(icsbp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2412	put_cpu();
				2413
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2414	/*
				2415	* We may have multiple threads here if multiple per-cpu
				2416	* counters run dry at the same time. This will mean we can
				2417	* do more balances than strictly necessary but it is not
				2418	* the common slowpath case.
				2419	*/
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	2420	xfs_icsb_lock(mp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2421
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2422	/*
				2423	* running atomically.
				2424	*
				2425	* This will leave the counter in the correct state for future
				2426	* accesses. After the rebalance, we simply try again and our retry
				2427	* will either succeed through the fast path or slow path without
				2428	* another balance operation being required.
				2429	*/
				2430	xfs_icsb_balance_counter(mp, field, 0, delta);
David Chinner	03135cf	2007-02-10 18:35:15 +1100	[diff] [blame]	2431	xfs_icsb_unlock(mp);
David Chinner	20b6428	2007-02-10 18:35:09 +1100	[diff] [blame]	2432	goto again;
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2433	}
				2434
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2435	#endif