Blame - fs/xfs/xfs_mount.c - kernel/msm

blob: 4b7be49cc4de3a81066cf133c30e367f697489ff [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	19	#include "xfs_fs.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs_types.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	21	#include "xfs_bit.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dir.h"
				28	#include "xfs_dir2.h"
				29	#include "xfs_dmapi.h"
				30	#include "xfs_mount.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31	#include "xfs_bmap_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	32	#include "xfs_alloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	#include "xfs_ialloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	34	#include "xfs_dir_sf.h"
				35	#include "xfs_dir2_sf.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	36	#include "xfs_attr_sf.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	#include "xfs_dinode.h"
				38	#include "xfs_inode.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	39	#include "xfs_btree.h"
				40	#include "xfs_ialloc.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	#include "xfs_alloc.h"
				42	#include "xfs_rtalloc.h"
				43	#include "xfs_bmap.h"
				44	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	#include "xfs_rw.h"
				46	#include "xfs_quota.h"
				47	#include "xfs_fsops.h"
				48
				49	STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
				50	STATIC int xfs_uuid_mount(xfs_mount_t *);
				51	STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	52	STATIC void xfs_unmountfs_wait(xfs_mount_t *);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	54
				55	#ifdef HAVE_PERCPU_SB
				56	STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
				57	STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
				58	STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
				59	STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
				60	int, int);
				61	STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
				62	int, int);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	63	STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	64
				65	#else
				66
				67	#define xfs_icsb_destroy_counters(mp) do { } while (0)
				68	#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
				69	#define xfs_icsb_sync_counters(mp) do { } while (0)
				70	#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
				71	#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0)
				72
				73	#endif
				74
Christoph Hellwig	1df84c9	2006-01-11 15:29:52 +1100	[diff] [blame]	75	static const struct {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	76	short offset;
				77	short type; /* 0 = integer
				78	* 1 = binary / string (no translation)
				79	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80	} xfs_sb_info[] = {
				81	{ offsetof(xfs_sb_t, sb_magicnum), 0 },
				82	{ offsetof(xfs_sb_t, sb_blocksize), 0 },
				83	{ offsetof(xfs_sb_t, sb_dblocks), 0 },
				84	{ offsetof(xfs_sb_t, sb_rblocks), 0 },
				85	{ offsetof(xfs_sb_t, sb_rextents), 0 },
				86	{ offsetof(xfs_sb_t, sb_uuid), 1 },
				87	{ offsetof(xfs_sb_t, sb_logstart), 0 },
				88	{ offsetof(xfs_sb_t, sb_rootino), 0 },
				89	{ offsetof(xfs_sb_t, sb_rbmino), 0 },
				90	{ offsetof(xfs_sb_t, sb_rsumino), 0 },
				91	{ offsetof(xfs_sb_t, sb_rextsize), 0 },
				92	{ offsetof(xfs_sb_t, sb_agblocks), 0 },
				93	{ offsetof(xfs_sb_t, sb_agcount), 0 },
				94	{ offsetof(xfs_sb_t, sb_rbmblocks), 0 },
				95	{ offsetof(xfs_sb_t, sb_logblocks), 0 },
				96	{ offsetof(xfs_sb_t, sb_versionnum), 0 },
				97	{ offsetof(xfs_sb_t, sb_sectsize), 0 },
				98	{ offsetof(xfs_sb_t, sb_inodesize), 0 },
				99	{ offsetof(xfs_sb_t, sb_inopblock), 0 },
				100	{ offsetof(xfs_sb_t, sb_fname[0]), 1 },
				101	{ offsetof(xfs_sb_t, sb_blocklog), 0 },
				102	{ offsetof(xfs_sb_t, sb_sectlog), 0 },
				103	{ offsetof(xfs_sb_t, sb_inodelog), 0 },
				104	{ offsetof(xfs_sb_t, sb_inopblog), 0 },
				105	{ offsetof(xfs_sb_t, sb_agblklog), 0 },
				106	{ offsetof(xfs_sb_t, sb_rextslog), 0 },
				107	{ offsetof(xfs_sb_t, sb_inprogress), 0 },
				108	{ offsetof(xfs_sb_t, sb_imax_pct), 0 },
				109	{ offsetof(xfs_sb_t, sb_icount), 0 },
				110	{ offsetof(xfs_sb_t, sb_ifree), 0 },
				111	{ offsetof(xfs_sb_t, sb_fdblocks), 0 },
				112	{ offsetof(xfs_sb_t, sb_frextents), 0 },
				113	{ offsetof(xfs_sb_t, sb_uquotino), 0 },
				114	{ offsetof(xfs_sb_t, sb_gquotino), 0 },
				115	{ offsetof(xfs_sb_t, sb_qflags), 0 },
				116	{ offsetof(xfs_sb_t, sb_flags), 0 },
				117	{ offsetof(xfs_sb_t, sb_shared_vn), 0 },
				118	{ offsetof(xfs_sb_t, sb_inoalignmt), 0 },
				119	{ offsetof(xfs_sb_t, sb_unit), 0 },
				120	{ offsetof(xfs_sb_t, sb_width), 0 },
				121	{ offsetof(xfs_sb_t, sb_dirblklog), 0 },
				122	{ offsetof(xfs_sb_t, sb_logsectlog), 0 },
				123	{ offsetof(xfs_sb_t, sb_logsectsize),0 },
				124	{ offsetof(xfs_sb_t, sb_logsunit), 0 },
				125	{ offsetof(xfs_sb_t, sb_features2), 0 },
				126	{ sizeof(xfs_sb_t), 0 }
				127	};
				128
				129	/*
				130	* Return a pointer to an initialized xfs_mount structure.
				131	*/
				132	xfs_mount_t *
				133	xfs_mount_init(void)
				134	{
				135	xfs_mount_t *mp;
				136
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	137	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
				138
				139	if (xfs_icsb_init_counters(mp)) {
				140	mp->m_flags \|= XFS_MOUNT_NO_PERCPU_SB;
				141	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	142
				143	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
				144	spinlock_init(&mp->m_sb_lock, "xfs_sb");
Jes Sorensen	794ee1b	2006-01-09 15:59:21 -0800	[diff] [blame]	145	mutex_init(&mp->m_ilock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	146	initnsema(&mp->m_growlock, 1, "xfs_grow");
				147	/*
				148	* Initialize the AIL.
				149	*/
				150	xfs_trans_ail_init(mp);
				151
				152	atomic_set(&mp->m_active_trans, 0);
				153
				154	return mp;
				155	}
				156
				157	/*
				158	* Free up the resources associated with a mount structure. Assume that
				159	* the structure was initially zeroed, so we can tell which fields got
				160	* initialized.
				161	*/
				162	void
				163	xfs_mount_free(
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	164	xfs_mount_t *mp,
				165	int remove_bhv)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	166	{
				167	if (mp->m_ihash)
				168	xfs_ihash_free(mp);
				169	if (mp->m_chash)
				170	xfs_chash_free(mp);
				171
				172	if (mp->m_perag) {
				173	int agno;
				174
				175	for (agno = 0; agno < mp->m_maxagi; agno++)
				176	if (mp->m_perag[agno].pagb_list)
				177	kmem_free(mp->m_perag[agno].pagb_list,
				178	sizeof(xfs_perag_busy_t) *
				179	XFS_PAGB_NUM_SLOTS);
				180	kmem_free(mp->m_perag,
				181	sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
				182	}
				183
				184	AIL_LOCK_DESTROY(&mp->m_ail_lock);
				185	spinlock_destroy(&mp->m_sb_lock);
				186	mutex_destroy(&mp->m_ilock);
				187	freesema(&mp->m_growlock);
				188	if (mp->m_quotainfo)
				189	XFS_QM_DONE(mp);
				190
				191	if (mp->m_fsname != NULL)
				192	kmem_free(mp->m_fsname, mp->m_fsname_len);
Nathan Scott	fc1f8c1	2005-11-02 11:44:33 +1100	[diff] [blame]	193	if (mp->m_rtname != NULL)
				194	kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
				195	if (mp->m_logname != NULL)
				196	kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	197
				198	if (remove_bhv) {
				199	struct vfs *vfsp = XFS_MTOVFS(mp);
				200
				201	bhv_remove_all_vfsops(vfsp, 0);
				202	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
				203	}
				204
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	205	xfs_icsb_destroy_counters(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	206	kmem_free(mp, sizeof(xfs_mount_t));
				207	}
				208
				209
				210	/*
				211	* Check the validity of the SB found.
				212	*/
				213	STATIC int
				214	xfs_mount_validate_sb(
				215	xfs_mount_t *mp,
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	216	xfs_sb_t *sbp,
				217	int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	218	{
				219	/*
				220	* If the log device and data device have the
				221	* same device number, the log is internal.
				222	* Consequently, the sb_logstart should be non-zero. If
				223	* we have a zero sb_logstart in this case, we may be trying to mount
				224	* a volume filesystem in a non-volume manner.
				225	*/
				226	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	227	xfs_fs_mount_cmn_err(flags, "bad magic number");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	228	return XFS_ERROR(EWRONGFS);
				229	}
				230
				231	if (!XFS_SB_GOOD_VERSION(sbp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	232	xfs_fs_mount_cmn_err(flags, "bad version");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	233	return XFS_ERROR(EWRONGFS);
				234	}
				235
				236	if (unlikely(
				237	sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	238	xfs_fs_mount_cmn_err(flags,
				239	"filesystem is marked as having an external log; "
				240	"specify logdev on the\nmount command line.");
				241	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	242	}
				243
				244	if (unlikely(
				245	sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	246	xfs_fs_mount_cmn_err(flags,
				247	"filesystem is marked as having an internal log; "
				248	"do not specify logdev on\nthe mount command line.");
				249	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	250	}
				251
				252	/*
				253	* More sanity checking. These were stolen directly from
				254	* xfs_repair.
				255	*/
				256	if (unlikely(
				257	sbp->sb_agcount <= 0 \|\|
				258	sbp->sb_sectsize < XFS_MIN_SECTORSIZE \|\|
				259	sbp->sb_sectsize > XFS_MAX_SECTORSIZE \|\|
				260	sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG \|\|
				261	sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG \|\|
				262	sbp->sb_blocksize < XFS_MIN_BLOCKSIZE \|\|
				263	sbp->sb_blocksize > XFS_MAX_BLOCKSIZE \|\|
				264	sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG \|\|
				265	sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG \|\|
				266	sbp->sb_inodesize < XFS_DINODE_MIN_SIZE \|\|
				267	sbp->sb_inodesize > XFS_DINODE_MAX_SIZE \|\|
Nathan Scott	9f989c9	2006-03-14 13:29:32 +1100	[diff] [blame]	268	sbp->sb_inodelog < XFS_DINODE_MIN_LOG \|\|
				269	sbp->sb_inodelog > XFS_DINODE_MAX_LOG \|\|
				270	(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) \|\|
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	271	(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) \|\|
				272	(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) \|\|
Nathan Scott	e50bd16	2006-04-11 15:10:45 +1000	[diff] [blame]	273	(sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	274	xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	return XFS_ERROR(EFSCORRUPTED);
				276	}
				277
				278	/*
				279	* Sanity check AG count, size fields against data size field
				280	*/
				281	if (unlikely(
				282	sbp->sb_dblocks == 0 \|\|
				283	sbp->sb_dblocks >
				284	(xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks \|\|
				285	sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
				286	sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	287	xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	288	return XFS_ERROR(EFSCORRUPTED);
				289	}
				290
				291	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
				292	ASSERT(sbp->sb_blocklog >= BBSHIFT);
				293
				294	#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
				295	if (unlikely(
				296	(sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX \|\|
				297	(sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
				298	#else /* Limited by UINT_MAX of sectors */
				299	if (unlikely(
				300	(sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX \|\|
				301	(sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
				302	#endif
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	303	xfs_fs_mount_cmn_err(flags,
				304	"file system too large to be mounted on this system.");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	305	return XFS_ERROR(E2BIG);
				306	}
				307
				308	if (unlikely(sbp->sb_inprogress)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	309	xfs_fs_mount_cmn_err(flags, "file system busy");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	310	return XFS_ERROR(EFSCORRUPTED);
				311	}
				312
				313	/*
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	314	* Version 1 directory format has never worked on Linux.
				315	*/
				316	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	317	xfs_fs_mount_cmn_err(flags,
				318	"file system using version 1 directory format");
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	319	return XFS_ERROR(ENOSYS);
				320	}
				321
				322	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	323	* Until this is fixed only page-sized or smaller data blocks work.
				324	*/
				325	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	326	xfs_fs_mount_cmn_err(flags,
				327	"file system with blocksize %d bytes",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	328	sbp->sb_blocksize);
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	329	xfs_fs_mount_cmn_err(flags,
				330	"only pagesize (%ld) or less will currently work.",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	331	PAGE_SIZE);
				332	return XFS_ERROR(ENOSYS);
				333	}
				334
				335	return 0;
				336	}
				337
				338	xfs_agnumber_t
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	339	xfs_initialize_perag(
				340	struct vfs *vfs,
				341	xfs_mount_t *mp,
				342	xfs_agnumber_t agcount)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	343	{
				344	xfs_agnumber_t index, max_metadata;
				345	xfs_perag_t *pag;
				346	xfs_agino_t agino;
				347	xfs_ino_t ino;
				348	xfs_sb_t *sbp = &mp->m_sb;
				349	xfs_ino_t max_inum = XFS_MAXINUMBER_32;
				350
				351	/* Check to see if the filesystem can overflow 32 bit inodes */
				352	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
				353	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
				354
				355	/* Clear the mount flag if no inode can overflow 32 bits
				356	* on this filesystem, or if specifically requested..
				357	*/
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	358	if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	359	mp->m_flags \|= XFS_MOUNT_32BITINODES;
				360	} else {
				361	mp->m_flags &= ~XFS_MOUNT_32BITINODES;
				362	}
				363
				364	/* If we can overflow then setup the ag headers accordingly */
				365	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				366	/* Calculate how much should be reserved for inodes to
				367	* meet the max inode percentage.
				368	*/
				369	if (mp->m_maxicount) {
				370	__uint64_t icount;
				371
				372	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				373	do_div(icount, 100);
				374	icount += sbp->sb_agblocks - 1;
Eric Sandeen	a749ee8	2005-11-02 15:13:42 +1100	[diff] [blame]	375	do_div(icount, sbp->sb_agblocks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	376	max_metadata = icount;
				377	} else {
				378	max_metadata = agcount;
				379	}
				380	for (index = 0; index < agcount; index++) {
				381	ino = XFS_AGINO_TO_INO(mp, index, agino);
				382	if (ino > max_inum) {
				383	index++;
				384	break;
				385	}
				386
Nathan Scott	c41564b	2006-03-29 08:55:14 +1000	[diff] [blame]	387	/* This ag is preferred for inodes */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	388	pag = &mp->m_perag[index];
				389	pag->pagi_inodeok = 1;
				390	if (index < max_metadata)
				391	pag->pagf_metadata = 1;
				392	}
				393	} else {
				394	/* Setup default behavior for smaller filesystems */
				395	for (index = 0; index < agcount; index++) {
				396	pag = &mp->m_perag[index];
				397	pag->pagi_inodeok = 1;
				398	}
				399	}
				400	return index;
				401	}
				402
				403	/*
				404	* xfs_xlatesb
				405	*
				406	* data - on disk version of sb
				407	* sb - a superblock
				408	* dir - conversion direction: <0 - convert sb to buf
				409	* >0 - convert buf to sb
				410	* fields - which fields to copy (bitmask)
				411	*/
				412	void
				413	xfs_xlatesb(
				414	void *data,
				415	xfs_sb_t *sb,
				416	int dir,
				417	__int64_t fields)
				418	{
				419	xfs_caddr_t buf_ptr;
				420	xfs_caddr_t mem_ptr;
				421	xfs_sb_field_t f;
				422	int first;
				423	int size;
				424
				425	ASSERT(dir);
				426	ASSERT(fields);
				427
				428	if (!fields)
				429	return;
				430
				431	buf_ptr = (xfs_caddr_t)data;
				432	mem_ptr = (xfs_caddr_t)sb;
				433
				434	while (fields) {
				435	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				436	first = xfs_sb_info[f].offset;
				437	size = xfs_sb_info[f + 1].offset - first;
				438
				439	ASSERT(xfs_sb_info[f].type == 0 \|\| xfs_sb_info[f].type == 1);
				440
				441	if (size == 1 \|\| xfs_sb_info[f].type == 1) {
				442	if (dir > 0) {
				443	memcpy(mem_ptr + first, buf_ptr + first, size);
				444	} else {
				445	memcpy(buf_ptr + first, mem_ptr + first, size);
				446	}
				447	} else {
				448	switch (size) {
				449	case 2:
				450	INT_XLATE((__uint16_t)(buf_ptr+first),
				451	(__uint16_t)(mem_ptr+first),
				452	dir, ARCH_CONVERT);
				453	break;
				454	case 4:
				455	INT_XLATE((__uint32_t)(buf_ptr+first),
				456	(__uint32_t)(mem_ptr+first),
				457	dir, ARCH_CONVERT);
				458	break;
				459	case 8:
				460	INT_XLATE((__uint64_t)(buf_ptr+first),
				461	(__uint64_t)(mem_ptr+first), dir, ARCH_CONVERT);
				462	break;
				463	default:
				464	ASSERT(0);
				465	}
				466	}
				467
				468	fields &= ~(1LL << f);
				469	}
				470	}
				471
				472	/*
				473	* xfs_readsb
				474	*
				475	* Does the initial read of the superblock.
				476	*/
				477	int
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	478	xfs_readsb(xfs_mount_t *mp, int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	479	{
				480	unsigned int sector_size;
				481	unsigned int extra_flags;
				482	xfs_buf_t *bp;
				483	xfs_sb_t *sbp;
				484	int error;
				485
				486	ASSERT(mp->m_sb_bp == NULL);
				487	ASSERT(mp->m_ddev_targp != NULL);
				488
				489	/*
				490	* Allocate a (locked) buffer to hold the superblock.
				491	* This will be kept around at all times to optimize
				492	* access to the superblock.
				493	*/
				494	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
				495	extra_flags = XFS_BUF_LOCK \| XFS_BUF_MANAGE \| XFS_BUF_MAPPED;
				496
				497	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				498	BTOBB(sector_size), extra_flags);
				499	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	500	xfs_fs_mount_cmn_err(flags, "SB read failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	501	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				502	goto fail;
				503	}
				504	ASSERT(XFS_BUF_ISBUSY(bp));
				505	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				506
				507	/*
				508	* Initialize the mount structure from the superblock.
				509	* But first do some basic consistency checking.
				510	*/
				511	sbp = XFS_BUF_TO_SBP(bp);
				512	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
				513
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	514	error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	515	if (error) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	516	xfs_fs_mount_cmn_err(flags, "SB validate failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	517	goto fail;
				518	}
				519
				520	/*
				521	* We must be able to do sector-sized and sector-aligned IO.
				522	*/
				523	if (sector_size > mp->m_sb.sb_sectsize) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	524	xfs_fs_mount_cmn_err(flags,
				525	"device supports only %u byte sectors (not %u)",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	526	sector_size, mp->m_sb.sb_sectsize);
				527	error = ENOSYS;
				528	goto fail;
				529	}
				530
				531	/*
				532	* If device sector size is smaller than the superblock size,
				533	* re-read the superblock so the buffer is correctly sized.
				534	*/
				535	if (sector_size < mp->m_sb.sb_sectsize) {
				536	XFS_BUF_UNMANAGE(bp);
				537	xfs_buf_relse(bp);
				538	sector_size = mp->m_sb.sb_sectsize;
				539	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				540	BTOBB(sector_size), extra_flags);
				541	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	542	xfs_fs_mount_cmn_err(flags, "SB re-read failed");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	543	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				544	goto fail;
				545	}
				546	ASSERT(XFS_BUF_ISBUSY(bp));
				547	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				548	}
				549
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	550	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
				551	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
				552	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
				553
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	554	mp->m_sb_bp = bp;
				555	xfs_buf_relse(bp);
				556	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
				557	return 0;
				558
				559	fail:
				560	if (bp) {
				561	XFS_BUF_UNMANAGE(bp);
				562	xfs_buf_relse(bp);
				563	}
				564	return error;
				565	}
				566
				567
				568	/*
				569	* xfs_mount_common
				570	*
				571	* Mount initialization code establishing various mount
				572	* fields from the superblock associated with the given
				573	* mount structure
				574	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	575	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	576	xfs_mount_common(xfs_mount_t mp, xfs_sb_t sbp)
				577	{
				578	int i;
				579
				580	mp->m_agfrotor = mp->m_agirotor = 0;
				581	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
				582	mp->m_maxagi = mp->m_sb.sb_agcount;
				583	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
				584	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
				585	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
				586	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
				587	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
				588	mp->m_litino = sbp->sb_inodesize -
				589	((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
				590	mp->m_blockmask = sbp->sb_blocksize - 1;
				591	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
				592	mp->m_blockwmask = mp->m_blockwsize - 1;
				593	INIT_LIST_HEAD(&mp->m_del_inodes);
				594
				595	/*
				596	* Setup for attributes, in case they get created.
				597	* This value is for inodes getting attributes for the first time,
				598	* the per-inode value is for old attribute values.
				599	*/
				600	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
				601	switch (sbp->sb_inodesize) {
				602	case 256:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	603	mp->m_attroffset = XFS_LITINO(mp) -
				604	XFS_BMDR_SPACE_CALC(MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	605	break;
				606	case 512:
				607	case 1024:
				608	case 2048:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	609	mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	610	break;
				611	default:
				612	ASSERT(0);
				613	}
				614	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
				615
				616	for (i = 0; i < 2; i++) {
				617	mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				618	xfs_alloc, i == 0);
				619	mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				620	xfs_alloc, i == 0);
				621	}
				622	for (i = 0; i < 2; i++) {
				623	mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				624	xfs_bmbt, i == 0);
				625	mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				626	xfs_bmbt, i == 0);
				627	}
				628	for (i = 0; i < 2; i++) {
				629	mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				630	xfs_inobt, i == 0);
				631	mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				632	xfs_inobt, i == 0);
				633	}
				634
				635	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
				636	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
				637	sbp->sb_inopblock);
				638	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
				639	}
				640	/*
				641	* xfs_mountfs
				642	*
				643	* This function does the following on an initial mount of a file system:
				644	* - reads the superblock from disk and init the mount struct
				645	* - if we're a 32-bit kernel, do a size check on the superblock
				646	* so we don't mount terabyte filesystems
				647	* - init mount struct realtime fields
				648	* - allocate inode hash table for fs
				649	* - init directory manager
				650	* - perform recovery and init the log manager
				651	*/
				652	int
				653	xfs_mountfs(
				654	vfs_t *vfsp,
				655	xfs_mount_t *mp,
				656	int mfsi_flags)
				657	{
				658	xfs_buf_t *bp;
				659	xfs_sb_t *sbp = &(mp->m_sb);
				660	xfs_inode_t *rip;
				661	vnode_t *rvp = NULL;
				662	int readio_log, writeio_log;
				663	xfs_daddr_t d;
				664	__uint64_t ret64;
				665	__int64_t update_flags;
				666	uint quotamount, quotaflags;
				667	int agno;
				668	int uuid_mounted = 0;
				669	int error = 0;
				670
				671	if (mp->m_sb_bp == NULL) {
Nathan Scott	764d1f8	2006-03-31 13:04:17 +1000	[diff] [blame]	672	if ((error = xfs_readsb(mp, mfsi_flags))) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	673	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	674	}
				675	}
				676	xfs_mount_common(mp, sbp);
				677
				678	/*
				679	* Check if sb_agblocks is aligned at stripe boundary
				680	* If sb_agblocks is NOT aligned turn off m_dalign since
				681	* allocator alignment is within an ag, therefore ag has
				682	* to be aligned at stripe boundary.
				683	*/
				684	update_flags = 0LL;
				685	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
				686	/*
				687	* If stripe unit and stripe width are not multiples
				688	* of the fs blocksize turn off alignment.
				689	*/
				690	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
				691	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
				692	if (mp->m_flags & XFS_MOUNT_RETERR) {
				693	cmn_err(CE_WARN,
				694	"XFS: alignment check 1 failed");
				695	error = XFS_ERROR(EINVAL);
				696	goto error1;
				697	}
				698	mp->m_dalign = mp->m_swidth = 0;
				699	} else {
				700	/*
				701	* Convert the stripe unit and width to FSBs.
				702	*/
				703	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
				704	if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				705	if (mp->m_flags & XFS_MOUNT_RETERR) {
				706	error = XFS_ERROR(EINVAL);
				707	goto error1;
				708	}
				709	xfs_fs_cmn_err(CE_WARN, mp,
				710	"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
				711	mp->m_dalign, mp->m_swidth,
				712	sbp->sb_agblocks);
				713
				714	mp->m_dalign = 0;
				715	mp->m_swidth = 0;
				716	} else if (mp->m_dalign) {
				717	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
				718	} else {
				719	if (mp->m_flags & XFS_MOUNT_RETERR) {
				720	xfs_fs_cmn_err(CE_WARN, mp,
				721	"stripe alignment turned off: sunit(%d) less than bsize(%d)",
				722	mp->m_dalign,
				723	mp->m_blockmask +1);
				724	error = XFS_ERROR(EINVAL);
				725	goto error1;
				726	}
				727	mp->m_swidth = 0;
				728	}
				729	}
				730
				731	/*
				732	* Update superblock with new values
				733	* and log changes
				734	*/
				735	if (XFS_SB_VERSION_HASDALIGN(sbp)) {
				736	if (sbp->sb_unit != mp->m_dalign) {
				737	sbp->sb_unit = mp->m_dalign;
				738	update_flags \|= XFS_SB_UNIT;
				739	}
				740	if (sbp->sb_width != mp->m_swidth) {
				741	sbp->sb_width = mp->m_swidth;
				742	update_flags \|= XFS_SB_WIDTH;
				743	}
				744	}
				745	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
				746	XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
				747	mp->m_dalign = sbp->sb_unit;
				748	mp->m_swidth = sbp->sb_width;
				749	}
				750
				751	xfs_alloc_compute_maxlevels(mp);
				752	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
				753	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
				754	xfs_ialloc_compute_maxlevels(mp);
				755
				756	if (sbp->sb_imax_pct) {
				757	__uint64_t icount;
				758
				759	/* Make sure the maximum inode count is a multiple of the
				760	* units we allocate inodes in.
				761	*/
				762
				763	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				764	do_div(icount, 100);
				765	do_div(icount, mp->m_ialloc_blks);
				766	mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
				767	sbp->sb_inopblog;
				768	} else
				769	mp->m_maxicount = 0;
				770
				771	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
				772
				773	/*
				774	* XFS uses the uuid from the superblock as the unique
				775	* identifier for fsid. We can not use the uuid from the volume
				776	* since a single partition filesystem is identical to a single
				777	* partition volume/filesystem.
				778	*/
				779	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
				780	(mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
				781	if (xfs_uuid_mount(mp)) {
				782	error = XFS_ERROR(EINVAL);
				783	goto error1;
				784	}
				785	uuid_mounted=1;
				786	ret64 = uuid_hash64(&sbp->sb_uuid);
				787	memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
				788	}
				789
				790	/*
				791	* Set the default minimum read and write sizes unless
				792	* already specified in a mount option.
				793	* We use smaller I/O sizes when the file system
				794	* is being used for NFS service (wsync mount option).
				795	*/
				796	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
				797	if (mp->m_flags & XFS_MOUNT_WSYNC) {
				798	readio_log = XFS_WSYNC_READIO_LOG;
				799	writeio_log = XFS_WSYNC_WRITEIO_LOG;
				800	} else {
				801	readio_log = XFS_READIO_LOG_LARGE;
				802	writeio_log = XFS_WRITEIO_LOG_LARGE;
				803	}
				804	} else {
				805	readio_log = mp->m_readio_log;
				806	writeio_log = mp->m_writeio_log;
				807	}
				808
				809	/*
				810	* Set the number of readahead buffers to use based on
				811	* physical memory size.
				812	*/
				813	if (xfs_physmem <= 4096) /* <= 16MB */
				814	mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
				815	else if (xfs_physmem <= 8192) /* <= 32MB */
				816	mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
				817	else
				818	mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
				819	if (sbp->sb_blocklog > readio_log) {
				820	mp->m_readio_log = sbp->sb_blocklog;
				821	} else {
				822	mp->m_readio_log = readio_log;
				823	}
				824	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
				825	if (sbp->sb_blocklog > writeio_log) {
				826	mp->m_writeio_log = sbp->sb_blocklog;
				827	} else {
				828	mp->m_writeio_log = writeio_log;
				829	}
				830	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
				831
				832	/*
				833	* Set the inode cluster size based on the physical memory
				834	* size. This may still be overridden by the file system
				835	* block size if it is larger than the chosen cluster size.
				836	*/
				837	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
				838	mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
				839	} else {
				840	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
				841	}
				842	/*
				843	* Set whether we're using inode alignment.
				844	*/
				845	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
				846	mp->m_sb.sb_inoalignmt >=
				847	XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
				848	mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
				849	else
				850	mp->m_inoalign_mask = 0;
				851	/*
				852	* If we are using stripe alignment, check whether
				853	* the stripe unit is a multiple of the inode alignment
				854	*/
				855	if (mp->m_dalign && mp->m_inoalign_mask &&
				856	!(mp->m_dalign & mp->m_inoalign_mask))
				857	mp->m_sinoalign = mp->m_dalign;
				858	else
				859	mp->m_sinoalign = 0;
				860	/*
				861	* Check that the data (and log if separate) are an ok size.
				862	*/
				863	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
				864	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
				865	cmn_err(CE_WARN, "XFS: size check 1 failed");
				866	error = XFS_ERROR(E2BIG);
				867	goto error1;
				868	}
				869	error = xfs_read_buf(mp, mp->m_ddev_targp,
				870	d - XFS_FSS_TO_BB(mp, 1),
				871	XFS_FSS_TO_BB(mp, 1), 0, &bp);
				872	if (!error) {
				873	xfs_buf_relse(bp);
				874	} else {
				875	cmn_err(CE_WARN, "XFS: size check 2 failed");
				876	if (error == ENOSPC) {
				877	error = XFS_ERROR(E2BIG);
				878	}
				879	goto error1;
				880	}
				881
				882	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
				883	mp->m_logdev_targp != mp->m_ddev_targp) {
				884	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
				885	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
				886	cmn_err(CE_WARN, "XFS: size check 3 failed");
				887	error = XFS_ERROR(E2BIG);
				888	goto error1;
				889	}
				890	error = xfs_read_buf(mp, mp->m_logdev_targp,
				891	d - XFS_FSB_TO_BB(mp, 1),
				892	XFS_FSB_TO_BB(mp, 1), 0, &bp);
				893	if (!error) {
				894	xfs_buf_relse(bp);
				895	} else {
				896	cmn_err(CE_WARN, "XFS: size check 3 failed");
				897	if (error == ENOSPC) {
				898	error = XFS_ERROR(E2BIG);
				899	}
				900	goto error1;
				901	}
				902	}
				903
				904	/*
				905	* Initialize realtime fields in the mount structure
				906	*/
				907	if ((error = xfs_rtmount_init(mp))) {
				908	cmn_err(CE_WARN, "XFS: RT mount failed");
				909	goto error1;
				910	}
				911
				912	/*
				913	* For client case we are done now
				914	*/
				915	if (mfsi_flags & XFS_MFSI_CLIENT) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	916	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	917	}
				918
				919	/*
				920	* Copies the low order bits of the timestamp and the randomly
				921	* set "sequence" number out of a UUID.
				922	*/
				923	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
				924
				925	/*
				926	* The vfs structure needs to have a file system independent
				927	* way of checking for the invariant file system ID. Since it
				928	* can't look at mount structures it has a pointer to the data
				929	* in the mount structure.
				930	*
				931	* File systems that don't support user level file handles (i.e.
				932	* all of them except for XFS) will leave vfs_altfsid as NULL.
				933	*/
				934	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
				935	mp->m_dmevmask = 0; /* not persistent; set after each mount */
				936
				937	/*
				938	* Select the right directory manager.
				939	*/
				940	mp->m_dirops =
				941	XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
				942	xfsv2_dirops :
				943	xfsv1_dirops;
				944
				945	/*
				946	* Initialize directory manager's entries.
				947	*/
				948	XFS_DIR_MOUNT(mp);
				949
				950	/*
				951	* Initialize the attribute manager's entries.
				952	*/
				953	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
				954
				955	/*
				956	* Initialize the precomputed transaction reservations values.
				957	*/
				958	xfs_trans_init(mp);
				959
				960	/*
				961	* Allocate and initialize the inode hash table for this
				962	* file system.
				963	*/
				964	xfs_ihash_init(mp);
				965	xfs_chash_init(mp);
				966
				967	/*
				968	* Allocate and initialize the per-ag data.
				969	*/
				970	init_rwsem(&mp->m_peraglock);
				971	mp->m_perag =
				972	kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
				973
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	974	mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	975
				976	/*
				977	* log's mount-time initialization. Perform 1st part recovery if needed
				978	*/
				979	if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */
				980	error = xfs_log_mount(mp, mp->m_logdev_targp,
				981	XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				982	XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
				983	if (error) {
				984	cmn_err(CE_WARN, "XFS: log mount failed");
				985	goto error2;
				986	}
				987	} else { /* No log has been defined */
				988	cmn_err(CE_WARN, "XFS: no log defined");
				989	XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
				990	error = XFS_ERROR(EFSCORRUPTED);
				991	goto error2;
				992	}
				993
				994	/*
				995	* Get and sanity-check the root inode.
				996	* Save the pointer to it in the mount structure.
				997	*/
				998	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
				999	if (error) {
				1000	cmn_err(CE_WARN, "XFS: failed to read root inode");
				1001	goto error3;
				1002	}
				1003
				1004	ASSERT(rip != NULL);
				1005	rvp = XFS_ITOV(rip);
				1006
				1007	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
				1008	cmn_err(CE_WARN, "XFS: corrupted root inode");
				1009	prdev("Root inode %llu is not a directory",
				1010	mp->m_ddev_targp, (unsigned long long)rip->i_ino);
				1011	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1012	XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				1013	mp);
				1014	error = XFS_ERROR(EFSCORRUPTED);
				1015	goto error4;
				1016	}
				1017	mp->m_rootip = rip; /* save it */
				1018
				1019	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1020
				1021	/*
				1022	* Initialize realtime inode pointers in the mount structure
				1023	*/
				1024	if ((error = xfs_rtmount_inodes(mp))) {
				1025	/*
				1026	* Free up the root inode.
				1027	*/
				1028	cmn_err(CE_WARN, "XFS: failed to read RT inodes");
				1029	goto error4;
				1030	}
				1031
				1032	/*
				1033	* If fs is not mounted readonly, then update the superblock
				1034	* unit and width changes.
				1035	*/
				1036	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
				1037	xfs_mount_log_sbunit(mp, update_flags);
				1038
				1039	/*
				1040	* Initialise the XFS quota management subsystem for this mount
				1041	*/
				1042	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
				1043	goto error4;
				1044
				1045	/*
				1046	* Finish recovering the file system. This part needed to be
				1047	* delayed until after the root and real-time bitmap inodes
				1048	* were consistently read in.
				1049	*/
				1050	error = xfs_log_mount_finish(mp, mfsi_flags);
				1051	if (error) {
				1052	cmn_err(CE_WARN, "XFS: log mount finish failed");
				1053	goto error4;
				1054	}
				1055
				1056	/*
				1057	* Complete the quota initialisation, post-log-replay component.
				1058	*/
				1059	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
				1060	goto error4;
				1061
				1062	return 0;
				1063
				1064	error4:
				1065	/*
				1066	* Free up the root inode.
				1067	*/
				1068	VN_RELE(rvp);
				1069	error3:
				1070	xfs_log_unmount_dealloc(mp);
				1071	error2:
				1072	xfs_ihash_free(mp);
				1073	xfs_chash_free(mp);
				1074	for (agno = 0; agno < sbp->sb_agcount; agno++)
				1075	if (mp->m_perag[agno].pagb_list)
				1076	kmem_free(mp->m_perag[agno].pagb_list,
				1077	sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
				1078	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
				1079	mp->m_perag = NULL;
				1080	/* FALLTHROUGH */
				1081	error1:
				1082	if (uuid_mounted)
				1083	xfs_uuid_unmount(mp);
				1084	xfs_freesb(mp);
				1085	return error;
				1086	}
				1087
				1088	/*
				1089	* xfs_unmountfs
				1090	*
				1091	* This flushes out the inodes,dquots and the superblock, unmounts the
				1092	* log and makes sure that incore structures are freed.
				1093	*/
				1094	int
				1095	xfs_unmountfs(xfs_mount_t mp, struct cred cr)
				1096	{
				1097	struct vfs *vfsp = XFS_MTOVFS(mp);
				1098	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1099	int64_t fsid;
				1100	#endif
				1101
Christoph Hellwig	efa8027	2005-06-21 15:37:17 +1000	[diff] [blame]	1102	xfs_iflush_all(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1103
Nathan Scott	ee2a4f7	2006-01-11 15:33:36 +1100	[diff] [blame]	1104	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL \| XFS_QMOPT_UMOUNTING);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1105
				1106	/*
				1107	* Flush out the log synchronously so that we know for sure
				1108	* that nothing is pinned. This is important because bflush()
				1109	* will skip pinned buffers.
				1110	*/
				1111	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);
				1112
				1113	xfs_binval(mp->m_ddev_targp);
				1114	if (mp->m_rtdev_targp) {
				1115	xfs_binval(mp->m_rtdev_targp);
				1116	}
				1117
				1118	xfs_unmountfs_writesb(mp);
				1119
				1120	xfs_unmountfs_wait(mp); /* wait for async bufs */
				1121
				1122	xfs_log_unmount(mp); /* Done! No more fs ops. */
				1123
				1124	xfs_freesb(mp);
				1125
				1126	/*
				1127	* All inodes from this mount point should be freed.
				1128	*/
				1129	ASSERT(mp->m_inodes == NULL);
				1130
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1131	xfs_unmountfs_close(mp, cr);
				1132	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
				1133	xfs_uuid_unmount(mp);
				1134
				1135	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1136	/*
				1137	* clear all error tags on this filesystem
				1138	*/
				1139	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
				1140	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
				1141	#endif
				1142	XFS_IODONE(vfsp);
				1143	xfs_mount_free(mp, 1);
				1144	return 0;
				1145	}
				1146
				1147	void
				1148	xfs_unmountfs_close(xfs_mount_t mp, struct cred cr)
				1149	{
				1150	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1151	xfs_free_buftarg(mp->m_logdev_targp, 1);
				1152	if (mp->m_rtdev_targp)
				1153	xfs_free_buftarg(mp->m_rtdev_targp, 1);
				1154	xfs_free_buftarg(mp->m_ddev_targp, 0);
				1155	}
				1156
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	1157	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1158	xfs_unmountfs_wait(xfs_mount_t *mp)
				1159	{
				1160	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1161	xfs_wait_buftarg(mp->m_logdev_targp);
				1162	if (mp->m_rtdev_targp)
				1163	xfs_wait_buftarg(mp->m_rtdev_targp);
				1164	xfs_wait_buftarg(mp->m_ddev_targp);
				1165	}
				1166
				1167	int
				1168	xfs_unmountfs_writesb(xfs_mount_t *mp)
				1169	{
				1170	xfs_buf_t *sbp;
				1171	xfs_sb_t *sb;
				1172	int error = 0;
				1173
				1174	/*
				1175	* skip superblock write if fs is read-only, or
				1176	* if we are doing a forced umount.
				1177	*/
				1178	sbp = xfs_getsb(mp, 0);
				1179	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY \|\|
				1180	XFS_FORCED_SHUTDOWN(mp))) {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1181
				1182	xfs_icsb_sync_counters(mp);
				1183
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1184	/*
				1185	* mark shared-readonly if desired
				1186	*/
				1187	sb = XFS_BUF_TO_SBP(sbp);
				1188	if (mp->m_mk_sharedro) {
				1189	if (!(sb->sb_flags & XFS_SBF_READONLY))
				1190	sb->sb_flags \|= XFS_SBF_READONLY;
				1191	if (!XFS_SB_VERSION_HASSHARED(sb))
				1192	XFS_SB_VERSION_ADDSHARED(sb);
				1193	xfs_fs_cmn_err(CE_NOTE, mp,
				1194	"Unmounting, marking shared read-only");
				1195	}
				1196	XFS_BUF_UNDONE(sbp);
				1197	XFS_BUF_UNREAD(sbp);
				1198	XFS_BUF_UNDELAYWRITE(sbp);
				1199	XFS_BUF_WRITE(sbp);
				1200	XFS_BUF_UNASYNC(sbp);
				1201	ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
				1202	xfsbdstrat(mp, sbp);
				1203	/* Nevermind errors we might get here. */
				1204	error = xfs_iowait(sbp);
				1205	if (error)
				1206	xfs_ioerror_alert("xfs_unmountfs_writesb",
				1207	mp, sbp, XFS_BUF_ADDR(sbp));
				1208	if (error && mp->m_mk_sharedro)
				1209	xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
				1210	}
				1211	xfs_buf_relse(sbp);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1212	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1213	}
				1214
				1215	/*
				1216	* xfs_mod_sb() can be used to copy arbitrary changes to the
				1217	* in-core superblock into the superblock buffer to be logged.
				1218	* It does not provide the higher level of locking that is
				1219	* needed to protect the in-core superblock from concurrent
				1220	* access.
				1221	*/
				1222	void
				1223	xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
				1224	{
				1225	xfs_buf_t *bp;
				1226	int first;
				1227	int last;
				1228	xfs_mount_t *mp;
				1229	xfs_sb_t *sbp;
				1230	xfs_sb_field_t f;
				1231
				1232	ASSERT(fields);
				1233	if (!fields)
				1234	return;
				1235	mp = tp->t_mountp;
				1236	bp = xfs_trans_getsb(tp, mp, 0);
				1237	sbp = XFS_BUF_TO_SBP(bp);
				1238	first = sizeof(xfs_sb_t);
				1239	last = 0;
				1240
				1241	/* translate/copy */
				1242
				1243	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
				1244
				1245	/* find modified range */
				1246
				1247	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				1248	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1249	first = xfs_sb_info[f].offset;
				1250
				1251	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
				1252	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1253	last = xfs_sb_info[f + 1].offset - 1;
				1254
				1255	xfs_trans_log_buf(tp, bp, first, last);
				1256	}
Yingping Lu	d210a28	2006-06-09 14:55:18 +1000	[diff] [blame^]	1257
				1258	/*
				1259	* In order to avoid ENOSPC-related deadlock caused by
				1260	* out-of-order locking of AGF buffer (PV 947395), we place
				1261	* constraints on the relationship among actual allocations for
				1262	* data blocks, freelist blocks, and potential file data bmap
				1263	* btree blocks. However, these restrictions may result in no
				1264	* actual space allocated for a delayed extent, for example, a data
				1265	* block in a certain AG is allocated but there is no additional
				1266	* block for the additional bmap btree block due to a split of the
				1267	* bmap btree of the file. The result of this may lead to an
				1268	* infinite loop in xfssyncd when the file gets flushed to disk and
				1269	* all delayed extents need to be actually allocated. To get around
				1270	* this, we explicitly set aside a few blocks which will not be
				1271	* reserved in delayed allocation. Considering the minimum number of
				1272	* needed freelist blocks is 4 fsbs, a potential split of file's bmap
				1273	* btree requires 1 fsb, so we set the number of set-aside blocks to 8.
				1274	*/
				1275	#define SET_ASIDE_BLOCKS 8
				1276
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1277	/*
				1278	* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
				1279	* a delta to a specified field in the in-core superblock. Simply
				1280	* switch on the field indicated and apply the delta to that field.
				1281	* Fields are not allowed to dip below zero, so if the delta would
				1282	* do this do not apply it and return EINVAL.
				1283	*
				1284	* The SB_LOCK must be held when this routine is called.
				1285	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1286	int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1287	xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
				1288	int delta, int rsvd)
				1289	{
				1290	int scounter; /* short counter for 32 bit fields */
				1291	long long lcounter; /* long counter for 64 bit fields */
				1292	long long res_used, rem;
				1293
				1294	/*
				1295	* With the in-core superblock spin lock held, switch
				1296	* on the indicated field. Apply the delta to the
				1297	* proper field. If the fields value would dip below
				1298	* 0, then do not apply the delta and return EINVAL.
				1299	*/
				1300	switch (field) {
				1301	case XFS_SBS_ICOUNT:
				1302	lcounter = (long long)mp->m_sb.sb_icount;
				1303	lcounter += delta;
				1304	if (lcounter < 0) {
				1305	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1306	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1307	}
				1308	mp->m_sb.sb_icount = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1309	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1310	case XFS_SBS_IFREE:
				1311	lcounter = (long long)mp->m_sb.sb_ifree;
				1312	lcounter += delta;
				1313	if (lcounter < 0) {
				1314	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1315	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1316	}
				1317	mp->m_sb.sb_ifree = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1318	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1319	case XFS_SBS_FDBLOCKS:
				1320
Yingping Lu	d210a28	2006-06-09 14:55:18 +1000	[diff] [blame^]	1321	lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1322	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
				1323
				1324	if (delta > 0) { /* Putting blocks back */
				1325	if (res_used > delta) {
				1326	mp->m_resblks_avail += delta;
				1327	} else {
				1328	rem = delta - res_used;
				1329	mp->m_resblks_avail = mp->m_resblks;
				1330	lcounter += rem;
				1331	}
				1332	} else { /* Taking blocks away */
				1333
				1334	lcounter += delta;
				1335
				1336	/*
				1337	* If were out of blocks, use any available reserved blocks if
				1338	* were allowed to.
				1339	*/
				1340
				1341	if (lcounter < 0) {
				1342	if (rsvd) {
				1343	lcounter = (long long)mp->m_resblks_avail + delta;
				1344	if (lcounter < 0) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1345	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1346	}
				1347	mp->m_resblks_avail = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1348	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1349	} else { /* not reserved */
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1350	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1351	}
				1352	}
				1353	}
				1354
Yingping Lu	d210a28	2006-06-09 14:55:18 +1000	[diff] [blame^]	1355	mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1356	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1357	case XFS_SBS_FREXTENTS:
				1358	lcounter = (long long)mp->m_sb.sb_frextents;
				1359	lcounter += delta;
				1360	if (lcounter < 0) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1361	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1362	}
				1363	mp->m_sb.sb_frextents = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1364	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1365	case XFS_SBS_DBLOCKS:
				1366	lcounter = (long long)mp->m_sb.sb_dblocks;
				1367	lcounter += delta;
				1368	if (lcounter < 0) {
				1369	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1370	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1371	}
				1372	mp->m_sb.sb_dblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1373	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1374	case XFS_SBS_AGCOUNT:
				1375	scounter = mp->m_sb.sb_agcount;
				1376	scounter += delta;
				1377	if (scounter < 0) {
				1378	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1379	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1380	}
				1381	mp->m_sb.sb_agcount = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1382	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1383	case XFS_SBS_IMAX_PCT:
				1384	scounter = mp->m_sb.sb_imax_pct;
				1385	scounter += delta;
				1386	if (scounter < 0) {
				1387	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1388	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1389	}
				1390	mp->m_sb.sb_imax_pct = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1391	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1392	case XFS_SBS_REXTSIZE:
				1393	scounter = mp->m_sb.sb_rextsize;
				1394	scounter += delta;
				1395	if (scounter < 0) {
				1396	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1397	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1398	}
				1399	mp->m_sb.sb_rextsize = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1400	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401	case XFS_SBS_RBMBLOCKS:
				1402	scounter = mp->m_sb.sb_rbmblocks;
				1403	scounter += delta;
				1404	if (scounter < 0) {
				1405	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1406	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1407	}
				1408	mp->m_sb.sb_rbmblocks = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1409	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1410	case XFS_SBS_RBLOCKS:
				1411	lcounter = (long long)mp->m_sb.sb_rblocks;
				1412	lcounter += delta;
				1413	if (lcounter < 0) {
				1414	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1415	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1416	}
				1417	mp->m_sb.sb_rblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1418	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1419	case XFS_SBS_REXTENTS:
				1420	lcounter = (long long)mp->m_sb.sb_rextents;
				1421	lcounter += delta;
				1422	if (lcounter < 0) {
				1423	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1424	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1425	}
				1426	mp->m_sb.sb_rextents = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1427	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1428	case XFS_SBS_REXTSLOG:
				1429	scounter = mp->m_sb.sb_rextslog;
				1430	scounter += delta;
				1431	if (scounter < 0) {
				1432	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1433	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1434	}
				1435	mp->m_sb.sb_rextslog = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1436	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1437	default:
				1438	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1439	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1440	}
				1441	}
				1442
				1443	/*
				1444	* xfs_mod_incore_sb() is used to change a field in the in-core
				1445	* superblock structure by the specified delta. This modification
				1446	* is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked()
				1447	* routine to do the work.
				1448	*/
				1449	int
				1450	xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
				1451	{
				1452	unsigned long s;
				1453	int status;
				1454
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1455	/* check for per-cpu counters */
				1456	switch (field) {
				1457	#ifdef HAVE_PERCPU_SB
				1458	case XFS_SBS_ICOUNT:
				1459	case XFS_SBS_IFREE:
				1460	case XFS_SBS_FDBLOCKS:
				1461	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1462	status = xfs_icsb_modify_counters(mp, field,
				1463	delta, rsvd);
				1464	break;
				1465	}
				1466	/* FALLTHROUGH */
				1467	#endif
				1468	default:
				1469	s = XFS_SB_LOCK(mp);
				1470	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				1471	XFS_SB_UNLOCK(mp, s);
				1472	break;
				1473	}
				1474
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1475	return status;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1476	}
				1477
				1478	/*
				1479	* xfs_mod_incore_sb_batch() is used to change more than one field
				1480	* in the in-core superblock structure at a time. This modification
				1481	* is protected by a lock internal to this module. The fields and
				1482	* changes to those fields are specified in the array of xfs_mod_sb
				1483	* structures passed in.
				1484	*
				1485	* Either all of the specified deltas will be applied or none of
				1486	* them will. If any modified field dips below 0, then all modifications
				1487	* will be backed out and EINVAL will be returned.
				1488	*/
				1489	int
				1490	xfs_mod_incore_sb_batch(xfs_mount_t mp, xfs_mod_sb_t msb, uint nmsb, int rsvd)
				1491	{
				1492	unsigned long s;
				1493	int status=0;
				1494	xfs_mod_sb_t *msbp;
				1495
				1496	/*
				1497	* Loop through the array of mod structures and apply each
				1498	* individually. If any fail, then back out all those
				1499	* which have already been applied. Do all of this within
				1500	* the scope of the SB_LOCK so that all of the changes will
				1501	* be atomic.
				1502	*/
				1503	s = XFS_SB_LOCK(mp);
				1504	msbp = &msb[0];
				1505	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
				1506	/*
				1507	* Apply the delta at index n. If it fails, break
				1508	* from the loop so we'll fall into the undo loop
				1509	* below.
				1510	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1511	switch (msbp->msb_field) {
				1512	#ifdef HAVE_PERCPU_SB
				1513	case XFS_SBS_ICOUNT:
				1514	case XFS_SBS_IFREE:
				1515	case XFS_SBS_FDBLOCKS:
				1516	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1517	status = xfs_icsb_modify_counters_locked(mp,
				1518	msbp->msb_field,
				1519	msbp->msb_delta, rsvd);
				1520	break;
				1521	}
				1522	/* FALLTHROUGH */
				1523	#endif
				1524	default:
				1525	status = xfs_mod_incore_sb_unlocked(mp,
				1526	msbp->msb_field,
				1527	msbp->msb_delta, rsvd);
				1528	break;
				1529	}
				1530
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1531	if (status != 0) {
				1532	break;
				1533	}
				1534	}
				1535
				1536	/*
				1537	* If we didn't complete the loop above, then back out
				1538	* any changes made to the superblock. If you add code
				1539	* between the loop above and here, make sure that you
				1540	* preserve the value of status. Loop back until
				1541	* we step below the beginning of the array. Make sure
				1542	* we don't touch anything back there.
				1543	*/
				1544	if (status != 0) {
				1545	msbp--;
				1546	while (msbp >= msb) {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1547	switch (msbp->msb_field) {
				1548	#ifdef HAVE_PERCPU_SB
				1549	case XFS_SBS_ICOUNT:
				1550	case XFS_SBS_IFREE:
				1551	case XFS_SBS_FDBLOCKS:
				1552	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1553	status =
				1554	xfs_icsb_modify_counters_locked(mp,
				1555	msbp->msb_field,
				1556	-(msbp->msb_delta),
				1557	rsvd);
				1558	break;
				1559	}
				1560	/* FALLTHROUGH */
				1561	#endif
				1562	default:
				1563	status = xfs_mod_incore_sb_unlocked(mp,
				1564	msbp->msb_field,
				1565	-(msbp->msb_delta),
				1566	rsvd);
				1567	break;
				1568	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1569	ASSERT(status == 0);
				1570	msbp--;
				1571	}
				1572	}
				1573	XFS_SB_UNLOCK(mp, s);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1574	return status;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1575	}
				1576
				1577	/*
				1578	* xfs_getsb() is called to obtain the buffer for the superblock.
				1579	* The buffer is returned locked and read in from disk.
				1580	* The buffer should be released with a call to xfs_brelse().
				1581	*
				1582	* If the flags parameter is BUF_TRYLOCK, then we'll only return
				1583	* the superblock buffer if it can be locked without sleeping.
				1584	* If it can't then we'll return NULL.
				1585	*/
				1586	xfs_buf_t *
				1587	xfs_getsb(
				1588	xfs_mount_t *mp,
				1589	int flags)
				1590	{
				1591	xfs_buf_t *bp;
				1592
				1593	ASSERT(mp->m_sb_bp != NULL);
				1594	bp = mp->m_sb_bp;
				1595	if (flags & XFS_BUF_TRYLOCK) {
				1596	if (!XFS_BUF_CPSEMA(bp)) {
				1597	return NULL;
				1598	}
				1599	} else {
				1600	XFS_BUF_PSEMA(bp, PRIBIO);
				1601	}
				1602	XFS_BUF_HOLD(bp);
				1603	ASSERT(XFS_BUF_ISDONE(bp));
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1604	return bp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1605	}
				1606
				1607	/*
				1608	* Used to free the superblock along various error paths.
				1609	*/
				1610	void
				1611	xfs_freesb(
				1612	xfs_mount_t *mp)
				1613	{
				1614	xfs_buf_t *bp;
				1615
				1616	/*
				1617	* Use xfs_getsb() so that the buffer will be locked
				1618	* when we call xfs_buf_relse().
				1619	*/
				1620	bp = xfs_getsb(mp, 0);
				1621	XFS_BUF_UNMANAGE(bp);
				1622	xfs_buf_relse(bp);
				1623	mp->m_sb_bp = NULL;
				1624	}
				1625
				1626	/*
				1627	* See if the UUID is unique among mounted XFS filesystems.
				1628	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
				1629	*/
				1630	STATIC int
				1631	xfs_uuid_mount(
				1632	xfs_mount_t *mp)
				1633	{
				1634	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
				1635	cmn_err(CE_WARN,
				1636	"XFS: Filesystem %s has nil UUID - can't mount",
				1637	mp->m_fsname);
				1638	return -1;
				1639	}
				1640	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
				1641	cmn_err(CE_WARN,
				1642	"XFS: Filesystem %s has duplicate UUID - can't mount",
				1643	mp->m_fsname);
				1644	return -1;
				1645	}
				1646	return 0;
				1647	}
				1648
				1649	/*
				1650	* Remove filesystem from the UUID table.
				1651	*/
				1652	STATIC void
				1653	xfs_uuid_unmount(
				1654	xfs_mount_t *mp)
				1655	{
				1656	uuid_table_remove(&mp->m_sb.sb_uuid);
				1657	}
				1658
				1659	/*
				1660	* Used to log changes to the superblock unit and width fields which could
				1661	* be altered by the mount options. Only the first superblock is updated.
				1662	*/
				1663	STATIC void
				1664	xfs_mount_log_sbunit(
				1665	xfs_mount_t *mp,
				1666	__int64_t fields)
				1667	{
				1668	xfs_trans_t *tp;
				1669
				1670	ASSERT(fields & (XFS_SB_UNIT\|XFS_SB_WIDTH\|XFS_SB_UUID));
				1671
				1672	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
				1673	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				1674	XFS_DEFAULT_LOG_COUNT)) {
				1675	xfs_trans_cancel(tp, 0);
				1676	return;
				1677	}
				1678	xfs_mod_sb(tp, fields);
				1679	xfs_trans_commit(tp, 0, NULL);
				1680	}
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1681
				1682
				1683	#ifdef HAVE_PERCPU_SB
				1684	/*
				1685	* Per-cpu incore superblock counters
				1686	*
				1687	* Simple concept, difficult implementation
				1688	*
				1689	* Basically, replace the incore superblock counters with a distributed per cpu
				1690	* counter for contended fields (e.g. free block count).
				1691	*
				1692	* Difficulties arise in that the incore sb is used for ENOSPC checking, and
				1693	* hence needs to be accurately read when we are running low on space. Hence
				1694	* there is a method to enable and disable the per-cpu counters based on how
				1695	* much "stuff" is available in them.
				1696	*
				1697	* Basically, a counter is enabled if there is enough free resource to justify
				1698	* running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
				1699	* ENOSPC), then we disable the counters to synchronise all callers and
				1700	* re-distribute the available resources.
				1701	*
				1702	* If, once we redistributed the available resources, we still get a failure,
				1703	* we disable the per-cpu counter and go through the slow path.
				1704	*
				1705	* The slow path is the current xfs_mod_incore_sb() function. This means that
				1706	* when we disable a per-cpu counter, we need to drain it's resources back to
				1707	* the global superblock. We do this after disabling the counter to prevent
				1708	* more threads from queueing up on the counter.
				1709	*
				1710	* Essentially, this means that we still need a lock in the fast path to enable
				1711	* synchronisation between the global counters and the per-cpu counters. This
				1712	* is not a problem because the lock will be local to a CPU almost all the time
				1713	* and have little contention except when we get to ENOSPC conditions.
				1714	*
				1715	* Basically, this lock becomes a barrier that enables us to lock out the fast
				1716	* path while we do things like enabling and disabling counters and
				1717	* synchronising the counters.
				1718	*
				1719	* Locking rules:
				1720	*
				1721	* 1. XFS_SB_LOCK() before picking up per-cpu locks
				1722	* 2. per-cpu locks always picked up via for_each_online_cpu() order
				1723	* 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
				1724	* 4. modifying per-cpu counters requires holding per-cpu lock
				1725	* 5. modifying global counters requires holding XFS_SB_LOCK
				1726	* 6. enabling or disabling a counter requires holding the XFS_SB_LOCK
				1727	* and _none_ of the per-cpu locks.
				1728	*
				1729	* Disabled counters are only ever re-enabled by a balance operation
				1730	* that results in more free resources per CPU than a given threshold.
				1731	* To ensure counters don't remain disabled, they are rebalanced when
				1732	* the global resource goes above a higher threshold (i.e. some hysteresis
				1733	* is present to prevent thrashing).
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1734	*/
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1735
				1736	/*
				1737	* hot-plug CPU notifier support.
				1738	*
				1739	* We cannot use the hotcpu_register() function because it does
				1740	* not allow notifier instances. We need a notifier per filesystem
				1741	* as we need to be able to identify the filesystem to balance
Nathan Scott	c41564b	2006-03-29 08:55:14 +1000	[diff] [blame]	1742	* the counters out. This is achieved by having a notifier block
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1743	* embedded in the xfs_mount_t and doing pointer magic to get the
				1744	* mount pointer from the notifier block address.
				1745	*/
				1746	STATIC int
				1747	xfs_icsb_cpu_notify(
				1748	struct notifier_block *nfb,
				1749	unsigned long action,
				1750	void *hcpu)
				1751	{
				1752	xfs_icsb_cnts_t *cntp;
				1753	xfs_mount_t *mp;
				1754	int s;
				1755
				1756	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
				1757	cntp = (xfs_icsb_cnts_t *)
				1758	per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
				1759	switch (action) {
				1760	case CPU_UP_PREPARE:
				1761	/* Easy Case - initialize the area and locks, and
				1762	* then rebalance when online does everything else for us. */
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1763	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1764	break;
				1765	case CPU_ONLINE:
				1766	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
				1767	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
				1768	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
				1769	break;
				1770	case CPU_DEAD:
				1771	/* Disable all the counters, then fold the dead cpu's
				1772	* count into the total on the global superblock and
				1773	* re-enable the counters. */
				1774	s = XFS_SB_LOCK(mp);
				1775	xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
				1776	xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
				1777	xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
				1778
				1779	mp->m_sb.sb_icount += cntp->icsb_icount;
				1780	mp->m_sb.sb_ifree += cntp->icsb_ifree;
				1781	mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
				1782
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1783	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1784
				1785	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
				1786	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
				1787	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED);
				1788	XFS_SB_UNLOCK(mp, s);
				1789	break;
				1790	}
				1791
				1792	return NOTIFY_OK;
				1793	}
				1794
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1795	int
				1796	xfs_icsb_init_counters(
				1797	xfs_mount_t *mp)
				1798	{
				1799	xfs_icsb_cnts_t *cntp;
				1800	int i;
				1801
				1802	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
				1803	if (mp->m_sb_cnts == NULL)
				1804	return -ENOMEM;
				1805
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1806	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
				1807	mp->m_icsb_notifier.priority = 0;
				1808	register_cpu_notifier(&mp->m_icsb_notifier);
				1809
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1810	for_each_online_cpu(i) {
				1811	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1812	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1813	}
				1814	/*
				1815	* start with all counters disabled so that the
				1816	* initial balance kicks us off correctly
				1817	*/
				1818	mp->m_icsb_counters = -1;
				1819	return 0;
				1820	}
				1821
				1822	STATIC void
				1823	xfs_icsb_destroy_counters(
				1824	xfs_mount_t *mp)
				1825	{
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1826	if (mp->m_sb_cnts) {
				1827	unregister_cpu_notifier(&mp->m_icsb_notifier);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1828	free_percpu(mp->m_sb_cnts);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame]	1829	}
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1830	}
				1831
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1832	STATIC inline void
				1833	xfs_icsb_lock_cntr(
				1834	xfs_icsb_cnts_t *icsbp)
				1835	{
				1836	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
				1837	ndelay(1000);
				1838	}
				1839	}
				1840
				1841	STATIC inline void
				1842	xfs_icsb_unlock_cntr(
				1843	xfs_icsb_cnts_t *icsbp)
				1844	{
				1845	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
				1846	}
				1847
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1848
				1849	STATIC inline void
				1850	xfs_icsb_lock_all_counters(
				1851	xfs_mount_t *mp)
				1852	{
				1853	xfs_icsb_cnts_t *cntp;
				1854	int i;
				1855
				1856	for_each_online_cpu(i) {
				1857	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1858	xfs_icsb_lock_cntr(cntp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1859	}
				1860	}
				1861
				1862	STATIC inline void
				1863	xfs_icsb_unlock_all_counters(
				1864	xfs_mount_t *mp)
				1865	{
				1866	xfs_icsb_cnts_t *cntp;
				1867	int i;
				1868
				1869	for_each_online_cpu(i) {
				1870	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	1871	xfs_icsb_unlock_cntr(cntp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1872	}
				1873	}
				1874
				1875	STATIC void
				1876	xfs_icsb_count(
				1877	xfs_mount_t *mp,
				1878	xfs_icsb_cnts_t *cnt,
				1879	int flags)
				1880	{
				1881	xfs_icsb_cnts_t *cntp;
				1882	int i;
				1883
				1884	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
				1885
				1886	if (!(flags & XFS_ICSB_LAZY_COUNT))
				1887	xfs_icsb_lock_all_counters(mp);
				1888
				1889	for_each_online_cpu(i) {
				1890	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
				1891	cnt->icsb_icount += cntp->icsb_icount;
				1892	cnt->icsb_ifree += cntp->icsb_ifree;
				1893	cnt->icsb_fdblocks += cntp->icsb_fdblocks;
				1894	}
				1895
				1896	if (!(flags & XFS_ICSB_LAZY_COUNT))
				1897	xfs_icsb_unlock_all_counters(mp);
				1898	}
				1899
				1900	STATIC int
				1901	xfs_icsb_counter_disabled(
				1902	xfs_mount_t *mp,
				1903	xfs_sb_field_t field)
				1904	{
				1905	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				1906	return test_bit(field, &mp->m_icsb_counters);
				1907	}
				1908
				1909	STATIC int
				1910	xfs_icsb_disable_counter(
				1911	xfs_mount_t *mp,
				1912	xfs_sb_field_t field)
				1913	{
				1914	xfs_icsb_cnts_t cnt;
				1915
				1916	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				1917
				1918	xfs_icsb_lock_all_counters(mp);
				1919	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
				1920	/* drain back to superblock */
				1921
				1922	xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED\|XFS_ICSB_LAZY_COUNT);
				1923	switch(field) {
				1924	case XFS_SBS_ICOUNT:
				1925	mp->m_sb.sb_icount = cnt.icsb_icount;
				1926	break;
				1927	case XFS_SBS_IFREE:
				1928	mp->m_sb.sb_ifree = cnt.icsb_ifree;
				1929	break;
				1930	case XFS_SBS_FDBLOCKS:
				1931	mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
				1932	break;
				1933	default:
				1934	BUG();
				1935	}
				1936	}
				1937
				1938	xfs_icsb_unlock_all_counters(mp);
				1939
				1940	return 0;
				1941	}
				1942
				1943	STATIC void
				1944	xfs_icsb_enable_counter(
				1945	xfs_mount_t *mp,
				1946	xfs_sb_field_t field,
				1947	uint64_t count,
				1948	uint64_t resid)
				1949	{
				1950	xfs_icsb_cnts_t *cntp;
				1951	int i;
				1952
				1953	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				1954
				1955	xfs_icsb_lock_all_counters(mp);
				1956	for_each_online_cpu(i) {
				1957	cntp = per_cpu_ptr(mp->m_sb_cnts, i);
				1958	switch (field) {
				1959	case XFS_SBS_ICOUNT:
				1960	cntp->icsb_icount = count + resid;
				1961	break;
				1962	case XFS_SBS_IFREE:
				1963	cntp->icsb_ifree = count + resid;
				1964	break;
				1965	case XFS_SBS_FDBLOCKS:
				1966	cntp->icsb_fdblocks = count + resid;
				1967	break;
				1968	default:
				1969	BUG();
				1970	break;
				1971	}
				1972	resid = 0;
				1973	}
				1974	clear_bit(field, &mp->m_icsb_counters);
				1975	xfs_icsb_unlock_all_counters(mp);
				1976	}
				1977
				1978	STATIC void
				1979	xfs_icsb_sync_counters_int(
				1980	xfs_mount_t *mp,
				1981	int flags)
				1982	{
				1983	xfs_icsb_cnts_t cnt;
				1984	int s;
				1985
				1986	/* Pass 1: lock all counters */
				1987	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
				1988	s = XFS_SB_LOCK(mp);
				1989
				1990	xfs_icsb_count(mp, &cnt, flags);
				1991
				1992	/* Step 3: update mp->m_sb fields */
				1993	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
				1994	mp->m_sb.sb_icount = cnt.icsb_icount;
				1995	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
				1996	mp->m_sb.sb_ifree = cnt.icsb_ifree;
				1997	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
				1998	mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
				1999
				2000	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
				2001	XFS_SB_UNLOCK(mp, s);
				2002	}
				2003
				2004	/*
				2005	* Accurate update of per-cpu counters to incore superblock
				2006	*/
				2007	STATIC void
				2008	xfs_icsb_sync_counters(
				2009	xfs_mount_t *mp)
				2010	{
				2011	xfs_icsb_sync_counters_int(mp, 0);
				2012	}
				2013
				2014	/*
				2015	* lazy addition used for things like df, background sb syncs, etc
				2016	*/
				2017	void
				2018	xfs_icsb_sync_counters_lazy(
				2019	xfs_mount_t *mp)
				2020	{
				2021	xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
				2022	}
				2023
				2024	/*
				2025	* Balance and enable/disable counters as necessary.
				2026	*
				2027	* Thresholds for re-enabling counters are somewhat magic.
				2028	* inode counts are chosen to be the same number as single
				2029	* on disk allocation chunk per CPU, and free blocks is
				2030	* something far enough zero that we aren't going thrash
				2031	* when we get near ENOSPC.
				2032	*/
				2033	#define XFS_ICSB_INO_CNTR_REENABLE 64
				2034	#define XFS_ICSB_FDBLK_CNTR_REENABLE 512
				2035	STATIC void
				2036	xfs_icsb_balance_counter(
				2037	xfs_mount_t *mp,
				2038	xfs_sb_field_t field,
				2039	int flags)
				2040	{
				2041	uint64_t count, resid = 0;
				2042	int weight = num_online_cpus();
				2043	int s;
				2044
				2045	if (!(flags & XFS_ICSB_SB_LOCKED))
				2046	s = XFS_SB_LOCK(mp);
				2047
				2048	/* disable counter and sync counter */
				2049	xfs_icsb_disable_counter(mp, field);
				2050
				2051	/* update counters - first CPU gets residual*/
				2052	switch (field) {
				2053	case XFS_SBS_ICOUNT:
				2054	count = mp->m_sb.sb_icount;
				2055	resid = do_div(count, weight);
				2056	if (count < XFS_ICSB_INO_CNTR_REENABLE)
				2057	goto out;
				2058	break;
				2059	case XFS_SBS_IFREE:
				2060	count = mp->m_sb.sb_ifree;
				2061	resid = do_div(count, weight);
				2062	if (count < XFS_ICSB_INO_CNTR_REENABLE)
				2063	goto out;
				2064	break;
				2065	case XFS_SBS_FDBLOCKS:
				2066	count = mp->m_sb.sb_fdblocks;
				2067	resid = do_div(count, weight);
				2068	if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
				2069	goto out;
				2070	break;
				2071	default:
				2072	BUG();
				2073	break;
				2074	}
				2075
				2076	xfs_icsb_enable_counter(mp, field, count, resid);
				2077	out:
				2078	if (!(flags & XFS_ICSB_SB_LOCKED))
				2079	XFS_SB_UNLOCK(mp, s);
				2080	}
				2081
				2082	STATIC int
				2083	xfs_icsb_modify_counters_int(
				2084	xfs_mount_t *mp,
				2085	xfs_sb_field_t field,
				2086	int delta,
				2087	int rsvd,
				2088	int flags)
				2089	{
				2090	xfs_icsb_cnts_t *icsbp;
				2091	long long lcounter; /* long counter for 64 bit fields */
				2092	int cpu, s, locked = 0;
				2093	int ret = 0, balance_done = 0;
				2094
				2095	again:
				2096	cpu = get_cpu();
				2097	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2098	xfs_icsb_lock_cntr(icsbp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2099	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
				2100	goto slow_path;
				2101
				2102	switch (field) {
				2103	case XFS_SBS_ICOUNT:
				2104	lcounter = icsbp->icsb_icount;
				2105	lcounter += delta;
				2106	if (unlikely(lcounter < 0))
				2107	goto slow_path;
				2108	icsbp->icsb_icount = lcounter;
				2109	break;
				2110
				2111	case XFS_SBS_IFREE:
				2112	lcounter = icsbp->icsb_ifree;
				2113	lcounter += delta;
				2114	if (unlikely(lcounter < 0))
				2115	goto slow_path;
				2116	icsbp->icsb_ifree = lcounter;
				2117	break;
				2118
				2119	case XFS_SBS_FDBLOCKS:
				2120	BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
				2121
				2122	lcounter = icsbp->icsb_fdblocks;
				2123	lcounter += delta;
				2124	if (unlikely(lcounter < 0))
				2125	goto slow_path;
				2126	icsbp->icsb_fdblocks = lcounter;
				2127	break;
				2128	default:
				2129	BUG();
				2130	break;
				2131	}
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2132	xfs_icsb_unlock_cntr(icsbp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2133	put_cpu();
				2134	if (locked)
				2135	XFS_SB_UNLOCK(mp, s);
				2136	return 0;
				2137
				2138	/*
				2139	* The slow path needs to be run with the SBLOCK
				2140	* held so that we prevent other threads from
				2141	* attempting to run this path at the same time.
				2142	* this provides exclusion for the balancing code,
				2143	* and exclusive fallback if the balance does not
				2144	* provide enough resources to continue in an unlocked
				2145	* manner.
				2146	*/
				2147	slow_path:
David Chinner	01e1b69	2006-03-14 13:29:16 +1100	[diff] [blame]	2148	xfs_icsb_unlock_cntr(icsbp);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	2149	put_cpu();
				2150
				2151	/* need to hold superblock incase we need
				2152	* to disable a counter */
				2153	if (!(flags & XFS_ICSB_SB_LOCKED)) {
				2154	s = XFS_SB_LOCK(mp);
				2155	locked = 1;
				2156	flags \|= XFS_ICSB_SB_LOCKED;
				2157	}
				2158	if (!balance_done) {
				2159	xfs_icsb_balance_counter(mp, field, flags);
				2160	balance_done = 1;
				2161	goto again;
				2162	} else {
				2163	/*
				2164	* we might not have enough on this local
				2165	* cpu to allocate for a bulk request.
				2166	* We need to drain this field from all CPUs
				2167	* and disable the counter fastpath
				2168	*/
				2169	xfs_icsb_disable_counter(mp, field);
				2170	}
				2171
				2172	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				2173
				2174	if (locked)
				2175	XFS_SB_UNLOCK(mp, s);
				2176	return ret;
				2177	}
				2178
				2179	STATIC int
				2180	xfs_icsb_modify_counters(
				2181	xfs_mount_t *mp,
				2182	xfs_sb_field_t field,
				2183	int delta,
				2184	int rsvd)
				2185	{
				2186	return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
				2187	}
				2188
				2189	/*
				2190	* Called when superblock is already locked
				2191	*/
				2192	STATIC int
				2193	xfs_icsb_modify_counters_locked(
				2194	xfs_mount_t *mp,
				2195	xfs_sb_field_t field,
				2196	int delta,
				2197	int rsvd)
				2198	{
				2199	return xfs_icsb_modify_counters_int(mp, field, delta,
				2200	rsvd, XFS_ICSB_SB_LOCKED);
				2201	}
				2202	#endif