Blame - fs/xfs/xfs_mount.c - kernel/msm

blob: a64110b9023bc45f331fdadf503b1071942790e6 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	19	#include "xfs_fs.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs_types.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	21	#include "xfs_bit.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dir.h"
				28	#include "xfs_dir2.h"
				29	#include "xfs_dmapi.h"
				30	#include "xfs_mount.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31	#include "xfs_bmap_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	32	#include "xfs_alloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	#include "xfs_ialloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	34	#include "xfs_dir_sf.h"
				35	#include "xfs_dir2_sf.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	36	#include "xfs_attr_sf.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	#include "xfs_dinode.h"
				38	#include "xfs_inode.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	39	#include "xfs_btree.h"
				40	#include "xfs_ialloc.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	#include "xfs_alloc.h"
				42	#include "xfs_rtalloc.h"
				43	#include "xfs_bmap.h"
				44	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	#include "xfs_rw.h"
				46	#include "xfs_quota.h"
				47	#include "xfs_fsops.h"
				48
				49	STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
				50	STATIC int xfs_uuid_mount(xfs_mount_t *);
				51	STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	52	STATIC void xfs_unmountfs_wait(xfs_mount_t *);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	54
				55	#ifdef HAVE_PERCPU_SB
				56	STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
				57	STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
				58	STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
				59	STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
				60	int, int);
				61	STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
				62	int, int);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame^]	63	STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	64
				65	#else
				66
				67	#define xfs_icsb_destroy_counters(mp) do { } while (0)
				68	#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
				69	#define xfs_icsb_sync_counters(mp) do { } while (0)
				70	#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
				71	#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0)
				72
				73	#endif
				74
Christoph Hellwig	1df84c9	2006-01-11 15:29:52 +1100	[diff] [blame]	75	static const struct {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	76	short offset;
				77	short type; /* 0 = integer
				78	* 1 = binary / string (no translation)
				79	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80	} xfs_sb_info[] = {
				81	{ offsetof(xfs_sb_t, sb_magicnum), 0 },
				82	{ offsetof(xfs_sb_t, sb_blocksize), 0 },
				83	{ offsetof(xfs_sb_t, sb_dblocks), 0 },
				84	{ offsetof(xfs_sb_t, sb_rblocks), 0 },
				85	{ offsetof(xfs_sb_t, sb_rextents), 0 },
				86	{ offsetof(xfs_sb_t, sb_uuid), 1 },
				87	{ offsetof(xfs_sb_t, sb_logstart), 0 },
				88	{ offsetof(xfs_sb_t, sb_rootino), 0 },
				89	{ offsetof(xfs_sb_t, sb_rbmino), 0 },
				90	{ offsetof(xfs_sb_t, sb_rsumino), 0 },
				91	{ offsetof(xfs_sb_t, sb_rextsize), 0 },
				92	{ offsetof(xfs_sb_t, sb_agblocks), 0 },
				93	{ offsetof(xfs_sb_t, sb_agcount), 0 },
				94	{ offsetof(xfs_sb_t, sb_rbmblocks), 0 },
				95	{ offsetof(xfs_sb_t, sb_logblocks), 0 },
				96	{ offsetof(xfs_sb_t, sb_versionnum), 0 },
				97	{ offsetof(xfs_sb_t, sb_sectsize), 0 },
				98	{ offsetof(xfs_sb_t, sb_inodesize), 0 },
				99	{ offsetof(xfs_sb_t, sb_inopblock), 0 },
				100	{ offsetof(xfs_sb_t, sb_fname[0]), 1 },
				101	{ offsetof(xfs_sb_t, sb_blocklog), 0 },
				102	{ offsetof(xfs_sb_t, sb_sectlog), 0 },
				103	{ offsetof(xfs_sb_t, sb_inodelog), 0 },
				104	{ offsetof(xfs_sb_t, sb_inopblog), 0 },
				105	{ offsetof(xfs_sb_t, sb_agblklog), 0 },
				106	{ offsetof(xfs_sb_t, sb_rextslog), 0 },
				107	{ offsetof(xfs_sb_t, sb_inprogress), 0 },
				108	{ offsetof(xfs_sb_t, sb_imax_pct), 0 },
				109	{ offsetof(xfs_sb_t, sb_icount), 0 },
				110	{ offsetof(xfs_sb_t, sb_ifree), 0 },
				111	{ offsetof(xfs_sb_t, sb_fdblocks), 0 },
				112	{ offsetof(xfs_sb_t, sb_frextents), 0 },
				113	{ offsetof(xfs_sb_t, sb_uquotino), 0 },
				114	{ offsetof(xfs_sb_t, sb_gquotino), 0 },
				115	{ offsetof(xfs_sb_t, sb_qflags), 0 },
				116	{ offsetof(xfs_sb_t, sb_flags), 0 },
				117	{ offsetof(xfs_sb_t, sb_shared_vn), 0 },
				118	{ offsetof(xfs_sb_t, sb_inoalignmt), 0 },
				119	{ offsetof(xfs_sb_t, sb_unit), 0 },
				120	{ offsetof(xfs_sb_t, sb_width), 0 },
				121	{ offsetof(xfs_sb_t, sb_dirblklog), 0 },
				122	{ offsetof(xfs_sb_t, sb_logsectlog), 0 },
				123	{ offsetof(xfs_sb_t, sb_logsectsize),0 },
				124	{ offsetof(xfs_sb_t, sb_logsunit), 0 },
				125	{ offsetof(xfs_sb_t, sb_features2), 0 },
				126	{ sizeof(xfs_sb_t), 0 }
				127	};
				128
				129	/*
				130	* Return a pointer to an initialized xfs_mount structure.
				131	*/
				132	xfs_mount_t *
				133	xfs_mount_init(void)
				134	{
				135	xfs_mount_t *mp;
				136
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	137	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
				138
				139	if (xfs_icsb_init_counters(mp)) {
				140	mp->m_flags \|= XFS_MOUNT_NO_PERCPU_SB;
				141	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	142
				143	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
				144	spinlock_init(&mp->m_sb_lock, "xfs_sb");
Jes Sorensen	794ee1b	2006-01-09 15:59:21 -0800	[diff] [blame]	145	mutex_init(&mp->m_ilock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	146	initnsema(&mp->m_growlock, 1, "xfs_grow");
				147	/*
				148	* Initialize the AIL.
				149	*/
				150	xfs_trans_ail_init(mp);
				151
				152	atomic_set(&mp->m_active_trans, 0);
				153
				154	return mp;
				155	}
				156
				157	/*
				158	* Free up the resources associated with a mount structure. Assume that
				159	* the structure was initially zeroed, so we can tell which fields got
				160	* initialized.
				161	*/
				162	void
				163	xfs_mount_free(
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	164	xfs_mount_t *mp,
				165	int remove_bhv)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	166	{
				167	if (mp->m_ihash)
				168	xfs_ihash_free(mp);
				169	if (mp->m_chash)
				170	xfs_chash_free(mp);
				171
				172	if (mp->m_perag) {
				173	int agno;
				174
				175	for (agno = 0; agno < mp->m_maxagi; agno++)
				176	if (mp->m_perag[agno].pagb_list)
				177	kmem_free(mp->m_perag[agno].pagb_list,
				178	sizeof(xfs_perag_busy_t) *
				179	XFS_PAGB_NUM_SLOTS);
				180	kmem_free(mp->m_perag,
				181	sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
				182	}
				183
				184	AIL_LOCK_DESTROY(&mp->m_ail_lock);
				185	spinlock_destroy(&mp->m_sb_lock);
				186	mutex_destroy(&mp->m_ilock);
				187	freesema(&mp->m_growlock);
				188	if (mp->m_quotainfo)
				189	XFS_QM_DONE(mp);
				190
				191	if (mp->m_fsname != NULL)
				192	kmem_free(mp->m_fsname, mp->m_fsname_len);
Nathan Scott	fc1f8c1	2005-11-02 11:44:33 +1100	[diff] [blame]	193	if (mp->m_rtname != NULL)
				194	kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
				195	if (mp->m_logname != NULL)
				196	kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	197
				198	if (remove_bhv) {
				199	struct vfs *vfsp = XFS_MTOVFS(mp);
				200
				201	bhv_remove_all_vfsops(vfsp, 0);
				202	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
				203	}
				204
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	205	xfs_icsb_destroy_counters(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	206	kmem_free(mp, sizeof(xfs_mount_t));
				207	}
				208
				209
				210	/*
				211	* Check the validity of the SB found.
				212	*/
				213	STATIC int
				214	xfs_mount_validate_sb(
				215	xfs_mount_t *mp,
				216	xfs_sb_t *sbp)
				217	{
				218	/*
				219	* If the log device and data device have the
				220	* same device number, the log is internal.
				221	* Consequently, the sb_logstart should be non-zero. If
				222	* we have a zero sb_logstart in this case, we may be trying to mount
				223	* a volume filesystem in a non-volume manner.
				224	*/
				225	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
				226	cmn_err(CE_WARN, "XFS: bad magic number");
				227	return XFS_ERROR(EWRONGFS);
				228	}
				229
				230	if (!XFS_SB_GOOD_VERSION(sbp)) {
				231	cmn_err(CE_WARN, "XFS: bad version");
				232	return XFS_ERROR(EWRONGFS);
				233	}
				234
				235	if (unlikely(
				236	sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
				237	cmn_err(CE_WARN,
				238	"XFS: filesystem is marked as having an external log; "
				239	"specify logdev on the\nmount command line.");
				240	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)",
				241	XFS_ERRLEVEL_HIGH, mp, sbp);
				242	return XFS_ERROR(EFSCORRUPTED);
				243	}
				244
				245	if (unlikely(
				246	sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
				247	cmn_err(CE_WARN,
				248	"XFS: filesystem is marked as having an internal log; "
				249	"don't specify logdev on\nthe mount command line.");
				250	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)",
				251	XFS_ERRLEVEL_HIGH, mp, sbp);
				252	return XFS_ERROR(EFSCORRUPTED);
				253	}
				254
				255	/*
				256	* More sanity checking. These were stolen directly from
				257	* xfs_repair.
				258	*/
				259	if (unlikely(
				260	sbp->sb_agcount <= 0 \|\|
				261	sbp->sb_sectsize < XFS_MIN_SECTORSIZE \|\|
				262	sbp->sb_sectsize > XFS_MAX_SECTORSIZE \|\|
				263	sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG \|\|
				264	sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG \|\|
				265	sbp->sb_blocksize < XFS_MIN_BLOCKSIZE \|\|
				266	sbp->sb_blocksize > XFS_MAX_BLOCKSIZE \|\|
				267	sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG \|\|
				268	sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG \|\|
				269	sbp->sb_inodesize < XFS_DINODE_MIN_SIZE \|\|
				270	sbp->sb_inodesize > XFS_DINODE_MAX_SIZE \|\|
				271	(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) \|\|
				272	(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) \|\|
				273	sbp->sb_imax_pct > 100)) {
				274	cmn_err(CE_WARN, "XFS: SB sanity check 1 failed");
				275	XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
				276	XFS_ERRLEVEL_LOW, mp, sbp);
				277	return XFS_ERROR(EFSCORRUPTED);
				278	}
				279
				280	/*
				281	* Sanity check AG count, size fields against data size field
				282	*/
				283	if (unlikely(
				284	sbp->sb_dblocks == 0 \|\|
				285	sbp->sb_dblocks >
				286	(xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks \|\|
				287	sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
				288	sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
				289	cmn_err(CE_WARN, "XFS: SB sanity check 2 failed");
				290	XFS_ERROR_REPORT("xfs_mount_validate_sb(4)",
				291	XFS_ERRLEVEL_LOW, mp);
				292	return XFS_ERROR(EFSCORRUPTED);
				293	}
				294
				295	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
				296	ASSERT(sbp->sb_blocklog >= BBSHIFT);
				297
				298	#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
				299	if (unlikely(
				300	(sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX \|\|
				301	(sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
				302	#else /* Limited by UINT_MAX of sectors */
				303	if (unlikely(
				304	(sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX \|\|
				305	(sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
				306	#endif
				307	cmn_err(CE_WARN,
				308	"XFS: File system is too large to be mounted on this system.");
				309	return XFS_ERROR(E2BIG);
				310	}
				311
				312	if (unlikely(sbp->sb_inprogress)) {
				313	cmn_err(CE_WARN, "XFS: file system busy");
				314	XFS_ERROR_REPORT("xfs_mount_validate_sb(5)",
				315	XFS_ERRLEVEL_LOW, mp);
				316	return XFS_ERROR(EFSCORRUPTED);
				317	}
				318
				319	/*
Nathan Scott	de20614	2005-05-05 13:24:13 -0700	[diff] [blame]	320	* Version 1 directory format has never worked on Linux.
				321	*/
				322	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
				323	cmn_err(CE_WARN,
				324	"XFS: Attempted to mount file system using version 1 directory format");
				325	return XFS_ERROR(ENOSYS);
				326	}
				327
				328	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	329	* Until this is fixed only page-sized or smaller data blocks work.
				330	*/
				331	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
				332	cmn_err(CE_WARN,
				333	"XFS: Attempted to mount file system with blocksize %d bytes",
				334	sbp->sb_blocksize);
				335	cmn_err(CE_WARN,
Christoph Hellwig	da1650a	2005-11-02 10:21:35 +1100	[diff] [blame]	336	"XFS: Only page-sized (%ld) or less blocksizes currently work.",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	337	PAGE_SIZE);
				338	return XFS_ERROR(ENOSYS);
				339	}
				340
				341	return 0;
				342	}
				343
				344	xfs_agnumber_t
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	345	xfs_initialize_perag(
				346	struct vfs *vfs,
				347	xfs_mount_t *mp,
				348	xfs_agnumber_t agcount)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	349	{
				350	xfs_agnumber_t index, max_metadata;
				351	xfs_perag_t *pag;
				352	xfs_agino_t agino;
				353	xfs_ino_t ino;
				354	xfs_sb_t *sbp = &mp->m_sb;
				355	xfs_ino_t max_inum = XFS_MAXINUMBER_32;
				356
				357	/* Check to see if the filesystem can overflow 32 bit inodes */
				358	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
				359	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
				360
				361	/* Clear the mount flag if no inode can overflow 32 bits
				362	* on this filesystem, or if specifically requested..
				363	*/
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	364	if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	365	mp->m_flags \|= XFS_MOUNT_32BITINODES;
				366	} else {
				367	mp->m_flags &= ~XFS_MOUNT_32BITINODES;
				368	}
				369
				370	/* If we can overflow then setup the ag headers accordingly */
				371	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				372	/* Calculate how much should be reserved for inodes to
				373	* meet the max inode percentage.
				374	*/
				375	if (mp->m_maxicount) {
				376	__uint64_t icount;
				377
				378	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				379	do_div(icount, 100);
				380	icount += sbp->sb_agblocks - 1;
Eric Sandeen	a749ee8	2005-11-02 15:13:42 +1100	[diff] [blame]	381	do_div(icount, sbp->sb_agblocks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	382	max_metadata = icount;
				383	} else {
				384	max_metadata = agcount;
				385	}
				386	for (index = 0; index < agcount; index++) {
				387	ino = XFS_AGINO_TO_INO(mp, index, agino);
				388	if (ino > max_inum) {
				389	index++;
				390	break;
				391	}
				392
				393	/* This ag is prefered for inodes */
				394	pag = &mp->m_perag[index];
				395	pag->pagi_inodeok = 1;
				396	if (index < max_metadata)
				397	pag->pagf_metadata = 1;
				398	}
				399	} else {
				400	/* Setup default behavior for smaller filesystems */
				401	for (index = 0; index < agcount; index++) {
				402	pag = &mp->m_perag[index];
				403	pag->pagi_inodeok = 1;
				404	}
				405	}
				406	return index;
				407	}
				408
				409	/*
				410	* xfs_xlatesb
				411	*
				412	* data - on disk version of sb
				413	* sb - a superblock
				414	* dir - conversion direction: <0 - convert sb to buf
				415	* >0 - convert buf to sb
				416	* fields - which fields to copy (bitmask)
				417	*/
				418	void
				419	xfs_xlatesb(
				420	void *data,
				421	xfs_sb_t *sb,
				422	int dir,
				423	__int64_t fields)
				424	{
				425	xfs_caddr_t buf_ptr;
				426	xfs_caddr_t mem_ptr;
				427	xfs_sb_field_t f;
				428	int first;
				429	int size;
				430
				431	ASSERT(dir);
				432	ASSERT(fields);
				433
				434	if (!fields)
				435	return;
				436
				437	buf_ptr = (xfs_caddr_t)data;
				438	mem_ptr = (xfs_caddr_t)sb;
				439
				440	while (fields) {
				441	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				442	first = xfs_sb_info[f].offset;
				443	size = xfs_sb_info[f + 1].offset - first;
				444
				445	ASSERT(xfs_sb_info[f].type == 0 \|\| xfs_sb_info[f].type == 1);
				446
				447	if (size == 1 \|\| xfs_sb_info[f].type == 1) {
				448	if (dir > 0) {
				449	memcpy(mem_ptr + first, buf_ptr + first, size);
				450	} else {
				451	memcpy(buf_ptr + first, mem_ptr + first, size);
				452	}
				453	} else {
				454	switch (size) {
				455	case 2:
				456	INT_XLATE((__uint16_t)(buf_ptr+first),
				457	(__uint16_t)(mem_ptr+first),
				458	dir, ARCH_CONVERT);
				459	break;
				460	case 4:
				461	INT_XLATE((__uint32_t)(buf_ptr+first),
				462	(__uint32_t)(mem_ptr+first),
				463	dir, ARCH_CONVERT);
				464	break;
				465	case 8:
				466	INT_XLATE((__uint64_t)(buf_ptr+first),
				467	(__uint64_t)(mem_ptr+first), dir, ARCH_CONVERT);
				468	break;
				469	default:
				470	ASSERT(0);
				471	}
				472	}
				473
				474	fields &= ~(1LL << f);
				475	}
				476	}
				477
				478	/*
				479	* xfs_readsb
				480	*
				481	* Does the initial read of the superblock.
				482	*/
				483	int
				484	xfs_readsb(xfs_mount_t *mp)
				485	{
				486	unsigned int sector_size;
				487	unsigned int extra_flags;
				488	xfs_buf_t *bp;
				489	xfs_sb_t *sbp;
				490	int error;
				491
				492	ASSERT(mp->m_sb_bp == NULL);
				493	ASSERT(mp->m_ddev_targp != NULL);
				494
				495	/*
				496	* Allocate a (locked) buffer to hold the superblock.
				497	* This will be kept around at all times to optimize
				498	* access to the superblock.
				499	*/
				500	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
				501	extra_flags = XFS_BUF_LOCK \| XFS_BUF_MANAGE \| XFS_BUF_MAPPED;
				502
				503	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				504	BTOBB(sector_size), extra_flags);
				505	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
				506	cmn_err(CE_WARN, "XFS: SB read failed");
				507	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				508	goto fail;
				509	}
				510	ASSERT(XFS_BUF_ISBUSY(bp));
				511	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				512
				513	/*
				514	* Initialize the mount structure from the superblock.
				515	* But first do some basic consistency checking.
				516	*/
				517	sbp = XFS_BUF_TO_SBP(bp);
				518	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
				519
				520	error = xfs_mount_validate_sb(mp, &(mp->m_sb));
				521	if (error) {
				522	cmn_err(CE_WARN, "XFS: SB validate failed");
				523	goto fail;
				524	}
				525
				526	/*
				527	* We must be able to do sector-sized and sector-aligned IO.
				528	*/
				529	if (sector_size > mp->m_sb.sb_sectsize) {
				530	cmn_err(CE_WARN,
				531	"XFS: device supports only %u byte sectors (not %u)",
				532	sector_size, mp->m_sb.sb_sectsize);
				533	error = ENOSYS;
				534	goto fail;
				535	}
				536
				537	/*
				538	* If device sector size is smaller than the superblock size,
				539	* re-read the superblock so the buffer is correctly sized.
				540	*/
				541	if (sector_size < mp->m_sb.sb_sectsize) {
				542	XFS_BUF_UNMANAGE(bp);
				543	xfs_buf_relse(bp);
				544	sector_size = mp->m_sb.sb_sectsize;
				545	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				546	BTOBB(sector_size), extra_flags);
				547	if (!bp \|\| XFS_BUF_ISERROR(bp)) {
				548	cmn_err(CE_WARN, "XFS: SB re-read failed");
				549	error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
				550	goto fail;
				551	}
				552	ASSERT(XFS_BUF_ISBUSY(bp));
				553	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
				554	}
				555
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	556	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
				557	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
				558	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
				559
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	560	mp->m_sb_bp = bp;
				561	xfs_buf_relse(bp);
				562	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
				563	return 0;
				564
				565	fail:
				566	if (bp) {
				567	XFS_BUF_UNMANAGE(bp);
				568	xfs_buf_relse(bp);
				569	}
				570	return error;
				571	}
				572
				573
				574	/*
				575	* xfs_mount_common
				576	*
				577	* Mount initialization code establishing various mount
				578	* fields from the superblock associated with the given
				579	* mount structure
				580	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	581	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	582	xfs_mount_common(xfs_mount_t mp, xfs_sb_t sbp)
				583	{
				584	int i;
				585
				586	mp->m_agfrotor = mp->m_agirotor = 0;
				587	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
				588	mp->m_maxagi = mp->m_sb.sb_agcount;
				589	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
				590	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
				591	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
				592	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
				593	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
				594	mp->m_litino = sbp->sb_inodesize -
				595	((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
				596	mp->m_blockmask = sbp->sb_blocksize - 1;
				597	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
				598	mp->m_blockwmask = mp->m_blockwsize - 1;
				599	INIT_LIST_HEAD(&mp->m_del_inodes);
				600
				601	/*
				602	* Setup for attributes, in case they get created.
				603	* This value is for inodes getting attributes for the first time,
				604	* the per-inode value is for old attribute values.
				605	*/
				606	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
				607	switch (sbp->sb_inodesize) {
				608	case 256:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	609	mp->m_attroffset = XFS_LITINO(mp) -
				610	XFS_BMDR_SPACE_CALC(MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	611	break;
				612	case 512:
				613	case 1024:
				614	case 2048:
Nathan Scott	d8cc890	2005-11-02 10:34:53 +1100	[diff] [blame]	615	mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616	break;
				617	default:
				618	ASSERT(0);
				619	}
				620	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
				621
				622	for (i = 0; i < 2; i++) {
				623	mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				624	xfs_alloc, i == 0);
				625	mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				626	xfs_alloc, i == 0);
				627	}
				628	for (i = 0; i < 2; i++) {
				629	mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				630	xfs_bmbt, i == 0);
				631	mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				632	xfs_bmbt, i == 0);
				633	}
				634	for (i = 0; i < 2; i++) {
				635	mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
				636	xfs_inobt, i == 0);
				637	mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
				638	xfs_inobt, i == 0);
				639	}
				640
				641	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
				642	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
				643	sbp->sb_inopblock);
				644	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
				645	}
				646	/*
				647	* xfs_mountfs
				648	*
				649	* This function does the following on an initial mount of a file system:
				650	* - reads the superblock from disk and init the mount struct
				651	* - if we're a 32-bit kernel, do a size check on the superblock
				652	* so we don't mount terabyte filesystems
				653	* - init mount struct realtime fields
				654	* - allocate inode hash table for fs
				655	* - init directory manager
				656	* - perform recovery and init the log manager
				657	*/
				658	int
				659	xfs_mountfs(
				660	vfs_t *vfsp,
				661	xfs_mount_t *mp,
				662	int mfsi_flags)
				663	{
				664	xfs_buf_t *bp;
				665	xfs_sb_t *sbp = &(mp->m_sb);
				666	xfs_inode_t *rip;
				667	vnode_t *rvp = NULL;
				668	int readio_log, writeio_log;
				669	xfs_daddr_t d;
				670	__uint64_t ret64;
				671	__int64_t update_flags;
				672	uint quotamount, quotaflags;
				673	int agno;
				674	int uuid_mounted = 0;
				675	int error = 0;
				676
				677	if (mp->m_sb_bp == NULL) {
				678	if ((error = xfs_readsb(mp))) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	679	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	680	}
				681	}
				682	xfs_mount_common(mp, sbp);
				683
				684	/*
				685	* Check if sb_agblocks is aligned at stripe boundary
				686	* If sb_agblocks is NOT aligned turn off m_dalign since
				687	* allocator alignment is within an ag, therefore ag has
				688	* to be aligned at stripe boundary.
				689	*/
				690	update_flags = 0LL;
				691	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
				692	/*
				693	* If stripe unit and stripe width are not multiples
				694	* of the fs blocksize turn off alignment.
				695	*/
				696	if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
				697	(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
				698	if (mp->m_flags & XFS_MOUNT_RETERR) {
				699	cmn_err(CE_WARN,
				700	"XFS: alignment check 1 failed");
				701	error = XFS_ERROR(EINVAL);
				702	goto error1;
				703	}
				704	mp->m_dalign = mp->m_swidth = 0;
				705	} else {
				706	/*
				707	* Convert the stripe unit and width to FSBs.
				708	*/
				709	mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
				710	if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				711	if (mp->m_flags & XFS_MOUNT_RETERR) {
				712	error = XFS_ERROR(EINVAL);
				713	goto error1;
				714	}
				715	xfs_fs_cmn_err(CE_WARN, mp,
				716	"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
				717	mp->m_dalign, mp->m_swidth,
				718	sbp->sb_agblocks);
				719
				720	mp->m_dalign = 0;
				721	mp->m_swidth = 0;
				722	} else if (mp->m_dalign) {
				723	mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
				724	} else {
				725	if (mp->m_flags & XFS_MOUNT_RETERR) {
				726	xfs_fs_cmn_err(CE_WARN, mp,
				727	"stripe alignment turned off: sunit(%d) less than bsize(%d)",
				728	mp->m_dalign,
				729	mp->m_blockmask +1);
				730	error = XFS_ERROR(EINVAL);
				731	goto error1;
				732	}
				733	mp->m_swidth = 0;
				734	}
				735	}
				736
				737	/*
				738	* Update superblock with new values
				739	* and log changes
				740	*/
				741	if (XFS_SB_VERSION_HASDALIGN(sbp)) {
				742	if (sbp->sb_unit != mp->m_dalign) {
				743	sbp->sb_unit = mp->m_dalign;
				744	update_flags \|= XFS_SB_UNIT;
				745	}
				746	if (sbp->sb_width != mp->m_swidth) {
				747	sbp->sb_width = mp->m_swidth;
				748	update_flags \|= XFS_SB_WIDTH;
				749	}
				750	}
				751	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
				752	XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
				753	mp->m_dalign = sbp->sb_unit;
				754	mp->m_swidth = sbp->sb_width;
				755	}
				756
				757	xfs_alloc_compute_maxlevels(mp);
				758	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
				759	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
				760	xfs_ialloc_compute_maxlevels(mp);
				761
				762	if (sbp->sb_imax_pct) {
				763	__uint64_t icount;
				764
				765	/* Make sure the maximum inode count is a multiple of the
				766	* units we allocate inodes in.
				767	*/
				768
				769	icount = sbp->sb_dblocks * sbp->sb_imax_pct;
				770	do_div(icount, 100);
				771	do_div(icount, mp->m_ialloc_blks);
				772	mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
				773	sbp->sb_inopblog;
				774	} else
				775	mp->m_maxicount = 0;
				776
				777	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
				778
				779	/*
				780	* XFS uses the uuid from the superblock as the unique
				781	* identifier for fsid. We can not use the uuid from the volume
				782	* since a single partition filesystem is identical to a single
				783	* partition volume/filesystem.
				784	*/
				785	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
				786	(mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
				787	if (xfs_uuid_mount(mp)) {
				788	error = XFS_ERROR(EINVAL);
				789	goto error1;
				790	}
				791	uuid_mounted=1;
				792	ret64 = uuid_hash64(&sbp->sb_uuid);
				793	memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
				794	}
				795
				796	/*
				797	* Set the default minimum read and write sizes unless
				798	* already specified in a mount option.
				799	* We use smaller I/O sizes when the file system
				800	* is being used for NFS service (wsync mount option).
				801	*/
				802	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
				803	if (mp->m_flags & XFS_MOUNT_WSYNC) {
				804	readio_log = XFS_WSYNC_READIO_LOG;
				805	writeio_log = XFS_WSYNC_WRITEIO_LOG;
				806	} else {
				807	readio_log = XFS_READIO_LOG_LARGE;
				808	writeio_log = XFS_WRITEIO_LOG_LARGE;
				809	}
				810	} else {
				811	readio_log = mp->m_readio_log;
				812	writeio_log = mp->m_writeio_log;
				813	}
				814
				815	/*
				816	* Set the number of readahead buffers to use based on
				817	* physical memory size.
				818	*/
				819	if (xfs_physmem <= 4096) /* <= 16MB */
				820	mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
				821	else if (xfs_physmem <= 8192) /* <= 32MB */
				822	mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
				823	else
				824	mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
				825	if (sbp->sb_blocklog > readio_log) {
				826	mp->m_readio_log = sbp->sb_blocklog;
				827	} else {
				828	mp->m_readio_log = readio_log;
				829	}
				830	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
				831	if (sbp->sb_blocklog > writeio_log) {
				832	mp->m_writeio_log = sbp->sb_blocklog;
				833	} else {
				834	mp->m_writeio_log = writeio_log;
				835	}
				836	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
				837
				838	/*
				839	* Set the inode cluster size based on the physical memory
				840	* size. This may still be overridden by the file system
				841	* block size if it is larger than the chosen cluster size.
				842	*/
				843	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
				844	mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
				845	} else {
				846	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
				847	}
				848	/*
				849	* Set whether we're using inode alignment.
				850	*/
				851	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
				852	mp->m_sb.sb_inoalignmt >=
				853	XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
				854	mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
				855	else
				856	mp->m_inoalign_mask = 0;
				857	/*
				858	* If we are using stripe alignment, check whether
				859	* the stripe unit is a multiple of the inode alignment
				860	*/
				861	if (mp->m_dalign && mp->m_inoalign_mask &&
				862	!(mp->m_dalign & mp->m_inoalign_mask))
				863	mp->m_sinoalign = mp->m_dalign;
				864	else
				865	mp->m_sinoalign = 0;
				866	/*
				867	* Check that the data (and log if separate) are an ok size.
				868	*/
				869	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
				870	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
				871	cmn_err(CE_WARN, "XFS: size check 1 failed");
				872	error = XFS_ERROR(E2BIG);
				873	goto error1;
				874	}
				875	error = xfs_read_buf(mp, mp->m_ddev_targp,
				876	d - XFS_FSS_TO_BB(mp, 1),
				877	XFS_FSS_TO_BB(mp, 1), 0, &bp);
				878	if (!error) {
				879	xfs_buf_relse(bp);
				880	} else {
				881	cmn_err(CE_WARN, "XFS: size check 2 failed");
				882	if (error == ENOSPC) {
				883	error = XFS_ERROR(E2BIG);
				884	}
				885	goto error1;
				886	}
				887
				888	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
				889	mp->m_logdev_targp != mp->m_ddev_targp) {
				890	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
				891	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
				892	cmn_err(CE_WARN, "XFS: size check 3 failed");
				893	error = XFS_ERROR(E2BIG);
				894	goto error1;
				895	}
				896	error = xfs_read_buf(mp, mp->m_logdev_targp,
				897	d - XFS_FSB_TO_BB(mp, 1),
				898	XFS_FSB_TO_BB(mp, 1), 0, &bp);
				899	if (!error) {
				900	xfs_buf_relse(bp);
				901	} else {
				902	cmn_err(CE_WARN, "XFS: size check 3 failed");
				903	if (error == ENOSPC) {
				904	error = XFS_ERROR(E2BIG);
				905	}
				906	goto error1;
				907	}
				908	}
				909
				910	/*
				911	* Initialize realtime fields in the mount structure
				912	*/
				913	if ((error = xfs_rtmount_init(mp))) {
				914	cmn_err(CE_WARN, "XFS: RT mount failed");
				915	goto error1;
				916	}
				917
				918	/*
				919	* For client case we are done now
				920	*/
				921	if (mfsi_flags & XFS_MFSI_CLIENT) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	922	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	923	}
				924
				925	/*
				926	* Copies the low order bits of the timestamp and the randomly
				927	* set "sequence" number out of a UUID.
				928	*/
				929	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
				930
				931	/*
				932	* The vfs structure needs to have a file system independent
				933	* way of checking for the invariant file system ID. Since it
				934	* can't look at mount structures it has a pointer to the data
				935	* in the mount structure.
				936	*
				937	* File systems that don't support user level file handles (i.e.
				938	* all of them except for XFS) will leave vfs_altfsid as NULL.
				939	*/
				940	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
				941	mp->m_dmevmask = 0; /* not persistent; set after each mount */
				942
				943	/*
				944	* Select the right directory manager.
				945	*/
				946	mp->m_dirops =
				947	XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
				948	xfsv2_dirops :
				949	xfsv1_dirops;
				950
				951	/*
				952	* Initialize directory manager's entries.
				953	*/
				954	XFS_DIR_MOUNT(mp);
				955
				956	/*
				957	* Initialize the attribute manager's entries.
				958	*/
				959	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
				960
				961	/*
				962	* Initialize the precomputed transaction reservations values.
				963	*/
				964	xfs_trans_init(mp);
				965
				966	/*
				967	* Allocate and initialize the inode hash table for this
				968	* file system.
				969	*/
				970	xfs_ihash_init(mp);
				971	xfs_chash_init(mp);
				972
				973	/*
				974	* Allocate and initialize the per-ag data.
				975	*/
				976	init_rwsem(&mp->m_peraglock);
				977	mp->m_perag =
				978	kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
				979
Nathan Scott	c11e2c3	2005-11-02 15:11:45 +1100	[diff] [blame]	980	mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	981
				982	/*
				983	* log's mount-time initialization. Perform 1st part recovery if needed
				984	*/
				985	if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */
				986	error = xfs_log_mount(mp, mp->m_logdev_targp,
				987	XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				988	XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
				989	if (error) {
				990	cmn_err(CE_WARN, "XFS: log mount failed");
				991	goto error2;
				992	}
				993	} else { /* No log has been defined */
				994	cmn_err(CE_WARN, "XFS: no log defined");
				995	XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
				996	error = XFS_ERROR(EFSCORRUPTED);
				997	goto error2;
				998	}
				999
				1000	/*
				1001	* Get and sanity-check the root inode.
				1002	* Save the pointer to it in the mount structure.
				1003	*/
				1004	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
				1005	if (error) {
				1006	cmn_err(CE_WARN, "XFS: failed to read root inode");
				1007	goto error3;
				1008	}
				1009
				1010	ASSERT(rip != NULL);
				1011	rvp = XFS_ITOV(rip);
				1012
				1013	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
				1014	cmn_err(CE_WARN, "XFS: corrupted root inode");
				1015	prdev("Root inode %llu is not a directory",
				1016	mp->m_ddev_targp, (unsigned long long)rip->i_ino);
				1017	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1018	XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				1019	mp);
				1020	error = XFS_ERROR(EFSCORRUPTED);
				1021	goto error4;
				1022	}
				1023	mp->m_rootip = rip; /* save it */
				1024
				1025	xfs_iunlock(rip, XFS_ILOCK_EXCL);
				1026
				1027	/*
				1028	* Initialize realtime inode pointers in the mount structure
				1029	*/
				1030	if ((error = xfs_rtmount_inodes(mp))) {
				1031	/*
				1032	* Free up the root inode.
				1033	*/
				1034	cmn_err(CE_WARN, "XFS: failed to read RT inodes");
				1035	goto error4;
				1036	}
				1037
				1038	/*
				1039	* If fs is not mounted readonly, then update the superblock
				1040	* unit and width changes.
				1041	*/
				1042	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
				1043	xfs_mount_log_sbunit(mp, update_flags);
				1044
				1045	/*
				1046	* Initialise the XFS quota management subsystem for this mount
				1047	*/
				1048	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
				1049	goto error4;
				1050
				1051	/*
				1052	* Finish recovering the file system. This part needed to be
				1053	* delayed until after the root and real-time bitmap inodes
				1054	* were consistently read in.
				1055	*/
				1056	error = xfs_log_mount_finish(mp, mfsi_flags);
				1057	if (error) {
				1058	cmn_err(CE_WARN, "XFS: log mount finish failed");
				1059	goto error4;
				1060	}
				1061
				1062	/*
				1063	* Complete the quota initialisation, post-log-replay component.
				1064	*/
				1065	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
				1066	goto error4;
				1067
				1068	return 0;
				1069
				1070	error4:
				1071	/*
				1072	* Free up the root inode.
				1073	*/
				1074	VN_RELE(rvp);
				1075	error3:
				1076	xfs_log_unmount_dealloc(mp);
				1077	error2:
				1078	xfs_ihash_free(mp);
				1079	xfs_chash_free(mp);
				1080	for (agno = 0; agno < sbp->sb_agcount; agno++)
				1081	if (mp->m_perag[agno].pagb_list)
				1082	kmem_free(mp->m_perag[agno].pagb_list,
				1083	sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
				1084	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
				1085	mp->m_perag = NULL;
				1086	/* FALLTHROUGH */
				1087	error1:
				1088	if (uuid_mounted)
				1089	xfs_uuid_unmount(mp);
				1090	xfs_freesb(mp);
				1091	return error;
				1092	}
				1093
				1094	/*
				1095	* xfs_unmountfs
				1096	*
				1097	* This flushes out the inodes,dquots and the superblock, unmounts the
				1098	* log and makes sure that incore structures are freed.
				1099	*/
				1100	int
				1101	xfs_unmountfs(xfs_mount_t mp, struct cred cr)
				1102	{
				1103	struct vfs *vfsp = XFS_MTOVFS(mp);
				1104	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1105	int64_t fsid;
				1106	#endif
				1107
Christoph Hellwig	efa8027	2005-06-21 15:37:17 +1000	[diff] [blame]	1108	xfs_iflush_all(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1109
Nathan Scott	ee2a4f7	2006-01-11 15:33:36 +1100	[diff] [blame]	1110	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL \| XFS_QMOPT_UMOUNTING);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1111
				1112	/*
				1113	* Flush out the log synchronously so that we know for sure
				1114	* that nothing is pinned. This is important because bflush()
				1115	* will skip pinned buffers.
				1116	*/
				1117	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);
				1118
				1119	xfs_binval(mp->m_ddev_targp);
				1120	if (mp->m_rtdev_targp) {
				1121	xfs_binval(mp->m_rtdev_targp);
				1122	}
				1123
				1124	xfs_unmountfs_writesb(mp);
				1125
				1126	xfs_unmountfs_wait(mp); /* wait for async bufs */
				1127
				1128	xfs_log_unmount(mp); /* Done! No more fs ops. */
				1129
				1130	xfs_freesb(mp);
				1131
				1132	/*
				1133	* All inodes from this mount point should be freed.
				1134	*/
				1135	ASSERT(mp->m_inodes == NULL);
				1136
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1137	xfs_unmountfs_close(mp, cr);
				1138	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
				1139	xfs_uuid_unmount(mp);
				1140
				1141	#if defined(DEBUG) \|\| defined(INDUCE_IO_ERROR)
				1142	/*
				1143	* clear all error tags on this filesystem
				1144	*/
				1145	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
				1146	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
				1147	#endif
				1148	XFS_IODONE(vfsp);
				1149	xfs_mount_free(mp, 1);
				1150	return 0;
				1151	}
				1152
				1153	void
				1154	xfs_unmountfs_close(xfs_mount_t mp, struct cred cr)
				1155	{
				1156	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1157	xfs_free_buftarg(mp->m_logdev_targp, 1);
				1158	if (mp->m_rtdev_targp)
				1159	xfs_free_buftarg(mp->m_rtdev_targp, 1);
				1160	xfs_free_buftarg(mp->m_ddev_targp, 0);
				1161	}
				1162
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	1163	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1164	xfs_unmountfs_wait(xfs_mount_t *mp)
				1165	{
				1166	if (mp->m_logdev_targp != mp->m_ddev_targp)
				1167	xfs_wait_buftarg(mp->m_logdev_targp);
				1168	if (mp->m_rtdev_targp)
				1169	xfs_wait_buftarg(mp->m_rtdev_targp);
				1170	xfs_wait_buftarg(mp->m_ddev_targp);
				1171	}
				1172
				1173	int
				1174	xfs_unmountfs_writesb(xfs_mount_t *mp)
				1175	{
				1176	xfs_buf_t *sbp;
				1177	xfs_sb_t *sb;
				1178	int error = 0;
				1179
				1180	/*
				1181	* skip superblock write if fs is read-only, or
				1182	* if we are doing a forced umount.
				1183	*/
				1184	sbp = xfs_getsb(mp, 0);
				1185	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY \|\|
				1186	XFS_FORCED_SHUTDOWN(mp))) {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1187
				1188	xfs_icsb_sync_counters(mp);
				1189
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1190	/*
				1191	* mark shared-readonly if desired
				1192	*/
				1193	sb = XFS_BUF_TO_SBP(sbp);
				1194	if (mp->m_mk_sharedro) {
				1195	if (!(sb->sb_flags & XFS_SBF_READONLY))
				1196	sb->sb_flags \|= XFS_SBF_READONLY;
				1197	if (!XFS_SB_VERSION_HASSHARED(sb))
				1198	XFS_SB_VERSION_ADDSHARED(sb);
				1199	xfs_fs_cmn_err(CE_NOTE, mp,
				1200	"Unmounting, marking shared read-only");
				1201	}
				1202	XFS_BUF_UNDONE(sbp);
				1203	XFS_BUF_UNREAD(sbp);
				1204	XFS_BUF_UNDELAYWRITE(sbp);
				1205	XFS_BUF_WRITE(sbp);
				1206	XFS_BUF_UNASYNC(sbp);
				1207	ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
				1208	xfsbdstrat(mp, sbp);
				1209	/* Nevermind errors we might get here. */
				1210	error = xfs_iowait(sbp);
				1211	if (error)
				1212	xfs_ioerror_alert("xfs_unmountfs_writesb",
				1213	mp, sbp, XFS_BUF_ADDR(sbp));
				1214	if (error && mp->m_mk_sharedro)
				1215	xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
				1216	}
				1217	xfs_buf_relse(sbp);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1218	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1219	}
				1220
				1221	/*
				1222	* xfs_mod_sb() can be used to copy arbitrary changes to the
				1223	* in-core superblock into the superblock buffer to be logged.
				1224	* It does not provide the higher level of locking that is
				1225	* needed to protect the in-core superblock from concurrent
				1226	* access.
				1227	*/
				1228	void
				1229	xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
				1230	{
				1231	xfs_buf_t *bp;
				1232	int first;
				1233	int last;
				1234	xfs_mount_t *mp;
				1235	xfs_sb_t *sbp;
				1236	xfs_sb_field_t f;
				1237
				1238	ASSERT(fields);
				1239	if (!fields)
				1240	return;
				1241	mp = tp->t_mountp;
				1242	bp = xfs_trans_getsb(tp, mp, 0);
				1243	sbp = XFS_BUF_TO_SBP(bp);
				1244	first = sizeof(xfs_sb_t);
				1245	last = 0;
				1246
				1247	/* translate/copy */
				1248
				1249	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
				1250
				1251	/* find modified range */
				1252
				1253	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
				1254	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1255	first = xfs_sb_info[f].offset;
				1256
				1257	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
				1258	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
				1259	last = xfs_sb_info[f + 1].offset - 1;
				1260
				1261	xfs_trans_log_buf(tp, bp, first, last);
				1262	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1263	/*
				1264	* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
				1265	* a delta to a specified field in the in-core superblock. Simply
				1266	* switch on the field indicated and apply the delta to that field.
				1267	* Fields are not allowed to dip below zero, so if the delta would
				1268	* do this do not apply it and return EINVAL.
				1269	*
				1270	* The SB_LOCK must be held when this routine is called.
				1271	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1272	int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1273	xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
				1274	int delta, int rsvd)
				1275	{
				1276	int scounter; /* short counter for 32 bit fields */
				1277	long long lcounter; /* long counter for 64 bit fields */
				1278	long long res_used, rem;
				1279
				1280	/*
				1281	* With the in-core superblock spin lock held, switch
				1282	* on the indicated field. Apply the delta to the
				1283	* proper field. If the fields value would dip below
				1284	* 0, then do not apply the delta and return EINVAL.
				1285	*/
				1286	switch (field) {
				1287	case XFS_SBS_ICOUNT:
				1288	lcounter = (long long)mp->m_sb.sb_icount;
				1289	lcounter += delta;
				1290	if (lcounter < 0) {
				1291	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1292	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1293	}
				1294	mp->m_sb.sb_icount = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1295	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1296	case XFS_SBS_IFREE:
				1297	lcounter = (long long)mp->m_sb.sb_ifree;
				1298	lcounter += delta;
				1299	if (lcounter < 0) {
				1300	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1301	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1302	}
				1303	mp->m_sb.sb_ifree = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1304	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1305	case XFS_SBS_FDBLOCKS:
				1306
				1307	lcounter = (long long)mp->m_sb.sb_fdblocks;
				1308	res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
				1309
				1310	if (delta > 0) { /* Putting blocks back */
				1311	if (res_used > delta) {
				1312	mp->m_resblks_avail += delta;
				1313	} else {
				1314	rem = delta - res_used;
				1315	mp->m_resblks_avail = mp->m_resblks;
				1316	lcounter += rem;
				1317	}
				1318	} else { /* Taking blocks away */
				1319
				1320	lcounter += delta;
				1321
				1322	/*
				1323	* If were out of blocks, use any available reserved blocks if
				1324	* were allowed to.
				1325	*/
				1326
				1327	if (lcounter < 0) {
				1328	if (rsvd) {
				1329	lcounter = (long long)mp->m_resblks_avail + delta;
				1330	if (lcounter < 0) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1331	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1332	}
				1333	mp->m_resblks_avail = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1334	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1335	} else { /* not reserved */
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1336	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1337	}
				1338	}
				1339	}
				1340
				1341	mp->m_sb.sb_fdblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1342	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1343	case XFS_SBS_FREXTENTS:
				1344	lcounter = (long long)mp->m_sb.sb_frextents;
				1345	lcounter += delta;
				1346	if (lcounter < 0) {
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1347	return XFS_ERROR(ENOSPC);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1348	}
				1349	mp->m_sb.sb_frextents = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1350	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1351	case XFS_SBS_DBLOCKS:
				1352	lcounter = (long long)mp->m_sb.sb_dblocks;
				1353	lcounter += delta;
				1354	if (lcounter < 0) {
				1355	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1356	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1357	}
				1358	mp->m_sb.sb_dblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1359	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1360	case XFS_SBS_AGCOUNT:
				1361	scounter = mp->m_sb.sb_agcount;
				1362	scounter += delta;
				1363	if (scounter < 0) {
				1364	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1365	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1366	}
				1367	mp->m_sb.sb_agcount = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1368	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1369	case XFS_SBS_IMAX_PCT:
				1370	scounter = mp->m_sb.sb_imax_pct;
				1371	scounter += delta;
				1372	if (scounter < 0) {
				1373	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1374	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1375	}
				1376	mp->m_sb.sb_imax_pct = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1377	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1378	case XFS_SBS_REXTSIZE:
				1379	scounter = mp->m_sb.sb_rextsize;
				1380	scounter += delta;
				1381	if (scounter < 0) {
				1382	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1383	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1384	}
				1385	mp->m_sb.sb_rextsize = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1386	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1387	case XFS_SBS_RBMBLOCKS:
				1388	scounter = mp->m_sb.sb_rbmblocks;
				1389	scounter += delta;
				1390	if (scounter < 0) {
				1391	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1392	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1393	}
				1394	mp->m_sb.sb_rbmblocks = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1395	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1396	case XFS_SBS_RBLOCKS:
				1397	lcounter = (long long)mp->m_sb.sb_rblocks;
				1398	lcounter += delta;
				1399	if (lcounter < 0) {
				1400	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1401	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1402	}
				1403	mp->m_sb.sb_rblocks = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1404	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1405	case XFS_SBS_REXTENTS:
				1406	lcounter = (long long)mp->m_sb.sb_rextents;
				1407	lcounter += delta;
				1408	if (lcounter < 0) {
				1409	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1410	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1411	}
				1412	mp->m_sb.sb_rextents = lcounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1413	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1414	case XFS_SBS_REXTSLOG:
				1415	scounter = mp->m_sb.sb_rextslog;
				1416	scounter += delta;
				1417	if (scounter < 0) {
				1418	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1419	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1420	}
				1421	mp->m_sb.sb_rextslog = scounter;
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1422	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1423	default:
				1424	ASSERT(0);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1425	return XFS_ERROR(EINVAL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1426	}
				1427	}
				1428
				1429	/*
				1430	* xfs_mod_incore_sb() is used to change a field in the in-core
				1431	* superblock structure by the specified delta. This modification
				1432	* is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked()
				1433	* routine to do the work.
				1434	*/
				1435	int
				1436	xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
				1437	{
				1438	unsigned long s;
				1439	int status;
				1440
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1441	/* check for per-cpu counters */
				1442	switch (field) {
				1443	#ifdef HAVE_PERCPU_SB
				1444	case XFS_SBS_ICOUNT:
				1445	case XFS_SBS_IFREE:
				1446	case XFS_SBS_FDBLOCKS:
				1447	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1448	status = xfs_icsb_modify_counters(mp, field,
				1449	delta, rsvd);
				1450	break;
				1451	}
				1452	/* FALLTHROUGH */
				1453	#endif
				1454	default:
				1455	s = XFS_SB_LOCK(mp);
				1456	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				1457	XFS_SB_UNLOCK(mp, s);
				1458	break;
				1459	}
				1460
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1461	return status;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1462	}
				1463
				1464	/*
				1465	* xfs_mod_incore_sb_batch() is used to change more than one field
				1466	* in the in-core superblock structure at a time. This modification
				1467	* is protected by a lock internal to this module. The fields and
				1468	* changes to those fields are specified in the array of xfs_mod_sb
				1469	* structures passed in.
				1470	*
				1471	* Either all of the specified deltas will be applied or none of
				1472	* them will. If any modified field dips below 0, then all modifications
				1473	* will be backed out and EINVAL will be returned.
				1474	*/
				1475	int
				1476	xfs_mod_incore_sb_batch(xfs_mount_t mp, xfs_mod_sb_t msb, uint nmsb, int rsvd)
				1477	{
				1478	unsigned long s;
				1479	int status=0;
				1480	xfs_mod_sb_t *msbp;
				1481
				1482	/*
				1483	* Loop through the array of mod structures and apply each
				1484	* individually. If any fail, then back out all those
				1485	* which have already been applied. Do all of this within
				1486	* the scope of the SB_LOCK so that all of the changes will
				1487	* be atomic.
				1488	*/
				1489	s = XFS_SB_LOCK(mp);
				1490	msbp = &msb[0];
				1491	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
				1492	/*
				1493	* Apply the delta at index n. If it fails, break
				1494	* from the loop so we'll fall into the undo loop
				1495	* below.
				1496	*/
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1497	switch (msbp->msb_field) {
				1498	#ifdef HAVE_PERCPU_SB
				1499	case XFS_SBS_ICOUNT:
				1500	case XFS_SBS_IFREE:
				1501	case XFS_SBS_FDBLOCKS:
				1502	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1503	status = xfs_icsb_modify_counters_locked(mp,
				1504	msbp->msb_field,
				1505	msbp->msb_delta, rsvd);
				1506	break;
				1507	}
				1508	/* FALLTHROUGH */
				1509	#endif
				1510	default:
				1511	status = xfs_mod_incore_sb_unlocked(mp,
				1512	msbp->msb_field,
				1513	msbp->msb_delta, rsvd);
				1514	break;
				1515	}
				1516
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1517	if (status != 0) {
				1518	break;
				1519	}
				1520	}
				1521
				1522	/*
				1523	* If we didn't complete the loop above, then back out
				1524	* any changes made to the superblock. If you add code
				1525	* between the loop above and here, make sure that you
				1526	* preserve the value of status. Loop back until
				1527	* we step below the beginning of the array. Make sure
				1528	* we don't touch anything back there.
				1529	*/
				1530	if (status != 0) {
				1531	msbp--;
				1532	while (msbp >= msb) {
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1533	switch (msbp->msb_field) {
				1534	#ifdef HAVE_PERCPU_SB
				1535	case XFS_SBS_ICOUNT:
				1536	case XFS_SBS_IFREE:
				1537	case XFS_SBS_FDBLOCKS:
				1538	if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				1539	status =
				1540	xfs_icsb_modify_counters_locked(mp,
				1541	msbp->msb_field,
				1542	-(msbp->msb_delta),
				1543	rsvd);
				1544	break;
				1545	}
				1546	/* FALLTHROUGH */
				1547	#endif
				1548	default:
				1549	status = xfs_mod_incore_sb_unlocked(mp,
				1550	msbp->msb_field,
				1551	-(msbp->msb_delta),
				1552	rsvd);
				1553	break;
				1554	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1555	ASSERT(status == 0);
				1556	msbp--;
				1557	}
				1558	}
				1559	XFS_SB_UNLOCK(mp, s);
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1560	return status;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1561	}
				1562
				1563	/*
				1564	* xfs_getsb() is called to obtain the buffer for the superblock.
				1565	* The buffer is returned locked and read in from disk.
				1566	* The buffer should be released with a call to xfs_brelse().
				1567	*
				1568	* If the flags parameter is BUF_TRYLOCK, then we'll only return
				1569	* the superblock buffer if it can be locked without sleeping.
				1570	* If it can't then we'll return NULL.
				1571	*/
				1572	xfs_buf_t *
				1573	xfs_getsb(
				1574	xfs_mount_t *mp,
				1575	int flags)
				1576	{
				1577	xfs_buf_t *bp;
				1578
				1579	ASSERT(mp->m_sb_bp != NULL);
				1580	bp = mp->m_sb_bp;
				1581	if (flags & XFS_BUF_TRYLOCK) {
				1582	if (!XFS_BUF_CPSEMA(bp)) {
				1583	return NULL;
				1584	}
				1585	} else {
				1586	XFS_BUF_PSEMA(bp, PRIBIO);
				1587	}
				1588	XFS_BUF_HOLD(bp);
				1589	ASSERT(XFS_BUF_ISDONE(bp));
Jesper Juhl	014c254	2006-01-15 02:37:08 +0100	[diff] [blame]	1590	return bp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1591	}
				1592
				1593	/*
				1594	* Used to free the superblock along various error paths.
				1595	*/
				1596	void
				1597	xfs_freesb(
				1598	xfs_mount_t *mp)
				1599	{
				1600	xfs_buf_t *bp;
				1601
				1602	/*
				1603	* Use xfs_getsb() so that the buffer will be locked
				1604	* when we call xfs_buf_relse().
				1605	*/
				1606	bp = xfs_getsb(mp, 0);
				1607	XFS_BUF_UNMANAGE(bp);
				1608	xfs_buf_relse(bp);
				1609	mp->m_sb_bp = NULL;
				1610	}
				1611
				1612	/*
				1613	* See if the UUID is unique among mounted XFS filesystems.
				1614	* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
				1615	*/
				1616	STATIC int
				1617	xfs_uuid_mount(
				1618	xfs_mount_t *mp)
				1619	{
				1620	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
				1621	cmn_err(CE_WARN,
				1622	"XFS: Filesystem %s has nil UUID - can't mount",
				1623	mp->m_fsname);
				1624	return -1;
				1625	}
				1626	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
				1627	cmn_err(CE_WARN,
				1628	"XFS: Filesystem %s has duplicate UUID - can't mount",
				1629	mp->m_fsname);
				1630	return -1;
				1631	}
				1632	return 0;
				1633	}
				1634
				1635	/*
				1636	* Remove filesystem from the UUID table.
				1637	*/
				1638	STATIC void
				1639	xfs_uuid_unmount(
				1640	xfs_mount_t *mp)
				1641	{
				1642	uuid_table_remove(&mp->m_sb.sb_uuid);
				1643	}
				1644
				1645	/*
				1646	* Used to log changes to the superblock unit and width fields which could
				1647	* be altered by the mount options. Only the first superblock is updated.
				1648	*/
				1649	STATIC void
				1650	xfs_mount_log_sbunit(
				1651	xfs_mount_t *mp,
				1652	__int64_t fields)
				1653	{
				1654	xfs_trans_t *tp;
				1655
				1656	ASSERT(fields & (XFS_SB_UNIT\|XFS_SB_WIDTH\|XFS_SB_UUID));
				1657
				1658	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
				1659	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				1660	XFS_DEFAULT_LOG_COUNT)) {
				1661	xfs_trans_cancel(tp, 0);
				1662	return;
				1663	}
				1664	xfs_mod_sb(tp, fields);
				1665	xfs_trans_commit(tp, 0, NULL);
				1666	}
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1667
				1668
				1669	#ifdef HAVE_PERCPU_SB
				1670	/*
				1671	* Per-cpu incore superblock counters
				1672	*
				1673	* Simple concept, difficult implementation
				1674	*
				1675	* Basically, replace the incore superblock counters with a distributed per cpu
				1676	* counter for contended fields (e.g. free block count).
				1677	*
				1678	* Difficulties arise in that the incore sb is used for ENOSPC checking, and
				1679	* hence needs to be accurately read when we are running low on space. Hence
				1680	* there is a method to enable and disable the per-cpu counters based on how
				1681	* much "stuff" is available in them.
				1682	*
				1683	* Basically, a counter is enabled if there is enough free resource to justify
				1684	* running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
				1685	* ENOSPC), then we disable the counters to synchronise all callers and
				1686	* re-distribute the available resources.
				1687	*
				1688	* If, once we redistributed the available resources, we still get a failure,
				1689	* we disable the per-cpu counter and go through the slow path.
				1690	*
				1691	* The slow path is the current xfs_mod_incore_sb() function. This means that
				1692	* when we disable a per-cpu counter, we need to drain it's resources back to
				1693	* the global superblock. We do this after disabling the counter to prevent
				1694	* more threads from queueing up on the counter.
				1695	*
				1696	* Essentially, this means that we still need a lock in the fast path to enable
				1697	* synchronisation between the global counters and the per-cpu counters. This
				1698	* is not a problem because the lock will be local to a CPU almost all the time
				1699	* and have little contention except when we get to ENOSPC conditions.
				1700	*
				1701	* Basically, this lock becomes a barrier that enables us to lock out the fast
				1702	* path while we do things like enabling and disabling counters and
				1703	* synchronising the counters.
				1704	*
				1705	* Locking rules:
				1706	*
				1707	* 1. XFS_SB_LOCK() before picking up per-cpu locks
				1708	* 2. per-cpu locks always picked up via for_each_online_cpu() order
				1709	* 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
				1710	* 4. modifying per-cpu counters requires holding per-cpu lock
				1711	* 5. modifying global counters requires holding XFS_SB_LOCK
				1712	* 6. enabling or disabling a counter requires holding the XFS_SB_LOCK
				1713	* and _none_ of the per-cpu locks.
				1714	*
				1715	* Disabled counters are only ever re-enabled by a balance operation
				1716	* that results in more free resources per CPU than a given threshold.
				1717	* To ensure counters don't remain disabled, they are rebalanced when
				1718	* the global resource goes above a higher threshold (i.e. some hysteresis
				1719	* is present to prevent thrashing).
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1720	*/
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame^]	1721
				1722	/*
				1723	* hot-plug CPU notifier support.
				1724	*
				1725	* We cannot use the hotcpu_register() function because it does
				1726	* not allow notifier instances. We need a notifier per filesystem
				1727	* as we need to be able to identify the filesystem to balance
				1728	* the counters out. This is acheived by having a notifier block
				1729	* embedded in the xfs_mount_t and doing pointer magic to get the
				1730	* mount pointer from the notifier block address.
				1731	*/
				1732	STATIC int
				1733	xfs_icsb_cpu_notify(
				1734	struct notifier_block *nfb,
				1735	unsigned long action,
				1736	void *hcpu)
				1737	{
				1738	xfs_icsb_cnts_t *cntp;
				1739	xfs_mount_t *mp;
				1740	int s;
				1741
				1742	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
				1743	cntp = (xfs_icsb_cnts_t *)
				1744	per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
				1745	switch (action) {
				1746	case CPU_UP_PREPARE:
				1747	/* Easy Case - initialize the area and locks, and
				1748	* then rebalance when online does everything else for us. */
				1749	spin_lock_init(&cntp->icsb_lock);
				1750	cntp->icsb_icount = 0;
				1751	cntp->icsb_ifree = 0;
				1752	cntp->icsb_fdblocks = 0;
				1753	break;
				1754	case CPU_ONLINE:
				1755	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
				1756	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
				1757	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
				1758	break;
				1759	case CPU_DEAD:
				1760	/* Disable all the counters, then fold the dead cpu's
				1761	* count into the total on the global superblock and
				1762	* re-enable the counters. */
				1763	s = XFS_SB_LOCK(mp);
				1764	xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
				1765	xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
				1766	xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
				1767
				1768	mp->m_sb.sb_icount += cntp->icsb_icount;
				1769	mp->m_sb.sb_ifree += cntp->icsb_ifree;
				1770	mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
				1771
				1772	cntp->icsb_icount = 0;
				1773	cntp->icsb_ifree = 0;
				1774	cntp->icsb_fdblocks = 0;
				1775
				1776	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
				1777	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
				1778	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED);
				1779	XFS_SB_UNLOCK(mp, s);
				1780	break;
				1781	}
				1782
				1783	return NOTIFY_OK;
				1784	}
				1785
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1786	int
				1787	xfs_icsb_init_counters(
				1788	xfs_mount_t *mp)
				1789	{
				1790	xfs_icsb_cnts_t *cntp;
				1791	int i;
				1792
				1793	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
				1794	if (mp->m_sb_cnts == NULL)
				1795	return -ENOMEM;
				1796
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame^]	1797	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
				1798	mp->m_icsb_notifier.priority = 0;
				1799	register_cpu_notifier(&mp->m_icsb_notifier);
				1800
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1801	for_each_online_cpu(i) {
				1802	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
				1803	spin_lock_init(&cntp->icsb_lock);
				1804	}
				1805	/*
				1806	* start with all counters disabled so that the
				1807	* initial balance kicks us off correctly
				1808	*/
				1809	mp->m_icsb_counters = -1;
				1810	return 0;
				1811	}
				1812
				1813	STATIC void
				1814	xfs_icsb_destroy_counters(
				1815	xfs_mount_t *mp)
				1816	{
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame^]	1817	if (mp->m_sb_cnts) {
				1818	unregister_cpu_notifier(&mp->m_icsb_notifier);
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1819	free_percpu(mp->m_sb_cnts);
David Chinner	e8234a6	2006-03-14 13:23:52 +1100	[diff] [blame^]	1820	}
David Chinner	8d280b9	2006-03-14 13:13:09 +1100	[diff] [blame]	1821	}
				1822
				1823
				1824	STATIC inline void
				1825	xfs_icsb_lock_all_counters(
				1826	xfs_mount_t *mp)
				1827	{
				1828	xfs_icsb_cnts_t *cntp;
				1829	int i;
				1830
				1831	for_each_online_cpu(i) {
				1832	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
				1833	spin_lock(&cntp->icsb_lock);
				1834	}
				1835	}
				1836
				1837	STATIC inline void
				1838	xfs_icsb_unlock_all_counters(
				1839	xfs_mount_t *mp)
				1840	{
				1841	xfs_icsb_cnts_t *cntp;
				1842	int i;
				1843
				1844	for_each_online_cpu(i) {
				1845	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
				1846	spin_unlock(&cntp->icsb_lock);
				1847	}
				1848	}
				1849
				1850	STATIC void
				1851	xfs_icsb_count(
				1852	xfs_mount_t *mp,
				1853	xfs_icsb_cnts_t *cnt,
				1854	int flags)
				1855	{
				1856	xfs_icsb_cnts_t *cntp;
				1857	int i;
				1858
				1859	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
				1860
				1861	if (!(flags & XFS_ICSB_LAZY_COUNT))
				1862	xfs_icsb_lock_all_counters(mp);
				1863
				1864	for_each_online_cpu(i) {
				1865	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
				1866	cnt->icsb_icount += cntp->icsb_icount;
				1867	cnt->icsb_ifree += cntp->icsb_ifree;
				1868	cnt->icsb_fdblocks += cntp->icsb_fdblocks;
				1869	}
				1870
				1871	if (!(flags & XFS_ICSB_LAZY_COUNT))
				1872	xfs_icsb_unlock_all_counters(mp);
				1873	}
				1874
				1875	STATIC int
				1876	xfs_icsb_counter_disabled(
				1877	xfs_mount_t *mp,
				1878	xfs_sb_field_t field)
				1879	{
				1880	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				1881	return test_bit(field, &mp->m_icsb_counters);
				1882	}
				1883
				1884	STATIC int
				1885	xfs_icsb_disable_counter(
				1886	xfs_mount_t *mp,
				1887	xfs_sb_field_t field)
				1888	{
				1889	xfs_icsb_cnts_t cnt;
				1890
				1891	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				1892
				1893	xfs_icsb_lock_all_counters(mp);
				1894	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
				1895	/* drain back to superblock */
				1896
				1897	xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED\|XFS_ICSB_LAZY_COUNT);
				1898	switch(field) {
				1899	case XFS_SBS_ICOUNT:
				1900	mp->m_sb.sb_icount = cnt.icsb_icount;
				1901	break;
				1902	case XFS_SBS_IFREE:
				1903	mp->m_sb.sb_ifree = cnt.icsb_ifree;
				1904	break;
				1905	case XFS_SBS_FDBLOCKS:
				1906	mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
				1907	break;
				1908	default:
				1909	BUG();
				1910	}
				1911	}
				1912
				1913	xfs_icsb_unlock_all_counters(mp);
				1914
				1915	return 0;
				1916	}
				1917
				1918	STATIC void
				1919	xfs_icsb_enable_counter(
				1920	xfs_mount_t *mp,
				1921	xfs_sb_field_t field,
				1922	uint64_t count,
				1923	uint64_t resid)
				1924	{
				1925	xfs_icsb_cnts_t *cntp;
				1926	int i;
				1927
				1928	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
				1929
				1930	xfs_icsb_lock_all_counters(mp);
				1931	for_each_online_cpu(i) {
				1932	cntp = per_cpu_ptr(mp->m_sb_cnts, i);
				1933	switch (field) {
				1934	case XFS_SBS_ICOUNT:
				1935	cntp->icsb_icount = count + resid;
				1936	break;
				1937	case XFS_SBS_IFREE:
				1938	cntp->icsb_ifree = count + resid;
				1939	break;
				1940	case XFS_SBS_FDBLOCKS:
				1941	cntp->icsb_fdblocks = count + resid;
				1942	break;
				1943	default:
				1944	BUG();
				1945	break;
				1946	}
				1947	resid = 0;
				1948	}
				1949	clear_bit(field, &mp->m_icsb_counters);
				1950	xfs_icsb_unlock_all_counters(mp);
				1951	}
				1952
				1953	STATIC void
				1954	xfs_icsb_sync_counters_int(
				1955	xfs_mount_t *mp,
				1956	int flags)
				1957	{
				1958	xfs_icsb_cnts_t cnt;
				1959	int s;
				1960
				1961	/* Pass 1: lock all counters */
				1962	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
				1963	s = XFS_SB_LOCK(mp);
				1964
				1965	xfs_icsb_count(mp, &cnt, flags);
				1966
				1967	/* Step 3: update mp->m_sb fields */
				1968	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
				1969	mp->m_sb.sb_icount = cnt.icsb_icount;
				1970	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
				1971	mp->m_sb.sb_ifree = cnt.icsb_ifree;
				1972	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
				1973	mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
				1974
				1975	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
				1976	XFS_SB_UNLOCK(mp, s);
				1977	}
				1978
				1979	/*
				1980	* Accurate update of per-cpu counters to incore superblock
				1981	*/
				1982	STATIC void
				1983	xfs_icsb_sync_counters(
				1984	xfs_mount_t *mp)
				1985	{
				1986	xfs_icsb_sync_counters_int(mp, 0);
				1987	}
				1988
				1989	/*
				1990	* lazy addition used for things like df, background sb syncs, etc
				1991	*/
				1992	void
				1993	xfs_icsb_sync_counters_lazy(
				1994	xfs_mount_t *mp)
				1995	{
				1996	xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
				1997	}
				1998
				1999	/*
				2000	* Balance and enable/disable counters as necessary.
				2001	*
				2002	* Thresholds for re-enabling counters are somewhat magic.
				2003	* inode counts are chosen to be the same number as single
				2004	* on disk allocation chunk per CPU, and free blocks is
				2005	* something far enough zero that we aren't going thrash
				2006	* when we get near ENOSPC.
				2007	*/
				2008	#define XFS_ICSB_INO_CNTR_REENABLE 64
				2009	#define XFS_ICSB_FDBLK_CNTR_REENABLE 512
				2010	STATIC void
				2011	xfs_icsb_balance_counter(
				2012	xfs_mount_t *mp,
				2013	xfs_sb_field_t field,
				2014	int flags)
				2015	{
				2016	uint64_t count, resid = 0;
				2017	int weight = num_online_cpus();
				2018	int s;
				2019
				2020	if (!(flags & XFS_ICSB_SB_LOCKED))
				2021	s = XFS_SB_LOCK(mp);
				2022
				2023	/* disable counter and sync counter */
				2024	xfs_icsb_disable_counter(mp, field);
				2025
				2026	/* update counters - first CPU gets residual*/
				2027	switch (field) {
				2028	case XFS_SBS_ICOUNT:
				2029	count = mp->m_sb.sb_icount;
				2030	resid = do_div(count, weight);
				2031	if (count < XFS_ICSB_INO_CNTR_REENABLE)
				2032	goto out;
				2033	break;
				2034	case XFS_SBS_IFREE:
				2035	count = mp->m_sb.sb_ifree;
				2036	resid = do_div(count, weight);
				2037	if (count < XFS_ICSB_INO_CNTR_REENABLE)
				2038	goto out;
				2039	break;
				2040	case XFS_SBS_FDBLOCKS:
				2041	count = mp->m_sb.sb_fdblocks;
				2042	resid = do_div(count, weight);
				2043	if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
				2044	goto out;
				2045	break;
				2046	default:
				2047	BUG();
				2048	break;
				2049	}
				2050
				2051	xfs_icsb_enable_counter(mp, field, count, resid);
				2052	out:
				2053	if (!(flags & XFS_ICSB_SB_LOCKED))
				2054	XFS_SB_UNLOCK(mp, s);
				2055	}
				2056
				2057	STATIC int
				2058	xfs_icsb_modify_counters_int(
				2059	xfs_mount_t *mp,
				2060	xfs_sb_field_t field,
				2061	int delta,
				2062	int rsvd,
				2063	int flags)
				2064	{
				2065	xfs_icsb_cnts_t *icsbp;
				2066	long long lcounter; /* long counter for 64 bit fields */
				2067	int cpu, s, locked = 0;
				2068	int ret = 0, balance_done = 0;
				2069
				2070	again:
				2071	cpu = get_cpu();
				2072	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
				2073	spin_lock(&icsbp->icsb_lock);
				2074	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
				2075	goto slow_path;
				2076
				2077	switch (field) {
				2078	case XFS_SBS_ICOUNT:
				2079	lcounter = icsbp->icsb_icount;
				2080	lcounter += delta;
				2081	if (unlikely(lcounter < 0))
				2082	goto slow_path;
				2083	icsbp->icsb_icount = lcounter;
				2084	break;
				2085
				2086	case XFS_SBS_IFREE:
				2087	lcounter = icsbp->icsb_ifree;
				2088	lcounter += delta;
				2089	if (unlikely(lcounter < 0))
				2090	goto slow_path;
				2091	icsbp->icsb_ifree = lcounter;
				2092	break;
				2093
				2094	case XFS_SBS_FDBLOCKS:
				2095	BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
				2096
				2097	lcounter = icsbp->icsb_fdblocks;
				2098	lcounter += delta;
				2099	if (unlikely(lcounter < 0))
				2100	goto slow_path;
				2101	icsbp->icsb_fdblocks = lcounter;
				2102	break;
				2103	default:
				2104	BUG();
				2105	break;
				2106	}
				2107	spin_unlock(&icsbp->icsb_lock);
				2108	put_cpu();
				2109	if (locked)
				2110	XFS_SB_UNLOCK(mp, s);
				2111	return 0;
				2112
				2113	/*
				2114	* The slow path needs to be run with the SBLOCK
				2115	* held so that we prevent other threads from
				2116	* attempting to run this path at the same time.
				2117	* this provides exclusion for the balancing code,
				2118	* and exclusive fallback if the balance does not
				2119	* provide enough resources to continue in an unlocked
				2120	* manner.
				2121	*/
				2122	slow_path:
				2123	spin_unlock(&icsbp->icsb_lock);
				2124	put_cpu();
				2125
				2126	/* need to hold superblock incase we need
				2127	* to disable a counter */
				2128	if (!(flags & XFS_ICSB_SB_LOCKED)) {
				2129	s = XFS_SB_LOCK(mp);
				2130	locked = 1;
				2131	flags \|= XFS_ICSB_SB_LOCKED;
				2132	}
				2133	if (!balance_done) {
				2134	xfs_icsb_balance_counter(mp, field, flags);
				2135	balance_done = 1;
				2136	goto again;
				2137	} else {
				2138	/*
				2139	* we might not have enough on this local
				2140	* cpu to allocate for a bulk request.
				2141	* We need to drain this field from all CPUs
				2142	* and disable the counter fastpath
				2143	*/
				2144	xfs_icsb_disable_counter(mp, field);
				2145	}
				2146
				2147	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
				2148
				2149	if (locked)
				2150	XFS_SB_UNLOCK(mp, s);
				2151	return ret;
				2152	}
				2153
				2154	STATIC int
				2155	xfs_icsb_modify_counters(
				2156	xfs_mount_t *mp,
				2157	xfs_sb_field_t field,
				2158	int delta,
				2159	int rsvd)
				2160	{
				2161	return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
				2162	}
				2163
				2164	/*
				2165	* Called when superblock is already locked
				2166	*/
				2167	STATIC int
				2168	xfs_icsb_modify_counters_locked(
				2169	xfs_mount_t *mp,
				2170	xfs_sb_field_t field,
				2171	int delta,
				2172	int rsvd)
				2173	{
				2174	return xfs_icsb_modify_counters_int(mp, field, delta,
				2175	rsvd, XFS_ICSB_SB_LOCKED);
				2176	}
				2177	#endif