Blame - fs/xfs/xfs_filestream.c - kernel/msm

blob: 6c87c8f304efb8f7d887c7c41a667e4c85105882 [file] [log] [blame]

David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	1	/*
				2	* Copyright (c) 2006-2007 Silicon Graphics, Inc.
				3	* All Rights Reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				17	*/
				18	#include "xfs.h"
				19	#include "xfs_bmap_btree.h"
				20	#include "xfs_inum.h"
				21	#include "xfs_dir2.h"
				22	#include "xfs_dir2_sf.h"
				23	#include "xfs_attr_sf.h"
				24	#include "xfs_dinode.h"
				25	#include "xfs_inode.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dmapi.h"
				28	#include "xfs_log.h"
				29	#include "xfs_trans.h"
				30	#include "xfs_sb.h"
				31	#include "xfs_mount.h"
				32	#include "xfs_bmap.h"
				33	#include "xfs_alloc.h"
				34	#include "xfs_utils.h"
				35	#include "xfs_mru_cache.h"
				36	#include "xfs_filestream.h"
				37
				38	#ifdef XFS_FILESTREAMS_TRACE
				39
				40	ktrace_t *xfs_filestreams_trace_buf;
				41
				42	STATIC void
				43	xfs_filestreams_trace(
				44	xfs_mount_t mp, / mount point */
				45	int type, /* type of trace */
				46	const char func, / source function */
				47	int line, /* source line number */
				48	__psunsigned_t arg0,
				49	__psunsigned_t arg1,
				50	__psunsigned_t arg2,
				51	__psunsigned_t arg3,
				52	__psunsigned_t arg4,
				53	__psunsigned_t arg5)
				54	{
				55	ktrace_enter(xfs_filestreams_trace_buf,
				56	(void *)(__psint_t)(type \| (line << 16)),
				57	(void *)func,
				58	(void *)(__psunsigned_t)current_pid(),
				59	(void *)mp,
				60	(void *)(__psunsigned_t)arg0,
				61	(void *)(__psunsigned_t)arg1,
				62	(void *)(__psunsigned_t)arg2,
				63	(void *)(__psunsigned_t)arg3,
				64	(void *)(__psunsigned_t)arg4,
				65	(void *)(__psunsigned_t)arg5,
				66	NULL, NULL, NULL, NULL, NULL, NULL);
				67	}
				68
				69	#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0)
				70	#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0)
				71	#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0)
				72	#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0)
				73	#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
				74	#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
				75	#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
Harvey Harrison	34a622b	2008-04-10 12:19:21 +1000	[diff] [blame]	76	xfs_filestreams_trace(mp, t, __func__, __LINE__, \
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	77	(__psunsigned_t)a0, (__psunsigned_t)a1, \
				78	(__psunsigned_t)a2, (__psunsigned_t)a3, \
				79	(__psunsigned_t)a4, (__psunsigned_t)a5)
				80
				81	#define TRACE_AG_SCAN(mp, ag, ag2) \
				82	TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
				83	#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
				84	TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
				85	#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
				86	TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
				87	cnt, free, scan, flag)
				88	#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
				89	TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
				90	#define TRACE_FREE(mp, ip, pip, ag, cnt) \
				91	TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
				92	#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
				93	TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
				94	#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
				95	TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
				96	#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
				97	TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
				98	#define TRACE_ORPHAN(mp, ip, ag) \
				99	TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
				100
				101
				102	#else
				103	#define TRACE_AG_SCAN(mp, ag, ag2)
				104	#define TRACE_AG_PICK1(mp, max_ag, maxfree)
				105	#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
				106	#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
				107	#define TRACE_FREE(mp, ip, pip, ag, cnt)
				108	#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
				109	#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
				110	#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
				111	#define TRACE_ORPHAN(mp, ip, ag)
				112	#endif
				113
				114	static kmem_zone_t *item_zone;
				115
				116	/*
				117	* Structure for associating a file or a directory with an allocation group.
				118	* The parent directory pointer is only needed for files, but since there will
				119	* generally be vastly more files than directories in the cache, using the same
				120	* data structure simplifies the code with very little memory overhead.
				121	*/
				122	typedef struct fstrm_item
				123	{
				124	xfs_agnumber_t ag; /* AG currently in use for the file/directory. */
				125	xfs_inode_t ip; / inode self-pointer. */
				126	xfs_inode_t pip; / Parent directory inode pointer. */
				127	} fstrm_item_t;
				128
				129
				130	/*
				131	* Scan the AGs starting at startag looking for an AG that isn't in use and has
				132	* at least minlen blocks free.
				133	*/
				134	static int
				135	_xfs_filestream_pick_ag(
				136	xfs_mount_t *mp,
				137	xfs_agnumber_t startag,
				138	xfs_agnumber_t *agp,
				139	int flags,
				140	xfs_extlen_t minlen)
				141	{
				142	int err, trylock, nscan;
Dave Chinner	6cc8764	2009-03-16 08:29:46 +0100	[diff] [blame]	143	xfs_extlen_t longest, free, minfree, maxfree = 0;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	144	xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
				145	struct xfs_perag *pag;
				146
				147	/* 2% of an AG's blocks must be free for it to be chosen. */
				148	minfree = mp->m_sb.sb_agblocks / 50;
				149
				150	ag = startag;
				151	*agp = NULLAGNUMBER;
				152
				153	/* For the first pass, don't sleep trying to init the per-AG. */
				154	trylock = XFS_ALLOC_FLAG_TRYLOCK;
				155
				156	for (nscan = 0; 1; nscan++) {
				157
				158	TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
				159
				160	pag = mp->m_perag + ag;
				161
				162	if (!pag->pagf_init) {
				163	err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
				164	if (err && !trylock)
				165	return err;
				166	}
				167
				168	/* Might fail sometimes during the 1st pass with trylock set. */
				169	if (!pag->pagf_init)
				170	goto next_ag;
				171
				172	/* Keep track of the AG with the most free blocks. */
				173	if (pag->pagf_freeblks > maxfree) {
				174	maxfree = pag->pagf_freeblks;
				175	max_ag = ag;
				176	}
				177
				178	/*
				179	* The AG reference count does two things: it enforces mutual
				180	* exclusion when examining the suitability of an AG in this
				181	* loop, and it guards against two filestreams being established
				182	* in the same AG as each other.
				183	*/
				184	if (xfs_filestream_get_ag(mp, ag) > 1) {
				185	xfs_filestream_put_ag(mp, ag);
				186	goto next_ag;
				187	}
				188
Dave Chinner	6cc8764	2009-03-16 08:29:46 +0100	[diff] [blame]	189	longest = xfs_alloc_longest_free_extent(mp, pag);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	190	if (((minlen && longest >= minlen) \|\|
				191	(!minlen && pag->pagf_freeblks >= minfree)) &&
				192	(!pag->pagf_metadata \|\| !(flags & XFS_PICK_USERDATA) \|\|
				193	(flags & XFS_PICK_LOWSPACE))) {
				194
				195	/* Break out, retaining the reference on the AG. */
				196	free = pag->pagf_freeblks;
				197	*agp = ag;
				198	break;
				199	}
				200
				201	/* Drop the reference on this AG, it's not usable. */
				202	xfs_filestream_put_ag(mp, ag);
				203	next_ag:
				204	/* Move to the next AG, wrapping to AG 0 if necessary. */
				205	if (++ag >= mp->m_sb.sb_agcount)
				206	ag = 0;
				207
				208	/* If a full pass of the AGs hasn't been done yet, continue. */
				209	if (ag != startag)
				210	continue;
				211
				212	/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
				213	if (trylock != 0) {
				214	trylock = 0;
				215	continue;
				216	}
				217
				218	/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
				219	if (!(flags & XFS_PICK_LOWSPACE)) {
				220	flags \|= XFS_PICK_LOWSPACE;
				221	continue;
				222	}
				223
				224	/*
				225	* Take the AG with the most free space, regardless of whether
				226	* it's already in use by another filestream.
				227	*/
				228	if (max_ag != NULLAGNUMBER) {
				229	xfs_filestream_get_ag(mp, max_ag);
				230	TRACE_AG_PICK1(mp, max_ag, maxfree);
				231	free = maxfree;
				232	*agp = max_ag;
				233	break;
				234	}
				235
				236	/* take AG 0 if none matched */
				237	TRACE_AG_PICK1(mp, max_ag, maxfree);
				238	*agp = 0;
				239	return 0;
				240	}
				241
				242	TRACE_AG_PICK2(mp, startag, agp, xfs_filestream_peek_ag(mp, agp),
				243	free, nscan, flags);
				244
				245	return 0;
				246	}
				247
				248	/*
				249	* Set the allocation group number for a file or a directory, updating inode
				250	* references and per-AG references as appropriate. Must be called with the
				251	* m_peraglock held in read mode.
				252	*/
				253	static int
				254	_xfs_filestream_update_ag(
				255	xfs_inode_t *ip,
				256	xfs_inode_t *pip,
				257	xfs_agnumber_t ag)
				258	{
				259	int err = 0;
				260	xfs_mount_t *mp;
				261	xfs_mru_cache_t *cache;
				262	fstrm_item_t *item;
				263	xfs_agnumber_t old_ag;
				264	xfs_inode_t *old_pip;
				265
				266	/*
				267	* Either ip is a regular file and pip is a directory, or ip is a
				268	* directory and pip is NULL.
				269	*/
				270	ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
				271	(pip->i_d.di_mode & S_IFDIR)) \|\|
				272	((ip->i_d.di_mode & S_IFDIR) && !pip)));
				273
				274	mp = ip->i_mount;
				275	cache = mp->m_filestream;
				276
				277	item = xfs_mru_cache_lookup(cache, ip->i_ino);
				278	if (item) {
				279	ASSERT(item->ip == ip);
				280	old_ag = item->ag;
				281	item->ag = ag;
				282	old_pip = item->pip;
				283	item->pip = pip;
				284	xfs_mru_cache_done(cache);
				285
				286	/*
				287	* If the AG has changed, drop the old ref and take a new one,
				288	* effectively transferring the reference from old to new AG.
				289	*/
				290	if (ag != old_ag) {
				291	xfs_filestream_put_ag(mp, old_ag);
				292	xfs_filestream_get_ag(mp, ag);
				293	}
				294
				295	/*
				296	* If ip is a file and its pip has changed, drop the old ref and
				297	* take a new one.
				298	*/
				299	if (pip && pip != old_pip) {
				300	IRELE(old_pip);
				301	IHOLD(pip);
				302	}
				303
				304	TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
				305	ag, xfs_filestream_peek_ag(mp, ag));
				306	return 0;
				307	}
				308
				309	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
				310	if (!item)
				311	return ENOMEM;
				312
				313	item->ag = ag;
				314	item->ip = ip;
				315	item->pip = pip;
				316
				317	err = xfs_mru_cache_insert(cache, ip->i_ino, item);
				318	if (err) {
				319	kmem_zone_free(item_zone, item);
				320	return err;
				321	}
				322
				323	/* Take a reference on the AG. */
				324	xfs_filestream_get_ag(mp, ag);
				325
				326	/*
				327	* Take a reference on the inode itself regardless of whether it's a
				328	* regular file or a directory.
				329	*/
				330	IHOLD(ip);
				331
				332	/*
				333	* In the case of a regular file, take a reference on the parent inode
				334	* as well to ensure it remains in-core.
				335	*/
				336	if (pip)
				337	IHOLD(pip);
				338
				339	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
				340	ag, xfs_filestream_peek_ag(mp, ag));
				341
				342	return 0;
				343	}
				344
				345	/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
David Chinner	a8272ce	2007-11-23 16:28:09 +1100	[diff] [blame]	346	STATIC void
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	347	xfs_fstrm_free_func(
Eric Sandeen	bcc7b44	2007-08-30 17:21:38 +1000	[diff] [blame]	348	unsigned long ino,
				349	void *data)
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	350	{
Eric Sandeen	bcc7b44	2007-08-30 17:21:38 +1000	[diff] [blame]	351	fstrm_item_t item = (fstrm_item_t )data;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	352	xfs_inode_t *ip = item->ip;
				353	int ref;
				354
				355	ASSERT(ip->i_ino == ino);
				356
				357	xfs_iflags_clear(ip, XFS_IFILESTREAM);
				358
				359	/* Drop the reference taken on the AG when the item was added. */
				360	ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
				361
				362	ASSERT(ref >= 0);
				363	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
				364	xfs_filestream_peek_ag(ip->i_mount, item->ag));
				365
				366	/*
				367	* _xfs_filestream_update_ag() always takes a reference on the inode
				368	* itself, whether it's a file or a directory. Release it here.
				369	* This can result in the inode being freed and so we must
				370	* not hold any inode locks when freeing filesstreams objects
				371	* otherwise we can deadlock here.
				372	*/
				373	IRELE(ip);
				374
				375	/*
				376	* In the case of a regular file, _xfs_filestream_update_ag() also
				377	* takes a ref on the parent inode to keep it in-core. Release that
				378	* too.
				379	*/
				380	if (item->pip)
				381	IRELE(item->pip);
				382
				383	/* Finally, free the memory allocated for the item. */
				384	kmem_zone_free(item_zone, item);
				385	}
				386
				387	/*
				388	* xfs_filestream_init() is called at xfs initialisation time to set up the
				389	* memory zone that will be used for filestream data structure allocation.
				390	*/
				391	int
				392	xfs_filestream_init(void)
				393	{
				394	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
Christoph Hellwig	9f8868f	2008-07-18 17:11:46 +1000	[diff] [blame]	395	if (!item_zone)
				396	return -ENOMEM;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	397	#ifdef XFS_FILESTREAMS_TRACE
Lachlan McIlroy	5695ef4	2008-08-13 16:51:57 +1000	[diff] [blame]	398	xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	399	#endif
Christoph Hellwig	9f8868f	2008-07-18 17:11:46 +1000	[diff] [blame]	400	return 0;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	401	}
				402
				403	/*
				404	* xfs_filestream_uninit() is called at xfs termination time to destroy the
				405	* memory zone that was used for filestream data structure allocation.
				406	*/
				407	void
				408	xfs_filestream_uninit(void)
				409	{
				410	#ifdef XFS_FILESTREAMS_TRACE
				411	ktrace_free(xfs_filestreams_trace_buf);
				412	#endif
				413	kmem_zone_destroy(item_zone);
				414	}
				415
				416	/*
				417	* xfs_filestream_mount() is called when a file system is mounted with the
				418	* filestream option. It is responsible for allocating the data structures
				419	* needed to track the new file system's file streams.
				420	*/
				421	int
				422	xfs_filestream_mount(
				423	xfs_mount_t *mp)
				424	{
				425	int err;
				426	unsigned int lifetime, grp_count;
				427
				428	/*
				429	* The filestream timer tunable is currently fixed within the range of
				430	* one second to four minutes, with five seconds being the default. The
				431	* group count is somewhat arbitrary, but it'd be nice to adhere to the
				432	* timer tunable to within about 10 percent. This requires at least 10
				433	* groups.
				434	*/
				435	lifetime = xfs_fstrm_centisecs * 10;
				436	grp_count = 10;
				437
				438	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
Eric Sandeen	bcc7b44	2007-08-30 17:21:38 +1000	[diff] [blame]	439	xfs_fstrm_free_func);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	440
				441	return err;
				442	}
				443
				444	/*
				445	* xfs_filestream_unmount() is called when a file system that was mounted with
				446	* the filestream option is unmounted. It drains the data structures created
				447	* to track the file system's file streams and frees all the memory that was
				448	* allocated.
				449	*/
				450	void
				451	xfs_filestream_unmount(
				452	xfs_mount_t *mp)
				453	{
				454	xfs_mru_cache_destroy(mp->m_filestream);
				455	}
				456
				457	/*
				458	* If the mount point's m_perag array is going to be reallocated, all
				459	* outstanding cache entries must be flushed to avoid accessing reference count
				460	* addresses that have been freed. The call to xfs_filestream_flush() must be
				461	* made inside the block that holds the m_peraglock in write mode to do the
				462	* reallocation.
				463	*/
				464	void
				465	xfs_filestream_flush(
				466	xfs_mount_t *mp)
				467	{
David Chinner	65de556	2007-08-16 15:21:11 +1000	[diff] [blame]	468	xfs_mru_cache_flush(mp->m_filestream);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	469	}
				470
				471	/*
				472	* Return the AG of the filestream the file or directory belongs to, or
				473	* NULLAGNUMBER otherwise.
				474	*/
				475	xfs_agnumber_t
				476	xfs_filestream_lookup_ag(
				477	xfs_inode_t *ip)
				478	{
				479	xfs_mru_cache_t *cache;
				480	fstrm_item_t *item;
				481	xfs_agnumber_t ag;
				482	int ref;
				483
				484	if (!(ip->i_d.di_mode & (S_IFREG \| S_IFDIR))) {
				485	ASSERT(0);
				486	return NULLAGNUMBER;
				487	}
				488
				489	cache = ip->i_mount->m_filestream;
				490	item = xfs_mru_cache_lookup(cache, ip->i_ino);
				491	if (!item) {
				492	TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
				493	return NULLAGNUMBER;
				494	}
				495
				496	ASSERT(ip == item->ip);
				497	ag = item->ag;
				498	ref = xfs_filestream_peek_ag(ip->i_mount, ag);
				499	xfs_mru_cache_done(cache);
				500
				501	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
				502	return ag;
				503	}
				504
				505	/*
				506	* xfs_filestream_associate() should only be called to associate a regular file
				507	* with its parent directory. Calling it with a child directory isn't
				508	* appropriate because filestreams don't apply to entire directory hierarchies.
				509	* Creating a file in a child directory of an existing filestream directory
				510	* starts a new filestream with its own allocation group association.
				511	*
				512	* Returns < 0 on error, 0 if successful association occurred, > 0 if
				513	* we failed to get an association because of locking issues.
				514	*/
				515	int
				516	xfs_filestream_associate(
				517	xfs_inode_t *pip,
				518	xfs_inode_t *ip)
				519	{
				520	xfs_mount_t *mp;
				521	xfs_mru_cache_t *cache;
				522	fstrm_item_t *item;
				523	xfs_agnumber_t ag, rotorstep, startag;
				524	int err = 0;
				525
				526	ASSERT(pip->i_d.di_mode & S_IFDIR);
				527	ASSERT(ip->i_d.di_mode & S_IFREG);
				528	if (!(pip->i_d.di_mode & S_IFDIR) \|\| !(ip->i_d.di_mode & S_IFREG))
				529	return -EINVAL;
				530
				531	mp = pip->i_mount;
				532	cache = mp->m_filestream;
				533	down_read(&mp->m_peraglock);
				534
				535	/*
				536	* We have a problem, Houston.
				537	*
				538	* Taking the iolock here violates inode locking order - we already
				539	* hold the ilock. Hence if we block getting this lock we may never
				540	* wake. Unfortunately, that means if we can't get the lock, we're
				541	* screwed in terms of getting a stream association - we can't spin
				542	* waiting for the lock because someone else is waiting on the lock we
				543	* hold and we cannot drop that as we are in a transaction here.
				544	*
				545	* Lucky for us, this inversion is rarely a problem because it's a
				546	* directory inode that we are trying to lock here and that means the
				547	* only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
				548	* used. i.e. freeze, remount-ro, quotasync or unmount.
				549	*
				550	* So, if we can't get the iolock without sleeping then just give up
				551	*/
				552	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
				553	up_read(&mp->m_peraglock);
				554	return 1;
				555	}
				556
				557	/* If the parent directory is already in the cache, use its AG. */
				558	item = xfs_mru_cache_lookup(cache, pip->i_ino);
				559	if (item) {
				560	ASSERT(item->ip == pip);
				561	ag = item->ag;
				562	xfs_mru_cache_done(cache);
				563
				564	TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
				565	err = _xfs_filestream_update_ag(ip, pip, ag);
				566
				567	goto exit;
				568	}
				569
				570	/*
				571	* Set the starting AG using the rotor for inode32, otherwise
				572	* use the directory inode's AG.
				573	*/
				574	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				575	rotorstep = xfs_rotorstep;
				576	startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
				577	mp->m_agfrotor = (mp->m_agfrotor + 1) %
				578	(mp->m_sb.sb_agcount * rotorstep);
				579	} else
				580	startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
				581
				582	/* Pick a new AG for the parent inode starting at startag. */
				583	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
				584	if (err \|\| ag == NULLAGNUMBER)
				585	goto exit_did_pick;
				586
				587	/* Associate the parent inode with the AG. */
				588	err = _xfs_filestream_update_ag(pip, NULL, ag);
				589	if (err)
				590	goto exit_did_pick;
				591
				592	/* Associate the file inode with the AG. */
				593	err = _xfs_filestream_update_ag(ip, pip, ag);
				594	if (err)
				595	goto exit_did_pick;
				596
				597	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
				598
				599	exit_did_pick:
				600	/*
				601	* If _xfs_filestream_pick_ag() returned a valid AG, remove the
				602	* reference it took on it, since the file and directory will have taken
				603	* their own now if they were successfully cached.
				604	*/
				605	if (ag != NULLAGNUMBER)
				606	xfs_filestream_put_ag(mp, ag);
				607
				608	exit:
				609	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
				610	up_read(&mp->m_peraglock);
				611	return -err;
				612	}
				613
				614	/*
				615	* Pick a new allocation group for the current file and its file stream. This
				616	* function is called by xfs_bmap_filestreams() with the mount point's per-ag
				617	* lock held.
				618	*/
				619	int
				620	xfs_filestream_new_ag(
				621	xfs_bmalloca_t *ap,
				622	xfs_agnumber_t *agp)
				623	{
				624	int flags, err;
				625	xfs_inode_t ip, pip = NULL;
				626	xfs_mount_t *mp;
				627	xfs_mru_cache_t *cache;
				628	xfs_extlen_t minlen;
				629	fstrm_item_t dir, file;
				630	xfs_agnumber_t ag = NULLAGNUMBER;
				631
				632	ip = ap->ip;
				633	mp = ip->i_mount;
				634	cache = mp->m_filestream;
				635	minlen = ap->alen;
				636	*agp = NULLAGNUMBER;
				637
				638	/*
				639	* Look for the file in the cache, removing it if it's found. Doing
				640	* this allows it to be held across the dir lookup that follows.
				641	*/
				642	file = xfs_mru_cache_remove(cache, ip->i_ino);
				643	if (file) {
				644	ASSERT(ip == file->ip);
				645
				646	/* Save the file's parent inode and old AG number for later. */
				647	pip = file->pip;
				648	ag = file->ag;
				649
				650	/* Look for the file's directory in the cache. */
				651	dir = xfs_mru_cache_lookup(cache, pip->i_ino);
				652	if (dir) {
				653	ASSERT(pip == dir->ip);
				654
				655	/*
				656	* If the directory has already moved on to a new AG,
				657	* use that AG as the new AG for the file. Don't
				658	* forget to twiddle the AG refcounts to match the
				659	* movement.
				660	*/
				661	if (dir->ag != file->ag) {
				662	xfs_filestream_put_ag(mp, file->ag);
				663	xfs_filestream_get_ag(mp, dir->ag);
				664	*agp = file->ag = dir->ag;
				665	}
				666
				667	xfs_mru_cache_done(cache);
				668	}
				669
				670	/*
				671	* Put the file back in the cache. If this fails, the free
				672	* function needs to be called to tidy up in the same way as if
				673	* the item had simply expired from the cache.
				674	*/
				675	err = xfs_mru_cache_insert(cache, ip->i_ino, file);
				676	if (err) {
				677	xfs_fstrm_free_func(ip->i_ino, file);
				678	return err;
				679	}
				680
				681	/*
				682	* If the file's AG was moved to the directory's new AG, there's
				683	* nothing more to be done.
				684	*/
				685	if (*agp != NULLAGNUMBER) {
				686	TRACE_MOVEAG(mp, ip, pip,
				687	ag, xfs_filestream_peek_ag(mp, ag),
				688	agp, xfs_filestream_peek_ag(mp, agp));
				689	return 0;
				690	}
				691	}
				692
				693	/*
				694	* If the file's parent directory is known, take its iolock in exclusive
				695	* mode to prevent two sibling files from racing each other to migrate
				696	* themselves and their parent to different AGs.
				697	*/
				698	if (pip)
				699	xfs_ilock(pip, XFS_IOLOCK_EXCL);
				700
				701	/*
				702	* A new AG needs to be found for the file. If the file's parent
				703	* directory is also known, it will be moved to the new AG as well to
				704	* ensure that files created inside it in future use the new AG.
				705	*/
				706	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
				707	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) \|
				708	(ap->low ? XFS_PICK_LOWSPACE : 0);
				709
				710	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
				711	if (err \|\| *agp == NULLAGNUMBER)
				712	goto exit;
				713
				714	/*
				715	* If the file wasn't found in the file cache, then its parent directory
				716	* inode isn't known. For this to have happened, the file must either
				717	* be pre-existing, or it was created long enough ago that its cache
				718	* entry has expired. This isn't the sort of usage that the filestreams
				719	* allocator is trying to optimise, so there's no point trying to track
				720	* its new AG somehow in the filestream data structures.
				721	*/
				722	if (!pip) {
				723	TRACE_ORPHAN(mp, ip, *agp);
				724	goto exit;
				725	}
				726
				727	/* Associate the parent inode with the AG. */
				728	err = _xfs_filestream_update_ag(pip, NULL, *agp);
				729	if (err)
				730	goto exit;
				731
				732	/* Associate the file inode with the AG. */
				733	err = _xfs_filestream_update_ag(ip, pip, *agp);
				734	if (err)
				735	goto exit;
				736
				737	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
				738	agp, xfs_filestream_peek_ag(mp, agp));
				739
				740	exit:
				741	/*
				742	* If _xfs_filestream_pick_ag() returned a valid AG, remove the
				743	* reference it took on it, since the file and directory will have taken
				744	* their own now if they were successfully cached.
				745	*/
				746	if (*agp != NULLAGNUMBER)
				747	xfs_filestream_put_ag(mp, *agp);
				748	else
				749	*agp = 0;
				750
				751	if (pip)
				752	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
				753
				754	return err;
				755	}
				756
				757	/*
				758	* Remove an association between an inode and a filestream object.
				759	* Typically this is done on last close of an unlinked file.
				760	*/
				761	void
				762	xfs_filestream_deassociate(
				763	xfs_inode_t *ip)
				764	{
				765	xfs_mru_cache_t *cache = ip->i_mount->m_filestream;
				766
				767	xfs_mru_cache_delete(cache, ip->i_ino);
				768	}