Blame - fs/xfs/linux-2.6/xfs_sync.c - kernel/msm-4.9

blob: ee1648b179f71886b7659f9f28dfaffdabdbfb2a [file] [log] [blame]

David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	1	/*
				2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				17	*/
				18	#include "xfs.h"
				19	#include "xfs_fs.h"
				20	#include "xfs_types.h"
				21	#include "xfs_bit.h"
				22	#include "xfs_log.h"
				23	#include "xfs_inum.h"
				24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dir2.h"
				28	#include "xfs_dmapi.h"
				29	#include "xfs_mount.h"
				30	#include "xfs_bmap_btree.h"
				31	#include "xfs_alloc_btree.h"
				32	#include "xfs_ialloc_btree.h"
				33	#include "xfs_btree.h"
				34	#include "xfs_dir2_sf.h"
				35	#include "xfs_attr_sf.h"
				36	#include "xfs_inode.h"
				37	#include "xfs_dinode.h"
				38	#include "xfs_error.h"
				39	#include "xfs_mru_cache.h"
				40	#include "xfs_filestream.h"
				41	#include "xfs_vnodeops.h"
				42	#include "xfs_utils.h"
				43	#include "xfs_buf_item.h"
				44	#include "xfs_inode_item.h"
				45	#include "xfs_rw.h"
				46
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	47	#include <linux/kthread.h>
				48	#include <linux/freezer.h>
				49
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	50	/*
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	51	* Sync all the inodes in the given AG according to the
				52	* direction given by the flags.
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	53	*/
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	54	STATIC int
				55	xfs_sync_inodes_ag(
				56	xfs_mount_t *mp,
				57	int ag,
David Chinner	2030b5a	2008-10-30 17:15:12 +1100	[diff] [blame]	58	int flags)
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	59	{
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	60	xfs_perag_t *pag = &mp->m_perag[ag];
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	61	int nr_found;
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame^]	62	uint32_t first_index = 0;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	63	int error = 0;
				64	int last_error = 0;
				65	int fflag = XFS_B_ASYNC;
				66	int lock_flags = XFS_ILOCK_SHARED;
				67
				68	if (flags & SYNC_DELWRI)
				69	fflag = XFS_B_DELWRI;
				70	if (flags & SYNC_WAIT)
				71	fflag = 0; /* synchronous overrides all */
				72
David Chinner	cb56a4b	2008-10-30 17:16:00 +1100	[diff] [blame]	73	if (flags & SYNC_DELWRI) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	74	/*
				75	* We need the I/O lock if we're going to call any of
				76	* the flush/inval routines.
				77	*/
				78	lock_flags \|= XFS_IOLOCK_SHARED;
				79	}
				80
				81	do {
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	82	struct inode *inode;
				83	boolean_t inode_refed;
				84	xfs_inode_t *ip = NULL;
				85
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	86	/*
				87	* use a gang lookup to find the next inode in the tree
				88	* as the tree is sparse and a gang lookup walks to find
				89	* the number of objects requested.
				90	*/
				91	read_lock(&pag->pag_ici_lock);
				92	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
				93	(void**)&ip, first_index, 1);
				94
				95	if (!nr_found) {
				96	read_unlock(&pag->pag_ici_lock);
				97	break;
				98	}
				99
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame^]	100	/*
				101	* Update the index for the next lookup. Catch overflows
				102	* into the next AG range which can occur if we have inodes
				103	* in the last block of the AG and we are currently
				104	* pointing to the last inode.
				105	*/
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	106	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame^]	107	if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
				108	read_unlock(&pag->pag_ici_lock);
				109	break;
				110	}
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	111
				112	/*
				113	* skip inodes in reclaim. Let xfs_syncsub do that for
				114	* us so we don't need to worry.
				115	*/
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	116	if (xfs_iflags_test(ip, (XFS_IRECLAIM\|XFS_IRECLAIMABLE))) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	117	read_unlock(&pag->pag_ici_lock);
				118	continue;
				119	}
				120
				121	/* bad inodes are dealt with elsewhere */
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	122	inode = VFS_I(ip);
				123	if (is_bad_inode(inode)) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	124	read_unlock(&pag->pag_ici_lock);
				125	continue;
				126	}
				127
				128	/* nothing to sync during shutdown */
David Chinner	cb56a4b	2008-10-30 17:16:00 +1100	[diff] [blame]	129	if (XFS_FORCED_SHUTDOWN(mp)) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	130	read_unlock(&pag->pag_ici_lock);
				131	return 0;
				132	}
				133
				134	/*
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	135	* If we can't get a reference on the VFS_I, the inode must be
				136	* in reclaim. If we can get the inode lock without blocking,
				137	* it is safe to flush the inode because we hold the tree lock
				138	* and xfs_iextract will block right now. Hence if we lock the
				139	* inode while holding the tree lock, xfs_ireclaim() is
				140	* guaranteed to block on the inode lock we now hold and hence
				141	* it is safe to reference the inode until we drop the inode
				142	* locks completely.
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	143	*/
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	144	inode_refed = B_FALSE;
				145	if (igrab(inode)) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	146	read_unlock(&pag->pag_ici_lock);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	147	xfs_ilock(ip, lock_flags);
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	148	inode_refed = B_TRUE;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	149	} else {
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	150	if (!xfs_ilock_nowait(ip, lock_flags)) {
				151	/* leave it to reclaim */
				152	read_unlock(&pag->pag_ici_lock);
				153	continue;
				154	}
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	155	read_unlock(&pag->pag_ici_lock);
				156	}
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	157
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	158	/*
				159	* If we have to flush data or wait for I/O completion
				160	* we need to drop the ilock that we currently hold.
				161	* If we need to drop the lock, insert a marker if we
				162	* have not already done so.
				163	*/
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	164	if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	165	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				166	error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
				167	if (flags & SYNC_IOWAIT)
				168	vn_iowait(ip);
				169	xfs_ilock(ip, XFS_ILOCK_SHARED);
				170	}
				171
				172	if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
				173	if (flags & SYNC_WAIT) {
				174	xfs_iflock(ip);
				175	if (!xfs_inode_clean(ip))
				176	error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
				177	else
				178	xfs_ifunlock(ip);
				179	} else if (xfs_iflock_nowait(ip)) {
				180	if (!xfs_inode_clean(ip))
				181	error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
				182	else
				183	xfs_ifunlock(ip);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	184	}
				185	}
				186
				187	if (lock_flags)
				188	xfs_iunlock(ip, lock_flags);
				189
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	190	if (inode_refed) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	191	IRELE(ip);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	192	}
				193
				194	if (error)
				195	last_error = error;
				196	/*
				197	* bail out if the filesystem is corrupted.
				198	*/
				199	if (error == EFSCORRUPTED)
				200	return XFS_ERROR(error);
				201
				202	} while (nr_found);
				203
				204	return last_error;
				205	}
				206
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	207	int
				208	xfs_sync_inodes(
				209	xfs_mount_t *mp,
David Chinner	2030b5a	2008-10-30 17:15:12 +1100	[diff] [blame]	210	int flags)
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	211	{
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	212	int error;
				213	int last_error;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	214	int i;
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	215	int lflags = XFS_LOG_FORCE;
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	216
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	217	if (mp->m_flags & XFS_MOUNT_RDONLY)
				218	return 0;
				219	error = 0;
				220	last_error = 0;
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	221
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	222	if (flags & SYNC_WAIT)
				223	lflags \|= XFS_LOG_SYNC;
				224
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	225	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
				226	if (!mp->m_perag[i].pag_ici_init)
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	227	continue;
David Chinner	2030b5a	2008-10-30 17:15:12 +1100	[diff] [blame]	228	error = xfs_sync_inodes_ag(mp, i, flags);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	229	if (error)
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	230	last_error = error;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	231	if (error == EFSCORRUPTED)
				232	break;
				233	}
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	234	if (flags & SYNC_DELWRI)
				235	xfs_log_force(mp, 0, lflags);
				236
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	237	return XFS_ERROR(last_error);
				238	}
				239
Christoph Hellwig	2af75df	2008-10-30 17:14:53 +1100	[diff] [blame]	240	STATIC int
				241	xfs_commit_dummy_trans(
				242	struct xfs_mount *mp,
				243	uint log_flags)
				244	{
				245	struct xfs_inode *ip = mp->m_rootip;
				246	struct xfs_trans *tp;
				247	int error;
				248
				249	/*
				250	* Put a dummy transaction in the log to tell recovery
				251	* that all others are OK.
				252	*/
				253	tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
				254	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
				255	if (error) {
				256	xfs_trans_cancel(tp, 0);
				257	return error;
				258	}
				259
				260	xfs_ilock(ip, XFS_ILOCK_EXCL);
				261
				262	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				263	xfs_trans_ihold(tp, ip);
				264	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				265	/* XXX(hch): ignoring the error here.. */
				266	error = xfs_trans_commit(tp, 0);
				267
				268	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				269
				270	xfs_log_force(mp, 0, log_flags);
				271	return 0;
				272	}
				273
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	274	int
Christoph Hellwig	2af75df	2008-10-30 17:14:53 +1100	[diff] [blame]	275	xfs_sync_fsdata(
				276	struct xfs_mount *mp,
				277	int flags)
				278	{
				279	struct xfs_buf *bp;
				280	struct xfs_buf_log_item *bip;
				281	int error = 0;
				282
				283	/*
				284	* If this is xfssyncd() then only sync the superblock if we can
				285	* lock it without sleeping and it is not pinned.
				286	*/
				287	if (flags & SYNC_BDFLUSH) {
				288	ASSERT(!(flags & SYNC_WAIT));
				289
				290	bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
				291	if (!bp)
				292	goto out;
				293
				294	bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
				295	if (!bip \|\| !xfs_buf_item_dirty(bip) \|\| XFS_BUF_ISPINNED(bp))
				296	goto out_brelse;
				297	} else {
				298	bp = xfs_getsb(mp, 0);
				299
				300	/*
				301	* If the buffer is pinned then push on the log so we won't
				302	* get stuck waiting in the write for someone, maybe
				303	* ourselves, to flush the log.
				304	*
				305	* Even though we just pushed the log above, we did not have
				306	* the superblock buffer locked at that point so it can
				307	* become pinned in between there and here.
				308	*/
				309	if (XFS_BUF_ISPINNED(bp))
				310	xfs_log_force(mp, 0, XFS_LOG_FORCE);
				311	}
				312
				313
				314	if (flags & SYNC_WAIT)
				315	XFS_BUF_UNASYNC(bp);
				316	else
				317	XFS_BUF_ASYNC(bp);
				318
				319	return xfs_bwrite(mp, bp);
				320
				321	out_brelse:
				322	xfs_buf_relse(bp);
				323	out:
				324	return error;
				325	}
				326
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	327	/*
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	328	* When remounting a filesystem read-only or freezing the filesystem, we have
				329	* two phases to execute. This first phase is syncing the data before we
				330	* quiesce the filesystem, and the second is flushing all the inodes out after
				331	* we've waited for all the transactions created by the first phase to
				332	* complete. The second phase ensures that the inodes are written to their
				333	* location on disk rather than just existing in transactions in the log. This
				334	* means after a quiesce there is no log replay required to write the inodes to
				335	* disk (this is the main difference between a sync and a quiesce).
				336	*/
				337	/*
				338	* First stage of freeze - no writers will make progress now we are here,
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	339	* so we flush delwri and delalloc buffers here, then wait for all I/O to
				340	* complete. Data is frozen at that point. Metadata is not frozen,
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	341	* transactions can still occur here so don't bother flushing the buftarg
				342	* because it'll just get dirty again.
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	343	*/
				344	int
				345	xfs_quiesce_data(
				346	struct xfs_mount *mp)
				347	{
				348	int error;
				349
				350	/* push non-blocking */
				351	xfs_sync_inodes(mp, SYNC_DELWRI\|SYNC_BDFLUSH);
				352	XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
				353	xfs_filestream_flush(mp);
				354
				355	/* push and block */
				356	xfs_sync_inodes(mp, SYNC_DELWRI\|SYNC_WAIT\|SYNC_IOWAIT);
				357	XFS_QM_DQSYNC(mp, SYNC_WAIT);
				358
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	359	/* write superblock and hoover up shutdown errors */
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	360	error = xfs_sync_fsdata(mp, 0);
				361
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	362	/* flush data-only devices */
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	363	if (mp->m_rtdev_targp)
				364	XFS_bflush(mp->m_rtdev_targp);
				365
				366	return error;
				367	}
				368
David Chinner	76bf105	2008-10-30 17:16:21 +1100	[diff] [blame]	369	STATIC void
				370	xfs_quiesce_fs(
				371	struct xfs_mount *mp)
				372	{
				373	int count = 0, pincount;
				374
				375	xfs_flush_buftarg(mp->m_ddev_targp, 0);
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	376	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
David Chinner	76bf105	2008-10-30 17:16:21 +1100	[diff] [blame]	377
				378	/*
				379	* This loop must run at least twice. The first instance of the loop
				380	* will flush most meta data but that will generate more meta data
				381	* (typically directory updates). Which then must be flushed and
				382	* logged before we can write the unmount record.
				383	*/
				384	do {
				385	xfs_sync_inodes(mp, SYNC_ATTR\|SYNC_WAIT);
				386	pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
				387	if (!pincount) {
				388	delay(50);
				389	count++;
				390	}
				391	} while (count < 2);
				392	}
				393
				394	/*
				395	* Second stage of a quiesce. The data is already synced, now we have to take
				396	* care of the metadata. New transactions are already blocked, so we need to
				397	* wait for any remaining transactions to drain out before proceding.
				398	*/
				399	void
				400	xfs_quiesce_attr(
				401	struct xfs_mount *mp)
				402	{
				403	int error = 0;
				404
				405	/* wait for all modifications to complete */
				406	while (atomic_read(&mp->m_active_trans) > 0)
				407	delay(100);
				408
				409	/* flush inodes and push all remaining buffers out to disk */
				410	xfs_quiesce_fs(mp);
				411
				412	ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
				413
				414	/* Push the superblock and write an unmount record */
				415	error = xfs_log_sbcount(mp, 1);
				416	if (error)
				417	xfs_fs_cmn_err(CE_WARN, mp,
				418	"xfs_attr_quiesce: failed to log sb changes. "
				419	"Frozen image may not be consistent.");
				420	xfs_log_unmount_write(mp);
				421	xfs_unmountfs_writesb(mp);
				422	}
				423
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	424	/*
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	425	* Enqueue a work item to be picked up by the vfs xfssyncd thread.
				426	* Doing this has two advantages:
				427	* - It saves on stack space, which is tight in certain situations
				428	* - It can be used (with care) as a mechanism to avoid deadlocks.
				429	* Flushing while allocating in a full filesystem requires both.
				430	*/
				431	STATIC void
				432	xfs_syncd_queue_work(
				433	struct xfs_mount *mp,
				434	void *data,
				435	void (syncer)(struct xfs_mount , void *))
				436	{
				437	struct bhv_vfs_sync_work *work;
				438
				439	work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
				440	INIT_LIST_HEAD(&work->w_list);
				441	work->w_syncer = syncer;
				442	work->w_data = data;
				443	work->w_mount = mp;
				444	spin_lock(&mp->m_sync_lock);
				445	list_add_tail(&work->w_list, &mp->m_sync_list);
				446	spin_unlock(&mp->m_sync_lock);
				447	wake_up_process(mp->m_sync_task);
				448	}
				449
				450	/*
				451	* Flush delayed allocate data, attempting to free up reserved space
				452	* from existing allocations. At this point a new allocation attempt
				453	* has failed with ENOSPC and we are in the process of scratching our
				454	* heads, looking about for more room...
				455	*/
				456	STATIC void
				457	xfs_flush_inode_work(
				458	struct xfs_mount *mp,
				459	void *arg)
				460	{
				461	struct inode *inode = arg;
				462	filemap_flush(inode->i_mapping);
				463	iput(inode);
				464	}
				465
				466	void
				467	xfs_flush_inode(
				468	xfs_inode_t *ip)
				469	{
				470	struct inode *inode = VFS_I(ip);
				471
				472	igrab(inode);
				473	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
				474	delay(msecs_to_jiffies(500));
				475	}
				476
				477	/*
				478	* This is the "bigger hammer" version of xfs_flush_inode_work...
				479	* (IOW, "If at first you don't succeed, use a Bigger Hammer").
				480	*/
				481	STATIC void
				482	xfs_flush_device_work(
				483	struct xfs_mount *mp,
				484	void *arg)
				485	{
				486	struct inode *inode = arg;
				487	sync_blockdev(mp->m_super->s_bdev);
				488	iput(inode);
				489	}
				490
				491	void
				492	xfs_flush_device(
				493	xfs_inode_t *ip)
				494	{
				495	struct inode *inode = VFS_I(ip);
				496
				497	igrab(inode);
				498	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
				499	delay(msecs_to_jiffies(500));
				500	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE\|XFS_LOG_SYNC);
				501	}
				502
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	503	/*
				504	* Every sync period we need to unpin all items, reclaim inodes, sync
				505	* quota and write out the superblock. We might need to cover the log
				506	* to indicate it is idle.
				507	*/
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	508	STATIC void
				509	xfs_sync_worker(
				510	struct xfs_mount *mp,
				511	void *unused)
				512	{
				513	int error;
				514
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	515	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
				516	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	517	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	518	/* dgc: errors ignored here */
				519	error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
				520	error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
				521	if (xfs_log_need_covered(mp))
				522	error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
				523	}
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	524	mp->m_sync_seq++;
				525	wake_up(&mp->m_wait_single_sync_task);
				526	}
				527
				528	STATIC int
				529	xfssyncd(
				530	void *arg)
				531	{
				532	struct xfs_mount *mp = arg;
				533	long timeleft;
				534	bhv_vfs_sync_work_t work, n;
				535	LIST_HEAD (tmp);
				536
				537	set_freezable();
				538	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
				539	for (;;) {
				540	timeleft = schedule_timeout_interruptible(timeleft);
				541	/* swsusp */
				542	try_to_freeze();
				543	if (kthread_should_stop() && list_empty(&mp->m_sync_list))
				544	break;
				545
				546	spin_lock(&mp->m_sync_lock);
				547	/*
				548	* We can get woken by laptop mode, to do a sync -
				549	* that's the (only!) case where the list would be
				550	* empty with time remaining.
				551	*/
				552	if (!timeleft \|\| list_empty(&mp->m_sync_list)) {
				553	if (!timeleft)
				554	timeleft = xfs_syncd_centisecs *
				555	msecs_to_jiffies(10);
				556	INIT_LIST_HEAD(&mp->m_sync_work.w_list);
				557	list_add_tail(&mp->m_sync_work.w_list,
				558	&mp->m_sync_list);
				559	}
				560	list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
				561	list_move(&work->w_list, &tmp);
				562	spin_unlock(&mp->m_sync_lock);
				563
				564	list_for_each_entry_safe(work, n, &tmp, w_list) {
				565	(*work->w_syncer)(mp, work->w_data);
				566	list_del(&work->w_list);
				567	if (work == &mp->m_sync_work)
				568	continue;
				569	kmem_free(work);
				570	}
				571	}
				572
				573	return 0;
				574	}
				575
				576	int
				577	xfs_syncd_init(
				578	struct xfs_mount *mp)
				579	{
				580	mp->m_sync_work.w_syncer = xfs_sync_worker;
				581	mp->m_sync_work.w_mount = mp;
				582	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
				583	if (IS_ERR(mp->m_sync_task))
				584	return -PTR_ERR(mp->m_sync_task);
				585	return 0;
				586	}
				587
				588	void
				589	xfs_syncd_stop(
				590	struct xfs_mount *mp)
				591	{
				592	kthread_stop(mp->m_sync_task);
				593	}
				594
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	595	int
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	596	xfs_reclaim_inode(
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	597	xfs_inode_t *ip,
				598	int locked,
				599	int sync_mode)
				600	{
				601	xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
				602
				603	/* The hash lock here protects a thread in xfs_iget_core from
				604	* racing with us on linking the inode back with a vnode.
				605	* Once we have the XFS_IRECLAIM flag set it will not touch
				606	* us.
				607	*/
				608	write_lock(&pag->pag_ici_lock);
				609	spin_lock(&ip->i_flags_lock);
				610	if (__xfs_iflags_test(ip, XFS_IRECLAIM) \|\|
				611	!__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
				612	spin_unlock(&ip->i_flags_lock);
				613	write_unlock(&pag->pag_ici_lock);
				614	if (locked) {
				615	xfs_ifunlock(ip);
				616	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				617	}
				618	return 1;
				619	}
				620	__xfs_iflags_set(ip, XFS_IRECLAIM);
				621	spin_unlock(&ip->i_flags_lock);
				622	write_unlock(&pag->pag_ici_lock);
				623	xfs_put_perag(ip->i_mount, pag);
				624
				625	/*
				626	* If the inode is still dirty, then flush it out. If the inode
				627	* is not in the AIL, then it will be OK to flush it delwri as
				628	* long as xfs_iflush() does not keep any references to the inode.
				629	* We leave that decision up to xfs_iflush() since it has the
				630	* knowledge of whether it's OK to simply do a delwri flush of
				631	* the inode or whether we need to wait until the inode is
				632	* pulled from the AIL.
				633	* We get the flush lock regardless, though, just to make sure
				634	* we don't free it while it is being flushed.
				635	*/
				636	if (!locked) {
				637	xfs_ilock(ip, XFS_ILOCK_EXCL);
				638	xfs_iflock(ip);
				639	}
				640
				641	/*
				642	* In the case of a forced shutdown we rely on xfs_iflush() to
				643	* wait for the inode to be unpinned before returning an error.
				644	*/
				645	if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
				646	/* synchronize with xfs_iflush_done */
				647	xfs_iflock(ip);
				648	xfs_ifunlock(ip);
				649	}
				650
				651	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				652	xfs_ireclaim(ip);
				653	return 0;
				654	}
				655
David Chinner	1165451	2008-10-30 17:37:49 +1100	[diff] [blame]	656	/*
				657	* We set the inode flag atomically with the radix tree tag.
				658	* Once we get tag lookups on the radix tree, this inode flag
				659	* can go away.
				660	*/
David Chinner	396beb8	2008-10-30 17:37:26 +1100	[diff] [blame]	661	void
				662	xfs_inode_set_reclaim_tag(
				663	xfs_inode_t *ip)
				664	{
				665	xfs_mount_t *mp = ip->i_mount;
				666	xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
				667
				668	read_lock(&pag->pag_ici_lock);
				669	spin_lock(&ip->i_flags_lock);
				670	radix_tree_tag_set(&pag->pag_ici_root,
				671	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
David Chinner	1165451	2008-10-30 17:37:49 +1100	[diff] [blame]	672	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
David Chinner	396beb8	2008-10-30 17:37:26 +1100	[diff] [blame]	673	spin_unlock(&ip->i_flags_lock);
				674	read_unlock(&pag->pag_ici_lock);
				675	xfs_put_perag(mp, pag);
				676	}
				677
				678	void
				679	__xfs_inode_clear_reclaim_tag(
				680	xfs_mount_t *mp,
				681	xfs_perag_t *pag,
				682	xfs_inode_t *ip)
				683	{
				684	radix_tree_tag_clear(&pag->pag_ici_root,
				685	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
				686	}
				687
				688	void
				689	xfs_inode_clear_reclaim_tag(
				690	xfs_inode_t *ip)
				691	{
				692	xfs_mount_t *mp = ip->i_mount;
				693	xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
				694
				695	read_lock(&pag->pag_ici_lock);
				696	spin_lock(&ip->i_flags_lock);
				697	__xfs_inode_clear_reclaim_tag(mp, pag, ip);
				698	spin_unlock(&ip->i_flags_lock);
				699	read_unlock(&pag->pag_ici_lock);
				700	xfs_put_perag(mp, pag);
				701	}
				702
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	703
				704	STATIC void
				705	xfs_reclaim_inodes_ag(
				706	xfs_mount_t *mp,
				707	int ag,
				708	int noblock,
				709	int mode)
				710	{
				711	xfs_inode_t *ip = NULL;
				712	xfs_perag_t *pag = &mp->m_perag[ag];
				713	int nr_found;
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame^]	714	uint32_t first_index;
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	715	int skipped;
				716
				717	restart:
				718	first_index = 0;
				719	skipped = 0;
				720	do {
				721	/*
				722	* use a gang lookup to find the next inode in the tree
				723	* as the tree is sparse and a gang lookup walks to find
				724	* the number of objects requested.
				725	*/
				726	read_lock(&pag->pag_ici_lock);
				727	nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
				728	(void**)&ip, first_index, 1,
				729	XFS_ICI_RECLAIM_TAG);
				730
				731	if (!nr_found) {
				732	read_unlock(&pag->pag_ici_lock);
				733	break;
				734	}
				735
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame^]	736	/*
				737	* Update the index for the next lookup. Catch overflows
				738	* into the next AG range which can occur if we have inodes
				739	* in the last block of the AG and we are currently
				740	* pointing to the last inode.
				741	*/
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	742	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame^]	743	if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
				744	read_unlock(&pag->pag_ici_lock);
				745	break;
				746	}
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	747
				748	ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE\|XFS_IRECLAIM)));
				749
				750	/* ignore if already under reclaim */
				751	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
				752	read_unlock(&pag->pag_ici_lock);
				753	continue;
				754	}
				755
				756	if (noblock) {
				757	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
				758	read_unlock(&pag->pag_ici_lock);
				759	continue;
				760	}
				761	if (xfs_ipincount(ip) \|\|
				762	!xfs_iflock_nowait(ip)) {
				763	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				764	read_unlock(&pag->pag_ici_lock);
				765	continue;
				766	}
				767	}
				768	read_unlock(&pag->pag_ici_lock);
				769
				770	/*
				771	* hmmm - this is an inode already in reclaim. Do
				772	* we even bother catching it here?
				773	*/
				774	if (xfs_reclaim_inode(ip, noblock, mode))
				775	skipped++;
				776	} while (nr_found);
				777
				778	if (skipped) {
				779	delay(1);
				780	goto restart;
				781	}
				782	return;
				783
				784	}
				785
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	786	int
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	787	xfs_reclaim_inodes(
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	788	xfs_mount_t *mp,
				789	int noblock,
				790	int mode)
				791	{
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	792	int i;
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	793
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	794	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
				795	if (!mp->m_perag[i].pag_ici_init)
				796	continue;
				797	xfs_reclaim_inodes_ag(mp, i, noblock, mode);
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	798	}
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	799	return 0;
				800	}
				801
				802