Blame - fs/fs-writeback.c - kernel/msm-4.9

blob: 0639024d83a98e95a69c2be9bac2c8c548c0de43 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* fs/fs-writeback.c
				3	*
				4	* Copyright (C) 2002, Linus Torvalds.
				5	*
				6	* Contains all the functions related to writing back and waiting
				7	* upon dirty inodes against superblocks, and writing back dirty
				8	* pages against inodes. ie: data writeback. Writeout of the
				9	* inode itself is not handled here.
				10	*
				11	* 10Apr2002 akpm@zip.com.au
				12	* Split out of fs/inode.c
				13	* Additions for address_space-based writeback
				14	*/
				15
				16	#include <linux/kernel.h>
				17	#include <linux/spinlock.h>
				18	#include <linux/sched.h>
				19	#include <linux/fs.h>
				20	#include <linux/mm.h>
				21	#include <linux/writeback.h>
				22	#include <linux/blkdev.h>
				23	#include <linux/backing-dev.h>
				24	#include <linux/buffer_head.h>
David Howells	07f3f05	2006-09-30 20:52:18 +0200	[diff] [blame^]	25	#include "internal.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	26
				27	/**
				28	* __mark_inode_dirty - internal function
				29	* @inode: inode to mark
				30	* @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
				31	* Mark an inode as dirty. Callers should use mark_inode_dirty or
				32	* mark_inode_dirty_sync.
				33	*
				34	* Put the inode on the super block's dirty list.
				35	*
				36	* CAREFUL! We mark it dirty unconditionally, but move it onto the
				37	* dirty list only if it is hashed or if it refers to a blockdev.
				38	* If it was not hashed, it will never be added to the dirty list
				39	* even if it is later hashed, as it will have been marked dirty already.
				40	*
				41	* In short, make sure you hash any inodes _before_ you start marking
				42	* them dirty.
				43	*
				44	* This function must be atomic for the I_DIRTY_PAGES case -
				45	* set_page_dirty() is called under spinlock in several places.
				46	*
				47	* Note that for blockdevs, inode->dirtied_when represents the dirtying time of
				48	* the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
				49	* the kernel-internal blockdev inode represents the dirtying time of the
				50	* blockdev's pages. This is why for I_DIRTY_PAGES we always use
				51	* page->mapping->host, so the page-dirtying time is recorded in the internal
				52	* blockdev inode.
				53	*/
				54	void __mark_inode_dirty(struct inode *inode, int flags)
				55	{
				56	struct super_block *sb = inode->i_sb;
				57
				58	/*
				59	* Don't do this for I_DIRTY_PAGES - that doesn't actually
				60	* dirty the inode itself
				61	*/
				62	if (flags & (I_DIRTY_SYNC \| I_DIRTY_DATASYNC)) {
				63	if (sb->s_op->dirty_inode)
				64	sb->s_op->dirty_inode(inode);
				65	}
				66
				67	/*
				68	* make sure that changes are seen by all cpus before we test i_state
				69	* -- mikulas
				70	*/
				71	smp_mb();
				72
				73	/* avoid the locking if we can */
				74	if ((inode->i_state & flags) == flags)
				75	return;
				76
				77	if (unlikely(block_dump)) {
				78	struct dentry *dentry = NULL;
				79	const char *name = "?";
				80
				81	if (!list_empty(&inode->i_dentry)) {
				82	dentry = list_entry(inode->i_dentry.next,
				83	struct dentry, d_alias);
				84	if (dentry && dentry->d_name.name)
				85	name = (const char *) dentry->d_name.name;
				86	}
				87
				88	if (inode->i_ino \|\| strcmp(inode->i_sb->s_id, "bdev"))
				89	printk(KERN_DEBUG
				90	"%s(%d): dirtied inode %lu (%s) on %s\n",
				91	current->comm, current->pid, inode->i_ino,
				92	name, inode->i_sb->s_id);
				93	}
				94
				95	spin_lock(&inode_lock);
				96	if ((inode->i_state & flags) != flags) {
				97	const int was_dirty = inode->i_state & I_DIRTY;
				98
				99	inode->i_state \|= flags;
				100
				101	/*
				102	* If the inode is locked, just update its dirty state.
				103	* The unlocker will place the inode on the appropriate
				104	* superblock list, based upon its state.
				105	*/
				106	if (inode->i_state & I_LOCK)
				107	goto out;
				108
				109	/*
				110	* Only add valid (hashed) inodes to the superblock's
				111	* dirty list. Add blockdev inodes as well.
				112	*/
				113	if (!S_ISBLK(inode->i_mode)) {
				114	if (hlist_unhashed(&inode->i_hash))
				115	goto out;
				116	}
				117	if (inode->i_state & (I_FREEING\|I_CLEAR))
				118	goto out;
				119
				120	/*
				121	* If the inode was already on s_dirty or s_io, don't
				122	* reposition it (that would break s_dirty time-ordering).
				123	*/
				124	if (!was_dirty) {
				125	inode->dirtied_when = jiffies;
				126	list_move(&inode->i_list, &sb->s_dirty);
				127	}
				128	}
				129	out:
				130	spin_unlock(&inode_lock);
				131	}
				132
				133	EXPORT_SYMBOL(__mark_inode_dirty);
				134
				135	static int write_inode(struct inode *inode, int sync)
				136	{
				137	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
				138	return inode->i_sb->s_op->write_inode(inode, sync);
				139	return 0;
				140	}
				141
				142	/*
				143	* Write a single inode's dirty pages and inode data out to disk.
				144	* If `wait' is set, wait on the writeout.
				145	*
				146	* The whole writeout design is quite complex and fragile. We want to avoid
				147	* starvation of particular inodes when others are being redirtied, prevent
				148	* livelocks, etc.
				149	*
				150	* Called under inode_lock.
				151	*/
				152	static int
				153	__sync_single_inode(struct inode inode, struct writeback_control wbc)
				154	{
				155	unsigned dirty;
				156	struct address_space *mapping = inode->i_mapping;
				157	struct super_block *sb = inode->i_sb;
				158	int wait = wbc->sync_mode == WB_SYNC_ALL;
				159	int ret;
				160
				161	BUG_ON(inode->i_state & I_LOCK);
				162
				163	/* Set I_LOCK, reset I_DIRTY */
				164	dirty = inode->i_state & I_DIRTY;
				165	inode->i_state \|= I_LOCK;
				166	inode->i_state &= ~I_DIRTY;
				167
				168	spin_unlock(&inode_lock);
				169
				170	ret = do_writepages(mapping, wbc);
				171
				172	/* Don't write the inode if only I_DIRTY_PAGES was set */
				173	if (dirty & (I_DIRTY_SYNC \| I_DIRTY_DATASYNC)) {
				174	int err = write_inode(inode, wait);
				175	if (ret == 0)
				176	ret = err;
				177	}
				178
				179	if (wait) {
				180	int err = filemap_fdatawait(mapping);
				181	if (ret == 0)
				182	ret = err;
				183	}
				184
				185	spin_lock(&inode_lock);
				186	inode->i_state &= ~I_LOCK;
				187	if (!(inode->i_state & I_FREEING)) {
				188	if (!(inode->i_state & I_DIRTY) &&
				189	mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
				190	/*
				191	* We didn't write back all the pages. nfs_writepages()
				192	* sometimes bales out without doing anything. Redirty
				193	* the inode. It is still on sb->s_io.
				194	*/
				195	if (wbc->for_kupdate) {
				196	/*
				197	* For the kupdate function we leave the inode
				198	* at the head of sb_dirty so it will get more
				199	* writeout as soon as the queue becomes
				200	* uncongested.
				201	*/
				202	inode->i_state \|= I_DIRTY_PAGES;
				203	list_move_tail(&inode->i_list, &sb->s_dirty);
				204	} else {
				205	/*
				206	* Otherwise fully redirty the inode so that
				207	* other inodes on this superblock will get some
				208	* writeout. Otherwise heavy writing to one
				209	* file would indefinitely suspend writeout of
				210	* all the other files.
				211	*/
				212	inode->i_state \|= I_DIRTY_PAGES;
				213	inode->dirtied_when = jiffies;
				214	list_move(&inode->i_list, &sb->s_dirty);
				215	}
				216	} else if (inode->i_state & I_DIRTY) {
				217	/*
				218	* Someone redirtied the inode while were writing back
				219	* the pages.
				220	*/
				221	list_move(&inode->i_list, &sb->s_dirty);
				222	} else if (atomic_read(&inode->i_count)) {
				223	/*
				224	* The inode is clean, inuse
				225	*/
				226	list_move(&inode->i_list, &inode_in_use);
				227	} else {
				228	/*
				229	* The inode is clean, unused
				230	*/
				231	list_move(&inode->i_list, &inode_unused);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	232	}
				233	}
				234	wake_up_inode(inode);
				235	return ret;
				236	}
				237
				238	/*
Andrea Arcangeli	7f04c26	2005-10-30 15:03:05 -0800	[diff] [blame]	239	* Write out an inode's dirty pages. Called under inode_lock. Either the
				240	* caller has ref on the inode (either via __iget or via syscall against an fd)
				241	* or the inode has I_WILL_FREE set (via generic_forget_inode)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	242	*/
				243	static int
Andrea Arcangeli	7f04c26	2005-10-30 15:03:05 -0800	[diff] [blame]	244	__writeback_single_inode(struct inode inode, struct writeback_control wbc)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	245	{
				246	wait_queue_head_t *wqh;
				247
Andrea Arcangeli	7f04c26	2005-10-30 15:03:05 -0800	[diff] [blame]	248	if (!atomic_read(&inode->i_count))
Andrea Arcangeli	659603e	2005-10-31 14:08:54 -0800	[diff] [blame]	249	WARN_ON(!(inode->i_state & (I_WILL_FREE\|I_FREEING)));
Andrea Arcangeli	7f04c26	2005-10-30 15:03:05 -0800	[diff] [blame]	250	else
				251	WARN_ON(inode->i_state & I_WILL_FREE);
				252
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	253	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) {
				254	list_move(&inode->i_list, &inode->i_sb->s_dirty);
				255	return 0;
				256	}
				257
				258	/*
				259	* It's a data-integrity sync. We must wait.
				260	*/
				261	if (inode->i_state & I_LOCK) {
				262	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LOCK);
				263
				264	wqh = bit_waitqueue(&inode->i_state, __I_LOCK);
				265	do {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	266	spin_unlock(&inode_lock);
				267	__wait_on_bit(wqh, &wq, inode_wait,
				268	TASK_UNINTERRUPTIBLE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	269	spin_lock(&inode_lock);
				270	} while (inode->i_state & I_LOCK);
				271	}
				272	return __sync_single_inode(inode, wbc);
				273	}
				274
				275	/*
				276	* Write out a superblock's list of dirty inodes. A wait will be performed
				277	* upon no inodes, all inodes or the final one, depending upon sync_mode.
				278	*
				279	* If older_than_this is non-NULL, then only write out inodes which
				280	* had their first dirtying at a time earlier than *older_than_this.
				281	*
				282	* If we're a pdlfush thread, then implement pdflush collision avoidance
				283	* against the entire list.
				284	*
				285	* WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
				286	* that it can be located for waiting on in __writeback_single_inode().
				287	*
				288	* Called under inode_lock.
				289	*
				290	* If `bdi' is non-zero then we're being asked to writeback a specific queue.
				291	* This function assumes that the blockdev superblock's inodes are backed by
				292	* a variety of queues, so all inodes are searched. For other superblocks,
				293	* assume that all inodes are backed by the same queue.
				294	*
				295	* FIXME: this linear search could get expensive with many fileystems. But
				296	* how to fix? We need to go from an address_space to all inodes which share
				297	* a queue with that address_space. (Easy: have a global "dirty superblocks"
				298	* list).
				299	*
				300	* The inodes to be written are parked on sb->s_io. They are moved back onto
				301	* sb->s_dirty as they are selected for writing. This way, none can be missed
				302	* on the writer throttling path, and we get decent balancing between many
				303	* throttled threads: we don't want them all piling up on __wait_on_inode.
				304	*/
				305	static void
				306	sync_sb_inodes(struct super_block sb, struct writeback_control wbc)
				307	{
				308	const unsigned long start = jiffies; /* livelock avoidance */
				309
				310	if (!wbc->for_kupdate \|\| list_empty(&sb->s_io))
				311	list_splice_init(&sb->s_dirty, &sb->s_io);
				312
				313	while (!list_empty(&sb->s_io)) {
				314	struct inode *inode = list_entry(sb->s_io.prev,
				315	struct inode, i_list);
				316	struct address_space *mapping = inode->i_mapping;
				317	struct backing_dev_info *bdi = mapping->backing_dev_info;
				318	long pages_skipped;
				319
				320	if (!bdi_cap_writeback_dirty(bdi)) {
				321	list_move(&inode->i_list, &sb->s_dirty);
				322	if (sb == blockdev_superblock) {
				323	/*
				324	* Dirty memory-backed blockdev: the ramdisk
				325	* driver does this. Skip just this inode
				326	*/
				327	continue;
				328	}
				329	/*
				330	* Dirty memory-backed inode against a filesystem other
				331	* than the kernel-internal bdev filesystem. Skip the
				332	* entire superblock.
				333	*/
				334	break;
				335	}
				336
				337	if (wbc->nonblocking && bdi_write_congested(bdi)) {
				338	wbc->encountered_congestion = 1;
				339	if (sb != blockdev_superblock)
				340	break; /* Skip a congested fs */
				341	list_move(&inode->i_list, &sb->s_dirty);
				342	continue; /* Skip a congested blockdev */
				343	}
				344
				345	if (wbc->bdi && bdi != wbc->bdi) {
				346	if (sb != blockdev_superblock)
				347	break; /* fs has the wrong queue */
				348	list_move(&inode->i_list, &sb->s_dirty);
				349	continue; /* blockdev has wrong queue */
				350	}
				351
				352	/* Was this inode dirtied after sync_sb_inodes was called? */
				353	if (time_after(inode->dirtied_when, start))
				354	break;
				355
				356	/* Was this inode dirtied too recently? */
				357	if (wbc->older_than_this && time_after(inode->dirtied_when,
				358	*wbc->older_than_this))
				359	break;
				360
				361	/* Is another pdflush already flushing this queue? */
				362	if (current_is_pdflush() && !writeback_acquire(bdi))
				363	break;
				364
				365	BUG_ON(inode->i_state & I_FREEING);
				366	__iget(inode);
				367	pages_skipped = wbc->pages_skipped;
				368	__writeback_single_inode(inode, wbc);
				369	if (wbc->sync_mode == WB_SYNC_HOLD) {
				370	inode->dirtied_when = jiffies;
				371	list_move(&inode->i_list, &sb->s_dirty);
				372	}
				373	if (current_is_pdflush())
				374	writeback_release(bdi);
				375	if (wbc->pages_skipped != pages_skipped) {
				376	/*
				377	* writeback is not making progress due to locked
				378	* buffers. Skip this inode for now.
				379	*/
				380	list_move(&inode->i_list, &sb->s_dirty);
				381	}
				382	spin_unlock(&inode_lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	383	iput(inode);
OGAWA Hirofumi	4ffc844	2006-03-25 03:07:44 -0800	[diff] [blame]	384	cond_resched();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	385	spin_lock(&inode_lock);
				386	if (wbc->nr_to_write <= 0)
				387	break;
				388	}
				389	return; /* Leave any unwritten inodes on s_io */
				390	}
				391
				392	/*
				393	* Start writeback of dirty pagecache data against all unlocked inodes.
				394	*
				395	* Note:
				396	* We don't need to grab a reference to superblock here. If it has non-empty
				397	* ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
				398	* past sync_inodes_sb() until both the ->s_dirty and ->s_io lists are
				399	* empty. Since __sync_single_inode() regains inode_lock before it finally moves
				400	* inode from superblock lists we are OK.
				401	*
				402	* If `older_than_this' is non-zero then only flush inodes which have a
				403	* flushtime older than *older_than_this.
				404	*
				405	* If `bdi' is non-zero then we will scan the first inode against each
				406	* superblock until we find the matching ones. One group will be the dirty
				407	* inodes against a filesystem. Then when we hit the dummy blockdev superblock,
				408	* sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not
				409	* super-efficient but we're about to do a ton of I/O...
				410	*/
				411	void
				412	writeback_inodes(struct writeback_control *wbc)
				413	{
				414	struct super_block *sb;
				415
				416	might_sleep();
				417	spin_lock(&sb_lock);
				418	restart:
				419	sb = sb_entry(super_blocks.prev);
				420	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
				421	if (!list_empty(&sb->s_dirty) \|\| !list_empty(&sb->s_io)) {
				422	/* we're making our own get_super here */
				423	sb->s_count++;
				424	spin_unlock(&sb_lock);
				425	/*
				426	* If we can't get the readlock, there's no sense in
				427	* waiting around, most of the time the FS is going to
				428	* be unmounted by the time it is released.
				429	*/
				430	if (down_read_trylock(&sb->s_umount)) {
				431	if (sb->s_root) {
				432	spin_lock(&inode_lock);
				433	sync_sb_inodes(sb, wbc);
				434	spin_unlock(&inode_lock);
				435	}
				436	up_read(&sb->s_umount);
				437	}
				438	spin_lock(&sb_lock);
				439	if (__put_super_and_need_restart(sb))
				440	goto restart;
				441	}
				442	if (wbc->nr_to_write <= 0)
				443	break;
				444	}
				445	spin_unlock(&sb_lock);
				446	}
				447
				448	/*
				449	* writeback and wait upon the filesystem's dirty inodes. The caller will
				450	* do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is
				451	* used to park the written inodes on sb->s_dirty for the wait pass.
				452	*
				453	* A finite limit is set on the number of pages which will be written.
				454	* To prevent infinite livelock of sys_sync().
				455	*
				456	* We add in the number of potentially dirty inodes, because each inode write
				457	* can dirty pagecache in the underlying blockdev.
				458	*/
				459	void sync_inodes_sb(struct super_block *sb, int wait)
				460	{
				461	struct writeback_control wbc = {
				462	.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
OGAWA Hirofumi	111ebb6	2006-06-23 02:03:26 -0700	[diff] [blame]	463	.range_start = 0,
				464	.range_end = LLONG_MAX,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	465	};
Christoph Lameter	b1e7a8f	2006-06-30 01:55:39 -0700	[diff] [blame]	466	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
Christoph Lameter	fd39fc8	2006-06-30 01:55:40 -0700	[diff] [blame]	467	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	468
				469	wbc.nr_to_write = nr_dirty + nr_unstable +
				470	(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
				471	nr_dirty + nr_unstable;
				472	wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
				473	spin_lock(&inode_lock);
				474	sync_sb_inodes(sb, &wbc);
				475	spin_unlock(&inode_lock);
				476	}
				477
				478	/*
				479	* Rather lame livelock avoidance.
				480	*/
				481	static void set_sb_syncing(int val)
				482	{
				483	struct super_block *sb;
				484	spin_lock(&sb_lock);
				485	sb = sb_entry(super_blocks.prev);
				486	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
				487	sb->s_syncing = val;
				488	}
				489	spin_unlock(&sb_lock);
				490	}
				491
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	492	/**
Martin Waitz	67be2dd	2005-05-01 08:59:26 -0700	[diff] [blame]	493	* sync_inodes - writes all inodes to disk
				494	* @wait: wait for completion
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	495	*
				496	* sync_inodes() goes through each super block's dirty inode list, writes the
				497	* inodes out, waits on the writeout and puts the inodes back on the normal
				498	* list.
				499	*
				500	* This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle
				501	* part of the sync functions is that the blockdev "superblock" is processed
				502	* last. This is because the write_inode() function of a typical fs will
				503	* perform no I/O, but will mark buffers in the blockdev mapping as dirty.
				504	* What we want to do is to perform all that dirtying first, and then write
				505	* back all those inode blocks via the blockdev mapping in one sweep. So the
				506	* additional (somewhat redundant) sync_blockdev() calls here are to make
				507	* sure that really happens. Because if we call sync_inodes_sb(wait=1) with
				508	* outstanding dirty inodes, the writeback goes block-at-a-time within the
				509	* filesystem's write_inode(). This is extremely slow.
				510	*/
Kirill Korotaev	618f063	2005-06-23 00:09:54 -0700	[diff] [blame]	511	static void __sync_inodes(int wait)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	512	{
				513	struct super_block *sb;
				514
Kirill Korotaev	618f063	2005-06-23 00:09:54 -0700	[diff] [blame]	515	spin_lock(&sb_lock);
				516	restart:
				517	list_for_each_entry(sb, &super_blocks, s_list) {
				518	if (sb->s_syncing)
				519	continue;
				520	sb->s_syncing = 1;
				521	sb->s_count++;
				522	spin_unlock(&sb_lock);
				523	down_read(&sb->s_umount);
				524	if (sb->s_root) {
				525	sync_inodes_sb(sb, wait);
				526	sync_blockdev(sb->s_bdev);
				527	}
				528	up_read(&sb->s_umount);
				529	spin_lock(&sb_lock);
				530	if (__put_super_and_need_restart(sb))
				531	goto restart;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	532	}
Kirill Korotaev	618f063	2005-06-23 00:09:54 -0700	[diff] [blame]	533	spin_unlock(&sb_lock);
				534	}
				535
				536	void sync_inodes(int wait)
				537	{
				538	set_sb_syncing(0);
				539	__sync_inodes(0);
				540
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	541	if (wait) {
				542	set_sb_syncing(0);
Kirill Korotaev	618f063	2005-06-23 00:09:54 -0700	[diff] [blame]	543	__sync_inodes(1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	544	}
				545	}
				546
				547	/**
Andrea Arcangeli	7f04c26	2005-10-30 15:03:05 -0800	[diff] [blame]	548	* write_inode_now - write an inode to disk
				549	* @inode: inode to write to disk
				550	* @sync: whether the write should be synchronous or not
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	551	*
Andrea Arcangeli	7f04c26	2005-10-30 15:03:05 -0800	[diff] [blame]	552	* This function commits an inode to disk immediately if it is dirty. This is
				553	* primarily needed by knfsd.
				554	*
				555	* The caller must either have a ref on the inode or must have set I_WILL_FREE.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	556	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	557	int write_inode_now(struct inode *inode, int sync)
				558	{
				559	int ret;
				560	struct writeback_control wbc = {
				561	.nr_to_write = LONG_MAX,
				562	.sync_mode = WB_SYNC_ALL,
OGAWA Hirofumi	111ebb6	2006-06-23 02:03:26 -0700	[diff] [blame]	563	.range_start = 0,
				564	.range_end = LLONG_MAX,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	565	};
				566
				567	if (!mapping_cap_writeback_dirty(inode->i_mapping))
Andrew Morton	49364ce	2005-11-07 00:59:15 -0800	[diff] [blame]	568	wbc.nr_to_write = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	569
				570	might_sleep();
				571	spin_lock(&inode_lock);
				572	ret = __writeback_single_inode(inode, &wbc);
				573	spin_unlock(&inode_lock);
				574	if (sync)
				575	wait_on_inode(inode);
				576	return ret;
				577	}
				578	EXPORT_SYMBOL(write_inode_now);
				579
				580	/**
				581	* sync_inode - write an inode and its pages to disk.
				582	* @inode: the inode to sync
				583	* @wbc: controls the writeback mode
				584	*
				585	* sync_inode() will write an inode and its pages to disk. It will also
				586	* correctly update the inode on its superblock's dirty inode lists and will
				587	* update inode->i_state.
				588	*
				589	* The caller must have a ref on the inode.
				590	*/
				591	int sync_inode(struct inode inode, struct writeback_control wbc)
				592	{
				593	int ret;
				594
				595	spin_lock(&inode_lock);
				596	ret = __writeback_single_inode(inode, wbc);
				597	spin_unlock(&inode_lock);
				598	return ret;
				599	}
				600	EXPORT_SYMBOL(sync_inode);
				601
				602	/**
				603	* generic_osync_inode - flush all dirty data for a given inode to disk
				604	* @inode: inode to write
Martin Waitz	67be2dd	2005-05-01 08:59:26 -0700	[diff] [blame]	605	* @mapping: the address_space that should be flushed
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	606	* @what: what to write and wait upon
				607	*
				608	* This can be called by file_write functions for files which have the
				609	* O_SYNC flag set, to flush dirty writes to disk.
				610	*
				611	* @what is a bitmask, specifying which part of the inode's data should be
Randy Dunlap	b8887e6	2005-11-07 01:01:07 -0800	[diff] [blame]	612	* written and waited upon.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	613	*
				614	* OSYNC_DATA: i_mapping's dirty data
				615	* OSYNC_METADATA: the buffers at i_mapping->private_list
				616	* OSYNC_INODE: the inode itself
				617	*/
				618
				619	int generic_osync_inode(struct inode inode, struct address_space mapping, int what)
				620	{
				621	int err = 0;
				622	int need_write_inode_now = 0;
				623	int err2;
				624
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	625	if (what & OSYNC_DATA)
				626	err = filemap_fdatawrite(mapping);
				627	if (what & (OSYNC_METADATA\|OSYNC_DATA)) {
				628	err2 = sync_mapping_buffers(mapping);
				629	if (!err)
				630	err = err2;
				631	}
				632	if (what & OSYNC_DATA) {
				633	err2 = filemap_fdatawait(mapping);
				634	if (!err)
				635	err = err2;
				636	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	637
				638	spin_lock(&inode_lock);
				639	if ((inode->i_state & I_DIRTY) &&
				640	((what & OSYNC_INODE) \|\| (inode->i_state & I_DIRTY_DATASYNC)))
				641	need_write_inode_now = 1;
				642	spin_unlock(&inode_lock);
				643
				644	if (need_write_inode_now) {
				645	err2 = write_inode_now(inode, 1);
				646	if (!err)
				647	err = err2;
				648	}
				649	else
				650	wait_on_inode(inode);
				651
				652	return err;
				653	}
				654
				655	EXPORT_SYMBOL(generic_osync_inode);
				656
				657	/**
				658	* writeback_acquire: attempt to get exclusive writeback access to a device
				659	* @bdi: the device's backing_dev_info structure
				660	*
				661	* It is a waste of resources to have more than one pdflush thread blocked on
				662	* a single request queue. Exclusion at the request_queue level is obtained
				663	* via a flag in the request_queue's backing_dev_info.state.
				664	*
				665	* Non-request_queue-backed address_spaces will share default_backing_dev_info,
				666	* unless they implement their own. Which is somewhat inefficient, as this
				667	* may prevent concurrent writeback against multiple devices.
				668	*/
				669	int writeback_acquire(struct backing_dev_info *bdi)
				670	{
				671	return !test_and_set_bit(BDI_pdflush, &bdi->state);
				672	}
				673
				674	/**
				675	* writeback_in_progress: determine whether there is writeback in progress
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	676	* @bdi: the device's backing_dev_info structure.
Randy Dunlap	b8887e6	2005-11-07 01:01:07 -0800	[diff] [blame]	677	*
				678	* Determine whether there is writeback in progress against a backing device.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	679	*/
				680	int writeback_in_progress(struct backing_dev_info *bdi)
				681	{
				682	return test_bit(BDI_pdflush, &bdi->state);
				683	}
				684
				685	/**
				686	* writeback_release: relinquish exclusive writeback access against a device.
				687	* @bdi: the device's backing_dev_info structure
				688	*/
				689	void writeback_release(struct backing_dev_info *bdi)
				690	{
				691	BUG_ON(!writeback_in_progress(bdi));
				692	clear_bit(BDI_pdflush, &bdi->state);
				693	}