Blame - fs/inotify.c - kernel/msm-4.9

blob: fb4803131423faa49acb1fcb76c247de34ef5fb3 [file] [log] [blame]

Robert Love	0eeca28	2005-07-12 17:06:03 -0400	[diff] [blame]	1	/*
				2	* fs/inotify.c - inode-based file event notifications
				3	*
				4	* Authors:
				5	* John McCutchan <ttb@tentacle.dhs.org>
				6	* Robert Love <rml@novell.com>
				7	*
				8	* Copyright (C) 2005 John McCutchan
				9	*
				10	* This program is free software; you can redistribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the
				12	* Free Software Foundation; either version 2, or (at your option) any
				13	* later version.
				14	*
				15	* This program is distributed in the hope that it will be useful, but
				16	* WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*/
				20
				21	#include <linux/module.h>
				22	#include <linux/kernel.h>
				23	#include <linux/sched.h>
				24	#include <linux/spinlock.h>
				25	#include <linux/idr.h>
				26	#include <linux/slab.h>
				27	#include <linux/fs.h>
				28	#include <linux/file.h>
				29	#include <linux/mount.h>
				30	#include <linux/namei.h>
				31	#include <linux/poll.h>
				32	#include <linux/device.h>
				33	#include <linux/miscdevice.h>
				34	#include <linux/init.h>
				35	#include <linux/list.h>
				36	#include <linux/writeback.h>
				37	#include <linux/inotify.h>
				38
				39	#include <asm/ioctls.h>
				40
				41	static atomic_t inotify_cookie;
				42
				43	static kmem_cache_t *watch_cachep;
				44	static kmem_cache_t *event_cachep;
				45
				46	static struct vfsmount *inotify_mnt;
				47
Robert Love	0399cb0	2005-07-13 12:38:18 -0400	[diff] [blame^]	48	/* these are configurable via /proc/sys/fs/inotify/ */
				49	int inotify_max_user_instances;
Robert Love	0eeca28	2005-07-12 17:06:03 -0400	[diff] [blame]	50	int inotify_max_user_watches;
				51	int inotify_max_queued_events;
				52
				53	/*
				54	* Lock ordering:
				55	*
				56	* dentry->d_lock (used to keep d_move() away from dentry->d_parent)
				57	* iprune_sem (synchronize shrink_icache_memory())
				58	* inode_lock (protects the super_block->s_inodes list)
				59	* inode->inotify_sem (protects inode->inotify_watches and watches->i_list)
				60	* inotify_dev->sem (protects inotify_device and watches->d_list)
				61	*/
				62
				63	/*
				64	* Lifetimes of the three main data structures--inotify_device, inode, and
				65	* inotify_watch--are managed by reference count.
				66	*
				67	* inotify_device: Lifetime is from open until release. Additional references
				68	* can bump the count via get_inotify_dev() and drop the count via
				69	* put_inotify_dev().
				70	*
				71	* inotify_watch: Lifetime is from create_watch() to destory_watch().
				72	* Additional references can bump the count via get_inotify_watch() and drop
				73	* the count via put_inotify_watch().
				74	*
				75	* inode: Pinned so long as the inode is associated with a watch, from
				76	* create_watch() to put_inotify_watch().
				77	*/
				78
				79	/*
				80	* struct inotify_device - represents an open instance of an inotify device
				81	*
				82	* This structure is protected by the semaphore 'sem'.
				83	*/
				84	struct inotify_device {
				85	wait_queue_head_t wq; /* wait queue for i/o */
				86	struct idr idr; /* idr mapping wd -> watch */
				87	struct semaphore sem; /* protects this bad boy */
				88	struct list_head events; /* list of queued events */
				89	struct list_head watches; /* list of watches */
				90	atomic_t count; /* reference count */
				91	struct user_struct user; / user who opened this dev */
				92	unsigned int queue_size; /* size of the queue (bytes) */
				93	unsigned int event_count; /* number of pending events */
				94	unsigned int max_events; /* maximum number of events */
				95	};
				96
				97	/*
				98	* struct inotify_kernel_event - An inotify event, originating from a watch and
				99	* queued for user-space. A list of these is attached to each instance of the
				100	* device. In read(), this list is walked and all events that can fit in the
				101	* buffer are returned.
				102	*
				103	* Protected by dev->sem of the device in which we are queued.
				104	*/
				105	struct inotify_kernel_event {
				106	struct inotify_event event; /* the user-space event */
				107	struct list_head list; /* entry in inotify_device's list */
				108	char name; / filename, if any */
				109	};
				110
				111	/*
				112	* struct inotify_watch - represents a watch request on a specific inode
				113	*
				114	* d_list is protected by dev->sem of the associated watch->dev.
				115	* i_list and mask are protected by inode->inotify_sem of the associated inode.
				116	* dev, inode, and wd are never written to once the watch is created.
				117	*/
				118	struct inotify_watch {
				119	struct list_head d_list; /* entry in inotify_device's list */
				120	struct list_head i_list; /* entry in inode's list */
				121	atomic_t count; /* reference count */
				122	struct inotify_device dev; / associated device */
				123	struct inode inode; / associated inode */
				124	s32 wd; /* watch descriptor */
				125	u32 mask; /* event mask for this watch */
				126	};
				127
Robert Love	0399cb0	2005-07-13 12:38:18 -0400	[diff] [blame^]	128	#ifdef CONFIG_SYSCTL
				129
				130	#include <linux/sysctl.h>
				131
				132	static int zero;
				133
				134	ctl_table inotify_table[] = {
				135	{
				136	.ctl_name = INOTIFY_MAX_USER_INSTANCES,
				137	.procname = "max_user_instances",
				138	.data = &inotify_max_user_instances,
				139	.maxlen = sizeof(int),
				140	.mode = 0644,
				141	.proc_handler = &proc_dointvec_minmax,
				142	.strategy = &sysctl_intvec,
				143	.extra1 = &zero,
				144	},
				145	{
				146	.ctl_name = INOTIFY_MAX_USER_WATCHES,
				147	.procname = "max_user_watches",
				148	.data = &inotify_max_user_watches,
				149	.maxlen = sizeof(int),
				150	.mode = 0644,
				151	.proc_handler = &proc_dointvec_minmax,
				152	.strategy = &sysctl_intvec,
				153	.extra1 = &zero,
				154	},
				155	{
				156	.ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
				157	.procname = "max_queued_events",
				158	.data = &inotify_max_queued_events,
				159	.maxlen = sizeof(int),
				160	.mode = 0644,
				161	.proc_handler = &proc_dointvec_minmax,
				162	.strategy = &sysctl_intvec,
				163	.extra1 = &zero
				164	},
				165	{ .ctl_name = 0 }
				166	};
				167	#endif /* CONFIG_SYSCTL */
				168
Robert Love	0eeca28	2005-07-12 17:06:03 -0400	[diff] [blame]	169	static inline void get_inotify_dev(struct inotify_device *dev)
				170	{
				171	atomic_inc(&dev->count);
				172	}
				173
				174	static inline void put_inotify_dev(struct inotify_device *dev)
				175	{
				176	if (atomic_dec_and_test(&dev->count)) {
				177	atomic_dec(&dev->user->inotify_devs);
				178	free_uid(dev->user);
				179	kfree(dev);
				180	}
				181	}
				182
				183	static inline void get_inotify_watch(struct inotify_watch *watch)
				184	{
				185	atomic_inc(&watch->count);
				186	}
				187
				188	/*
				189	* put_inotify_watch - decrements the ref count on a given watch. cleans up
				190	* the watch and its references if the count reaches zero.
				191	*/
				192	static inline void put_inotify_watch(struct inotify_watch *watch)
				193	{
				194	if (atomic_dec_and_test(&watch->count)) {
				195	put_inotify_dev(watch->dev);
				196	iput(watch->inode);
				197	kmem_cache_free(watch_cachep, watch);
				198	}
				199	}
				200
				201	/*
				202	* kernel_event - create a new kernel event with the given parameters
				203	*
				204	* This function can sleep.
				205	*/
				206	static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
				207	const char *name)
				208	{
				209	struct inotify_kernel_event *kevent;
				210
				211	kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
				212	if (unlikely(!kevent))
				213	return NULL;
				214
				215	/* we hand this out to user-space, so zero it just in case */
				216	memset(&kevent->event, 0, sizeof(struct inotify_event));
				217
				218	kevent->event.wd = wd;
				219	kevent->event.mask = mask;
				220	kevent->event.cookie = cookie;
				221
				222	INIT_LIST_HEAD(&kevent->list);
				223
				224	if (name) {
				225	size_t len, rem, event_size = sizeof(struct inotify_event);
				226
				227	/*
				228	* We need to pad the filename so as to properly align an
				229	* array of inotify_event structures. Because the structure is
				230	* small and the common case is a small filename, we just round
				231	* up to the next multiple of the structure's sizeof. This is
				232	* simple and safe for all architectures.
				233	*/
				234	len = strlen(name) + 1;
				235	rem = event_size - len;
				236	if (len > event_size) {
				237	rem = event_size - (len % event_size);
				238	if (len % event_size == 0)
				239	rem = 0;
				240	}
				241
				242	kevent->name = kmalloc(len + rem, GFP_KERNEL);
				243	if (unlikely(!kevent->name)) {
				244	kmem_cache_free(event_cachep, kevent);
				245	return NULL;
				246	}
				247	memcpy(kevent->name, name, len);
				248	if (rem)
				249	memset(kevent->name + len, 0, rem);
				250	kevent->event.len = len + rem;
				251	} else {
				252	kevent->event.len = 0;
				253	kevent->name = NULL;
				254	}
				255
				256	return kevent;
				257	}
				258
				259	/*
				260	* inotify_dev_get_event - return the next event in the given dev's queue
				261	*
				262	* Caller must hold dev->sem.
				263	*/
				264	static inline struct inotify_kernel_event *
				265	inotify_dev_get_event(struct inotify_device *dev)
				266	{
				267	return list_entry(dev->events.next, struct inotify_kernel_event, list);
				268	}
				269
				270	/*
				271	* inotify_dev_queue_event - add a new event to the given device
				272	*
				273	* Caller must hold dev->sem. Can sleep (calls kernel_event()).
				274	*/
				275	static void inotify_dev_queue_event(struct inotify_device *dev,
				276	struct inotify_watch *watch, u32 mask,
				277	u32 cookie, const char *name)
				278	{
				279	struct inotify_kernel_event kevent, last;
				280
				281	/* coalescing: drop this event if it is a dupe of the previous */
				282	last = inotify_dev_get_event(dev);
				283	if (last && last->event.mask == mask && last->event.wd == watch->wd &&
				284	last->event.cookie == cookie) {
				285	const char *lastname = last->name;
				286
				287	if (!name && !lastname)
				288	return;
				289	if (name && lastname && !strcmp(lastname, name))
				290	return;
				291	}
				292
				293	/* the queue overflowed and we already sent the Q_OVERFLOW event */
				294	if (unlikely(dev->event_count > dev->max_events))
				295	return;
				296
				297	/* if the queue overflows, we need to notify user space */
				298	if (unlikely(dev->event_count == dev->max_events))
				299	kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
				300	else
				301	kevent = kernel_event(watch->wd, mask, cookie, name);
				302
				303	if (unlikely(!kevent))
				304	return;
				305
				306	/* queue the event and wake up anyone waiting */
				307	dev->event_count++;
				308	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
				309	list_add_tail(&kevent->list, &dev->events);
				310	wake_up_interruptible(&dev->wq);
				311	}
				312
				313	/*
				314	* remove_kevent - cleans up and ultimately frees the given kevent
				315	*
				316	* Caller must hold dev->sem.
				317	*/
				318	static void remove_kevent(struct inotify_device *dev,
				319	struct inotify_kernel_event *kevent)
				320	{
				321	list_del(&kevent->list);
				322
				323	dev->event_count--;
				324	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
				325
				326	kfree(kevent->name);
				327	kmem_cache_free(event_cachep, kevent);
				328	}
				329
				330	/*
				331	* inotify_dev_event_dequeue - destroy an event on the given device
				332	*
				333	* Caller must hold dev->sem.
				334	*/
				335	static void inotify_dev_event_dequeue(struct inotify_device *dev)
				336	{
				337	if (!list_empty(&dev->events)) {
				338	struct inotify_kernel_event *kevent;
				339	kevent = inotify_dev_get_event(dev);
				340	remove_kevent(dev, kevent);
				341	}
				342	}
				343
				344	/*
				345	* inotify_dev_get_wd - returns the next WD for use by the given dev
				346	*
				347	* Callers must hold dev->sem. This function can sleep.
				348	*/
				349	static int inotify_dev_get_wd(struct inotify_device *dev,
				350	struct inotify_watch *watch)
				351	{
				352	int ret;
				353
				354	do {
				355	if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
				356	return -ENOSPC;
				357	ret = idr_get_new(&dev->idr, watch, &watch->wd);
				358	} while (ret == -EAGAIN);
				359
				360	return ret;
				361	}
				362
				363	/*
				364	* find_inode - resolve a user-given path to a specific inode and return a nd
				365	*/
				366	static int find_inode(const char __user dirname, struct nameidata nd)
				367	{
				368	int error;
				369
				370	error = __user_walk(dirname, LOOKUP_FOLLOW, nd);
				371	if (error)
				372	return error;
				373	/* you can only watch an inode if you have read permissions on it */
				374	error = permission(nd->dentry->d_inode, MAY_READ, NULL);
				375	if (error)
				376	path_release (nd);
				377	return error;
				378	}
				379
				380	/*
				381	* create_watch - creates a watch on the given device.
				382	*
				383	* Callers must hold dev->sem. Calls inotify_dev_get_wd() so may sleep.
				384	* Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
				385	*/
				386	static struct inotify_watch create_watch(struct inotify_device dev,
				387	u32 mask, struct inode *inode)
				388	{
				389	struct inotify_watch *watch;
				390	int ret;
				391
				392	if (atomic_read(&dev->user->inotify_watches) >= inotify_max_user_watches)
				393	return ERR_PTR(-ENOSPC);
				394
				395	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
				396	if (unlikely(!watch))
				397	return ERR_PTR(-ENOMEM);
				398
				399	ret = inotify_dev_get_wd(dev, watch);
				400	if (unlikely(ret)) {
				401	kmem_cache_free(watch_cachep, watch);
				402	return ERR_PTR(ret);
				403	}
				404
				405	watch->mask = mask;
				406	atomic_set(&watch->count, 0);
				407	INIT_LIST_HEAD(&watch->d_list);
				408	INIT_LIST_HEAD(&watch->i_list);
				409
				410	/* save a reference to device and bump the count to make it official */
				411	get_inotify_dev(dev);
				412	watch->dev = dev;
				413
				414	/*
				415	* Save a reference to the inode and bump the ref count to make it
				416	* official. We hold a reference to nameidata, which makes this safe.
				417	*/
				418	watch->inode = igrab(inode);
				419
				420	/* bump our own count, corresponding to our entry in dev->watches */
				421	get_inotify_watch(watch);
				422
				423	atomic_inc(&dev->user->inotify_watches);
				424
				425	return watch;
				426	}
				427
				428	/*
				429	* inotify_find_dev - find the watch associated with the given inode and dev
				430	*
				431	* Callers must hold inode->inotify_sem.
				432	*/
				433	static struct inotify_watch inode_find_dev(struct inode inode,
				434	struct inotify_device *dev)
				435	{
				436	struct inotify_watch *watch;
				437
				438	list_for_each_entry(watch, &inode->inotify_watches, i_list) {
				439	if (watch->dev == dev)
				440	return watch;
				441	}
				442
				443	return NULL;
				444	}
				445
				446	/*
				447	* remove_watch_no_event - remove_watch() without the IN_IGNORED event.
				448	*/
				449	static void remove_watch_no_event(struct inotify_watch *watch,
				450	struct inotify_device *dev)
				451	{
				452	list_del(&watch->i_list);
				453	list_del(&watch->d_list);
				454
				455	atomic_dec(&dev->user->inotify_watches);
				456	idr_remove(&dev->idr, watch->wd);
				457	put_inotify_watch(watch);
				458	}
				459
				460	/*
				461	* remove_watch - Remove a watch from both the device and the inode. Sends
				462	* the IN_IGNORED event to the given device signifying that the inode is no
				463	* longer watched.
				464	*
				465	* Callers must hold both inode->inotify_sem and dev->sem. We drop a
				466	* reference to the inode before returning.
				467	*
				468	* The inode is not iput() so as to remain atomic. If the inode needs to be
				469	* iput(), the call returns one. Otherwise, it returns zero.
				470	*/
				471	static void remove_watch(struct inotify_watch watch,struct inotify_device dev)
				472	{
				473	inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
				474	remove_watch_no_event(watch, dev);
				475	}
				476
				477	/*
				478	* inotify_inode_watched - returns nonzero if there are watches on this inode
				479	* and zero otherwise. We call this lockless, we do not care if we race.
				480	*/
				481	static inline int inotify_inode_watched(struct inode *inode)
				482	{
				483	return !list_empty(&inode->inotify_watches);
				484	}
				485
				486	/* Kernel API */
				487
				488	/**
				489	* inotify_inode_queue_event - queue an event to all watches on this inode
				490	* @inode: inode event is originating from
				491	* @mask: event mask describing this event
				492	* @cookie: cookie for synchronization, or zero
				493	* @name: filename, if any
				494	*/
				495	void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
				496	const char *name)
				497	{
				498	struct inotify_watch watch, next;
				499
				500	if (!inotify_inode_watched(inode))
				501	return;
				502
				503	down(&inode->inotify_sem);
				504	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
				505	u32 watch_mask = watch->mask;
				506	if (watch_mask & mask) {
				507	struct inotify_device *dev = watch->dev;
				508	get_inotify_watch(watch);
				509	down(&dev->sem);
				510	inotify_dev_queue_event(dev, watch, mask, cookie, name);
				511	if (watch_mask & IN_ONESHOT)
				512	remove_watch_no_event(watch, dev);
				513	up(&dev->sem);
				514	put_inotify_watch(watch);
				515	}
				516	}
				517	up(&inode->inotify_sem);
				518	}
				519	EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
				520
				521	/**
				522	* inotify_dentry_parent_queue_event - queue an event to a dentry's parent
				523	* @dentry: the dentry in question, we queue against this dentry's parent
				524	* @mask: event mask describing this event
				525	* @cookie: cookie for synchronization, or zero
				526	* @name: filename, if any
				527	*/
				528	void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
				529	u32 cookie, const char *name)
				530	{
				531	struct dentry *parent;
				532	struct inode *inode;
				533
				534	spin_lock(&dentry->d_lock);
				535	parent = dentry->d_parent;
				536	inode = parent->d_inode;
				537
				538	if (inotify_inode_watched(inode)) {
				539	dget(parent);
				540	spin_unlock(&dentry->d_lock);
				541	inotify_inode_queue_event(inode, mask, cookie, name);
				542	dput(parent);
				543	} else
				544	spin_unlock(&dentry->d_lock);
				545	}
				546	EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
				547
				548	/**
				549	* inotify_get_cookie - return a unique cookie for use in synchronizing events.
				550	*/
				551	u32 inotify_get_cookie(void)
				552	{
				553	return atomic_inc_return(&inotify_cookie);
				554	}
				555	EXPORT_SYMBOL_GPL(inotify_get_cookie);
				556
				557	/**
				558	* inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
				559	* @list: list of inodes being unmounted (sb->s_inodes)
				560	*
				561	* Called with inode_lock held, protecting the unmounting super block's list
				562	* of inodes, and with iprune_sem held, keeping shrink_icache_memory() at bay.
				563	* We temporarily drop inode_lock, however, and CAN block.
				564	*/
				565	void inotify_unmount_inodes(struct list_head *list)
				566	{
				567	struct inode inode, next_i, *need_iput = NULL;
				568
				569	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
				570	struct inotify_watch watch, next_w;
				571	struct inode *need_iput_tmp;
				572	struct list_head *watches;
				573
				574	/*
				575	* If i_count is zero, the inode cannot have any watches and
				576	* doing an __iget/iput with MS_ACTIVE clear would actually
				577	* evict all inodes with zero i_count from icache which is
				578	* unnecessarily violent and may in fact be illegal to do.
				579	*/
				580	if (!atomic_read(&inode->i_count))
				581	continue;
				582
				583	/*
				584	* We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
				585	* I_WILL_FREE which is fine because by that point the inode
				586	* cannot have any associated watches.
				587	*/
				588	if (inode->i_state & (I_CLEAR \| I_FREEING \| I_WILL_FREE))
				589	continue;
				590
				591	need_iput_tmp = need_iput;
				592	need_iput = NULL;
				593	/* In case the remove_watch() drops a reference. */
				594	if (inode != need_iput_tmp)
				595	__iget(inode);
				596	else
				597	need_iput_tmp = NULL;
				598	/* In case the dropping of a reference would nuke next_i. */
				599	if ((&next_i->i_sb_list != list) &&
				600	atomic_read(&next_i->i_count) &&
				601	!(next_i->i_state & (I_CLEAR \| I_FREEING \|
				602	I_WILL_FREE))) {
				603	__iget(next_i);
				604	need_iput = next_i;
				605	}
				606
				607	/*
				608	* We can safely drop inode_lock here because we hold
				609	* references on both inode and next_i. Also no new inodes
				610	* will be added since the umount has begun. Finally,
				611	* iprune_sem keeps shrink_icache_memory() away.
				612	*/
				613	spin_unlock(&inode_lock);
				614
				615	if (need_iput_tmp)
				616	iput(need_iput_tmp);
				617
				618	/* for each watch, send IN_UNMOUNT and then remove it */
				619	down(&inode->inotify_sem);
				620	watches = &inode->inotify_watches;
				621	list_for_each_entry_safe(watch, next_w, watches, i_list) {
				622	struct inotify_device *dev = watch->dev;
				623	down(&dev->sem);
				624	inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
				625	remove_watch(watch, dev);
				626	up(&dev->sem);
				627	}
				628	up(&inode->inotify_sem);
				629	iput(inode);
				630
				631	spin_lock(&inode_lock);
				632	}
				633	}
				634	EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
				635
				636	/**
				637	* inotify_inode_is_dead - an inode has been deleted, cleanup any watches
				638	* @inode: inode that is about to be removed
				639	*/
				640	void inotify_inode_is_dead(struct inode *inode)
				641	{
				642	struct inotify_watch watch, next;
				643
				644	down(&inode->inotify_sem);
				645	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
				646	struct inotify_device *dev = watch->dev;
				647	down(&dev->sem);
				648	remove_watch(watch, dev);
				649	up(&dev->sem);
				650	}
				651	up(&inode->inotify_sem);
				652	}
				653	EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
				654
				655	/* Device Interface */
				656
				657	static unsigned int inotify_poll(struct file file, poll_table wait)
				658	{
				659	struct inotify_device *dev = file->private_data;
				660	int ret = 0;
				661
				662	poll_wait(file, &dev->wq, wait);
				663	down(&dev->sem);
				664	if (!list_empty(&dev->events))
				665	ret = POLLIN \| POLLRDNORM;
				666	up(&dev->sem);
				667
				668	return ret;
				669	}
				670
				671	static ssize_t inotify_read(struct file file, char __user buf,
				672	size_t count, loff_t *pos)
				673	{
				674	size_t event_size = sizeof (struct inotify_event);
				675	struct inotify_device *dev;
				676	char __user *start;
				677	int ret;
				678	DEFINE_WAIT(wait);
				679
				680	start = buf;
				681	dev = file->private_data;
				682
				683	while (1) {
				684	int events;
				685
				686	prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
				687
				688	down(&dev->sem);
				689	events = !list_empty(&dev->events);
				690	up(&dev->sem);
				691	if (events) {
				692	ret = 0;
				693	break;
				694	}
				695
				696	if (file->f_flags & O_NONBLOCK) {
				697	ret = -EAGAIN;
				698	break;
				699	}
				700
				701	if (signal_pending(current)) {
				702	ret = -EINTR;
				703	break;
				704	}
				705
				706	schedule();
				707	}
				708
				709	finish_wait(&dev->wq, &wait);
				710	if (ret)
				711	return ret;
				712
				713	down(&dev->sem);
				714	while (1) {
				715	struct inotify_kernel_event *kevent;
				716
				717	ret = buf - start;
				718	if (list_empty(&dev->events))
				719	break;
				720
				721	kevent = inotify_dev_get_event(dev);
				722	if (event_size + kevent->event.len > count)
				723	break;
				724
				725	if (copy_to_user(buf, &kevent->event, event_size)) {
				726	ret = -EFAULT;
				727	break;
				728	}
				729	buf += event_size;
				730	count -= event_size;
				731
				732	if (kevent->name) {
				733	if (copy_to_user(buf, kevent->name, kevent->event.len)){
				734	ret = -EFAULT;
				735	break;
				736	}
				737	buf += kevent->event.len;
				738	count -= kevent->event.len;
				739	}
				740
				741	remove_kevent(dev, kevent);
				742	}
				743	up(&dev->sem);
				744
				745	return ret;
				746	}
				747
				748	static int inotify_release(struct inode ignored, struct file file)
				749	{
				750	struct inotify_device *dev = file->private_data;
				751
				752	/*
				753	* Destroy all of the watches on this device. Unfortunately, not very
				754	* pretty. We cannot do a simple iteration over the list, because we
				755	* do not know the inode until we iterate to the watch. But we need to
				756	* hold inode->inotify_sem before dev->sem. The following works.
				757	*/
				758	while (1) {
				759	struct inotify_watch *watch;
				760	struct list_head *watches;
				761	struct inode *inode;
				762
				763	down(&dev->sem);
				764	watches = &dev->watches;
				765	if (list_empty(watches)) {
				766	up(&dev->sem);
				767	break;
				768	}
				769	watch = list_entry(watches->next, struct inotify_watch, d_list);
				770	get_inotify_watch(watch);
				771	up(&dev->sem);
				772
				773	inode = watch->inode;
				774	down(&inode->inotify_sem);
				775	down(&dev->sem);
				776	remove_watch_no_event(watch, dev);
				777	up(&dev->sem);
				778	up(&inode->inotify_sem);
				779	put_inotify_watch(watch);
				780	}
				781
				782	/* destroy all of the events on this device */
				783	down(&dev->sem);
				784	while (!list_empty(&dev->events))
				785	inotify_dev_event_dequeue(dev);
				786	up(&dev->sem);
				787
				788	/* free this device: the put matching the get in inotify_open() */
				789	put_inotify_dev(dev);
				790
				791	return 0;
				792	}
				793
				794	/*
				795	* inotify_ignore - handle the INOTIFY_IGNORE ioctl, asking that a given wd be
				796	* removed from the device.
				797	*
				798	* Can sleep.
				799	*/
				800	static int inotify_ignore(struct inotify_device *dev, s32 wd)
				801	{
				802	struct inotify_watch *watch;
				803	struct inode *inode;
				804
				805	down(&dev->sem);
				806	watch = idr_find(&dev->idr, wd);
				807	if (unlikely(!watch)) {
				808	up(&dev->sem);
				809	return -EINVAL;
				810	}
				811	get_inotify_watch(watch);
				812	inode = watch->inode;
				813	up(&dev->sem);
				814
				815	down(&inode->inotify_sem);
				816	down(&dev->sem);
				817
				818	/* make sure that we did not race */
				819	watch = idr_find(&dev->idr, wd);
				820	if (likely(watch))
				821	remove_watch(watch, dev);
				822
				823	up(&dev->sem);
				824	up(&inode->inotify_sem);
				825	put_inotify_watch(watch);
				826
				827	return 0;
				828	}
				829
				830	static long inotify_ioctl(struct file *file, unsigned int cmd,
				831	unsigned long arg)
				832	{
				833	struct inotify_device *dev;
				834	void __user *p;
				835	int ret = -ENOTTY;
				836
				837	dev = file->private_data;
				838	p = (void __user *) arg;
				839
				840	switch (cmd) {
				841	case FIONREAD:
				842	ret = put_user(dev->queue_size, (int __user *) p);
				843	break;
				844	}
				845
				846	return ret;
				847	}
				848
				849	static struct file_operations inotify_fops = {
				850	.poll = inotify_poll,
				851	.read = inotify_read,
				852	.release = inotify_release,
				853	.unlocked_ioctl = inotify_ioctl,
				854	.compat_ioctl = inotify_ioctl,
				855	};
				856
				857	asmlinkage long sys_inotify_init(void)
				858	{
				859	struct inotify_device *dev;
				860	struct user_struct *user;
				861	int ret = -ENOTTY;
				862	int fd;
				863	struct file *filp;
				864
				865	fd = get_unused_fd();
				866	if (fd < 0) {
				867	ret = fd;
				868	goto out;
				869	}
				870
				871	filp = get_empty_filp();
				872	if (!filp) {
				873	put_unused_fd(fd);
				874	ret = -ENFILE;
				875	goto out;
				876	}
				877	filp->f_op = &inotify_fops;
				878	filp->f_vfsmnt = mntget(inotify_mnt);
				879	filp->f_dentry = dget(inotify_mnt->mnt_root);
				880	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
				881	filp->f_mode = FMODE_READ;
				882	filp->f_flags = O_RDONLY;
				883
				884	user = get_uid(current->user);
				885
Robert Love	0399cb0	2005-07-13 12:38:18 -0400	[diff] [blame^]	886	if (unlikely(atomic_read(&user->inotify_devs) >= inotify_max_user_instances)) {
Robert Love	0eeca28	2005-07-12 17:06:03 -0400	[diff] [blame]	887	ret = -EMFILE;
				888	goto out_err;
				889	}
				890
				891	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
				892	if (unlikely(!dev)) {
				893	ret = -ENOMEM;
				894	goto out_err;
				895	}
				896
				897	idr_init(&dev->idr);
				898	INIT_LIST_HEAD(&dev->events);
				899	INIT_LIST_HEAD(&dev->watches);
				900	init_waitqueue_head(&dev->wq);
				901	sema_init(&dev->sem, 1);
				902	dev->event_count = 0;
				903	dev->queue_size = 0;
				904	dev->max_events = inotify_max_queued_events;
				905	dev->user = user;
				906	atomic_set(&dev->count, 0);
				907
				908	get_inotify_dev(dev);
				909	atomic_inc(&user->inotify_devs);
				910
				911	filp->private_data = dev;
				912	fd_install (fd, filp);
				913	return fd;
				914	out_err:
				915	put_unused_fd (fd);
				916	put_filp (filp);
				917	free_uid(user);
				918	out:
				919	return ret;
				920	}
				921
				922	asmlinkage long sys_inotify_add_watch(int fd, const char *path, u32 mask)
				923	{
				924	struct inotify_watch watch, old;
				925	struct inode *inode;
				926	struct inotify_device *dev;
				927	struct nameidata nd;
				928	struct file *filp;
				929	int ret;
				930
				931	filp = fget(fd);
				932	if (!filp)
				933	return -EBADF;
				934
				935	dev = filp->private_data;
				936
				937	ret = find_inode ((const char __user*)path, &nd);
				938	if (ret)
				939	goto fput_and_out;
				940
				941	/* Held in place by reference in nd */
				942	inode = nd.dentry->d_inode;
				943
				944	down(&inode->inotify_sem);
				945	down(&dev->sem);
				946
				947	/* don't let user-space set invalid bits: we don't want flags set */
				948	mask &= IN_ALL_EVENTS;
				949	if (!mask) {
				950	ret = -EINVAL;
				951	goto out;
				952	}
				953
				954	/*
				955	* Handle the case of re-adding a watch on an (inode,dev) pair that we
				956	* are already watching. We just update the mask and return its wd.
				957	*/
				958	old = inode_find_dev(inode, dev);
				959	if (unlikely(old)) {
				960	old->mask = mask;
				961	ret = old->wd;
				962	goto out;
				963	}
				964
				965	watch = create_watch(dev, mask, inode);
				966	if (unlikely(IS_ERR(watch))) {
				967	ret = PTR_ERR(watch);
				968	goto out;
				969	}
				970
				971	/* Add the watch to the device's and the inode's list */
				972	list_add(&watch->d_list, &dev->watches);
				973	list_add(&watch->i_list, &inode->inotify_watches);
				974	ret = watch->wd;
				975	out:
				976	path_release (&nd);
				977	up(&dev->sem);
				978	up(&inode->inotify_sem);
				979	fput_and_out:
				980	fput(filp);
				981	return ret;
				982	}
				983
				984	asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
				985	{
				986	struct file *filp;
				987	struct inotify_device *dev;
				988	int ret;
				989
				990	filp = fget(fd);
				991	if (!filp)
				992	return -EBADF;
				993	dev = filp->private_data;
				994	ret = inotify_ignore (dev, wd);
				995	fput(filp);
				996	return ret;
				997	}
				998
				999	static struct super_block *
				1000	inotify_get_sb(struct file_system_type *fs_type, int flags,
				1001	const char dev_name, void data)
				1002	{
				1003	return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
				1004	}
				1005
				1006	static struct file_system_type inotify_fs_type = {
				1007	.name = "inotifyfs",
				1008	.get_sb = inotify_get_sb,
				1009	.kill_sb = kill_anon_super,
				1010	};
				1011
				1012	/*
				1013	* inotify_init - Our initialization function. Note that we cannnot return
				1014	* error because we have compiled-in VFS hooks. So an (unlikely) failure here
				1015	* must result in panic().
				1016	*/
				1017	static int __init inotify_init(void)
				1018	{
				1019	register_filesystem(&inotify_fs_type);
				1020	inotify_mnt = kern_mount(&inotify_fs_type);
				1021
				1022	inotify_max_queued_events = 8192;
Robert Love	0399cb0	2005-07-13 12:38:18 -0400	[diff] [blame^]	1023	inotify_max_user_instances = 8;
Robert Love	0eeca28	2005-07-12 17:06:03 -0400	[diff] [blame]	1024	inotify_max_user_watches = 8192;
				1025
				1026	atomic_set(&inotify_cookie, 0);
				1027
				1028	watch_cachep = kmem_cache_create("inotify_watch_cache",
				1029	sizeof(struct inotify_watch),
				1030	0, SLAB_PANIC, NULL, NULL);
				1031	event_cachep = kmem_cache_create("inotify_event_cache",
				1032	sizeof(struct inotify_kernel_event),
				1033	0, SLAB_PANIC, NULL, NULL);
				1034
				1035	printk(KERN_INFO "inotify syscall\n");
				1036
				1037	return 0;
				1038	}
				1039
				1040	module_init(inotify_init);