Blame - fs/namei.c - kernel/msm-4.19

blob: fa8df81ce8cadd841e7f73aff5a40aa59dfcf73b [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/fs/namei.c
				3	*
				4	* Copyright (C) 1991, 1992 Linus Torvalds
				5	*/
				6
				7	/*
				8	* Some corrections by tytso.
				9	*/
				10
				11	/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
				12	* lookup logic.
				13	*/
				14	/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
				15	*/
				16
				17	#include <linux/init.h>
				18	#include <linux/module.h>
				19	#include <linux/slab.h>
				20	#include <linux/fs.h>
				21	#include <linux/namei.h>
				22	#include <linux/quotaops.h>
				23	#include <linux/pagemap.h>
				24	#include <linux/dnotify.h>
				25	#include <linux/smp_lock.h>
				26	#include <linux/personality.h>
				27	#include <linux/security.h>
				28	#include <linux/syscalls.h>
				29	#include <linux/mount.h>
				30	#include <linux/audit.h>
				31	#include <asm/namei.h>
				32	#include <asm/uaccess.h>
				33
				34	#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
				35
				36	/* [Feb-1997 T. Schoebel-Theuer]
				37	* Fundamental changes in the pathname lookup mechanisms (namei)
				38	* were necessary because of omirr. The reason is that omirr needs
				39	* to know the _real_ pathname, not the user-supplied one, in case
				40	* of symlinks (and also when transname replacements occur).
				41	*
				42	* The new code replaces the old recursive symlink resolution with
				43	* an iterative one (in case of non-nested symlink chains). It does
				44	* this with calls to <fs>_follow_link().
				45	* As a side effect, dir_namei(), _namei() and follow_link() are now
				46	* replaced with a single function lookup_dentry() that can handle all
				47	* the special cases of the former code.
				48	*
				49	* With the new dcache, the pathname is stored at each inode, at least as
				50	* long as the refcount of the inode is positive. As a side effect, the
				51	* size of the dcache depends on the inode cache and thus is dynamic.
				52	*
				53	* [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
				54	* resolution to correspond with current state of the code.
				55	*
				56	* Note that the symlink resolution is not completely iterative.
				57	* There is still a significant amount of tail- and mid- recursion in
				58	* the algorithm. Also, note that <fs>_readlink() is not used in
				59	* lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
				60	* may return different results than <fs>_follow_link(). Many virtual
				61	* filesystems (including /proc) exhibit this behavior.
				62	*/
				63
				64	/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
				65	* New symlink semantics: when open() is called with flags O_CREAT \| O_EXCL
				66	* and the name already exists in form of a symlink, try to create the new
				67	* name indicated by the symlink. The old code always complained that the
				68	* name already exists, due to not following the symlink even if its target
				69	* is nonexistent. The new semantics affects also mknod() and link() when
				70	* the name is a symlink pointing to a non-existant name.
				71	*
				72	* I don't know which semantics is the right one, since I have no access
				73	* to standards. But I found by trial that HP-UX 9.0 has the full "new"
				74	* semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
				75	* "old" one. Personally, I think the new semantics is much more logical.
				76	* Note that "ln old new" where "new" is a symlink pointing to a non-existing
				77	* file does succeed in both HP-UX and SunOs, but not in Solaris
				78	* and in the old Linux semantics.
				79	*/
				80
				81	/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
				82	* semantics. See the comments in "open_namei" and "do_link" below.
				83	*
				84	* [10-Sep-98 Alan Modra] Another symlink change.
				85	*/
				86
				87	/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
				88	* inside the path - always follow.
				89	* in the last component in creation/removal/renaming - never follow.
				90	* if LOOKUP_FOLLOW passed - follow.
				91	* if the pathname has trailing slashes - follow.
				92	* otherwise - don't follow.
				93	* (applied in that order).
				94	*
				95	* [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
				96	* restored for 2.4. This is the last surviving part of old 4.2BSD bug.
				97	* During the 2.4 we need to fix the userland stuff depending on it -
				98	* hopefully we will be able to get rid of that wart in 2.5. So far only
				99	* XEmacs seems to be relying on it...
				100	*/
				101	/*
				102	* [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
				103	* implemented. Let's see if raised priority of ->s_vfs_rename_sem gives
				104	* any extra contention...
				105	*/
				106
				107	/* In order to reduce some races, while at the same time doing additional
				108	* checking and hopefully speeding things up, we copy filenames to the
				109	* kernel data space before using them..
				110	*
				111	* POSIX.1 2.4: an empty pathname is invalid (ENOENT).
				112	* PATH_MAX includes the nul terminator --RR.
				113	*/
				114	static inline int do_getname(const char __user filename, char page)
				115	{
				116	int retval;
				117	unsigned long len = PATH_MAX;
				118
				119	if (!segment_eq(get_fs(), KERNEL_DS)) {
				120	if ((unsigned long) filename >= TASK_SIZE)
				121	return -EFAULT;
				122	if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
				123	len = TASK_SIZE - (unsigned long) filename;
				124	}
				125
				126	retval = strncpy_from_user(page, filename, len);
				127	if (retval > 0) {
				128	if (retval < len)
				129	return 0;
				130	return -ENAMETOOLONG;
				131	} else if (!retval)
				132	retval = -ENOENT;
				133	return retval;
				134	}
				135
				136	char * getname(const char __user * filename)
				137	{
				138	char tmp, result;
				139
				140	result = ERR_PTR(-ENOMEM);
				141	tmp = __getname();
				142	if (tmp) {
				143	int retval = do_getname(filename, tmp);
				144
				145	result = tmp;
				146	if (retval < 0) {
				147	__putname(tmp);
				148	result = ERR_PTR(retval);
				149	}
				150	}
				151	audit_getname(result);
				152	return result;
				153	}
				154
				155	#ifdef CONFIG_AUDITSYSCALL
				156	void putname(const char *name)
				157	{
				158	if (unlikely(current->audit_context))
				159	audit_putname(name);
				160	else
				161	__putname(name);
				162	}
				163	EXPORT_SYMBOL(putname);
				164	#endif
				165
				166
				167	/**
				168	* generic_permission - check for access rights on a Posix-like filesystem
				169	* @inode: inode to check access rights for
				170	* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
				171	* @check_acl: optional callback to check for Posix ACLs
				172	*
				173	* Used to check for read/write/execute permissions on a file.
				174	* We use "fsuid" for this, letting us set arbitrary permissions
				175	* for filesystem access without changing the "normal" uids which
				176	* are used for other things..
				177	*/
				178	int generic_permission(struct inode *inode, int mask,
				179	int (check_acl)(struct inode inode, int mask))
				180	{
				181	umode_t mode = inode->i_mode;
				182
				183	if (current->fsuid == inode->i_uid)
				184	mode >>= 6;
				185	else {
				186	if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
				187	int error = check_acl(inode, mask);
				188	if (error == -EACCES)
				189	goto check_capabilities;
				190	else if (error != -EAGAIN)
				191	return error;
				192	}
				193
				194	if (in_group_p(inode->i_gid))
				195	mode >>= 3;
				196	}
				197
				198	/*
				199	* If the DACs are ok we don't need any capability check.
				200	*/
				201	if (((mode & mask & (MAY_READ\|MAY_WRITE\|MAY_EXEC)) == mask))
				202	return 0;
				203
				204	check_capabilities:
				205	/*
				206	* Read/write DACs are always overridable.
				207	* Executable DACs are overridable if at least one exec bit is set.
				208	*/
				209	if (!(mask & MAY_EXEC) \|\|
				210	(inode->i_mode & S_IXUGO) \|\| S_ISDIR(inode->i_mode))
				211	if (capable(CAP_DAC_OVERRIDE))
				212	return 0;
				213
				214	/*
				215	* Searching includes executable on directories, else just read.
				216	*/
				217	if (mask == MAY_READ \|\| (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
				218	if (capable(CAP_DAC_READ_SEARCH))
				219	return 0;
				220
				221	return -EACCES;
				222	}
				223
				224	int permission(struct inode inode, int mask, struct nameidata nd)
				225	{
				226	int retval, submask;
				227
				228	if (mask & MAY_WRITE) {
				229	umode_t mode = inode->i_mode;
				230
				231	/*
				232	* Nobody gets write access to a read-only fs.
				233	*/
				234	if (IS_RDONLY(inode) &&
				235	(S_ISREG(mode) \|\| S_ISDIR(mode) \|\| S_ISLNK(mode)))
				236	return -EROFS;
				237
				238	/*
				239	* Nobody gets write access to an immutable file.
				240	*/
				241	if (IS_IMMUTABLE(inode))
				242	return -EACCES;
				243	}
				244
				245
				246	/* Ordinary permission routines do not understand MAY_APPEND. */
				247	submask = mask & ~MAY_APPEND;
				248	if (inode->i_op && inode->i_op->permission)
				249	retval = inode->i_op->permission(inode, submask, nd);
				250	else
				251	retval = generic_permission(inode, submask, NULL);
				252	if (retval)
				253	return retval;
				254
				255	return security_inode_permission(inode, mask, nd);
				256	}
				257
				258	/*
				259	* get_write_access() gets write permission for a file.
				260	* put_write_access() releases this write permission.
				261	* This is used for regular files.
				262	* We cannot support write (and maybe mmap read-write shared) accesses and
				263	* MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
				264	* can have the following values:
				265	* 0: no writers, no VM_DENYWRITE mappings
				266	* < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
				267	* > 0: (i_writecount) users are writing to the file.
				268	*
				269	* Normally we operate on that counter with atomic_{inc,dec} and it's safe
				270	* except for the cases where we don't hold i_writecount yet. Then we need to
				271	* use {get,deny}_write_access() - these functions check the sign and refuse
				272	* to do the change if sign is wrong. Exclusion between them is provided by
				273	* the inode->i_lock spinlock.
				274	*/
				275
				276	int get_write_access(struct inode * inode)
				277	{
				278	spin_lock(&inode->i_lock);
				279	if (atomic_read(&inode->i_writecount) < 0) {
				280	spin_unlock(&inode->i_lock);
				281	return -ETXTBSY;
				282	}
				283	atomic_inc(&inode->i_writecount);
				284	spin_unlock(&inode->i_lock);
				285
				286	return 0;
				287	}
				288
				289	int deny_write_access(struct file * file)
				290	{
				291	struct inode *inode = file->f_dentry->d_inode;
				292
				293	spin_lock(&inode->i_lock);
				294	if (atomic_read(&inode->i_writecount) > 0) {
				295	spin_unlock(&inode->i_lock);
				296	return -ETXTBSY;
				297	}
				298	atomic_dec(&inode->i_writecount);
				299	spin_unlock(&inode->i_lock);
				300
				301	return 0;
				302	}
				303
				304	void path_release(struct nameidata *nd)
				305	{
				306	dput(nd->dentry);
				307	mntput(nd->mnt);
				308	}
				309
				310	/*
				311	* umount() mustn't call path_release()/mntput() as that would clear
				312	* mnt_expiry_mark
				313	*/
				314	void path_release_on_umount(struct nameidata *nd)
				315	{
				316	dput(nd->dentry);
				317	_mntput(nd->mnt);
				318	}
				319
				320	/*
				321	* Internal lookup() using the new generic dcache.
				322	* SMP-safe
				323	*/
				324	static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
				325	{
				326	struct dentry * dentry = __d_lookup(parent, name);
				327
				328	/* lockess __d_lookup may fail due to concurrent d_move()
				329	* in some unrelated directory, so try with d_lookup
				330	*/
				331	if (!dentry)
				332	dentry = d_lookup(parent, name);
				333
				334	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
				335	if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
				336	dput(dentry);
				337	dentry = NULL;
				338	}
				339	}
				340	return dentry;
				341	}
				342
				343	/*
				344	* Short-cut version of permission(), for calling by
				345	* path_walk(), when dcache lock is held. Combines parts
				346	* of permission() and generic_permission(), and tests ONLY for
				347	* MAY_EXEC permission.
				348	*
				349	* If appropriate, check DAC only. If not appropriate, or
				350	* short-cut DAC fails, then call permission() to do more
				351	* complete permission check.
				352	*/
				353	static inline int exec_permission_lite(struct inode *inode,
				354	struct nameidata *nd)
				355	{
				356	umode_t mode = inode->i_mode;
				357
				358	if (inode->i_op && inode->i_op->permission)
				359	return -EAGAIN;
				360
				361	if (current->fsuid == inode->i_uid)
				362	mode >>= 6;
				363	else if (in_group_p(inode->i_gid))
				364	mode >>= 3;
				365
				366	if (mode & MAY_EXEC)
				367	goto ok;
				368
				369	if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
				370	goto ok;
				371
				372	if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
				373	goto ok;
				374
				375	if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
				376	goto ok;
				377
				378	return -EACCES;
				379	ok:
				380	return security_inode_permission(inode, MAY_EXEC, nd);
				381	}
				382
				383	/*
				384	* This is called when everything else fails, and we actually have
				385	* to go to the low-level filesystem to find out what we should do..
				386	*
				387	* We get the directory semaphore, and after getting that we also
				388	* make sure that nobody added the entry to the dcache in the meantime..
				389	* SMP-safe
				390	*/
				391	static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
				392	{
				393	struct dentry * result;
				394	struct inode *dir = parent->d_inode;
				395
				396	down(&dir->i_sem);
				397	/*
				398	* First re-do the cached lookup just in case it was created
				399	* while we waited for the directory semaphore..
				400	*
				401	* FIXME! This could use version numbering or similar to
				402	* avoid unnecessary cache lookups.
				403	*
				404	* The "dcache_lock" is purely to protect the RCU list walker
				405	* from concurrent renames at this point (we mustn't get false
				406	* negatives from the RCU list walk here, unlike the optimistic
				407	* fast walk).
				408	*
				409	* so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
				410	*/
				411	result = d_lookup(parent, name);
				412	if (!result) {
				413	struct dentry * dentry = d_alloc(parent, name);
				414	result = ERR_PTR(-ENOMEM);
				415	if (dentry) {
				416	result = dir->i_op->lookup(dir, dentry, nd);
				417	if (result)
				418	dput(dentry);
				419	else
				420	result = dentry;
				421	}
				422	up(&dir->i_sem);
				423	return result;
				424	}
				425
				426	/*
				427	* Uhhuh! Nasty case: the cache was re-populated while
				428	* we waited on the semaphore. Need to revalidate.
				429	*/
				430	up(&dir->i_sem);
				431	if (result->d_op && result->d_op->d_revalidate) {
				432	if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
				433	dput(result);
				434	result = ERR_PTR(-ENOENT);
				435	}
				436	}
				437	return result;
				438	}
				439
				440	static int __emul_lookup_dentry(const char , struct nameidata );
				441
				442	/* SMP-safe */
				443	static inline int
				444	walk_init_root(const char name, struct nameidata nd)
				445	{
				446	read_lock(&current->fs->lock);
				447	if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
				448	nd->mnt = mntget(current->fs->altrootmnt);
				449	nd->dentry = dget(current->fs->altroot);
				450	read_unlock(&current->fs->lock);
				451	if (__emul_lookup_dentry(name,nd))
				452	return 0;
				453	read_lock(&current->fs->lock);
				454	}
				455	nd->mnt = mntget(current->fs->rootmnt);
				456	nd->dentry = dget(current->fs->root);
				457	read_unlock(&current->fs->lock);
				458	return 1;
				459	}
				460
				461	static inline int __vfs_follow_link(struct nameidata nd, const char link)
				462	{
				463	int res = 0;
				464	char *name;
				465	if (IS_ERR(link))
				466	goto fail;
				467
				468	if (*link == '/') {
				469	path_release(nd);
				470	if (!walk_init_root(link, nd))
				471	/* weird __emul_prefix() stuff did it */
				472	goto out;
				473	}
				474	res = link_path_walk(link, nd);
				475	out:
				476	if (nd->depth \|\| res \|\| nd->last_type!=LAST_NORM)
				477	return res;
				478	/*
				479	* If it is an iterative symlinks resolution in open_namei() we
				480	* have to copy the last component. And all that crap because of
				481	* bloody create() on broken symlinks. Furrfu...
				482	*/
				483	name = __getname();
				484	if (unlikely(!name)) {
				485	path_release(nd);
				486	return -ENOMEM;
				487	}
				488	strcpy(name, nd->last.name);
				489	nd->last.name = name;
				490	return 0;
				491	fail:
				492	path_release(nd);
				493	return PTR_ERR(link);
				494	}
				495
Al Viro	90ebe56	2005-06-06 13:35:58 -0700	[diff] [blame]	496	struct path {
				497	struct vfsmount *mnt;
				498	struct dentry *dentry;
				499	};
				500
Al Viro	cd4e91d	2005-06-06 13:36:03 -0700	[diff] [blame]	501	static inline int __do_follow_link(struct path path, struct nameidata nd)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	502	{
				503	int error;
Al Viro	cd4e91d	2005-06-06 13:36:03 -0700	[diff] [blame]	504	struct dentry *dentry = path->dentry;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	505
Al Viro	d671a1c	2005-06-06 13:36:14 -0700	[diff] [blame]	506	touch_atime(path->mnt, dentry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	507	nd_set_link(nd, NULL);
Al Viro	cd4e91d	2005-06-06 13:36:03 -0700	[diff] [blame]	508
Al Viro	39ca6d4	2005-06-06 13:36:12 -0700	[diff] [blame]	509	if (path->mnt == nd->mnt)
				510	mntget(path->mnt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	511	error = dentry->d_inode->i_op->follow_link(dentry, nd);
				512	if (!error) {
				513	char *s = nd_get_link(nd);
				514	if (s)
				515	error = __vfs_follow_link(nd, s);
				516	if (dentry->d_inode->i_op->put_link)
				517	dentry->d_inode->i_op->put_link(dentry, nd);
				518	}
Al Viro	cd4e91d	2005-06-06 13:36:03 -0700	[diff] [blame]	519	dput(dentry);
				520	mntput(path->mnt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	521
				522	return error;
				523	}
				524
				525	/*
				526	* This limits recursive symlink follows to 8, while
				527	* limiting consecutive symlinks to 40.
				528	*
				529	* Without that kind of total limit, nasty chains of consecutive
				530	* symlinks can cause almost arbitrarily long lookups.
				531	*/
Al Viro	90ebe56	2005-06-06 13:35:58 -0700	[diff] [blame]	532	static inline int do_follow_link(struct path path, struct nameidata nd)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	533	{
				534	int err = -ELOOP;
				535	if (current->link_count >= MAX_NESTED_LINKS)
				536	goto loop;
				537	if (current->total_link_count >= 40)
				538	goto loop;
				539	BUG_ON(nd->depth >= MAX_NESTED_LINKS);
				540	cond_resched();
Al Viro	90ebe56	2005-06-06 13:35:58 -0700	[diff] [blame]	541	err = security_inode_follow_link(path->dentry, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	542	if (err)
				543	goto loop;
				544	current->link_count++;
				545	current->total_link_count++;
				546	nd->depth++;
Al Viro	cd4e91d	2005-06-06 13:36:03 -0700	[diff] [blame]	547	err = __do_follow_link(path, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	548	current->link_count--;
				549	nd->depth--;
				550	return err;
				551	loop:
Al Viro	5f92b3bc	2005-06-06 13:35:59 -0700	[diff] [blame]	552	dput(path->dentry);
Al Viro	d9d29a2	2005-06-06 13:36:11 -0700	[diff] [blame]	553	if (path->mnt != nd->mnt)
				554	mntput(path->mnt);
Al Viro	839d9f9	2005-06-06 13:36:02 -0700	[diff] [blame]	555	path_release(nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	556	return err;
				557	}
				558
				559	int follow_up(struct vfsmount mnt, struct dentry dentry)
				560	{
				561	struct vfsmount *parent;
				562	struct dentry *mountpoint;
				563	spin_lock(&vfsmount_lock);
				564	parent=(*mnt)->mnt_parent;
				565	if (parent == *mnt) {
				566	spin_unlock(&vfsmount_lock);
				567	return 0;
				568	}
				569	mntget(parent);
				570	mountpoint=dget((*mnt)->mnt_mountpoint);
				571	spin_unlock(&vfsmount_lock);
				572	dput(*dentry);
				573	*dentry = mountpoint;
				574	mntput(*mnt);
				575	*mnt = parent;
				576	return 1;
				577	}
				578
				579	/* no need for dcache_lock, as serialization is taken care in
				580	* namespace.c
				581	*/
Al Viro	463ffb2	2005-06-06 13:36:05 -0700	[diff] [blame]	582	static int __follow_mount(struct path *path)
				583	{
				584	int res = 0;
				585	while (d_mountpoint(path->dentry)) {
				586	struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
				587	if (!mounted)
				588	break;
				589	dput(path->dentry);
				590	if (res)
				591	mntput(path->mnt);
				592	path->mnt = mounted;
				593	path->dentry = dget(mounted->mnt_root);
				594	res = 1;
				595	}
				596	return res;
				597	}
				598
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	599	static void follow_mount(struct vfsmount mnt, struct dentry dentry)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	600	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	601	while (d_mountpoint(*dentry)) {
				602	struct vfsmount mounted = lookup_mnt(mnt, *dentry);
				603	if (!mounted)
				604	break;
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	605	dput(*dentry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	606	mntput(*mnt);
				607	*mnt = mounted;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	608	*dentry = dget(mounted->mnt_root);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	609	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	610	}
				611
				612	/* no need for dcache_lock, as serialization is taken care in
				613	* namespace.c
				614	*/
Al Viro	e13b210	2005-06-06 13:36:06 -0700	[diff] [blame]	615	int follow_down(struct vfsmount mnt, struct dentry dentry)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616	{
				617	struct vfsmount *mounted;
				618
				619	mounted = lookup_mnt(mnt, dentry);
				620	if (mounted) {
Al Viro	e13b210	2005-06-06 13:36:06 -0700	[diff] [blame]	621	dput(*dentry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	622	mntput(*mnt);
				623	*mnt = mounted;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	624	*dentry = dget(mounted->mnt_root);
				625	return 1;
				626	}
				627	return 0;
				628	}
				629
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	630	static inline void follow_dotdot(struct nameidata *nd)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	631	{
				632	while(1) {
				633	struct vfsmount *parent;
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	634	struct dentry *old = nd->dentry;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	635
				636	read_lock(&current->fs->lock);
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	637	if (nd->dentry == current->fs->root &&
				638	nd->mnt == current->fs->rootmnt) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	639	read_unlock(&current->fs->lock);
				640	break;
				641	}
				642	read_unlock(&current->fs->lock);
				643	spin_lock(&dcache_lock);
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	644	if (nd->dentry != nd->mnt->mnt_root) {
				645	nd->dentry = dget(nd->dentry->d_parent);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	646	spin_unlock(&dcache_lock);
				647	dput(old);
				648	break;
				649	}
				650	spin_unlock(&dcache_lock);
				651	spin_lock(&vfsmount_lock);
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	652	parent = nd->mnt->mnt_parent;
				653	if (parent == nd->mnt) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	654	spin_unlock(&vfsmount_lock);
				655	break;
				656	}
				657	mntget(parent);
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	658	nd->dentry = dget(nd->mnt->mnt_mountpoint);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	659	spin_unlock(&vfsmount_lock);
				660	dput(old);
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	661	mntput(nd->mnt);
				662	nd->mnt = parent;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	663	}
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	664	follow_mount(&nd->mnt, &nd->dentry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	665	}
				666
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	667	/*
				668	* It's more convoluted than I'd like it to be, but... it's still fairly
				669	* small and for now I'd prefer to have fast path as straight as possible.
				670	* It _is_ time-critical.
				671	*/
				672	static int do_lookup(struct nameidata nd, struct qstr name,
				673	struct path *path)
				674	{
				675	struct vfsmount *mnt = nd->mnt;
				676	struct dentry *dentry = __d_lookup(nd->dentry, name);
				677
				678	if (!dentry)
				679	goto need_lookup;
				680	if (dentry->d_op && dentry->d_op->d_revalidate)
				681	goto need_revalidate;
				682	done:
				683	path->mnt = mnt;
				684	path->dentry = dentry;
Al Viro	634ee70	2005-06-06 13:36:13 -0700	[diff] [blame]	685	__follow_mount(path);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	686	return 0;
				687
				688	need_lookup:
				689	dentry = real_lookup(nd->dentry, name, nd);
				690	if (IS_ERR(dentry))
				691	goto fail;
				692	goto done;
				693
				694	need_revalidate:
				695	if (dentry->d_op->d_revalidate(dentry, nd))
				696	goto done;
				697	if (d_invalidate(dentry))
				698	goto done;
				699	dput(dentry);
				700	goto need_lookup;
				701
				702	fail:
				703	return PTR_ERR(dentry);
				704	}
				705
				706	/*
				707	* Name resolution.
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	708	* This is the basic name resolution function, turning a pathname into
				709	* the final dentry. We expect 'base' to be positive and a directory.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	710	*
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	711	* Returns 0 and nd will have valid dentry and mnt on success.
				712	* Returns error and drops reference to input namei data on failure.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	713	*/
				714	static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
				715	{
				716	struct path next;
				717	struct inode *inode;
				718	int err;
				719	unsigned int lookup_flags = nd->flags;
				720
				721	while (*name=='/')
				722	name++;
				723	if (!*name)
				724	goto return_reval;
				725
				726	inode = nd->dentry->d_inode;
				727	if (nd->depth)
				728	lookup_flags = LOOKUP_FOLLOW;
				729
				730	/* At this point we know we have a real path component. */
				731	for(;;) {
				732	unsigned long hash;
				733	struct qstr this;
				734	unsigned int c;
				735
				736	err = exec_permission_lite(inode, nd);
				737	if (err == -EAGAIN) {
				738	err = permission(inode, MAY_EXEC, nd);
				739	}
				740	if (err)
				741	break;
				742
				743	this.name = name;
				744	c = (const unsigned char )name;
				745
				746	hash = init_name_hash();
				747	do {
				748	name++;
				749	hash = partial_name_hash(c, hash);
				750	c = (const unsigned char )name;
				751	} while (c && (c != '/'));
				752	this.len = name - (const char *) this.name;
				753	this.hash = end_name_hash(hash);
				754
				755	/* remove trailing slashes? */
				756	if (!c)
				757	goto last_component;
				758	while (*++name == '/');
				759	if (!*name)
				760	goto last_with_slashes;
				761
				762	/*
				763	* "." and ".." are special - ".." especially so because it has
				764	* to be able to know about the current root directory and
				765	* parent relationships.
				766	*/
				767	if (this.name[0] == '.') switch (this.len) {
				768	default:
				769	break;
				770	case 2:
				771	if (this.name[1] != '.')
				772	break;
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	773	follow_dotdot(nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	774	inode = nd->dentry->d_inode;
				775	/* fallthrough */
				776	case 1:
				777	continue;
				778	}
				779	/*
				780	* See if the low-level filesystem might want
				781	* to use its own hash..
				782	*/
				783	if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
				784	err = nd->dentry->d_op->d_hash(nd->dentry, &this);
				785	if (err < 0)
				786	break;
				787	}
				788	nd->flags \|= LOOKUP_CONTINUE;
				789	/* This does the actual lookups.. */
				790	err = do_lookup(nd, &this, &next);
				791	if (err)
				792	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	793
				794	err = -ENOENT;
				795	inode = next.dentry->d_inode;
				796	if (!inode)
				797	goto out_dput;
				798	err = -ENOTDIR;
				799	if (!inode->i_op)
				800	goto out_dput;
				801
				802	if (inode->i_op->follow_link) {
Al Viro	90ebe56	2005-06-06 13:35:58 -0700	[diff] [blame]	803	err = do_follow_link(&next, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	804	if (err)
				805	goto return_err;
				806	err = -ENOENT;
				807	inode = nd->dentry->d_inode;
				808	if (!inode)
				809	break;
				810	err = -ENOTDIR;
				811	if (!inode->i_op)
				812	break;
				813	} else {
				814	dput(nd->dentry);
Al Viro	2f12dbf	2005-06-06 13:36:07 -0700	[diff] [blame]	815	if (nd->mnt != next.mnt)
				816	mntput(nd->mnt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	817	nd->mnt = next.mnt;
				818	nd->dentry = next.dentry;
				819	}
				820	err = -ENOTDIR;
				821	if (!inode->i_op->lookup)
				822	break;
				823	continue;
				824	/* here ends the main loop */
				825
				826	last_with_slashes:
				827	lookup_flags \|= LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
				828	last_component:
				829	nd->flags &= ~LOOKUP_CONTINUE;
				830	if (lookup_flags & LOOKUP_PARENT)
				831	goto lookup_parent;
				832	if (this.name[0] == '.') switch (this.len) {
				833	default:
				834	break;
				835	case 2:
				836	if (this.name[1] != '.')
				837	break;
Al Viro	58c465e	2005-06-06 13:36:13 -0700	[diff] [blame]	838	follow_dotdot(nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	839	inode = nd->dentry->d_inode;
				840	/* fallthrough */
				841	case 1:
				842	goto return_reval;
				843	}
				844	if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
				845	err = nd->dentry->d_op->d_hash(nd->dentry, &this);
				846	if (err < 0)
				847	break;
				848	}
				849	err = do_lookup(nd, &this, &next);
				850	if (err)
				851	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	852	inode = next.dentry->d_inode;
				853	if ((lookup_flags & LOOKUP_FOLLOW)
				854	&& inode && inode->i_op && inode->i_op->follow_link) {
Al Viro	90ebe56	2005-06-06 13:35:58 -0700	[diff] [blame]	855	err = do_follow_link(&next, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	856	if (err)
				857	goto return_err;
				858	inode = nd->dentry->d_inode;
				859	} else {
				860	dput(nd->dentry);
Al Viro	2f12dbf	2005-06-06 13:36:07 -0700	[diff] [blame]	861	if (nd->mnt != next.mnt)
				862	mntput(nd->mnt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	863	nd->mnt = next.mnt;
				864	nd->dentry = next.dentry;
				865	}
				866	err = -ENOENT;
				867	if (!inode)
				868	break;
				869	if (lookup_flags & LOOKUP_DIRECTORY) {
				870	err = -ENOTDIR;
				871	if (!inode->i_op \|\| !inode->i_op->lookup)
				872	break;
				873	}
				874	goto return_base;
				875	lookup_parent:
				876	nd->last = this;
				877	nd->last_type = LAST_NORM;
				878	if (this.name[0] != '.')
				879	goto return_base;
				880	if (this.len == 1)
				881	nd->last_type = LAST_DOT;
				882	else if (this.len == 2 && this.name[1] == '.')
				883	nd->last_type = LAST_DOTDOT;
				884	else
				885	goto return_base;
				886	return_reval:
				887	/*
				888	* We bypassed the ordinary revalidation routines.
				889	* We may need to check the cached dentry for staleness.
				890	*/
				891	if (nd->dentry && nd->dentry->d_sb &&
				892	(nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
				893	err = -ESTALE;
				894	/* Note: we do not d_invalidate() */
				895	if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
				896	break;
				897	}
				898	return_base:
				899	return 0;
				900	out_dput:
				901	dput(next.dentry);
Al Viro	2f12dbf	2005-06-06 13:36:07 -0700	[diff] [blame]	902	if (nd->mnt != next.mnt)
Al Viro	d9d29a2	2005-06-06 13:36:11 -0700	[diff] [blame]	903	mntput(next.mnt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	904	break;
				905	}
				906	path_release(nd);
				907	return_err:
				908	return err;
				909	}
				910
				911	/*
				912	* Wrapper to retry pathname resolution whenever the underlying
				913	* file system returns an ESTALE.
				914	*
				915	* Retry the whole path once, forcing real lookup requests
				916	* instead of relying on the dcache.
				917	*/
				918	int fastcall link_path_walk(const char name, struct nameidata nd)
				919	{
				920	struct nameidata save = *nd;
				921	int result;
				922
				923	/* make sure the stuff we saved doesn't go away */
				924	dget(save.dentry);
				925	mntget(save.mnt);
				926
				927	result = __link_path_walk(name, nd);
				928	if (result == -ESTALE) {
				929	*nd = save;
				930	dget(nd->dentry);
				931	mntget(nd->mnt);
				932	nd->flags \|= LOOKUP_REVAL;
				933	result = __link_path_walk(name, nd);
				934	}
				935
				936	dput(save.dentry);
				937	mntput(save.mnt);
				938
				939	return result;
				940	}
				941
				942	int fastcall path_walk(const char * name, struct nameidata *nd)
				943	{
				944	current->total_link_count = 0;
				945	return link_path_walk(name, nd);
				946	}
				947
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	948	/*
				949	* SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
				950	* everything is done. Returns 0 and drops input nd, if lookup failed;
				951	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	952	static int __emul_lookup_dentry(const char name, struct nameidata nd)
				953	{
				954	if (path_walk(name, nd))
				955	return 0; /* something went wrong... */
				956
				957	if (!nd->dentry->d_inode \|\| S_ISDIR(nd->dentry->d_inode->i_mode)) {
				958	struct dentry *old_dentry = nd->dentry;
				959	struct vfsmount *old_mnt = nd->mnt;
				960	struct qstr last = nd->last;
				961	int last_type = nd->last_type;
				962	/*
				963	* NAME was not found in alternate root or it's a directory. Try to find
				964	* it in the normal root:
				965	*/
				966	nd->last_type = LAST_ROOT;
				967	read_lock(&current->fs->lock);
				968	nd->mnt = mntget(current->fs->rootmnt);
				969	nd->dentry = dget(current->fs->root);
				970	read_unlock(&current->fs->lock);
				971	if (path_walk(name, nd) == 0) {
				972	if (nd->dentry->d_inode) {
				973	dput(old_dentry);
				974	mntput(old_mnt);
				975	return 1;
				976	}
				977	path_release(nd);
				978	}
				979	nd->dentry = old_dentry;
				980	nd->mnt = old_mnt;
				981	nd->last = last;
				982	nd->last_type = last_type;
				983	}
				984	return 1;
				985	}
				986
				987	void set_fs_altroot(void)
				988	{
				989	char *emul = __emul_prefix();
				990	struct nameidata nd;
				991	struct vfsmount mnt = NULL, oldmnt;
				992	struct dentry dentry = NULL, olddentry;
				993	int err;
				994
				995	if (!emul)
				996	goto set_it;
				997	err = path_lookup(emul, LOOKUP_FOLLOW\|LOOKUP_DIRECTORY\|LOOKUP_NOALT, &nd);
				998	if (!err) {
				999	mnt = nd.mnt;
				1000	dentry = nd.dentry;
				1001	}
				1002	set_it:
				1003	write_lock(&current->fs->lock);
				1004	oldmnt = current->fs->altrootmnt;
				1005	olddentry = current->fs->altroot;
				1006	current->fs->altrootmnt = mnt;
				1007	current->fs->altroot = dentry;
				1008	write_unlock(&current->fs->lock);
				1009	if (olddentry) {
				1010	dput(olddentry);
				1011	mntput(oldmnt);
				1012	}
				1013	}
				1014
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	1015	/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1016	int fastcall path_lookup(const char name, unsigned int flags, struct nameidata nd)
				1017	{
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	1018	int retval = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1019
				1020	nd->last_type = LAST_ROOT; /* if there are only slashes... */
				1021	nd->flags = flags;
				1022	nd->depth = 0;
				1023
				1024	read_lock(&current->fs->lock);
				1025	if (*name=='/') {
				1026	if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
				1027	nd->mnt = mntget(current->fs->altrootmnt);
				1028	nd->dentry = dget(current->fs->altroot);
				1029	read_unlock(&current->fs->lock);
				1030	if (__emul_lookup_dentry(name,nd))
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	1031	goto out; /* found in altroot */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1032	read_lock(&current->fs->lock);
				1033	}
				1034	nd->mnt = mntget(current->fs->rootmnt);
				1035	nd->dentry = dget(current->fs->root);
				1036	} else {
				1037	nd->mnt = mntget(current->fs->pwdmnt);
				1038	nd->dentry = dget(current->fs->pwd);
				1039	}
				1040	read_unlock(&current->fs->lock);
				1041	current->total_link_count = 0;
				1042	retval = link_path_walk(name, nd);
Prasanna Meda	ea3834d	2005-04-29 16:00:17 +0100	[diff] [blame]	1043	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1044	if (unlikely(current->audit_context
				1045	&& nd && nd->dentry && nd->dentry->d_inode))
				1046	audit_inode(name, nd->dentry->d_inode);
				1047	return retval;
				1048	}
				1049
				1050	/*
				1051	* Restricted form of lookup. Doesn't follow links, single-component only,
				1052	* needs parent already locked. Doesn't follow mounts.
				1053	* SMP-safe.
				1054	*/
				1055	static struct dentry * __lookup_hash(struct qstr name, struct dentry base, struct nameidata *nd)
				1056	{
				1057	struct dentry * dentry;
				1058	struct inode *inode;
				1059	int err;
				1060
				1061	inode = base->d_inode;
				1062	err = permission(inode, MAY_EXEC, nd);
				1063	dentry = ERR_PTR(err);
				1064	if (err)
				1065	goto out;
				1066
				1067	/*
				1068	* See if the low-level filesystem might want
				1069	* to use its own hash..
				1070	*/
				1071	if (base->d_op && base->d_op->d_hash) {
				1072	err = base->d_op->d_hash(base, name);
				1073	dentry = ERR_PTR(err);
				1074	if (err < 0)
				1075	goto out;
				1076	}
				1077
				1078	dentry = cached_lookup(base, name, nd);
				1079	if (!dentry) {
				1080	struct dentry *new = d_alloc(base, name);
				1081	dentry = ERR_PTR(-ENOMEM);
				1082	if (!new)
				1083	goto out;
				1084	dentry = inode->i_op->lookup(inode, new, nd);
				1085	if (!dentry)
				1086	dentry = new;
				1087	else
				1088	dput(new);
				1089	}
				1090	out:
				1091	return dentry;
				1092	}
				1093
				1094	struct dentry * lookup_hash(struct qstr name, struct dentry base)
				1095	{
				1096	return __lookup_hash(name, base, NULL);
				1097	}
				1098
				1099	/* SMP-safe */
				1100	struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
				1101	{
				1102	unsigned long hash;
				1103	struct qstr this;
				1104	unsigned int c;
				1105
				1106	this.name = name;
				1107	this.len = len;
				1108	if (!len)
				1109	goto access;
				1110
				1111	hash = init_name_hash();
				1112	while (len--) {
				1113	c = (const unsigned char )name++;
				1114	if (c == '/' \|\| c == '\0')
				1115	goto access;
				1116	hash = partial_name_hash(c, hash);
				1117	}
				1118	this.hash = end_name_hash(hash);
				1119
				1120	return lookup_hash(&this, base);
				1121	access:
				1122	return ERR_PTR(-EACCES);
				1123	}
				1124
				1125	/*
				1126	* namei()
				1127	*
				1128	* is used by most simple commands to get the inode of a specified name.
				1129	* Open, link etc use their own routines, but this is enough for things
				1130	* like 'chmod' etc.
				1131	*
				1132	* namei exists in two versions: namei/lnamei. The only difference is
				1133	* that namei follows links, while lnamei does not.
				1134	* SMP-safe
				1135	*/
				1136	int fastcall __user_walk(const char __user name, unsigned flags, struct nameidata nd)
				1137	{
				1138	char *tmp = getname(name);
				1139	int err = PTR_ERR(tmp);
				1140
				1141	if (!IS_ERR(tmp)) {
				1142	err = path_lookup(tmp, flags, nd);
				1143	putname(tmp);
				1144	}
				1145	return err;
				1146	}
				1147
				1148	/*
				1149	* It's inline, so penalty for filesystems that don't use sticky bit is
				1150	* minimal.
				1151	*/
				1152	static inline int check_sticky(struct inode dir, struct inode inode)
				1153	{
				1154	if (!(dir->i_mode & S_ISVTX))
				1155	return 0;
				1156	if (inode->i_uid == current->fsuid)
				1157	return 0;
				1158	if (dir->i_uid == current->fsuid)
				1159	return 0;
				1160	return !capable(CAP_FOWNER);
				1161	}
				1162
				1163	/*
				1164	* Check whether we can remove a link victim from directory dir, check
				1165	* whether the type of victim is right.
				1166	* 1. We can't do it if dir is read-only (done in permission())
				1167	* 2. We should have write and exec permissions on dir
				1168	* 3. We can't remove anything from append-only dir
				1169	* 4. We can't do anything with immutable dir (done in permission())
				1170	* 5. If the sticky bit on dir is set we should either
				1171	* a. be owner of dir, or
				1172	* b. be owner of victim, or
				1173	* c. have CAP_FOWNER capability
				1174	* 6. If the victim is append-only or immutable we can't do antyhing with
				1175	* links pointing to it.
				1176	* 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
				1177	* 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
				1178	* 9. We can't remove a root or mountpoint.
				1179	* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
				1180	* nfs_async_unlink().
				1181	*/
				1182	static inline int may_delete(struct inode dir,struct dentry victim,int isdir)
				1183	{
				1184	int error;
				1185
				1186	if (!victim->d_inode)
				1187	return -ENOENT;
				1188
				1189	BUG_ON(victim->d_parent->d_inode != dir);
				1190
				1191	error = permission(dir,MAY_WRITE \| MAY_EXEC, NULL);
				1192	if (error)
				1193	return error;
				1194	if (IS_APPEND(dir))
				1195	return -EPERM;
				1196	if (check_sticky(dir, victim->d_inode)\|\|IS_APPEND(victim->d_inode)\|\|
				1197	IS_IMMUTABLE(victim->d_inode))
				1198	return -EPERM;
				1199	if (isdir) {
				1200	if (!S_ISDIR(victim->d_inode->i_mode))
				1201	return -ENOTDIR;
				1202	if (IS_ROOT(victim))
				1203	return -EBUSY;
				1204	} else if (S_ISDIR(victim->d_inode->i_mode))
				1205	return -EISDIR;
				1206	if (IS_DEADDIR(dir))
				1207	return -ENOENT;
				1208	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
				1209	return -EBUSY;
				1210	return 0;
				1211	}
				1212
				1213	/* Check whether we can create an object with dentry child in directory
				1214	* dir.
				1215	* 1. We can't do it if child already exists (open has special treatment for
				1216	* this case, but since we are inlined it's OK)
				1217	* 2. We can't do it if dir is read-only (done in permission())
				1218	* 3. We should have write and exec permissions on dir
				1219	* 4. We can't do it if dir is immutable (done in permission())
				1220	*/
				1221	static inline int may_create(struct inode dir, struct dentry child,
				1222	struct nameidata *nd)
				1223	{
				1224	if (child->d_inode)
				1225	return -EEXIST;
				1226	if (IS_DEADDIR(dir))
				1227	return -ENOENT;
				1228	return permission(dir,MAY_WRITE \| MAY_EXEC, nd);
				1229	}
				1230
				1231	/*
				1232	* Special case: O_CREAT\|O_EXCL implies O_NOFOLLOW for security
				1233	* reasons.
				1234	*
				1235	* O_DIRECTORY translates into forcing a directory lookup.
				1236	*/
				1237	static inline int lookup_flags(unsigned int f)
				1238	{
				1239	unsigned long retval = LOOKUP_FOLLOW;
				1240
				1241	if (f & O_NOFOLLOW)
				1242	retval &= ~LOOKUP_FOLLOW;
				1243
				1244	if ((f & (O_CREAT\|O_EXCL)) == (O_CREAT\|O_EXCL))
				1245	retval &= ~LOOKUP_FOLLOW;
				1246
				1247	if (f & O_DIRECTORY)
				1248	retval \|= LOOKUP_DIRECTORY;
				1249
				1250	return retval;
				1251	}
				1252
				1253	/*
				1254	* p1 and p2 should be directories on the same fs.
				1255	*/
				1256	struct dentry lock_rename(struct dentry p1, struct dentry *p2)
				1257	{
				1258	struct dentry *p;
				1259
				1260	if (p1 == p2) {
				1261	down(&p1->d_inode->i_sem);
				1262	return NULL;
				1263	}
				1264
				1265	down(&p1->d_inode->i_sb->s_vfs_rename_sem);
				1266
				1267	for (p = p1; p->d_parent != p; p = p->d_parent) {
				1268	if (p->d_parent == p2) {
				1269	down(&p2->d_inode->i_sem);
				1270	down(&p1->d_inode->i_sem);
				1271	return p;
				1272	}
				1273	}
				1274
				1275	for (p = p2; p->d_parent != p; p = p->d_parent) {
				1276	if (p->d_parent == p1) {
				1277	down(&p1->d_inode->i_sem);
				1278	down(&p2->d_inode->i_sem);
				1279	return p;
				1280	}
				1281	}
				1282
				1283	down(&p1->d_inode->i_sem);
				1284	down(&p2->d_inode->i_sem);
				1285	return NULL;
				1286	}
				1287
				1288	void unlock_rename(struct dentry p1, struct dentry p2)
				1289	{
				1290	up(&p1->d_inode->i_sem);
				1291	if (p1 != p2) {
				1292	up(&p2->d_inode->i_sem);
				1293	up(&p1->d_inode->i_sb->s_vfs_rename_sem);
				1294	}
				1295	}
				1296
				1297	int vfs_create(struct inode dir, struct dentry dentry, int mode,
				1298	struct nameidata *nd)
				1299	{
				1300	int error = may_create(dir, dentry, nd);
				1301
				1302	if (error)
				1303	return error;
				1304
				1305	if (!dir->i_op \|\| !dir->i_op->create)
				1306	return -EACCES; /* shouldn't it be ENOSYS? */
				1307	mode &= S_IALLUGO;
				1308	mode \|= S_IFREG;
				1309	error = security_inode_create(dir, dentry, mode);
				1310	if (error)
				1311	return error;
				1312	DQUOT_INIT(dir);
				1313	error = dir->i_op->create(dir, dentry, mode, nd);
				1314	if (!error) {
				1315	inode_dir_notify(dir, DN_CREATE);
				1316	security_inode_post_create(dir, dentry, mode);
				1317	}
				1318	return error;
				1319	}
				1320
				1321	int may_open(struct nameidata *nd, int acc_mode, int flag)
				1322	{
				1323	struct dentry *dentry = nd->dentry;
				1324	struct inode *inode = dentry->d_inode;
				1325	int error;
				1326
				1327	if (!inode)
				1328	return -ENOENT;
				1329
				1330	if (S_ISLNK(inode->i_mode))
				1331	return -ELOOP;
				1332
				1333	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
				1334	return -EISDIR;
				1335
				1336	error = permission(inode, acc_mode, nd);
				1337	if (error)
				1338	return error;
				1339
				1340	/*
				1341	* FIFO's, sockets and device files are special: they don't
				1342	* actually live on the filesystem itself, and as such you
				1343	* can write to them even if the filesystem is read-only.
				1344	*/
				1345	if (S_ISFIFO(inode->i_mode) \|\| S_ISSOCK(inode->i_mode)) {
				1346	flag &= ~O_TRUNC;
				1347	} else if (S_ISBLK(inode->i_mode) \|\| S_ISCHR(inode->i_mode)) {
				1348	if (nd->mnt->mnt_flags & MNT_NODEV)
				1349	return -EACCES;
				1350
				1351	flag &= ~O_TRUNC;
				1352	} else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
				1353	return -EROFS;
				1354	/*
				1355	* An append-only file must be opened in append mode for writing.
				1356	*/
				1357	if (IS_APPEND(inode)) {
				1358	if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
				1359	return -EPERM;
				1360	if (flag & O_TRUNC)
				1361	return -EPERM;
				1362	}
				1363
				1364	/* O_NOATIME can only be set by the owner or superuser */
				1365	if (flag & O_NOATIME)
				1366	if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
				1367	return -EPERM;
				1368
				1369	/*
				1370	* Ensure there are no outstanding leases on the file.
				1371	*/
				1372	error = break_lease(inode, flag);
				1373	if (error)
				1374	return error;
				1375
				1376	if (flag & O_TRUNC) {
				1377	error = get_write_access(inode);
				1378	if (error)
				1379	return error;
				1380
				1381	/*
				1382	* Refuse to truncate files with mandatory locks held on them.
				1383	*/
				1384	error = locks_verify_locked(inode);
				1385	if (!error) {
				1386	DQUOT_INIT(inode);
				1387
				1388	error = do_truncate(dentry, 0);
				1389	}
				1390	put_write_access(inode);
				1391	if (error)
				1392	return error;
				1393	} else
				1394	if (flag & FMODE_WRITE)
				1395	DQUOT_INIT(inode);
				1396
				1397	return 0;
				1398	}
				1399
				1400	/*
				1401	* open_namei()
				1402	*
				1403	* namei for open - this is in fact almost the whole open-routine.
				1404	*
				1405	* Note that the low bits of "flag" aren't the same as in the open
				1406	* system call - they are 00 - no permissions needed
				1407	* 01 - read permission needed
				1408	* 10 - write permission needed
				1409	* 11 - read/write permissions needed
				1410	* which is a lot more logical, and also allows the "no perm" needed
				1411	* for symlinks (where the permissions are checked later).
				1412	* SMP-safe
				1413	*/
				1414	int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
				1415	{
				1416	int acc_mode, error = 0;
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1417	struct path path;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1418	struct dentry *dir;
				1419	int count = 0;
				1420
				1421	acc_mode = ACC_MODE(flag);
				1422
				1423	/* Allow the LSM permission hook to distinguish append
				1424	access from general write access. */
				1425	if (flag & O_APPEND)
				1426	acc_mode \|= MAY_APPEND;
				1427
				1428	/* Fill in the open() intent data */
				1429	nd->intent.open.flags = flag;
				1430	nd->intent.open.create_mode = mode;
				1431
				1432	/*
				1433	* The simplest case - just a plain lookup.
				1434	*/
				1435	if (!(flag & O_CREAT)) {
				1436	error = path_lookup(pathname, lookup_flags(flag)\|LOOKUP_OPEN, nd);
				1437	if (error)
				1438	return error;
				1439	goto ok;
				1440	}
				1441
				1442	/*
				1443	* Create - we need to know the parent.
				1444	*/
				1445	error = path_lookup(pathname, LOOKUP_PARENT\|LOOKUP_OPEN\|LOOKUP_CREATE, nd);
				1446	if (error)
				1447	return error;
				1448
				1449	/*
				1450	* We have the parent and last component. First of all, check
				1451	* that we are not asked to creat(2) an obvious directory - that
				1452	* will not do.
				1453	*/
				1454	error = -EISDIR;
				1455	if (nd->last_type != LAST_NORM \|\| nd->last.name[nd->last.len])
				1456	goto exit;
				1457
				1458	dir = nd->dentry;
				1459	nd->flags &= ~LOOKUP_PARENT;
				1460	down(&dir->d_inode->i_sem);
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1461	path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
Al Viro	d73ffe1	2005-06-06 13:36:01 -0700	[diff] [blame]	1462	path.mnt = nd->mnt;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1463
				1464	do_last:
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1465	error = PTR_ERR(path.dentry);
				1466	if (IS_ERR(path.dentry)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1467	up(&dir->d_inode->i_sem);
				1468	goto exit;
				1469	}
				1470
				1471	/* Negative dentry, just create the file */
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1472	if (!path.dentry->d_inode) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1473	if (!IS_POSIXACL(dir->d_inode))
				1474	mode &= ~current->fs->umask;
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1475	error = vfs_create(dir->d_inode, path.dentry, mode, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1476	up(&dir->d_inode->i_sem);
				1477	dput(nd->dentry);
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1478	nd->dentry = path.dentry;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1479	if (error)
				1480	goto exit;
				1481	/* Don't check for write permission, don't truncate */
				1482	acc_mode = 0;
				1483	flag &= ~O_TRUNC;
				1484	goto ok;
				1485	}
				1486
				1487	/*
				1488	* It already exists.
				1489	*/
				1490	up(&dir->d_inode->i_sem);
				1491
				1492	error = -EEXIST;
				1493	if (flag & O_EXCL)
				1494	goto exit_dput;
				1495
Al Viro	e13b210	2005-06-06 13:36:06 -0700	[diff] [blame]	1496	if (__follow_mount(&path)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1497	error = -ELOOP;
Al Viro	ba7a4c1	2005-06-06 13:36:08 -0700	[diff] [blame]	1498	if (flag & O_NOFOLLOW)
				1499	goto exit_dput;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1500	}
				1501	error = -ENOENT;
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1502	if (!path.dentry->d_inode)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1503	goto exit_dput;
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1504	if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1505	goto do_link;
				1506
				1507	dput(nd->dentry);
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1508	nd->dentry = path.dentry;
Al Viro	a15a3f6	2005-06-06 13:36:08 -0700	[diff] [blame]	1509	if (nd->mnt != path.mnt)
				1510	mntput(nd->mnt);
				1511	nd->mnt = path.mnt;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1512	error = -EISDIR;
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1513	if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1514	goto exit;
				1515	ok:
				1516	error = may_open(nd, acc_mode, flag);
				1517	if (error)
				1518	goto exit;
				1519	return 0;
				1520
				1521	exit_dput:
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1522	dput(path.dentry);
Al Viro	a15a3f6	2005-06-06 13:36:08 -0700	[diff] [blame]	1523	if (nd->mnt != path.mnt)
Al Viro	ba7a4c1	2005-06-06 13:36:08 -0700	[diff] [blame]	1524	mntput(path.mnt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1525	exit:
				1526	path_release(nd);
				1527	return error;
				1528
				1529	do_link:
				1530	error = -ELOOP;
				1531	if (flag & O_NOFOLLOW)
				1532	goto exit_dput;
				1533	/*
				1534	* This is subtle. Instead of calling do_follow_link() we do the
				1535	* thing by hands. The reason is that this way we have zero link_count
				1536	* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
				1537	* After that we have the parent and last component, i.e.
				1538	* we are in the same situation as after the first path_walk().
				1539	* Well, almost - if the last component is normal we get its copy
				1540	* stored in nd->last.name and we will have to putname() it when we
				1541	* are done. Procfs-like symlinks just set LAST_BIND.
				1542	*/
				1543	nd->flags \|= LOOKUP_PARENT;
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1544	error = security_inode_follow_link(path.dentry, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1545	if (error)
				1546	goto exit_dput;
Al Viro	cd4e91d	2005-06-06 13:36:03 -0700	[diff] [blame]	1547	error = __do_follow_link(&path, nd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1548	if (error)
				1549	return error;
				1550	nd->flags &= ~LOOKUP_PARENT;
Al Viro	d671d5e	2005-06-06 13:36:04 -0700	[diff] [blame]	1551	if (nd->last_type == LAST_BIND)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1552	goto ok;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1553	error = -EISDIR;
				1554	if (nd->last_type != LAST_NORM)
				1555	goto exit;
				1556	if (nd->last.name[nd->last.len]) {
				1557	putname(nd->last.name);
				1558	goto exit;
				1559	}
				1560	error = -ELOOP;
				1561	if (count++==32) {
				1562	putname(nd->last.name);
				1563	goto exit;
				1564	}
				1565	dir = nd->dentry;
				1566	down(&dir->d_inode->i_sem);
Al Viro	4e7506e	2005-06-06 13:36:00 -0700	[diff] [blame]	1567	path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
Al Viro	d671d5e	2005-06-06 13:36:04 -0700	[diff] [blame]	1568	path.mnt = nd->mnt;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1569	putname(nd->last.name);
				1570	goto do_last;
				1571	}
				1572
				1573	/**
				1574	* lookup_create - lookup a dentry, creating it if it doesn't exist
				1575	* @nd: nameidata info
				1576	* @is_dir: directory flag
				1577	*
				1578	* Simple function to lookup and return a dentry and create it
				1579	* if it doesn't exist. Is SMP-safe.
Christoph Hellwig	c663e5d	2005-06-23 00:09:49 -0700	[diff] [blame]	1580	*
				1581	* Returns with nd->dentry->d_inode->i_sem locked.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1582	*/
				1583	struct dentry lookup_create(struct nameidata nd, int is_dir)
				1584	{
Christoph Hellwig	c663e5d	2005-06-23 00:09:49 -0700	[diff] [blame]	1585	struct dentry *dentry = ERR_PTR(-EEXIST);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1586
				1587	down(&nd->dentry->d_inode->i_sem);
Christoph Hellwig	c663e5d	2005-06-23 00:09:49 -0700	[diff] [blame]	1588	/*
				1589	* Yucky last component or no last component at all?
				1590	* (foo/., foo/.., /////)
				1591	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1592	if (nd->last_type != LAST_NORM)
				1593	goto fail;
				1594	nd->flags &= ~LOOKUP_PARENT;
Christoph Hellwig	c663e5d	2005-06-23 00:09:49 -0700	[diff] [blame]	1595
				1596	/*
				1597	* Do the final lookup.
				1598	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1599	dentry = lookup_hash(&nd->last, nd->dentry);
				1600	if (IS_ERR(dentry))
				1601	goto fail;
Christoph Hellwig	c663e5d	2005-06-23 00:09:49 -0700	[diff] [blame]	1602
				1603	/*
				1604	* Special case - lookup gave negative, but... we had foo/bar/
				1605	* From the vfs_mknod() POV we just have a negative dentry -
				1606	* all is fine. Let's be bastards - you had / on the end, you've
				1607	* been asking for (non-existent) directory. -ENOENT for you.
				1608	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1609	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
				1610	goto enoent;
				1611	return dentry;
				1612	enoent:
				1613	dput(dentry);
				1614	dentry = ERR_PTR(-ENOENT);
				1615	fail:
				1616	return dentry;
				1617	}
Christoph Hellwig	f81a0bf	2005-05-19 12:26:43 -0700	[diff] [blame]	1618	EXPORT_SYMBOL_GPL(lookup_create);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1619
				1620	int vfs_mknod(struct inode dir, struct dentry dentry, int mode, dev_t dev)
				1621	{
				1622	int error = may_create(dir, dentry, NULL);
				1623
				1624	if (error)
				1625	return error;
				1626
				1627	if ((S_ISCHR(mode) \|\| S_ISBLK(mode)) && !capable(CAP_MKNOD))
				1628	return -EPERM;
				1629
				1630	if (!dir->i_op \|\| !dir->i_op->mknod)
				1631	return -EPERM;
				1632
				1633	error = security_inode_mknod(dir, dentry, mode, dev);
				1634	if (error)
				1635	return error;
				1636
				1637	DQUOT_INIT(dir);
				1638	error = dir->i_op->mknod(dir, dentry, mode, dev);
				1639	if (!error) {
				1640	inode_dir_notify(dir, DN_CREATE);
				1641	security_inode_post_mknod(dir, dentry, mode, dev);
				1642	}
				1643	return error;
				1644	}
				1645
				1646	asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
				1647	{
				1648	int error = 0;
				1649	char * tmp;
				1650	struct dentry * dentry;
				1651	struct nameidata nd;
				1652
				1653	if (S_ISDIR(mode))
				1654	return -EPERM;
				1655	tmp = getname(filename);
				1656	if (IS_ERR(tmp))
				1657	return PTR_ERR(tmp);
				1658
				1659	error = path_lookup(tmp, LOOKUP_PARENT, &nd);
				1660	if (error)
				1661	goto out;
				1662	dentry = lookup_create(&nd, 0);
				1663	error = PTR_ERR(dentry);
				1664
				1665	if (!IS_POSIXACL(nd.dentry->d_inode))
				1666	mode &= ~current->fs->umask;
				1667	if (!IS_ERR(dentry)) {
				1668	switch (mode & S_IFMT) {
				1669	case 0: case S_IFREG:
				1670	error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
				1671	break;
				1672	case S_IFCHR: case S_IFBLK:
				1673	error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
				1674	new_decode_dev(dev));
				1675	break;
				1676	case S_IFIFO: case S_IFSOCK:
				1677	error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
				1678	break;
				1679	case S_IFDIR:
				1680	error = -EPERM;
				1681	break;
				1682	default:
				1683	error = -EINVAL;
				1684	}
				1685	dput(dentry);
				1686	}
				1687	up(&nd.dentry->d_inode->i_sem);
				1688	path_release(&nd);
				1689	out:
				1690	putname(tmp);
				1691
				1692	return error;
				1693	}
				1694
				1695	int vfs_mkdir(struct inode dir, struct dentry dentry, int mode)
				1696	{
				1697	int error = may_create(dir, dentry, NULL);
				1698
				1699	if (error)
				1700	return error;
				1701
				1702	if (!dir->i_op \|\| !dir->i_op->mkdir)
				1703	return -EPERM;
				1704
				1705	mode &= (S_IRWXUGO\|S_ISVTX);
				1706	error = security_inode_mkdir(dir, dentry, mode);
				1707	if (error)
				1708	return error;
				1709
				1710	DQUOT_INIT(dir);
				1711	error = dir->i_op->mkdir(dir, dentry, mode);
				1712	if (!error) {
				1713	inode_dir_notify(dir, DN_CREATE);
				1714	security_inode_post_mkdir(dir,dentry, mode);
				1715	}
				1716	return error;
				1717	}
				1718
				1719	asmlinkage long sys_mkdir(const char __user * pathname, int mode)
				1720	{
				1721	int error = 0;
				1722	char * tmp;
				1723
				1724	tmp = getname(pathname);
				1725	error = PTR_ERR(tmp);
				1726	if (!IS_ERR(tmp)) {
				1727	struct dentry *dentry;
				1728	struct nameidata nd;
				1729
				1730	error = path_lookup(tmp, LOOKUP_PARENT, &nd);
				1731	if (error)
				1732	goto out;
				1733	dentry = lookup_create(&nd, 1);
				1734	error = PTR_ERR(dentry);
				1735	if (!IS_ERR(dentry)) {
				1736	if (!IS_POSIXACL(nd.dentry->d_inode))
				1737	mode &= ~current->fs->umask;
				1738	error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
				1739	dput(dentry);
				1740	}
				1741	up(&nd.dentry->d_inode->i_sem);
				1742	path_release(&nd);
				1743	out:
				1744	putname(tmp);
				1745	}
				1746
				1747	return error;
				1748	}
				1749
				1750	/*
				1751	* We try to drop the dentry early: we should have
				1752	* a usage count of 2 if we're the only user of this
				1753	* dentry, and if that is true (possibly after pruning
				1754	* the dcache), then we drop the dentry now.
				1755	*
				1756	* A low-level filesystem can, if it choses, legally
				1757	* do a
				1758	*
				1759	* if (!d_unhashed(dentry))
				1760	* return -EBUSY;
				1761	*
				1762	* if it cannot handle the case of removing a directory
				1763	* that is still in use by something else..
				1764	*/
				1765	void dentry_unhash(struct dentry *dentry)
				1766	{
				1767	dget(dentry);
				1768	if (atomic_read(&dentry->d_count))
				1769	shrink_dcache_parent(dentry);
				1770	spin_lock(&dcache_lock);
				1771	spin_lock(&dentry->d_lock);
				1772	if (atomic_read(&dentry->d_count) == 2)
				1773	__d_drop(dentry);
				1774	spin_unlock(&dentry->d_lock);
				1775	spin_unlock(&dcache_lock);
				1776	}
				1777
				1778	int vfs_rmdir(struct inode dir, struct dentry dentry)
				1779	{
				1780	int error = may_delete(dir, dentry, 1);
				1781
				1782	if (error)
				1783	return error;
				1784
				1785	if (!dir->i_op \|\| !dir->i_op->rmdir)
				1786	return -EPERM;
				1787
				1788	DQUOT_INIT(dir);
				1789
				1790	down(&dentry->d_inode->i_sem);
				1791	dentry_unhash(dentry);
				1792	if (d_mountpoint(dentry))
				1793	error = -EBUSY;
				1794	else {
				1795	error = security_inode_rmdir(dir, dentry);
				1796	if (!error) {
				1797	error = dir->i_op->rmdir(dir, dentry);
				1798	if (!error)
				1799	dentry->d_inode->i_flags \|= S_DEAD;
				1800	}
				1801	}
				1802	up(&dentry->d_inode->i_sem);
				1803	if (!error) {
				1804	inode_dir_notify(dir, DN_DELETE);
				1805	d_delete(dentry);
				1806	}
				1807	dput(dentry);
				1808
				1809	return error;
				1810	}
				1811
				1812	asmlinkage long sys_rmdir(const char __user * pathname)
				1813	{
				1814	int error = 0;
				1815	char * name;
				1816	struct dentry *dentry;
				1817	struct nameidata nd;
				1818
				1819	name = getname(pathname);
				1820	if(IS_ERR(name))
				1821	return PTR_ERR(name);
				1822
				1823	error = path_lookup(name, LOOKUP_PARENT, &nd);
				1824	if (error)
				1825	goto exit;
				1826
				1827	switch(nd.last_type) {
				1828	case LAST_DOTDOT:
				1829	error = -ENOTEMPTY;
				1830	goto exit1;
				1831	case LAST_DOT:
				1832	error = -EINVAL;
				1833	goto exit1;
				1834	case LAST_ROOT:
				1835	error = -EBUSY;
				1836	goto exit1;
				1837	}
				1838	down(&nd.dentry->d_inode->i_sem);
				1839	dentry = lookup_hash(&nd.last, nd.dentry);
				1840	error = PTR_ERR(dentry);
				1841	if (!IS_ERR(dentry)) {
				1842	error = vfs_rmdir(nd.dentry->d_inode, dentry);
				1843	dput(dentry);
				1844	}
				1845	up(&nd.dentry->d_inode->i_sem);
				1846	exit1:
				1847	path_release(&nd);
				1848	exit:
				1849	putname(name);
				1850	return error;
				1851	}
				1852
				1853	int vfs_unlink(struct inode dir, struct dentry dentry)
				1854	{
				1855	int error = may_delete(dir, dentry, 0);
				1856
				1857	if (error)
				1858	return error;
				1859
				1860	if (!dir->i_op \|\| !dir->i_op->unlink)
				1861	return -EPERM;
				1862
				1863	DQUOT_INIT(dir);
				1864
				1865	down(&dentry->d_inode->i_sem);
				1866	if (d_mountpoint(dentry))
				1867	error = -EBUSY;
				1868	else {
				1869	error = security_inode_unlink(dir, dentry);
				1870	if (!error)
				1871	error = dir->i_op->unlink(dir, dentry);
				1872	}
				1873	up(&dentry->d_inode->i_sem);
				1874
				1875	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
				1876	if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
				1877	d_delete(dentry);
				1878	inode_dir_notify(dir, DN_DELETE);
				1879	}
				1880	return error;
				1881	}
				1882
				1883	/*
				1884	* Make sure that the actual truncation of the file will occur outside its
				1885	* directory's i_sem. Truncate can take a long time if there is a lot of
				1886	* writeout happening, and we don't want to prevent access to the directory
				1887	* while waiting on the I/O.
				1888	*/
				1889	asmlinkage long sys_unlink(const char __user * pathname)
				1890	{
				1891	int error = 0;
				1892	char * name;
				1893	struct dentry *dentry;
				1894	struct nameidata nd;
				1895	struct inode *inode = NULL;
				1896
				1897	name = getname(pathname);
				1898	if(IS_ERR(name))
				1899	return PTR_ERR(name);
				1900
				1901	error = path_lookup(name, LOOKUP_PARENT, &nd);
				1902	if (error)
				1903	goto exit;
				1904	error = -EISDIR;
				1905	if (nd.last_type != LAST_NORM)
				1906	goto exit1;
				1907	down(&nd.dentry->d_inode->i_sem);
				1908	dentry = lookup_hash(&nd.last, nd.dentry);
				1909	error = PTR_ERR(dentry);
				1910	if (!IS_ERR(dentry)) {
				1911	/* Why not before? Because we want correct error value */
				1912	if (nd.last.name[nd.last.len])
				1913	goto slashes;
				1914	inode = dentry->d_inode;
				1915	if (inode)
				1916	atomic_inc(&inode->i_count);
				1917	error = vfs_unlink(nd.dentry->d_inode, dentry);
				1918	exit2:
				1919	dput(dentry);
				1920	}
				1921	up(&nd.dentry->d_inode->i_sem);
				1922	if (inode)
				1923	iput(inode); /* truncate the inode here */
				1924	exit1:
				1925	path_release(&nd);
				1926	exit:
				1927	putname(name);
				1928	return error;
				1929
				1930	slashes:
				1931	error = !dentry->d_inode ? -ENOENT :
				1932	S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
				1933	goto exit2;
				1934	}
				1935
				1936	int vfs_symlink(struct inode dir, struct dentry dentry, const char *oldname, int mode)
				1937	{
				1938	int error = may_create(dir, dentry, NULL);
				1939
				1940	if (error)
				1941	return error;
				1942
				1943	if (!dir->i_op \|\| !dir->i_op->symlink)
				1944	return -EPERM;
				1945
				1946	error = security_inode_symlink(dir, dentry, oldname);
				1947	if (error)
				1948	return error;
				1949
				1950	DQUOT_INIT(dir);
				1951	error = dir->i_op->symlink(dir, dentry, oldname);
				1952	if (!error) {
				1953	inode_dir_notify(dir, DN_CREATE);
				1954	security_inode_post_symlink(dir, dentry, oldname);
				1955	}
				1956	return error;
				1957	}
				1958
				1959	asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname)
				1960	{
				1961	int error = 0;
				1962	char * from;
				1963	char * to;
				1964
				1965	from = getname(oldname);
				1966	if(IS_ERR(from))
				1967	return PTR_ERR(from);
				1968	to = getname(newname);
				1969	error = PTR_ERR(to);
				1970	if (!IS_ERR(to)) {
				1971	struct dentry *dentry;
				1972	struct nameidata nd;
				1973
				1974	error = path_lookup(to, LOOKUP_PARENT, &nd);
				1975	if (error)
				1976	goto out;
				1977	dentry = lookup_create(&nd, 0);
				1978	error = PTR_ERR(dentry);
				1979	if (!IS_ERR(dentry)) {
				1980	error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
				1981	dput(dentry);
				1982	}
				1983	up(&nd.dentry->d_inode->i_sem);
				1984	path_release(&nd);
				1985	out:
				1986	putname(to);
				1987	}
				1988	putname(from);
				1989	return error;
				1990	}
				1991
				1992	int vfs_link(struct dentry old_dentry, struct inode dir, struct dentry *new_dentry)
				1993	{
				1994	struct inode *inode = old_dentry->d_inode;
				1995	int error;
				1996
				1997	if (!inode)
				1998	return -ENOENT;
				1999
				2000	error = may_create(dir, new_dentry, NULL);
				2001	if (error)
				2002	return error;
				2003
				2004	if (dir->i_sb != inode->i_sb)
				2005	return -EXDEV;
				2006
				2007	/*
				2008	* A link to an append-only or immutable file cannot be created.
				2009	*/
				2010	if (IS_APPEND(inode) \|\| IS_IMMUTABLE(inode))
				2011	return -EPERM;
				2012	if (!dir->i_op \|\| !dir->i_op->link)
				2013	return -EPERM;
				2014	if (S_ISDIR(old_dentry->d_inode->i_mode))
				2015	return -EPERM;
				2016
				2017	error = security_inode_link(old_dentry, dir, new_dentry);
				2018	if (error)
				2019	return error;
				2020
				2021	down(&old_dentry->d_inode->i_sem);
				2022	DQUOT_INIT(dir);
				2023	error = dir->i_op->link(old_dentry, dir, new_dentry);
				2024	up(&old_dentry->d_inode->i_sem);
				2025	if (!error) {
				2026	inode_dir_notify(dir, DN_CREATE);
				2027	security_inode_post_link(old_dentry, dir, new_dentry);
				2028	}
				2029	return error;
				2030	}
				2031
				2032	/*
				2033	* Hardlinks are often used in delicate situations. We avoid
				2034	* security-related surprises by not following symlinks on the
				2035	* newname. --KAB
				2036	*
				2037	* We don't follow them on the oldname either to be compatible
				2038	* with linux 2.0, and to avoid hard-linking to directories
				2039	* and other special files. --ADM
				2040	*/
				2041	asmlinkage long sys_link(const char __user * oldname, const char __user * newname)
				2042	{
				2043	struct dentry *new_dentry;
				2044	struct nameidata nd, old_nd;
				2045	int error;
				2046	char * to;
				2047
				2048	to = getname(newname);
				2049	if (IS_ERR(to))
				2050	return PTR_ERR(to);
				2051
				2052	error = __user_walk(oldname, 0, &old_nd);
				2053	if (error)
				2054	goto exit;
				2055	error = path_lookup(to, LOOKUP_PARENT, &nd);
				2056	if (error)
				2057	goto out;
				2058	error = -EXDEV;
				2059	if (old_nd.mnt != nd.mnt)
				2060	goto out_release;
				2061	new_dentry = lookup_create(&nd, 0);
				2062	error = PTR_ERR(new_dentry);
				2063	if (!IS_ERR(new_dentry)) {
				2064	error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
				2065	dput(new_dentry);
				2066	}
				2067	up(&nd.dentry->d_inode->i_sem);
				2068	out_release:
				2069	path_release(&nd);
				2070	out:
				2071	path_release(&old_nd);
				2072	exit:
				2073	putname(to);
				2074
				2075	return error;
				2076	}
				2077
				2078	/*
				2079	* The worst of all namespace operations - renaming directory. "Perverted"
				2080	* doesn't even start to describe it. Somebody in UCB had a heck of a trip...
				2081	* Problems:
				2082	* a) we can get into loop creation. Check is done in is_subdir().
				2083	* b) race potential - two innocent renames can create a loop together.
				2084	* That's where 4.4 screws up. Current fix: serialization on
				2085	* sb->s_vfs_rename_sem. We might be more accurate, but that's another
				2086	* story.
				2087	* c) we have to lock _three_ objects - parents and victim (if it exists).
				2088	* And that - after we got ->i_sem on parents (until then we don't know
				2089	* whether the target exists). Solution: try to be smart with locking
				2090	* order for inodes. We rely on the fact that tree topology may change
				2091	* only under ->s_vfs_rename_sem _and_ that parent of the object we
				2092	* move will be locked. Thus we can rank directories by the tree
				2093	* (ancestors first) and rank all non-directories after them.
				2094	* That works since everybody except rename does "lock parent, lookup,
				2095	* lock child" and rename is under ->s_vfs_rename_sem.
				2096	* HOWEVER, it relies on the assumption that any object with ->lookup()
				2097	* has no more than 1 dentry. If "hybrid" objects will ever appear,
				2098	* we'd better make sure that there's no link(2) for them.
				2099	* d) some filesystems don't support opened-but-unlinked directories,
				2100	* either because of layout or because they are not ready to deal with
				2101	* all cases correctly. The latter will be fixed (taking this sort of
				2102	* stuff into VFS), but the former is not going away. Solution: the same
				2103	* trick as in rmdir().
				2104	* e) conversion from fhandle to dentry may come in the wrong moment - when
				2105	* we are removing the target. Solution: we will have to grab ->i_sem
				2106	* in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
				2107	* ->i_sem on parents, which works but leads to some truely excessive
				2108	* locking].
				2109	*/
Adrian Bunk	75c96f8	2005-05-05 16:16:09 -0700	[diff] [blame]	2110	static int vfs_rename_dir(struct inode old_dir, struct dentry old_dentry,
				2111	struct inode new_dir, struct dentry new_dentry)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2112	{
				2113	int error = 0;
				2114	struct inode *target;
				2115
				2116	/*
				2117	* If we are going to change the parent - check write permissions,
				2118	* we'll need to flip '..'.
				2119	*/
				2120	if (new_dir != old_dir) {
				2121	error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
				2122	if (error)
				2123	return error;
				2124	}
				2125
				2126	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
				2127	if (error)
				2128	return error;
				2129
				2130	target = new_dentry->d_inode;
				2131	if (target) {
				2132	down(&target->i_sem);
				2133	dentry_unhash(new_dentry);
				2134	}
				2135	if (d_mountpoint(old_dentry)\|\|d_mountpoint(new_dentry))
				2136	error = -EBUSY;
				2137	else
				2138	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
				2139	if (target) {
				2140	if (!error)
				2141	target->i_flags \|= S_DEAD;
				2142	up(&target->i_sem);
				2143	if (d_unhashed(new_dentry))
				2144	d_rehash(new_dentry);
				2145	dput(new_dentry);
				2146	}
				2147	if (!error) {
				2148	d_move(old_dentry,new_dentry);
				2149	security_inode_post_rename(old_dir, old_dentry,
				2150	new_dir, new_dentry);
				2151	}
				2152	return error;
				2153	}
				2154
Adrian Bunk	75c96f8	2005-05-05 16:16:09 -0700	[diff] [blame]	2155	static int vfs_rename_other(struct inode old_dir, struct dentry old_dentry,
				2156	struct inode new_dir, struct dentry new_dentry)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2157	{
				2158	struct inode *target;
				2159	int error;
				2160
				2161	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
				2162	if (error)
				2163	return error;
				2164
				2165	dget(new_dentry);
				2166	target = new_dentry->d_inode;
				2167	if (target)
				2168	down(&target->i_sem);
				2169	if (d_mountpoint(old_dentry)\|\|d_mountpoint(new_dentry))
				2170	error = -EBUSY;
				2171	else
				2172	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
				2173	if (!error) {
				2174	/* The following d_move() should become unconditional */
				2175	if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
				2176	d_move(old_dentry, new_dentry);
				2177	security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
				2178	}
				2179	if (target)
				2180	up(&target->i_sem);
				2181	dput(new_dentry);
				2182	return error;
				2183	}
				2184
				2185	int vfs_rename(struct inode old_dir, struct dentry old_dentry,
				2186	struct inode new_dir, struct dentry new_dentry)
				2187	{
				2188	int error;
				2189	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
				2190
				2191	if (old_dentry->d_inode == new_dentry->d_inode)
				2192	return 0;
				2193
				2194	error = may_delete(old_dir, old_dentry, is_dir);
				2195	if (error)
				2196	return error;
				2197
				2198	if (!new_dentry->d_inode)
				2199	error = may_create(new_dir, new_dentry, NULL);
				2200	else
				2201	error = may_delete(new_dir, new_dentry, is_dir);
				2202	if (error)
				2203	return error;
				2204
				2205	if (!old_dir->i_op \|\| !old_dir->i_op->rename)
				2206	return -EPERM;
				2207
				2208	DQUOT_INIT(old_dir);
				2209	DQUOT_INIT(new_dir);
				2210
				2211	if (is_dir)
				2212	error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
				2213	else
				2214	error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
				2215	if (!error) {
				2216	if (old_dir == new_dir)
				2217	inode_dir_notify(old_dir, DN_RENAME);
				2218	else {
				2219	inode_dir_notify(old_dir, DN_DELETE);
				2220	inode_dir_notify(new_dir, DN_CREATE);
				2221	}
				2222	}
				2223	return error;
				2224	}
				2225
				2226	static inline int do_rename(const char * oldname, const char * newname)
				2227	{
				2228	int error = 0;
				2229	struct dentry * old_dir, * new_dir;
				2230	struct dentry * old_dentry, *new_dentry;
				2231	struct dentry * trap;
				2232	struct nameidata oldnd, newnd;
				2233
				2234	error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
				2235	if (error)
				2236	goto exit;
				2237
				2238	error = path_lookup(newname, LOOKUP_PARENT, &newnd);
				2239	if (error)
				2240	goto exit1;
				2241
				2242	error = -EXDEV;
				2243	if (oldnd.mnt != newnd.mnt)
				2244	goto exit2;
				2245
				2246	old_dir = oldnd.dentry;
				2247	error = -EBUSY;
				2248	if (oldnd.last_type != LAST_NORM)
				2249	goto exit2;
				2250
				2251	new_dir = newnd.dentry;
				2252	if (newnd.last_type != LAST_NORM)
				2253	goto exit2;
				2254
				2255	trap = lock_rename(new_dir, old_dir);
				2256
				2257	old_dentry = lookup_hash(&oldnd.last, old_dir);
				2258	error = PTR_ERR(old_dentry);
				2259	if (IS_ERR(old_dentry))
				2260	goto exit3;
				2261	/* source must exist */
				2262	error = -ENOENT;
				2263	if (!old_dentry->d_inode)
				2264	goto exit4;
				2265	/* unless the source is a directory trailing slashes give -ENOTDIR */
				2266	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
				2267	error = -ENOTDIR;
				2268	if (oldnd.last.name[oldnd.last.len])
				2269	goto exit4;
				2270	if (newnd.last.name[newnd.last.len])
				2271	goto exit4;
				2272	}
				2273	/* source should not be ancestor of target */
				2274	error = -EINVAL;
				2275	if (old_dentry == trap)
				2276	goto exit4;
				2277	new_dentry = lookup_hash(&newnd.last, new_dir);
				2278	error = PTR_ERR(new_dentry);
				2279	if (IS_ERR(new_dentry))
				2280	goto exit4;
				2281	/* target should not be an ancestor of source */
				2282	error = -ENOTEMPTY;
				2283	if (new_dentry == trap)
				2284	goto exit5;
				2285
				2286	error = vfs_rename(old_dir->d_inode, old_dentry,
				2287	new_dir->d_inode, new_dentry);
				2288	exit5:
				2289	dput(new_dentry);
				2290	exit4:
				2291	dput(old_dentry);
				2292	exit3:
				2293	unlock_rename(new_dir, old_dir);
				2294	exit2:
				2295	path_release(&newnd);
				2296	exit1:
				2297	path_release(&oldnd);
				2298	exit:
				2299	return error;
				2300	}
				2301
				2302	asmlinkage long sys_rename(const char __user * oldname, const char __user * newname)
				2303	{
				2304	int error;
				2305	char * from;
				2306	char * to;
				2307
				2308	from = getname(oldname);
				2309	if(IS_ERR(from))
				2310	return PTR_ERR(from);
				2311	to = getname(newname);
				2312	error = PTR_ERR(to);
				2313	if (!IS_ERR(to)) {
				2314	error = do_rename(from,to);
				2315	putname(to);
				2316	}
				2317	putname(from);
				2318	return error;
				2319	}
				2320
				2321	int vfs_readlink(struct dentry dentry, char __user buffer, int buflen, const char *link)
				2322	{
				2323	int len;
				2324
				2325	len = PTR_ERR(link);
				2326	if (IS_ERR(link))
				2327	goto out;
				2328
				2329	len = strlen(link);
				2330	if (len > (unsigned) buflen)
				2331	len = buflen;
				2332	if (copy_to_user(buffer, link, len))
				2333	len = -EFAULT;
				2334	out:
				2335	return len;
				2336	}
				2337
				2338	/*
				2339	* A helper for ->readlink(). This should be used ONLY for symlinks that
				2340	* have ->follow_link() touching nd only in nd_set_link(). Using (or not
				2341	* using) it for any given inode is up to filesystem.
				2342	*/
				2343	int generic_readlink(struct dentry dentry, char __user buffer, int buflen)
				2344	{
				2345	struct nameidata nd;
				2346	int res;
				2347	nd.depth = 0;
				2348	res = dentry->d_inode->i_op->follow_link(dentry, &nd);
				2349	if (!res) {
				2350	res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
				2351	if (dentry->d_inode->i_op->put_link)
				2352	dentry->d_inode->i_op->put_link(dentry, &nd);
				2353	}
				2354	return res;
				2355	}
				2356
				2357	int vfs_follow_link(struct nameidata nd, const char link)
				2358	{
				2359	return __vfs_follow_link(nd, link);
				2360	}
				2361
				2362	/* get the link contents into pagecache */
				2363	static char page_getlink(struct dentry dentry, struct page **ppage)
				2364	{
				2365	struct page * page;
				2366	struct address_space *mapping = dentry->d_inode->i_mapping;
				2367	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
				2368	NULL);
				2369	if (IS_ERR(page))
				2370	goto sync_fail;
				2371	wait_on_page_locked(page);
				2372	if (!PageUptodate(page))
				2373	goto async_fail;
				2374	*ppage = page;
				2375	return kmap(page);
				2376
				2377	async_fail:
				2378	page_cache_release(page);
				2379	return ERR_PTR(-EIO);
				2380
				2381	sync_fail:
				2382	return (char*)page;
				2383	}
				2384
				2385	int page_readlink(struct dentry dentry, char __user buffer, int buflen)
				2386	{
				2387	struct page *page = NULL;
				2388	char *s = page_getlink(dentry, &page);
				2389	int res = vfs_readlink(dentry,buffer,buflen,s);
				2390	if (page) {
				2391	kunmap(page);
				2392	page_cache_release(page);
				2393	}
				2394	return res;
				2395	}
				2396
				2397	int page_follow_link_light(struct dentry dentry, struct nameidata nd)
				2398	{
				2399	struct page *page;
				2400	nd_set_link(nd, page_getlink(dentry, &page));
				2401	return 0;
				2402	}
				2403
				2404	void page_put_link(struct dentry dentry, struct nameidata nd)
				2405	{
				2406	if (!IS_ERR(nd_get_link(nd))) {
				2407	struct page *page;
				2408	page = find_get_page(dentry->d_inode->i_mapping, 0);
				2409	if (!page)
				2410	BUG();
				2411	kunmap(page);
				2412	page_cache_release(page);
				2413	page_cache_release(page);
				2414	}
				2415	}
				2416
				2417	int page_symlink(struct inode inode, const char symname, int len)
				2418	{
				2419	struct address_space *mapping = inode->i_mapping;
				2420	struct page *page = grab_cache_page(mapping, 0);
				2421	int err = -ENOMEM;
				2422	char *kaddr;
				2423
				2424	if (!page)
				2425	goto fail;
				2426	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
				2427	if (err)
				2428	goto fail_map;
				2429	kaddr = kmap_atomic(page, KM_USER0);
				2430	memcpy(kaddr, symname, len-1);
				2431	kunmap_atomic(kaddr, KM_USER0);
				2432	mapping->a_ops->commit_write(NULL, page, 0, len-1);
				2433	/*
				2434	* Notice that we are _not_ going to block here - end of page is
				2435	* unmapped, so this will only try to map the rest of page, see
				2436	* that it is unmapped (typically even will not look into inode -
				2437	* ->i_size will be enough for everything) and zero it out.
				2438	* OTOH it's obviously correct and should make the page up-to-date.
				2439	*/
				2440	if (!PageUptodate(page)) {
				2441	err = mapping->a_ops->readpage(NULL, page);
				2442	wait_on_page_locked(page);
				2443	} else {
				2444	unlock_page(page);
				2445	}
				2446	page_cache_release(page);
				2447	if (err < 0)
				2448	goto fail;
				2449	mark_inode_dirty(inode);
				2450	return 0;
				2451	fail_map:
				2452	unlock_page(page);
				2453	page_cache_release(page);
				2454	fail:
				2455	return err;
				2456	}
				2457
				2458	struct inode_operations page_symlink_inode_operations = {
				2459	.readlink = generic_readlink,
				2460	.follow_link = page_follow_link_light,
				2461	.put_link = page_put_link,
				2462	};
				2463
				2464	EXPORT_SYMBOL(__user_walk);
				2465	EXPORT_SYMBOL(follow_down);
				2466	EXPORT_SYMBOL(follow_up);
				2467	EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
				2468	EXPORT_SYMBOL(getname);
				2469	EXPORT_SYMBOL(lock_rename);
				2470	EXPORT_SYMBOL(lookup_hash);
				2471	EXPORT_SYMBOL(lookup_one_len);
				2472	EXPORT_SYMBOL(page_follow_link_light);
				2473	EXPORT_SYMBOL(page_put_link);
				2474	EXPORT_SYMBOL(page_readlink);
				2475	EXPORT_SYMBOL(page_symlink);
				2476	EXPORT_SYMBOL(page_symlink_inode_operations);
				2477	EXPORT_SYMBOL(path_lookup);
				2478	EXPORT_SYMBOL(path_release);
				2479	EXPORT_SYMBOL(path_walk);
				2480	EXPORT_SYMBOL(permission);
				2481	EXPORT_SYMBOL(unlock_rename);
				2482	EXPORT_SYMBOL(vfs_create);
				2483	EXPORT_SYMBOL(vfs_follow_link);
				2484	EXPORT_SYMBOL(vfs_link);
				2485	EXPORT_SYMBOL(vfs_mkdir);
				2486	EXPORT_SYMBOL(vfs_mknod);
				2487	EXPORT_SYMBOL(generic_permission);
				2488	EXPORT_SYMBOL(vfs_readlink);
				2489	EXPORT_SYMBOL(vfs_rename);
				2490	EXPORT_SYMBOL(vfs_rmdir);
				2491	EXPORT_SYMBOL(vfs_symlink);
				2492	EXPORT_SYMBOL(vfs_unlink);
				2493	EXPORT_SYMBOL(dentry_unhash);
				2494	EXPORT_SYMBOL(generic_readlink);