Blame - fs/nfsd/vfs.c - kernel/msm-5.4

blob: e3e9d217236e46e65d60a8a9d0c722451f752e4c [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	#define MSNFS /* HACK HACK */
				2	/*
				3	* linux/fs/nfsd/vfs.c
				4	*
				5	* File operations used by nfsd. Some of these have been ripped from
				6	* other parts of the kernel because they weren't exported, others
				7	* are partial duplicates with added or changed functionality.
				8	*
				9	* Note that several functions dget() the dentry upon which they want
				10	* to act, most notably those that create directory entries. Response
				11	* dentry's are dput()'d if necessary in the release callback.
				12	* So if you notice code paths that apparently fail to dput() the
				13	* dentry, don't worry--they have been taken care of.
				14	*
				15	* Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
				16	* Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
				17	*/
				18
				19	#include <linux/config.h>
				20	#include <linux/string.h>
				21	#include <linux/time.h>
				22	#include <linux/errno.h>
				23	#include <linux/fs.h>
				24	#include <linux/file.h>
				25	#include <linux/mount.h>
				26	#include <linux/major.h>
				27	#include <linux/ext2_fs.h>
				28	#include <linux/proc_fs.h>
				29	#include <linux/stat.h>
				30	#include <linux/fcntl.h>
				31	#include <linux/net.h>
				32	#include <linux/unistd.h>
				33	#include <linux/slab.h>
				34	#include <linux/pagemap.h>
				35	#include <linux/in.h>
				36	#include <linux/module.h>
				37	#include <linux/namei.h>
				38	#include <linux/vfs.h>
				39	#include <linux/delay.h>
				40	#include <linux/sunrpc/svc.h>
				41	#include <linux/nfsd/nfsd.h>
				42	#ifdef CONFIG_NFSD_V3
				43	#include <linux/nfs3.h>
				44	#include <linux/nfsd/xdr3.h>
				45	#endif /* CONFIG_NFSD_V3 */
				46	#include <linux/nfsd/nfsfh.h>
				47	#include <linux/quotaops.h>
				48	#include <linux/dnotify.h>
				49	#ifdef CONFIG_NFSD_V4
				50	#include <linux/posix_acl.h>
				51	#include <linux/posix_acl_xattr.h>
				52	#include <linux/xattr_acl.h>
				53	#include <linux/xattr.h>
				54	#include <linux/nfs4.h>
				55	#include <linux/nfs4_acl.h>
				56	#include <linux/nfsd_idmap.h>
				57	#include <linux/security.h>
				58	#endif /* CONFIG_NFSD_V4 */
				59
				60	#include <asm/uaccess.h>
				61
				62	#define NFSDDBG_FACILITY NFSDDBG_FILEOP
				63	#define NFSD_PARANOIA
				64
				65
				66	/* We must ignore files (but only files) which might have mandatory
				67	* locks on them because there is no way to know if the accesser has
				68	* the lock.
				69	*/
				70	#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
				71
				72	/*
				73	* This is a cache of readahead params that help us choose the proper
				74	* readahead strategy. Initially, we set all readahead parameters to 0
				75	* and let the VFS handle things.
				76	* If you increase the number of cached files very much, you'll need to
				77	* add a hash table here.
				78	*/
				79	struct raparms {
				80	struct raparms *p_next;
				81	unsigned int p_count;
				82	ino_t p_ino;
				83	dev_t p_dev;
				84	int p_set;
				85	struct file_ra_state p_ra;
				86	};
				87
				88	static struct raparms * raparml;
				89	static struct raparms * raparm_cache;
				90
				91	/*
				92	* Called from nfsd_lookup and encode_dirent. Check if we have crossed
				93	* a mount point.
				94	* Returns -EAGAIN leaving dpp and expp unchanged,
				95	* or nfs_ok having possibly changed dpp and expp
				96	*/
				97	int
				98	nfsd_cross_mnt(struct svc_rqst rqstp, struct dentry *dpp,
				99	struct svc_export **expp)
				100	{
				101	struct svc_export exp = expp, *exp2 = NULL;
				102	struct dentry dentry = dpp;
				103	struct vfsmount *mnt = mntget(exp->ex_mnt);
				104	struct dentry *mounts = dget(dentry);
				105	int err = nfs_ok;
				106
				107	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
				108
				109	exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle);
				110	if (IS_ERR(exp2)) {
				111	err = PTR_ERR(exp2);
				112	dput(mounts);
				113	mntput(mnt);
				114	goto out;
				115	}
				116	if (exp2 && ((exp->ex_flags & NFSEXP_CROSSMOUNT) \|\| EX_NOHIDE(exp2))) {
				117	/* successfully crossed mount point */
				118	exp_put(exp);
				119	*expp = exp2;
				120	dput(dentry);
				121	*dpp = mounts;
				122	} else {
				123	if (exp2) exp_put(exp2);
				124	dput(mounts);
				125	}
				126	mntput(mnt);
				127	out:
				128	return err;
				129	}
				130
				131	/*
				132	* Look up one component of a pathname.
				133	* N.B. After this call _both_ fhp and resfh need an fh_put
				134	*
				135	* If the lookup would cross a mountpoint, and the mounted filesystem
				136	* is exported to the client with NFSEXP_NOHIDE, then the lookup is
				137	* accepted as it stands and the mounted directory is
				138	* returned. Otherwise the covered directory is returned.
				139	* NOTE: this mountpoint crossing is not supported properly by all
				140	* clients and is explicitly disallowed for NFSv3
				141	* NeilBrown <neilb@cse.unsw.edu.au>
				142	*/
				143	int
				144	nfsd_lookup(struct svc_rqst rqstp, struct svc_fh fhp, const char *name,
				145	int len, struct svc_fh *resfh)
				146	{
				147	struct svc_export *exp;
				148	struct dentry *dparent;
				149	struct dentry *dentry;
				150	int err;
				151
				152	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
				153
				154	/* Obtain dentry and export. */
				155	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
				156	if (err)
				157	return err;
				158
				159	dparent = fhp->fh_dentry;
				160	exp = fhp->fh_export;
				161	exp_get(exp);
				162
				163	err = nfserr_acces;
				164
				165	/* Lookup the name, but don't follow links */
				166	if (isdotent(name, len)) {
				167	if (len==1)
				168	dentry = dget(dparent);
				169	else if (dparent != exp->ex_dentry) {
				170	dentry = dget_parent(dparent);
				171	} else if (!EX_NOHIDE(exp))
				172	dentry = dget(dparent); /* .. == . just like at / */
				173	else {
				174	/* checking mountpoint crossing is very different when stepping up */
				175	struct svc_export *exp2 = NULL;
				176	struct dentry *dp;
				177	struct vfsmount *mnt = mntget(exp->ex_mnt);
				178	dentry = dget(dparent);
				179	while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
				180	;
				181	dp = dget_parent(dentry);
				182	dput(dentry);
				183	dentry = dp;
				184
				185	exp2 = exp_parent(exp->ex_client, mnt, dentry,
				186	&rqstp->rq_chandle);
				187	if (IS_ERR(exp2)) {
				188	err = PTR_ERR(exp2);
				189	dput(dentry);
				190	mntput(mnt);
				191	goto out_nfserr;
				192	}
				193	if (!exp2) {
				194	dput(dentry);
				195	dentry = dget(dparent);
				196	} else {
				197	exp_put(exp);
				198	exp = exp2;
				199	}
				200	mntput(mnt);
				201	}
				202	} else {
				203	fh_lock(fhp);
				204	dentry = lookup_one_len(name, dparent, len);
				205	err = PTR_ERR(dentry);
				206	if (IS_ERR(dentry))
				207	goto out_nfserr;
				208	/*
				209	* check if we have crossed a mount point ...
				210	*/
				211	if (d_mountpoint(dentry)) {
				212	if ((err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
				213	dput(dentry);
				214	goto out_nfserr;
				215	}
				216	}
				217	}
				218	/*
				219	* Note: we compose the file handle now, but as the
				220	* dentry may be negative, it may need to be updated.
				221	*/
				222	err = fh_compose(resfh, exp, dentry, fhp);
				223	if (!err && !dentry->d_inode)
				224	err = nfserr_noent;
				225	dput(dentry);
				226	out:
				227	exp_put(exp);
				228	return err;
				229
				230	out_nfserr:
				231	err = nfserrno(err);
				232	goto out;
				233	}
				234
				235	/*
				236	* Set various file attributes.
				237	* N.B. After this call fhp needs an fh_put
				238	*/
				239	int
				240	nfsd_setattr(struct svc_rqst rqstp, struct svc_fh fhp, struct iattr *iap,
				241	int check_guard, time_t guardtime)
				242	{
				243	struct dentry *dentry;
				244	struct inode *inode;
				245	int accmode = MAY_SATTR;
				246	int ftype = 0;
				247	int imode;
				248	int err;
				249	int size_change = 0;
				250
				251	if (iap->ia_valid & (ATTR_ATIME \| ATTR_MTIME \| ATTR_SIZE))
				252	accmode \|= MAY_WRITE\|MAY_OWNER_OVERRIDE;
				253	if (iap->ia_valid & ATTR_SIZE)
				254	ftype = S_IFREG;
				255
				256	/* Get inode */
				257	err = fh_verify(rqstp, fhp, ftype, accmode);
				258	if (err \|\| !iap->ia_valid)
				259	goto out;
				260
				261	dentry = fhp->fh_dentry;
				262	inode = dentry->d_inode;
				263
				264	/* NFSv2 does not differentiate between "set-[ac]time-to-now"
				265	* which only requires access, and "set-[ac]time-to-X" which
				266	* requires ownership.
				267	* So if it looks like it might be "set both to the same time which
				268	* is close to now", and if inode_change_ok fails, then we
				269	* convert to "set to now" instead of "set to explicit time"
				270	*
				271	* We only call inode_change_ok as the last test as technically
				272	* it is not an interface that we should be using. It is only
				273	* valid if the filesystem does not define it's own i_op->setattr.
				274	*/
				275	#define BOTH_TIME_SET (ATTR_ATIME_SET \| ATTR_MTIME_SET)
				276	#define MAX_TOUCH_TIME_ERROR (30*60)
				277	if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
				278	&& iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec
				279	) {
				280	/* Looks probable. Now just make sure time is in the right ballpark.
				281	* Solaris, at least, doesn't seem to care what the time request is.
				282	* We require it be within 30 minutes of now.
				283	*/
				284	time_t delta = iap->ia_atime.tv_sec - get_seconds();
				285	if (delta<0) delta = -delta;
				286	if (delta < MAX_TOUCH_TIME_ERROR &&
				287	inode_change_ok(inode, iap) != 0) {
				288	/* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
				289	* this will cause notify_change to set these times to "now"
				290	*/
				291	iap->ia_valid &= ~BOTH_TIME_SET;
				292	}
				293	}
				294
				295	/* The size case is special. It changes the file as well as the attributes. */
				296	if (iap->ia_valid & ATTR_SIZE) {
				297	if (iap->ia_size < inode->i_size) {
				298	err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC\|MAY_OWNER_OVERRIDE);
				299	if (err)
				300	goto out;
				301	}
				302
				303	/*
				304	* If we are changing the size of the file, then
				305	* we need to break all leases.
				306	*/
				307	err = break_lease(inode, FMODE_WRITE \| O_NONBLOCK);
				308	if (err == -EWOULDBLOCK)
				309	err = -ETIMEDOUT;
				310	if (err) /* ENOMEM or EWOULDBLOCK */
				311	goto out_nfserr;
				312
				313	err = get_write_access(inode);
				314	if (err)
				315	goto out_nfserr;
				316
				317	size_change = 1;
				318	err = locks_verify_truncate(inode, NULL, iap->ia_size);
				319	if (err) {
				320	put_write_access(inode);
				321	goto out_nfserr;
				322	}
				323	DQUOT_INIT(inode);
				324	}
				325
				326	imode = inode->i_mode;
				327	if (iap->ia_valid & ATTR_MODE) {
				328	iap->ia_mode &= S_IALLUGO;
				329	imode = iap->ia_mode \|= (imode & ~S_IALLUGO);
				330	}
				331
				332	/* Revoke setuid/setgid bit on chown/chgrp */
				333	if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
				334	iap->ia_valid \|= ATTR_KILL_SUID;
				335	if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
				336	iap->ia_valid \|= ATTR_KILL_SGID;
				337
				338	/* Change the attributes. */
				339
				340	iap->ia_valid \|= ATTR_CTIME;
				341
				342	err = nfserr_notsync;
				343	if (!check_guard \|\| guardtime == inode->i_ctime.tv_sec) {
				344	fh_lock(fhp);
				345	err = notify_change(dentry, iap);
				346	err = nfserrno(err);
				347	fh_unlock(fhp);
				348	}
				349	if (size_change)
				350	put_write_access(inode);
				351	if (!err)
				352	if (EX_ISSYNC(fhp->fh_export))
				353	write_inode_now(inode, 1);
				354	out:
				355	return err;
				356
				357	out_nfserr:
				358	err = nfserrno(err);
				359	goto out;
				360	}
				361
				362	#if defined(CONFIG_NFSD_V4)
				363
				364	static int
				365	set_nfsv4_acl_one(struct dentry dentry, struct posix_acl pacl, char *key)
				366	{
				367	int len;
				368	size_t buflen;
				369	char *buf = NULL;
				370	int error = 0;
				371	struct inode *inode = dentry->d_inode;
				372
				373	buflen = posix_acl_xattr_size(pacl->a_count);
				374	buf = kmalloc(buflen, GFP_KERNEL);
				375	error = -ENOMEM;
				376	if (buf == NULL)
				377	goto out;
				378
				379	len = posix_acl_to_xattr(pacl, buf, buflen);
				380	if (len < 0) {
				381	error = len;
				382	goto out;
				383	}
				384
				385	error = -EOPNOTSUPP;
				386	if (inode->i_op && inode->i_op->setxattr) {
				387	down(&inode->i_sem);
				388	security_inode_setxattr(dentry, key, buf, len, 0);
				389	error = inode->i_op->setxattr(dentry, key, buf, len, 0);
				390	if (!error)
				391	security_inode_post_setxattr(dentry, key, buf, len, 0);
				392	up(&inode->i_sem);
				393	}
				394	out:
				395	kfree(buf);
				396	return error;
				397	}
				398
				399	int
				400	nfsd4_set_nfs4_acl(struct svc_rqst rqstp, struct svc_fh fhp,
				401	struct nfs4_acl *acl)
				402	{
				403	int error;
				404	struct dentry *dentry;
				405	struct inode *inode;
				406	struct posix_acl pacl = NULL, dpacl = NULL;
				407	unsigned int flags = 0;
				408
				409	/* Get inode */
				410	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
				411	if (error)
				412	goto out;
				413
				414	dentry = fhp->fh_dentry;
				415	inode = dentry->d_inode;
				416	if (S_ISDIR(inode->i_mode))
				417	flags = NFS4_ACL_DIR;
				418
				419	error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
				420	if (error == -EINVAL) {
				421	error = nfserr_attrnotsupp;
				422	goto out;
				423	} else if (error < 0)
				424	goto out_nfserr;
				425
				426	if (pacl) {
				427	error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
				428	if (error < 0)
				429	goto out_nfserr;
				430	}
				431
				432	if (dpacl) {
				433	error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
				434	if (error < 0)
				435	goto out_nfserr;
				436	}
				437
				438	error = nfs_ok;
				439
				440	out:
				441	posix_acl_release(pacl);
				442	posix_acl_release(dpacl);
				443	return (error);
				444	out_nfserr:
				445	error = nfserrno(error);
				446	goto out;
				447	}
				448
				449	static struct posix_acl *
				450	_get_posix_acl(struct dentry dentry, char key)
				451	{
				452	struct inode *inode = dentry->d_inode;
				453	char *buf = NULL;
				454	int buflen, error = 0;
				455	struct posix_acl *pacl = NULL;
				456
				457	error = -EOPNOTSUPP;
				458	if (inode->i_op == NULL)
				459	goto out_err;
				460	if (inode->i_op->getxattr == NULL)
				461	goto out_err;
				462
				463	error = security_inode_getxattr(dentry, key);
				464	if (error)
				465	goto out_err;
				466
				467	buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
				468	if (buflen <= 0) {
				469	error = buflen < 0 ? buflen : -ENODATA;
				470	goto out_err;
				471	}
				472
				473	buf = kmalloc(buflen, GFP_KERNEL);
				474	if (buf == NULL) {
				475	error = -ENOMEM;
				476	goto out_err;
				477	}
				478
				479	error = inode->i_op->getxattr(dentry, key, buf, buflen);
				480	if (error < 0)
				481	goto out_err;
				482
				483	pacl = posix_acl_from_xattr(buf, buflen);
				484	out:
				485	kfree(buf);
				486	return pacl;
				487	out_err:
				488	pacl = ERR_PTR(error);
				489	goto out;
				490	}
				491
				492	int
				493	nfsd4_get_nfs4_acl(struct svc_rqst rqstp, struct dentry dentry, struct nfs4_acl **acl)
				494	{
				495	struct inode *inode = dentry->d_inode;
				496	int error = 0;
				497	struct posix_acl pacl = NULL, dpacl = NULL;
				498	unsigned int flags = 0;
				499
				500	pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
				501	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
				502	pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
				503	if (IS_ERR(pacl)) {
				504	error = PTR_ERR(pacl);
				505	pacl = NULL;
				506	goto out;
				507	}
				508
				509	if (S_ISDIR(inode->i_mode)) {
				510	dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
				511	if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
				512	dpacl = NULL;
				513	else if (IS_ERR(dpacl)) {
				514	error = PTR_ERR(dpacl);
				515	dpacl = NULL;
				516	goto out;
				517	}
				518	flags = NFS4_ACL_DIR;
				519	}
				520
				521	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
				522	if (IS_ERR(*acl)) {
				523	error = PTR_ERR(*acl);
				524	*acl = NULL;
				525	}
				526	out:
				527	posix_acl_release(pacl);
				528	posix_acl_release(dpacl);
				529	return error;
				530	}
				531
				532	#endif /* defined(CONFIG_NFS_V4) */
				533
				534	#ifdef CONFIG_NFSD_V3
				535	/*
				536	* Check server access rights to a file system object
				537	*/
				538	struct accessmap {
				539	u32 access;
				540	int how;
				541	};
				542	static struct accessmap nfs3_regaccess[] = {
				543	{ NFS3_ACCESS_READ, MAY_READ },
				544	{ NFS3_ACCESS_EXECUTE, MAY_EXEC },
				545	{ NFS3_ACCESS_MODIFY, MAY_WRITE\|MAY_TRUNC },
				546	{ NFS3_ACCESS_EXTEND, MAY_WRITE },
				547
				548	{ 0, 0 }
				549	};
				550
				551	static struct accessmap nfs3_diraccess[] = {
				552	{ NFS3_ACCESS_READ, MAY_READ },
				553	{ NFS3_ACCESS_LOOKUP, MAY_EXEC },
				554	{ NFS3_ACCESS_MODIFY, MAY_EXEC\|MAY_WRITE\|MAY_TRUNC },
				555	{ NFS3_ACCESS_EXTEND, MAY_EXEC\|MAY_WRITE },
				556	{ NFS3_ACCESS_DELETE, MAY_REMOVE },
				557
				558	{ 0, 0 }
				559	};
				560
				561	static struct accessmap nfs3_anyaccess[] = {
				562	/* Some clients - Solaris 2.6 at least, make an access call
				563	* to the server to check for access for things like /dev/null
				564	* (which really, the server doesn't care about). So
				565	* We provide simple access checking for them, looking
				566	* mainly at mode bits, and we make sure to ignore read-only
				567	* filesystem checks
				568	*/
				569	{ NFS3_ACCESS_READ, MAY_READ },
				570	{ NFS3_ACCESS_EXECUTE, MAY_EXEC },
				571	{ NFS3_ACCESS_MODIFY, MAY_WRITE\|MAY_LOCAL_ACCESS },
				572	{ NFS3_ACCESS_EXTEND, MAY_WRITE\|MAY_LOCAL_ACCESS },
				573
				574	{ 0, 0 }
				575	};
				576
				577	int
				578	nfsd_access(struct svc_rqst rqstp, struct svc_fh fhp, u32 access, u32 supported)
				579	{
				580	struct accessmap *map;
				581	struct svc_export *export;
				582	struct dentry *dentry;
				583	u32 query, result = 0, sresult = 0;
				584	unsigned int error;
				585
				586	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
				587	if (error)
				588	goto out;
				589
				590	export = fhp->fh_export;
				591	dentry = fhp->fh_dentry;
				592
				593	if (S_ISREG(dentry->d_inode->i_mode))
				594	map = nfs3_regaccess;
				595	else if (S_ISDIR(dentry->d_inode->i_mode))
				596	map = nfs3_diraccess;
				597	else
				598	map = nfs3_anyaccess;
				599
				600
				601	query = *access;
				602	for (; map->access; map++) {
				603	if (map->access & query) {
				604	unsigned int err2;
				605
				606	sresult \|= map->access;
				607
				608	err2 = nfsd_permission(export, dentry, map->how);
				609	switch (err2) {
				610	case nfs_ok:
				611	result \|= map->access;
				612	break;
				613
				614	/* the following error codes just mean the access was not allowed,
				615	* rather than an error occurred */
				616	case nfserr_rofs:
				617	case nfserr_acces:
				618	case nfserr_perm:
				619	/* simply don't "or" in the access bit. */
				620	break;
				621	default:
				622	error = err2;
				623	goto out;
				624	}
				625	}
				626	}
				627	*access = result;
				628	if (supported)
				629	*supported = sresult;
				630
				631	out:
				632	return error;
				633	}
				634	#endif /* CONFIG_NFSD_V3 */
				635
				636
				637
				638	/*
				639	* Open an existing file or directory.
				640	* The access argument indicates the type of open (read/write/lock)
				641	* N.B. After this call fhp needs an fh_put
				642	*/
				643	int
				644	nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, int type,
				645	int access, struct file **filp)
				646	{
				647	struct dentry *dentry;
				648	struct inode *inode;
				649	int flags = O_RDONLY\|O_LARGEFILE, err;
				650
				651	/*
				652	* If we get here, then the client has already done an "open",
				653	* and (hopefully) checked permission - so allow OWNER_OVERRIDE
				654	* in case a chmod has now revoked permission.
				655	*/
				656	err = fh_verify(rqstp, fhp, type, access \| MAY_OWNER_OVERRIDE);
				657	if (err)
				658	goto out;
				659
				660	dentry = fhp->fh_dentry;
				661	inode = dentry->d_inode;
				662
				663	/* Disallow write access to files with the append-only bit set
				664	* or any access when mandatory locking enabled
				665	*/
				666	err = nfserr_perm;
				667	if (IS_APPEND(inode) && (access & MAY_WRITE))
				668	goto out;
				669	if (IS_ISMNDLK(inode))
				670	goto out;
				671
				672	if (!inode->i_fop)
				673	goto out;
				674
				675	/*
				676	* Check to see if there are any leases on this file.
				677	* This may block while leases are broken.
				678	*/
				679	err = break_lease(inode, O_NONBLOCK \| ((access & MAY_WRITE) ? FMODE_WRITE : 0));
				680	if (err == -EWOULDBLOCK)
				681	err = -ETIMEDOUT;
				682	if (err) /* NOMEM or WOULDBLOCK */
				683	goto out_nfserr;
				684
				685	if (access & MAY_WRITE) {
				686	flags = O_WRONLY\|O_LARGEFILE;
				687
				688	DQUOT_INIT(inode);
				689	}
				690	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
				691	if (IS_ERR(*filp))
				692	err = PTR_ERR(*filp);
				693	out_nfserr:
				694	if (err)
				695	err = nfserrno(err);
				696	out:
				697	return err;
				698	}
				699
				700	/*
				701	* Close a file.
				702	*/
				703	void
				704	nfsd_close(struct file *filp)
				705	{
				706	fput(filp);
				707	}
				708
				709	/*
				710	* Sync a file
				711	* As this calls fsync (not fdatasync) there is no need for a write_inode
				712	* after it.
				713	*/
				714	static inline void nfsd_dosync(struct file filp, struct dentry dp,
				715	struct file_operations *fop)
				716	{
				717	struct inode *inode = dp->d_inode;
				718	int (fsync) (struct file , struct dentry *, int);
				719
				720	filemap_fdatawrite(inode->i_mapping);
				721	if (fop && (fsync = fop->fsync))
				722	fsync(filp, dp, 0);
				723	filemap_fdatawait(inode->i_mapping);
				724	}
				725
				726
				727	static void
				728	nfsd_sync(struct file *filp)
				729	{
				730	struct inode *inode = filp->f_dentry->d_inode;
				731	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
				732	down(&inode->i_sem);
				733	nfsd_dosync(filp, filp->f_dentry, filp->f_op);
				734	up(&inode->i_sem);
				735	}
				736
				737	static void
				738	nfsd_sync_dir(struct dentry *dp)
				739	{
				740	nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
				741	}
				742
				743	/*
				744	* Obtain the readahead parameters for the file
				745	* specified by (dev, ino).
				746	*/
				747	static DEFINE_SPINLOCK(ra_lock);
				748
				749	static inline struct raparms *
				750	nfsd_get_raparms(dev_t dev, ino_t ino)
				751	{
				752	struct raparms ra, rap, *frap = NULL;
				753	int depth = 0;
				754
				755	spin_lock(&ra_lock);
				756	for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
				757	if (ra->p_ino == ino && ra->p_dev == dev)
				758	goto found;
				759	depth++;
				760	if (ra->p_count == 0)
				761	frap = rap;
				762	}
				763	depth = nfsdstats.ra_size*11/10;
				764	if (!frap) {
				765	spin_unlock(&ra_lock);
				766	return NULL;
				767	}
				768	rap = frap;
				769	ra = *frap;
				770	ra->p_dev = dev;
				771	ra->p_ino = ino;
				772	ra->p_set = 0;
				773	found:
				774	if (rap != &raparm_cache) {
				775	*rap = ra->p_next;
				776	ra->p_next = raparm_cache;
				777	raparm_cache = ra;
				778	}
				779	ra->p_count++;
				780	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
				781	spin_unlock(&ra_lock);
				782	return ra;
				783	}
				784
				785	/*
				786	* Grab and keep cached pages assosiated with a file in the svc_rqst
				787	* so that they can be passed to the netowork sendmsg/sendpage routines
				788	* directrly. They will be released after the sending has completed.
				789	*/
				790	static int
				791	nfsd_read_actor(read_descriptor_t desc, struct page page, unsigned long offset , unsigned long size)
				792	{
				793	unsigned long count = desc->count;
				794	struct svc_rqst *rqstp = desc->arg.data;
				795
				796	if (size > count)
				797	size = count;
				798
				799	if (rqstp->rq_res.page_len == 0) {
				800	get_page(page);
				801	rqstp->rq_respages[rqstp->rq_resused++] = page;
				802	rqstp->rq_res.page_base = offset;
				803	rqstp->rq_res.page_len = size;
				804	} else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
				805	get_page(page);
				806	rqstp->rq_respages[rqstp->rq_resused++] = page;
				807	rqstp->rq_res.page_len += size;
				808	} else {
				809	rqstp->rq_res.page_len += size;
				810	}
				811
				812	desc->count = count - size;
				813	desc->written += size;
				814	return size;
				815	}
				816
				817	static inline int
				818	nfsd_vfs_read(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				819	loff_t offset, struct kvec vec, int vlen, unsigned long count)
				820	{
				821	struct inode *inode;
				822	struct raparms *ra;
				823	mm_segment_t oldfs;
				824	int err;
				825
				826	err = nfserr_perm;
				827	inode = file->f_dentry->d_inode;
				828	#ifdef MSNFS
				829	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
				830	(!lock_may_read(inode, offset, *count)))
				831	goto out;
				832	#endif
				833
				834	/* Get readahead parameters */
				835	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
				836
				837	if (ra && ra->p_set)
				838	file->f_ra = ra->p_ra;
				839
				840	if (file->f_op->sendfile) {
				841	svc_pushback_unused_pages(rqstp);
				842	err = file->f_op->sendfile(file, &offset, *count,
				843	nfsd_read_actor, rqstp);
				844	} else {
				845	oldfs = get_fs();
				846	set_fs(KERNEL_DS);
				847	err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
				848	set_fs(oldfs);
				849	}
				850
				851	/* Write back readahead params */
				852	if (ra) {
				853	spin_lock(&ra_lock);
				854	ra->p_ra = file->f_ra;
				855	ra->p_set = 1;
				856	ra->p_count--;
				857	spin_unlock(&ra_lock);
				858	}
				859
				860	if (err >= 0) {
				861	nfsdstats.io_read += err;
				862	*count = err;
				863	err = 0;
				864	dnotify_parent(file->f_dentry, DN_ACCESS);
				865	} else
				866	err = nfserrno(err);
				867	out:
				868	return err;
				869	}
				870
				871	static inline int
				872	nfsd_vfs_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				873	loff_t offset, struct kvec *vec, int vlen,
				874	unsigned long cnt, int *stablep)
				875	{
				876	struct svc_export *exp;
				877	struct dentry *dentry;
				878	struct inode *inode;
				879	mm_segment_t oldfs;
				880	int err = 0;
				881	int stable = *stablep;
				882
				883	err = nfserr_perm;
				884
				885	#ifdef MSNFS
				886	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
				887	(!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
				888	goto out;
				889	#endif
				890
				891	dentry = file->f_dentry;
				892	inode = dentry->d_inode;
				893	exp = fhp->fh_export;
				894
				895	/*
				896	* Request sync writes if
				897	* - the sync export option has been set, or
				898	* - the client requested O_SYNC behavior (NFSv3 feature).
				899	* - The file system doesn't support fsync().
				900	* When gathered writes have been configured for this volume,
				901	* flushing the data to disk is handled separately below.
				902	*/
				903
				904	if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */
				905	stable = 2;
				906	stablep = 2; / FILE_SYNC */
				907	}
				908
				909	if (!EX_ISSYNC(exp))
				910	stable = 0;
				911	if (stable && !EX_WGATHER(exp))
				912	file->f_flags \|= O_SYNC;
				913
				914	/* Write the data. */
				915	oldfs = get_fs(); set_fs(KERNEL_DS);
				916	err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
				917	set_fs(oldfs);
				918	if (err >= 0) {
				919	nfsdstats.io_write += cnt;
				920	dnotify_parent(file->f_dentry, DN_MODIFY);
				921	}
				922
				923	/* clear setuid/setgid flag after write */
				924	if (err >= 0 && (inode->i_mode & (S_ISUID \| S_ISGID))) {
				925	struct iattr ia;
				926	ia.ia_valid = ATTR_KILL_SUID \| ATTR_KILL_SGID;
				927
				928	down(&inode->i_sem);
				929	notify_change(dentry, &ia);
				930	up(&inode->i_sem);
				931	}
				932
				933	if (err >= 0 && stable) {
				934	static ino_t last_ino;
				935	static dev_t last_dev;
				936
				937	/*
				938	* Gathered writes: If another process is currently
				939	* writing to the file, there's a high chance
				940	* this is another nfsd (triggered by a bulk write
				941	* from a client's biod). Rather than syncing the
				942	* file with each write request, we sleep for 10 msec.
				943	*
				944	* I don't know if this roughly approximates
				945	* C. Juszak's idea of gathered writes, but it's a
				946	* nice and simple solution (IMHO), and it seems to
				947	* work:-)
				948	*/
				949	if (EX_WGATHER(exp)) {
				950	if (atomic_read(&inode->i_writecount) > 1
				951	\|\| (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
				952	dprintk("nfsd: write defer %d\n", current->pid);
				953	msleep(10);
				954	dprintk("nfsd: write resume %d\n", current->pid);
				955	}
				956
				957	if (inode->i_state & I_DIRTY) {
				958	dprintk("nfsd: write sync %d\n", current->pid);
				959	nfsd_sync(file);
				960	}
				961	#if 0
				962	wake_up(&inode->i_wait);
				963	#endif
				964	}
				965	last_ino = inode->i_ino;
				966	last_dev = inode->i_sb->s_dev;
				967	}
				968
				969	dprintk("nfsd: write complete err=%d\n", err);
				970	if (err >= 0)
				971	err = 0;
				972	else
				973	err = nfserrno(err);
				974	out:
				975	return err;
				976	}
				977
				978	/*
				979	* Read data from a file. count must contain the requested read count
				980	* on entry. On return, *count contains the number of bytes actually read.
				981	* N.B. After this call fhp needs an fh_put
				982	*/
				983	int
				984	nfsd_read(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				985	loff_t offset, struct kvec *vec, int vlen,
				986	unsigned long *count)
				987	{
				988	int err;
				989
				990	if (file) {
				991	err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
				992	MAY_READ\|MAY_OWNER_OVERRIDE);
				993	if (err)
				994	goto out;
				995	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
				996	} else {
				997	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
				998	if (err)
				999	goto out;
				1000	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
				1001	nfsd_close(file);
				1002	}
				1003	out:
				1004	return err;
				1005	}
				1006
				1007	/*
				1008	* Write data to a file.
				1009	* The stable flag requests synchronous writes.
				1010	* N.B. After this call fhp needs an fh_put
				1011	*/
				1012	int
				1013	nfsd_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				1014	loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
				1015	int *stablep)
				1016	{
				1017	int err = 0;
				1018
				1019	if (file) {
				1020	err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
				1021	MAY_WRITE\|MAY_OWNER_OVERRIDE);
				1022	if (err)
				1023	goto out;
				1024	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
				1025	stablep);
				1026	} else {
				1027	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
				1028	if (err)
				1029	goto out;
				1030
				1031	if (cnt)
				1032	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
				1033	cnt, stablep);
				1034	nfsd_close(file);
				1035	}
				1036	out:
				1037	return err;
				1038	}
				1039
				1040	#ifdef CONFIG_NFSD_V3
				1041	/*
				1042	* Commit all pending writes to stable storage.
				1043	* Strictly speaking, we could sync just the indicated file region here,
				1044	* but there's currently no way we can ask the VFS to do so.
				1045	*
				1046	* Unfortunately we cannot lock the file to make sure we return full WCC
				1047	* data to the client, as locking happens lower down in the filesystem.
				1048	*/
				1049	int
				1050	nfsd_commit(struct svc_rqst rqstp, struct svc_fh fhp,
				1051	loff_t offset, unsigned long count)
				1052	{
				1053	struct file *file;
				1054	int err;
				1055
				1056	if ((u64)count > ~(u64)offset)
				1057	return nfserr_inval;
				1058
				1059	if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
				1060	return err;
				1061	if (EX_ISSYNC(fhp->fh_export)) {
				1062	if (file->f_op && file->f_op->fsync) {
				1063	nfsd_sync(file);
				1064	} else {
				1065	err = nfserr_notsupp;
				1066	}
				1067	}
				1068
				1069	nfsd_close(file);
				1070	return err;
				1071	}
				1072	#endif /* CONFIG_NFSD_V3 */
				1073
				1074	/*
				1075	* Create a file (regular, directory, device, fifo); UNIX sockets
				1076	* not yet implemented.
				1077	* If the response fh has been verified, the parent directory should
				1078	* already be locked. Note that the parent directory is left locked.
				1079	*
				1080	* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
				1081	*/
				1082	int
				1083	nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1084	char fname, int flen, struct iattr iap,
				1085	int type, dev_t rdev, struct svc_fh *resfhp)
				1086	{
				1087	struct dentry dentry, dchild = NULL;
				1088	struct inode *dirp;
				1089	int err;
				1090
				1091	err = nfserr_perm;
				1092	if (!flen)
				1093	goto out;
				1094	err = nfserr_exist;
				1095	if (isdotent(fname, flen))
				1096	goto out;
				1097
				1098	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
				1099	if (err)
				1100	goto out;
				1101
				1102	dentry = fhp->fh_dentry;
				1103	dirp = dentry->d_inode;
				1104
				1105	err = nfserr_notdir;
				1106	if(!dirp->i_op \|\| !dirp->i_op->lookup)
				1107	goto out;
				1108	/*
				1109	* Check whether the response file handle has been verified yet.
				1110	* If it has, the parent directory should already be locked.
				1111	*/
				1112	if (!resfhp->fh_dentry) {
				1113	/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
				1114	fh_lock(fhp);
				1115	dchild = lookup_one_len(fname, dentry, flen);
				1116	err = PTR_ERR(dchild);
				1117	if (IS_ERR(dchild))
				1118	goto out_nfserr;
				1119	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1120	if (err)
				1121	goto out;
				1122	} else {
				1123	/* called from nfsd_proc_create */
				1124	dchild = dget(resfhp->fh_dentry);
				1125	if (!fhp->fh_locked) {
				1126	/* not actually possible */
				1127	printk(KERN_ERR
				1128	"nfsd_create: parent %s/%s not locked!\n",
				1129	dentry->d_parent->d_name.name,
				1130	dentry->d_name.name);
				1131	err = -EIO;
				1132	goto out;
				1133	}
				1134	}
				1135	/*
				1136	* Make sure the child dentry is still negative ...
				1137	*/
				1138	err = nfserr_exist;
				1139	if (dchild->d_inode) {
				1140	dprintk("nfsd_create: dentry %s/%s not negative!\n",
				1141	dentry->d_name.name, dchild->d_name.name);
				1142	goto out;
				1143	}
				1144
				1145	if (!(iap->ia_valid & ATTR_MODE))
				1146	iap->ia_mode = 0;
				1147	iap->ia_mode = (iap->ia_mode & S_IALLUGO) \| type;
				1148
				1149	/*
				1150	* Get the dir op function pointer.
				1151	*/
				1152	err = nfserr_perm;
				1153	switch (type) {
				1154	case S_IFREG:
				1155	err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
				1156	break;
				1157	case S_IFDIR:
				1158	err = vfs_mkdir(dirp, dchild, iap->ia_mode);
				1159	break;
				1160	case S_IFCHR:
				1161	case S_IFBLK:
				1162	case S_IFIFO:
				1163	case S_IFSOCK:
				1164	err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
				1165	break;
				1166	default:
				1167	printk("nfsd: bad file type %o in nfsd_create\n", type);
				1168	err = -EINVAL;
				1169	}
				1170	if (err < 0)
				1171	goto out_nfserr;
				1172
				1173	if (EX_ISSYNC(fhp->fh_export)) {
				1174	nfsd_sync_dir(dentry);
				1175	write_inode_now(dchild->d_inode, 1);
				1176	}
				1177
				1178
				1179	/* Set file attributes. Mode has already been set and
				1180	* setting uid/gid works only for root. Irix appears to
				1181	* send along the gid when it tries to implement setgid
				1182	* directories via NFS.
				1183	*/
				1184	err = 0;
				1185	if ((iap->ia_valid &= ~(ATTR_UID\|ATTR_GID\|ATTR_MODE)) != 0)
				1186	err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
				1187	/*
				1188	* Update the file handle to get the new inode info.
				1189	*/
				1190	if (!err)
				1191	err = fh_update(resfhp);
				1192	out:
				1193	if (dchild && !IS_ERR(dchild))
				1194	dput(dchild);
				1195	return err;
				1196
				1197	out_nfserr:
				1198	err = nfserrno(err);
				1199	goto out;
				1200	}
				1201
				1202	#ifdef CONFIG_NFSD_V3
				1203	/*
				1204	* NFSv3 version of nfsd_create
				1205	*/
				1206	int
				1207	nfsd_create_v3(struct svc_rqst rqstp, struct svc_fh fhp,
				1208	char fname, int flen, struct iattr iap,
				1209	struct svc_fh resfhp, int createmode, u32 verifier,
				1210	int *truncp)
				1211	{
				1212	struct dentry dentry, dchild = NULL;
				1213	struct inode *dirp;
				1214	int err;
				1215	__u32 v_mtime=0, v_atime=0;
				1216	int v_mode=0;
				1217
				1218	err = nfserr_perm;
				1219	if (!flen)
				1220	goto out;
				1221	err = nfserr_exist;
				1222	if (isdotent(fname, flen))
				1223	goto out;
				1224	if (!(iap->ia_valid & ATTR_MODE))
				1225	iap->ia_mode = 0;
				1226	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
				1227	if (err)
				1228	goto out;
				1229
				1230	dentry = fhp->fh_dentry;
				1231	dirp = dentry->d_inode;
				1232
				1233	/* Get all the sanity checks out of the way before
				1234	* we lock the parent. */
				1235	err = nfserr_notdir;
				1236	if(!dirp->i_op \|\| !dirp->i_op->lookup)
				1237	goto out;
				1238	fh_lock(fhp);
				1239
				1240	/*
				1241	* Compose the response file handle.
				1242	*/
				1243	dchild = lookup_one_len(fname, dentry, flen);
				1244	err = PTR_ERR(dchild);
				1245	if (IS_ERR(dchild))
				1246	goto out_nfserr;
				1247
				1248	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1249	if (err)
				1250	goto out;
				1251
				1252	if (createmode == NFS3_CREATE_EXCLUSIVE) {
				1253	/* while the verifier would fit in mtime+atime,
				1254	* solaris7 gets confused (bugid 4218508) if these have
				1255	* the high bit set, so we use the mode as well
				1256	*/
				1257	v_mtime = verifier[0]&0x7fffffff;
				1258	v_atime = verifier[1]&0x7fffffff;
				1259	v_mode = S_IFREG
				1260	\| ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
				1261	\| ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
				1262	;
				1263	}
				1264
				1265	if (dchild->d_inode) {
				1266	err = 0;
				1267
				1268	switch (createmode) {
				1269	case NFS3_CREATE_UNCHECKED:
				1270	if (! S_ISREG(dchild->d_inode->i_mode))
				1271	err = nfserr_exist;
				1272	else if (truncp) {
				1273	/* in nfsv4, we need to treat this case a little
				1274	* differently. we don't want to truncate the
				1275	* file now; this would be wrong if the OPEN
				1276	* fails for some other reason. furthermore,
				1277	* if the size is nonzero, we should ignore it
				1278	* according to spec!
				1279	*/
				1280	*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
				1281	}
				1282	else {
				1283	iap->ia_valid &= ATTR_SIZE;
				1284	goto set_attr;
				1285	}
				1286	break;
				1287	case NFS3_CREATE_EXCLUSIVE:
				1288	if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
				1289	&& dchild->d_inode->i_atime.tv_sec == v_atime
				1290	&& dchild->d_inode->i_mode == v_mode
				1291	&& dchild->d_inode->i_size == 0 )
				1292	break;
				1293	/* fallthru */
				1294	case NFS3_CREATE_GUARDED:
				1295	err = nfserr_exist;
				1296	}
				1297	goto out;
				1298	}
				1299
				1300	err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
				1301	if (err < 0)
				1302	goto out_nfserr;
				1303
				1304	if (EX_ISSYNC(fhp->fh_export)) {
				1305	nfsd_sync_dir(dentry);
				1306	/* setattr will sync the child (or not) */
				1307	}
				1308
				1309	/*
				1310	* Update the filehandle to get the new inode info.
				1311	*/
				1312	err = fh_update(resfhp);
				1313	if (err)
				1314	goto out;
				1315
				1316	if (createmode == NFS3_CREATE_EXCLUSIVE) {
				1317	/* Cram the verifier into atime/mtime/mode */
				1318	iap->ia_valid = ATTR_MTIME\|ATTR_ATIME
				1319	\| ATTR_MTIME_SET\|ATTR_ATIME_SET
				1320	\| ATTR_MODE;
				1321	/* XXX someone who knows this better please fix it for nsec */
				1322	iap->ia_mtime.tv_sec = v_mtime;
				1323	iap->ia_atime.tv_sec = v_atime;
				1324	iap->ia_mtime.tv_nsec = 0;
				1325	iap->ia_atime.tv_nsec = 0;
				1326	iap->ia_mode = v_mode;
				1327	}
				1328
				1329	/* Set file attributes.
				1330	* Mode has already been set but we might need to reset it
				1331	* for CREATE_EXCLUSIVE
				1332	* Irix appears to send along the gid when it tries to
				1333	* implement setgid directories via NFS. Clear out all that cruft.
				1334	*/
				1335	set_attr:
				1336	if ((iap->ia_valid &= ~(ATTR_UID\|ATTR_GID)) != 0)
				1337	err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
				1338
				1339	out:
				1340	fh_unlock(fhp);
				1341	if (dchild && !IS_ERR(dchild))
				1342	dput(dchild);
				1343	return err;
				1344
				1345	out_nfserr:
				1346	err = nfserrno(err);
				1347	goto out;
				1348	}
				1349	#endif /* CONFIG_NFSD_V3 */
				1350
				1351	/*
				1352	* Read a symlink. On entry, *lenp must contain the maximum path length that
				1353	* fits into the buffer. On return, it contains the true length.
				1354	* N.B. After this call fhp needs an fh_put
				1355	*/
				1356	int
				1357	nfsd_readlink(struct svc_rqst rqstp, struct svc_fh fhp, char buf, int lenp)
				1358	{
				1359	struct dentry *dentry;
				1360	struct inode *inode;
				1361	mm_segment_t oldfs;
				1362	int err;
				1363
				1364	err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
				1365	if (err)
				1366	goto out;
				1367
				1368	dentry = fhp->fh_dentry;
				1369	inode = dentry->d_inode;
				1370
				1371	err = nfserr_inval;
				1372	if (!inode->i_op \|\| !inode->i_op->readlink)
				1373	goto out;
				1374
				1375	touch_atime(fhp->fh_export->ex_mnt, dentry);
				1376	/* N.B. Why does this call need a get_fs()??
				1377	* Remove the set_fs and watch the fireworks:-) --okir
				1378	*/
				1379
				1380	oldfs = get_fs(); set_fs(KERNEL_DS);
				1381	err = inode->i_op->readlink(dentry, buf, *lenp);
				1382	set_fs(oldfs);
				1383
				1384	if (err < 0)
				1385	goto out_nfserr;
				1386	*lenp = err;
				1387	err = 0;
				1388	out:
				1389	return err;
				1390
				1391	out_nfserr:
				1392	err = nfserrno(err);
				1393	goto out;
				1394	}
				1395
				1396	/*
				1397	* Create a symlink and look up its inode
				1398	* N.B. After this call _both_ fhp and resfhp need an fh_put
				1399	*/
				1400	int
				1401	nfsd_symlink(struct svc_rqst rqstp, struct svc_fh fhp,
				1402	char *fname, int flen,
				1403	char *path, int plen,
				1404	struct svc_fh *resfhp,
				1405	struct iattr *iap)
				1406	{
				1407	struct dentry dentry, dnew;
				1408	int err, cerr;
				1409	umode_t mode;
				1410
				1411	err = nfserr_noent;
				1412	if (!flen \|\| !plen)
				1413	goto out;
				1414	err = nfserr_exist;
				1415	if (isdotent(fname, flen))
				1416	goto out;
				1417
				1418	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
				1419	if (err)
				1420	goto out;
				1421	fh_lock(fhp);
				1422	dentry = fhp->fh_dentry;
				1423	dnew = lookup_one_len(fname, dentry, flen);
				1424	err = PTR_ERR(dnew);
				1425	if (IS_ERR(dnew))
				1426	goto out_nfserr;
				1427
				1428	mode = S_IALLUGO;
				1429	/* Only the MODE ATTRibute is even vaguely meaningful */
				1430	if (iap && (iap->ia_valid & ATTR_MODE))
				1431	mode = iap->ia_mode & S_IALLUGO;
				1432
				1433	if (unlikely(path[plen] != 0)) {
				1434	char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
				1435	if (path_alloced == NULL)
				1436	err = -ENOMEM;
				1437	else {
				1438	strncpy(path_alloced, path, plen);
				1439	path_alloced[plen] = 0;
				1440	err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
				1441	kfree(path_alloced);
				1442	}
				1443	} else
				1444	err = vfs_symlink(dentry->d_inode, dnew, path, mode);
				1445
				1446	if (!err) {
				1447	if (EX_ISSYNC(fhp->fh_export))
				1448	nfsd_sync_dir(dentry);
				1449	} else
				1450	err = nfserrno(err);
				1451	fh_unlock(fhp);
				1452
				1453	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
				1454	dput(dnew);
				1455	if (err==0) err = cerr;
				1456	out:
				1457	return err;
				1458
				1459	out_nfserr:
				1460	err = nfserrno(err);
				1461	goto out;
				1462	}
				1463
				1464	/*
				1465	* Create a hardlink
				1466	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1467	*/
				1468	int
				1469	nfsd_link(struct svc_rqst rqstp, struct svc_fh ffhp,
				1470	char name, int len, struct svc_fh tfhp)
				1471	{
				1472	struct dentry ddir, dnew, *dold;
				1473	struct inode dirp, dest;
				1474	int err;
				1475
				1476	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
				1477	if (err)
				1478	goto out;
				1479	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
				1480	if (err)
				1481	goto out;
				1482
				1483	err = nfserr_perm;
				1484	if (!len)
				1485	goto out;
				1486	err = nfserr_exist;
				1487	if (isdotent(name, len))
				1488	goto out;
				1489
				1490	fh_lock(ffhp);
				1491	ddir = ffhp->fh_dentry;
				1492	dirp = ddir->d_inode;
				1493
				1494	dnew = lookup_one_len(name, ddir, len);
				1495	err = PTR_ERR(dnew);
				1496	if (IS_ERR(dnew))
				1497	goto out_nfserr;
				1498
				1499	dold = tfhp->fh_dentry;
				1500	dest = dold->d_inode;
				1501
				1502	err = vfs_link(dold, dirp, dnew);
				1503	if (!err) {
				1504	if (EX_ISSYNC(ffhp->fh_export)) {
				1505	nfsd_sync_dir(ddir);
				1506	write_inode_now(dest, 1);
				1507	}
				1508	} else {
				1509	if (err == -EXDEV && rqstp->rq_vers == 2)
				1510	err = nfserr_acces;
				1511	else
				1512	err = nfserrno(err);
				1513	}
				1514
				1515	fh_unlock(ffhp);
				1516	dput(dnew);
				1517	out:
				1518	return err;
				1519
				1520	out_nfserr:
				1521	err = nfserrno(err);
				1522	goto out;
				1523	}
				1524
				1525	/*
				1526	* Rename a file
				1527	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1528	*/
				1529	int
				1530	nfsd_rename(struct svc_rqst rqstp, struct svc_fh ffhp, char *fname, int flen,
				1531	struct svc_fh tfhp, char tname, int tlen)
				1532	{
				1533	struct dentry fdentry, tdentry, odentry, ndentry, *trap;
				1534	struct inode fdir, tdir;
				1535	int err;
				1536
				1537	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
				1538	if (err)
				1539	goto out;
				1540	err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
				1541	if (err)
				1542	goto out;
				1543
				1544	fdentry = ffhp->fh_dentry;
				1545	fdir = fdentry->d_inode;
				1546
				1547	tdentry = tfhp->fh_dentry;
				1548	tdir = tdentry->d_inode;
				1549
				1550	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
				1551	if (fdir->i_sb != tdir->i_sb)
				1552	goto out;
				1553
				1554	err = nfserr_perm;
				1555	if (!flen \|\| isdotent(fname, flen) \|\| !tlen \|\| isdotent(tname, tlen))
				1556	goto out;
				1557
				1558	/* cannot use fh_lock as we need deadlock protective ordering
				1559	* so do it by hand */
				1560	trap = lock_rename(tdentry, fdentry);
				1561	ffhp->fh_locked = tfhp->fh_locked = 1;
				1562	fill_pre_wcc(ffhp);
				1563	fill_pre_wcc(tfhp);
				1564
				1565	odentry = lookup_one_len(fname, fdentry, flen);
				1566	err = PTR_ERR(odentry);
				1567	if (IS_ERR(odentry))
				1568	goto out_nfserr;
				1569
				1570	err = -ENOENT;
				1571	if (!odentry->d_inode)
				1572	goto out_dput_old;
				1573	err = -EINVAL;
				1574	if (odentry == trap)
				1575	goto out_dput_old;
				1576
				1577	ndentry = lookup_one_len(tname, tdentry, tlen);
				1578	err = PTR_ERR(ndentry);
				1579	if (IS_ERR(ndentry))
				1580	goto out_dput_old;
				1581	err = -ENOTEMPTY;
				1582	if (ndentry == trap)
				1583	goto out_dput_new;
				1584
				1585	#ifdef MSNFS
				1586	if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
				1587	((atomic_read(&odentry->d_count) > 1)
				1588	\|\| (atomic_read(&ndentry->d_count) > 1))) {
				1589	err = nfserr_perm;
				1590	} else
				1591	#endif
				1592	err = vfs_rename(fdir, odentry, tdir, ndentry);
				1593	if (!err && EX_ISSYNC(tfhp->fh_export)) {
				1594	nfsd_sync_dir(tdentry);
				1595	nfsd_sync_dir(fdentry);
				1596	}
				1597
				1598	out_dput_new:
				1599	dput(ndentry);
				1600	out_dput_old:
				1601	dput(odentry);
				1602	out_nfserr:
				1603	if (err)
				1604	err = nfserrno(err);
				1605
				1606	/* we cannot reply on fh_unlock on the two filehandles,
				1607	* as that would do the wrong thing if the two directories
				1608	* were the same, so again we do it by hand
				1609	*/
				1610	fill_post_wcc(ffhp);
				1611	fill_post_wcc(tfhp);
				1612	unlock_rename(tdentry, fdentry);
				1613	ffhp->fh_locked = tfhp->fh_locked = 0;
				1614
				1615	out:
				1616	return err;
				1617	}
				1618
				1619	/*
				1620	* Unlink a file or directory
				1621	* N.B. After this call fhp needs an fh_put
				1622	*/
				1623	int
				1624	nfsd_unlink(struct svc_rqst rqstp, struct svc_fh fhp, int type,
				1625	char *fname, int flen)
				1626	{
				1627	struct dentry dentry, rdentry;
				1628	struct inode *dirp;
				1629	int err;
				1630
				1631	err = nfserr_acces;
				1632	if (!flen \|\| isdotent(fname, flen))
				1633	goto out;
				1634	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
				1635	if (err)
				1636	goto out;
				1637
				1638	fh_lock(fhp);
				1639	dentry = fhp->fh_dentry;
				1640	dirp = dentry->d_inode;
				1641
				1642	rdentry = lookup_one_len(fname, dentry, flen);
				1643	err = PTR_ERR(rdentry);
				1644	if (IS_ERR(rdentry))
				1645	goto out_nfserr;
				1646
				1647	if (!rdentry->d_inode) {
				1648	dput(rdentry);
				1649	err = nfserr_noent;
				1650	goto out;
				1651	}
				1652
				1653	if (!type)
				1654	type = rdentry->d_inode->i_mode & S_IFMT;
				1655
				1656	if (type != S_IFDIR) { /* It's UNLINK */
				1657	#ifdef MSNFS
				1658	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
				1659	(atomic_read(&rdentry->d_count) > 1)) {
				1660	err = nfserr_perm;
				1661	} else
				1662	#endif
				1663	err = vfs_unlink(dirp, rdentry);
				1664	} else { /* It's RMDIR */
				1665	err = vfs_rmdir(dirp, rdentry);
				1666	}
				1667
				1668	dput(rdentry);
				1669
				1670	if (err)
				1671	goto out_nfserr;
				1672	if (EX_ISSYNC(fhp->fh_export))
				1673	nfsd_sync_dir(dentry);
				1674
				1675	out:
				1676	return err;
				1677
				1678	out_nfserr:
				1679	err = nfserrno(err);
				1680	goto out;
				1681	}
				1682
				1683	/*
				1684	* Read entries from a directory.
				1685	* The NFSv3/4 verifier we ignore for now.
				1686	*/
				1687	int
				1688	nfsd_readdir(struct svc_rqst rqstp, struct svc_fh fhp, loff_t *offsetp,
				1689	struct readdir_cd *cdp, encode_dent_fn func)
				1690	{
				1691	int err;
				1692	struct file *file;
				1693	loff_t offset = *offsetp;
				1694
				1695	err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
				1696	if (err)
				1697	goto out;
				1698
				1699	offset = vfs_llseek(file, offset, 0);
				1700	if (offset < 0) {
				1701	err = nfserrno((int)offset);
				1702	goto out_close;
				1703	}
				1704
				1705	/*
				1706	* Read the directory entries. This silly loop is necessary because
				1707	* readdir() is not guaranteed to fill up the entire buffer, but
				1708	* may choose to do less.
				1709	*/
				1710
				1711	do {
				1712	cdp->err = nfserr_eof; /* will be cleared on successful read */
				1713	err = vfs_readdir(file, (filldir_t) func, cdp);
				1714	} while (err >=0 && cdp->err == nfs_ok);
				1715	if (err)
				1716	err = nfserrno(err);
				1717	else
				1718	err = cdp->err;
				1719	*offsetp = vfs_llseek(file, 0, 1);
				1720
				1721	if (err == nfserr_eof \|\| err == nfserr_toosmall)
				1722	err = nfs_ok; /* can still be found in ->err */
				1723	out_close:
				1724	nfsd_close(file);
				1725	out:
				1726	return err;
				1727	}
				1728
				1729	/*
				1730	* Get file system stats
				1731	* N.B. After this call fhp needs an fh_put
				1732	*/
				1733	int
				1734	nfsd_statfs(struct svc_rqst rqstp, struct svc_fh fhp, struct kstatfs *stat)
				1735	{
				1736	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
				1737	if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat))
				1738	err = nfserr_io;
				1739	return err;
				1740	}
				1741
				1742	/*
				1743	* Check for a user's access permissions to this inode.
				1744	*/
				1745	int
				1746	nfsd_permission(struct svc_export exp, struct dentry dentry, int acc)
				1747	{
				1748	struct inode *inode = dentry->d_inode;
				1749	int err;
				1750
				1751	if (acc == MAY_NOP)
				1752	return 0;
				1753	#if 0
				1754	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
				1755	acc,
				1756	(acc & MAY_READ)? " read" : "",
				1757	(acc & MAY_WRITE)? " write" : "",
				1758	(acc & MAY_EXEC)? " exec" : "",
				1759	(acc & MAY_SATTR)? " sattr" : "",
				1760	(acc & MAY_TRUNC)? " trunc" : "",
				1761	(acc & MAY_LOCK)? " lock" : "",
				1762	(acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
				1763	inode->i_mode,
				1764	IS_IMMUTABLE(inode)? " immut" : "",
				1765	IS_APPEND(inode)? " append" : "",
				1766	IS_RDONLY(inode)? " ro" : "");
				1767	dprintk(" owner %d/%d user %d/%d\n",
				1768	inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
				1769	#endif
				1770
				1771	/* Normally we reject any write/sattr etc access on a read-only file
				1772	* system. But if it is IRIX doing check on write-access for a
				1773	* device special file, we ignore rofs.
				1774	*/
				1775	if (!(acc & MAY_LOCAL_ACCESS))
				1776	if (acc & (MAY_WRITE \| MAY_SATTR \| MAY_TRUNC)) {
				1777	if (EX_RDONLY(exp) \|\| IS_RDONLY(inode))
				1778	return nfserr_rofs;
				1779	if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
				1780	return nfserr_perm;
				1781	}
				1782	if ((acc & MAY_TRUNC) && IS_APPEND(inode))
				1783	return nfserr_perm;
				1784
				1785	if (acc & MAY_LOCK) {
				1786	/* If we cannot rely on authentication in NLM requests,
				1787	* just allow locks, otherwise require read permission, or
				1788	* ownership
				1789	*/
				1790	if (exp->ex_flags & NFSEXP_NOAUTHNLM)
				1791	return 0;
				1792	else
				1793	acc = MAY_READ \| MAY_OWNER_OVERRIDE;
				1794	}
				1795	/*
				1796	* The file owner always gets access permission for accesses that
				1797	* would normally be checked at open time. This is to make
				1798	* file access work even when the client has done a fchmod(fd, 0).
				1799	*
				1800	* However, `cp foo bar' should fail nevertheless when bar is
				1801	* readonly. A sensible way to do this might be to reject all
				1802	* attempts to truncate a read-only file, because a creat() call
				1803	* always implies file truncation.
				1804	* ... but this isn't really fair. A process may reasonably call
				1805	* ftruncate on an open file descriptor on a file with perm 000.
				1806	* We must trust the client to do permission checking - using "ACCESS"
				1807	* with NFSv3.
				1808	*/
				1809	if ((acc & MAY_OWNER_OVERRIDE) &&
				1810	inode->i_uid == current->fsuid)
				1811	return 0;
				1812
				1813	err = permission(inode, acc & (MAY_READ\|MAY_WRITE\|MAY_EXEC), NULL);
				1814
				1815	/* Allow read access to binaries even when mode 111 */
				1816	if (err == -EACCES && S_ISREG(inode->i_mode) &&
				1817	acc == (MAY_READ \| MAY_OWNER_OVERRIDE))
				1818	err = permission(inode, MAY_EXEC, NULL);
				1819
				1820	return err? nfserrno(err) : 0;
				1821	}
				1822
				1823	void
				1824	nfsd_racache_shutdown(void)
				1825	{
				1826	if (!raparm_cache)
				1827	return;
				1828	dprintk("nfsd: freeing readahead buffers.\n");
				1829	kfree(raparml);
				1830	raparm_cache = raparml = NULL;
				1831	}
				1832	/*
				1833	* Initialize readahead param cache
				1834	*/
				1835	int
				1836	nfsd_racache_init(int cache_size)
				1837	{
				1838	int i;
				1839
				1840	if (raparm_cache)
				1841	return 0;
				1842	raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
				1843
				1844	if (raparml != NULL) {
				1845	dprintk("nfsd: allocating %d readahead buffers.\n",
				1846	cache_size);
				1847	memset(raparml, 0, sizeof(struct raparms) * cache_size);
				1848	for (i = 0; i < cache_size - 1; i++) {
				1849	raparml[i].p_next = raparml + i + 1;
				1850	}
				1851	raparm_cache = raparml;
				1852	} else {
				1853	printk(KERN_WARNING
				1854	"nfsd: Could not allocate memory read-ahead cache.\n");
				1855	return -ENOMEM;
				1856	}
				1857	nfsdstats.ra_size = cache_size;
				1858	return 0;
				1859	}