[PATCH] vfs: *at functions: core

Here is a series of patches which introduce in total 13 new system calls
which take a file descriptor/filename pair instead of a single file
name.  These functions, openat etc, have been discussed on numerous
occasions.  They are needed to implement race-free filesystem traversal,
they are necessary to implement a virtual per-thread current working
directory (think multi-threaded backup software), etc.

We have in glibc today implementations of the interfaces which use the
/proc/self/fd magic.  But this code is rather expensive.  Here are some
results (similar to what Jim Meyering posted before).

The test creates a deep directory hierarchy on a tmpfs filesystem.  Then
rm -fr is used to remove all directories.  Without syscall support I get
this:

real    0m31.921s
user    0m0.688s
sys     0m31.234s

With syscall support the results are much better:

real    0m20.699s
user    0m0.536s
sys     0m20.149s

The interfaces are for obvious reasons currently not much used.  But they'll
be used.  coreutils (and Jeff's posixutils) are already using them.
Furthermore, code like ftw/fts in libc (maybe even glob) will also start using
them.  I expect a patch to make follow soon.  Every program which is walking
the filesystem tree will benefit.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@ftp.linux.org.uk>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/fs/compat.c b/fs/compat.c
index 2468ac1..c6ba9de 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -68,10 +68,10 @@
 		tv[0].tv_usec = 0;
 		tv[1].tv_usec = 0;
 	}
-	return do_utimes(filename, t ? tv : NULL);
+	return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
 }
 
-asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
+asmlinkage long compat_sys_futimesat(int dfd, char __user *filename, struct compat_timeval __user *t)
 {
 	struct timeval tv[2];
 
@@ -82,14 +82,19 @@
 		    get_user(tv[1].tv_usec, &t[1].tv_usec))
 			return -EFAULT; 
 	} 
-	return do_utimes(filename, t ? tv : NULL);
+	return do_utimes(dfd, filename, t ? tv : NULL);
+}
+
+asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
+{
+	return compat_sys_futimesat(AT_FDCWD, filename, t);
 }
 
 asmlinkage long compat_sys_newstat(char __user * filename,
 		struct compat_stat __user *statbuf)
 {
 	struct kstat stat;
-	int error = vfs_stat(filename, &stat);
+	int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
 
 	if (!error)
 		error = cp_compat_stat(&stat, statbuf);
@@ -100,13 +105,34 @@
 		struct compat_stat __user *statbuf)
 {
 	struct kstat stat;
-	int error = vfs_lstat(filename, &stat);
+	int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
 
 	if (!error)
 		error = cp_compat_stat(&stat, statbuf);
 	return error;
 }
 
+asmlinkage long compat_sys_newfstatat(int dfd, char __user *filename,
+		struct compat_stat __user *statbuf, int flag)
+{
+	struct kstat stat;
+	int error = -EINVAL;
+
+	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	if (flag & AT_SYMLINK_NOFOLLOW)
+		error = vfs_lstat_fd(dfd, filename, &stat);
+	else
+		error = vfs_stat_fd(dfd, filename, &stat);
+
+	if (!error)
+		error = cp_compat_stat(&stat, statbuf);
+
+out:
+	return error;
+}
+
 asmlinkage long compat_sys_newfstat(unsigned int fd,
 		struct compat_stat __user * statbuf)
 {
@@ -1290,7 +1316,17 @@
 asmlinkage long
 compat_sys_open(const char __user *filename, int flags, int mode)
 {
-	return do_sys_open(filename, flags, mode);
+	return do_sys_open(AT_FDCWD, filename, flags, mode);
+}
+
+/*
+ * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
+ * O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_openat(int dfd, const char __user *filename, int flags, int mode)
+{
+	return do_sys_open(dfd, filename, flags, mode);
 }
 
 /*