[PATCH] vfs: *at functions: core

Here is a series of patches which introduce in total 13 new system calls
which take a file descriptor/filename pair instead of a single file
name.  These functions, openat etc, have been discussed on numerous
occasions.  They are needed to implement race-free filesystem traversal,
they are necessary to implement a virtual per-thread current working
directory (think multi-threaded backup software), etc.

We have in glibc today implementations of the interfaces which use the
/proc/self/fd magic.  But this code is rather expensive.  Here are some
results (similar to what Jim Meyering posted before).

The test creates a deep directory hierarchy on a tmpfs filesystem.  Then
rm -fr is used to remove all directories.  Without syscall support I get
this:

real    0m31.921s
user    0m0.688s
sys     0m31.234s

With syscall support the results are much better:

real    0m20.699s
user    0m0.536s
sys     0m20.149s

The interfaces are for obvious reasons currently not much used.  But they'll
be used.  coreutils (and Jeff's posixutils) are already using them.
Furthermore, code like ftw/fts in libc (maybe even glob) will also start using
them.  I expect a patch to make follow soon.  Every program which is walking
the filesystem tree will benefit.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@ftp.linux.org.uk>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 8a7c821..c52a637 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -23,6 +23,13 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+#define AT_FDCWD		-100    /* Special value used to indicate
+                                           openat should use the current
+                                           working directory. */
+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+#define AT_REMOVEDIR		0x200   /* Remove directory instead of
+                                           unlinking file.  */
+
 #ifdef __KERNEL__
 
 #ifndef force_o_largefile
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b77f260..84bb449 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1340,7 +1340,8 @@
 
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
-extern long do_sys_open(const char __user *filename, int flags, int mode);
+extern long do_sys_open(int fdf, const char __user *filename, int flags,
+			int mode);
 extern struct file *filp_open(const char *, int, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
 extern int filp_close(struct file *, fl_owner_t id);
@@ -1479,7 +1480,7 @@
 }
 extern int do_pipe(int *);
 
-extern int open_namei(const char *, int, int, struct nameidata *);
+extern int open_namei(int dfd, const char *, int, int, struct nameidata *);
 extern int may_open(struct nameidata *, int, int);
 
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
@@ -1677,6 +1678,8 @@
 
 extern int vfs_stat(char __user *, struct kstat *);
 extern int vfs_lstat(char __user *, struct kstat *);
+extern int vfs_stat_fd(int dfd, char __user *, struct kstat *);
+extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *);
 extern int vfs_fstat(unsigned int, struct kstat *);
 
 extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index b699e42..e669801 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -56,10 +56,11 @@
 #define LOOKUP_ACCESS		(0x0400)
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
+extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
 #define user_path_walk(name,nd) \
-	__user_walk(name, LOOKUP_FOLLOW, nd)
+	__user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd)
 #define user_path_walk_link(name,nd) \
-	__user_walk(name, 0, nd)
+	__user_walk_fd(AT_FDCWD, name, 0, nd)
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
@@ -67,7 +68,7 @@
 extern void path_release_on_umount(struct nameidata *);
 
 extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
-extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags);
+extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags);
 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *));
 extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
diff --git a/include/linux/time.h b/include/linux/time.h
index f2aca7e..614dd84 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -74,7 +74,7 @@
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
 #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
-extern long do_utimes(char __user *filename, struct timeval *times);
+extern long do_utimes(int dfd, char __user *filename, struct timeval *times);
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);