fs: Push mnt_want_write() outside of i_mutex
Currently, mnt_want_write() is sometimes called with i_mutex held and sometimes
without it. This isn't really a problem because mnt_want_write() is a
non-blocking operation (essentially has a trylock semantics) but when the
function starts to handle also frozen filesystems, it will get a full lock
semantics and thus proper lock ordering has to be established. So move
all mnt_want_write() calls outside of i_mutex.
One non-trivial case needing conversion is kern_path_create() /
user_path_create() which didn't include mnt_want_write() but now needs to
because it acquires i_mutex. Because there are virtual file systems which
don't bother with freeze / remount-ro protection we actually provide both
versions of the function - one which calls mnt_want_write() and one which does
not.
[AV: scratch the previous, mnt_want_write() has been moved to kern_path_create()
by now]
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/fs/namei.c b/fs/namei.c
index 35291ac..1b46439 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2975,6 +2975,7 @@
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct nameidata nd;
+ int err2;
int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
if (error)
return ERR_PTR(error);
@@ -2988,6 +2989,8 @@
nd.flags &= ~LOOKUP_PARENT;
nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+ /* don't fail immediately if it's r/o, at least try to report other errors */
+ err2 = mnt_want_write(nd.path.mnt);
/*
* Do the final lookup.
*/
@@ -3009,9 +3012,10 @@
error = -ENOENT;
goto fail;
}
- error = mnt_want_write(nd.path.mnt);
- if (error)
+ if (unlikely(err2)) {
+ error = err2;
goto fail;
+ }
*path = nd.path;
return dentry;
fail:
@@ -3019,6 +3023,8 @@
dentry = ERR_PTR(error);
unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ if (!err2)
+ mnt_drop_write(nd.path.mnt);
out:
path_put(&nd.path);
return dentry;
@@ -3266,6 +3272,9 @@
}
nd.flags &= ~LOOKUP_PARENT;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit1;
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -3276,19 +3285,15 @@
error = -ENOENT;
goto exit3;
}
- error = mnt_want_write(nd.path.mnt);
- if (error)
- goto exit3;
error = security_path_rmdir(&nd.path, dentry);
if (error)
- goto exit4;
+ goto exit3;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
-exit4:
- mnt_drop_write(nd.path.mnt);
exit3:
dput(dentry);
exit2:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
putname(name);
@@ -3355,6 +3360,9 @@
goto exit1;
nd.flags &= ~LOOKUP_PARENT;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit1;
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -3367,21 +3375,17 @@
if (!inode)
goto slashes;
ihold(inode);
- error = mnt_want_write(nd.path.mnt);
- if (error)
- goto exit2;
error = security_path_unlink(&nd.path, dentry);
if (error)
- goto exit3;
+ goto exit2;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
-exit3:
- mnt_drop_write(nd.path.mnt);
- exit2:
+exit2:
dput(dentry);
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (inode)
iput(inode); /* truncate the inode here */
+ mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
putname(name);
@@ -3753,6 +3757,10 @@
if (newnd.last_type != LAST_NORM)
goto exit2;
+ error = mnt_want_write(oldnd.path.mnt);
+ if (error)
+ goto exit2;
+
oldnd.flags &= ~LOOKUP_PARENT;
newnd.flags &= ~LOOKUP_PARENT;
newnd.flags |= LOOKUP_RENAME_TARGET;
@@ -3788,23 +3796,19 @@
if (new_dentry == trap)
goto exit5;
- error = mnt_want_write(oldnd.path.mnt);
- if (error)
- goto exit5;
error = security_path_rename(&oldnd.path, old_dentry,
&newnd.path, new_dentry);
if (error)
- goto exit6;
+ goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
-exit6:
- mnt_drop_write(oldnd.path.mnt);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ mnt_drop_write(oldnd.path.mnt);
exit2:
path_put(&newnd.path);
putname(to);