much milder d_walk() race
d_walk() relies upon the tree not getting rearranged under it without
rename_lock being touched. And we do grab rename_lock around the
places that change the tree topology. Unfortunately, branch reordering
is just as bad from d_walk() POV and we have two places that do it
without touching rename_lock - one in handling of cursors (for ramfs-style
directories) and another in autofs. autofs one is a separate story; this
commit deals with the cursors.
* mark cursor dentries explicitly at allocation time
* make __dentry_kill() leave ->d_child.next pointing to the next
non-cursor sibling, making sure that it won't be moved around unnoticed
before the parent is relocked on ascend-to-parent path in d_walk().
* make d_walk() skip cursors explicitly; strictly speaking it's
not necessary (all callbacks we pass to d_walk() are no-ops on cursors),
but it makes analysis easier.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/fs/dcache.c b/fs/dcache.c
index 817c243..b7eddfd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -507,6 +507,44 @@
}
EXPORT_SYMBOL(d_drop);
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+ struct dentry *next;
+ /*
+ * Inform d_walk() and shrink_dentry_list() that we are no longer
+ * attached to the dentry tree
+ */
+ dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ if (unlikely(list_empty(&dentry->d_child)))
+ return;
+ __list_del_entry(&dentry->d_child);
+ /*
+ * Cursors can move around the list of children. While we'd been
+ * a normal list member, it didn't matter - ->d_child.next would've
+ * been updated. However, from now on it won't be and for the
+ * things like d_walk() it might end up with a nasty surprise.
+ * Normally d_walk() doesn't care about cursors moving around -
+ * ->d_lock on parent prevents that and since a cursor has no children
+ * of its own, we get through it without ever unlocking the parent.
+ * There is one exception, though - if we ascend from a child that
+ * gets killed as soon as we unlock it, the next sibling is found
+ * using the value left in its ->d_child.next. And if _that_
+ * pointed to a cursor, and cursor got moved (e.g. by lseek())
+ * before d_walk() regains parent->d_lock, we'll end up skipping
+ * everything the cursor had been moved past.
+ *
+ * Solution: make sure that the pointer left behind in ->d_child.next
+ * points to something that won't be moving around. I.e. skip the
+ * cursors.
+ */
+ while (dentry->d_child.next != &parent->d_subdirs) {
+ next = list_entry(dentry->d_child.next, struct dentry, d_child);
+ if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+ break;
+ dentry->d_child.next = next->d_child.next;
+ }
+}
+
static void __dentry_kill(struct dentry *dentry)
{
struct dentry *parent = NULL;
@@ -532,12 +570,7 @@
}
/* if it was on the hash then remove it */
__d_drop(dentry);
- __list_del_entry(&dentry->d_child);
- /*
- * Inform d_walk() that we are no longer attached to the
- * dentry tree
- */
- dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ dentry_unlist(dentry, parent);
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1203,6 +1236,9 @@
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
+ if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+ continue;
+
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
ret = enter(data, dentry);
@@ -1651,6 +1687,16 @@
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+ struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+ if (dentry) {
+ dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+ dentry->d_parent = dget(parent);
+ }
+ return dentry;
+}
+
/**
* d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
* @sb: the superblock