sysfs: make directory dentries and inodes reclaimable

This patch makes dentries and inodes for sysfs directories
reclaimable.

* sysfs_notify() is modified to walk sysfs_dirent tree instead of
  dentry tree.

* sysfs_update_file() and sysfs_chmod_file() use sysfs_get_dentry() to
  grab the victim dentry.

* sysfs_rename_dir() and sysfs_move_dir() grab all dentries using
  sysfs_get_dentry() on startup.

* Dentries for all shadowed directories are pinned in memory to serve
  as lookup start point.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 9872112..aee966c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -568,10 +568,10 @@
 	spin_unlock(&dcache_lock);
 	spin_unlock(&sysfs_assoc_lock);
 
-	dput(dentry);
-	/* XXX: unpin if directory, this will go away soon */
-	if (sysfs_type(sd) == SYSFS_DIR)
+	/* dentries for shadowed inodes are pinned, unpin */
+	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
 		dput(dentry);
+	dput(dentry);
 
 	/* adjust nlink and update timestamp */
 	inode = ilookup(sysfs_sb, sd->s_ino);
@@ -686,69 +686,29 @@
 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
 		      const char *name, struct sysfs_dirent **p_sd)
 {
-	struct dentry *parent = parent_sd->s_dentry;
-	struct sysfs_addrm_cxt acxt;
-	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
-	struct dentry *dentry;
-	struct inode *inode;
+	struct sysfs_addrm_cxt acxt;
 	struct sysfs_dirent *sd;
 
-	sysfs_addrm_start(&acxt, parent_sd);
-
 	/* allocate */
-	dentry = lookup_one_len(name, parent, strlen(name));
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_finish;
-	}
-
-	error = -EEXIST;
-	if (dentry->d_inode)
-		goto out_dput;
-
-	error = -ENOMEM;
 	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
 	if (!sd)
-		goto out_drop;
+		return -ENOMEM;
 	sd->s_elem.dir.kobj = kobj;
 
-	inode = sysfs_get_inode(sd);
-	if (!inode)
-		goto out_sput;
-
-	if (inode->i_state & I_NEW) {
-		inode->i_op = &sysfs_dir_inode_operations;
-		inode->i_fop = &sysfs_dir_operations;
-		/* directory inodes start off with i_nlink == 2 (for ".") */
-		inc_nlink(inode);
+	/* link in */
+	sysfs_addrm_start(&acxt, parent_sd);
+	if (!sysfs_find_dirent(parent_sd, name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+	if (sysfs_addrm_finish(&acxt)) {
+		*p_sd = sd;
+		return 0;
 	}
 
-	/* link in */
-	error = -EEXIST;
-	if (sysfs_find_dirent(parent_sd, name))
-		goto out_iput;
-
-	sysfs_add_one(&acxt, sd);
-	sysfs_link_sibling(sd);
-	sysfs_instantiate(dentry, inode);
-	sysfs_attach_dentry(sd, dentry);
-
-	*p_sd = sd;
-	error = 0;
-	goto out_finish;	/* pin directory dentry in core */
-
- out_iput:
-	iput(inode);
- out_sput:
 	sysfs_put(sd);
- out_drop:
-	d_drop(dentry);
- out_dput:
-	dput(dentry);
- out_finish:
-	sysfs_addrm_finish(&acxt);
-	return error;
+	return -EEXIST;
 }
 
 int sysfs_create_subdir(struct kobject *kobj, const char *name,
@@ -785,6 +745,17 @@
 	return error;
 }
 
+static int sysfs_count_nlink(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent *child;
+	int nr = 0;
+
+	for (child = sd->s_children; child; child = child->s_sibling)
+		if (sysfs_type(child) == SYSFS_DIR)
+			nr++;
+	return nr + 2;
+}
+
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 				struct nameidata *nd)
 {
@@ -795,7 +766,7 @@
 	int found = 0;
 
 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
-		if ((sysfs_type(sd) & SYSFS_NOT_PINNED) &&
+		if (sysfs_type(sd) &&
 		    !strcmp(sd->s_name, dentry->d_name.name)) {
 			found = 1;
 			break;
@@ -816,6 +787,11 @@
 	if (inode->i_state & I_NEW) {
 		/* initialize inode according to type */
 		switch (sysfs_type(sd)) {
+		case SYSFS_DIR:
+			inode->i_op = &sysfs_dir_inode_operations;
+			inode->i_fop = &sysfs_dir_operations;
+			inode->i_nlink = sysfs_count_nlink(sd);
+			break;
 		case SYSFS_KOBJ_ATTR:
 			inode->i_size = PAGE_SIZE;
 			inode->i_fop = &sysfs_file_operations;
@@ -876,7 +852,7 @@
 	while (*pos) {
 		struct sysfs_dirent *sd = *pos;
 
-		if (sysfs_type(sd) && (sysfs_type(sd) & SYSFS_NOT_PINNED)) {
+		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
 			*pos = sd->s_sibling;
 			sd->s_sibling = NULL;
 			sysfs_remove_one(&acxt, sd);
@@ -912,14 +888,25 @@
 		     const char *new_name)
 {
 	struct sysfs_dirent *sd = kobj->sd;
-	struct dentry *new_parent = new_parent_sd->s_dentry;
-	struct dentry *new_dentry;
-	char *dup_name;
+	struct dentry *new_parent = NULL;
+	struct dentry *old_dentry = NULL, *new_dentry = NULL;
+	const char *dup_name = NULL;
 	int error;
 
-	if (!new_parent_sd)
-		return -EFAULT;
+	/* get dentries */
+	old_dentry = sysfs_get_dentry(sd);
+	if (IS_ERR(old_dentry)) {
+		error = PTR_ERR(old_dentry);
+		goto out_dput;
+	}
 
+	new_parent = sysfs_get_dentry(new_parent_sd);
+	if (IS_ERR(new_parent)) {
+		error = PTR_ERR(new_parent);
+		goto out_dput;
+	}
+
+	/* lock new_parent and get dentry for new name */
 	mutex_lock(&new_parent->d_inode->i_mutex);
 
 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
@@ -933,14 +920,14 @@
 	 * shadows of the same directory
 	 */
 	error = -EINVAL;
-	if (sd->s_parent->s_dentry->d_inode != new_parent->d_inode ||
+	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
 	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
-	    new_dentry == sd->s_dentry)
-		goto out_dput;
+	    old_dentry == new_dentry)
+		goto out_unlock;
 
 	error = -EEXIST;
 	if (new_dentry->d_inode)
-		goto out_dput;
+		goto out_unlock;
 
 	/* rename kobject and sysfs_dirent */
 	error = -ENOMEM;
@@ -950,9 +937,9 @@
 
 	error = kobject_set_name(kobj, "%s", new_name);
 	if (error)
-		goto out_free;
+		goto out_drop;
 
-	kfree(sd->s_name);
+	dup_name = sd->s_name;
 	sd->s_name = new_name;
 
 	/* move under the new parent */
@@ -972,45 +959,58 @@
 	error = 0;
 	goto out_unlock;
 
- out_free:
-	kfree(dup_name);
  out_drop:
 	d_drop(new_dentry);
- out_dput:
-	dput(new_dentry);
  out_unlock:
 	mutex_unlock(&new_parent->d_inode->i_mutex);
+ out_dput:
+	kfree(dup_name);
+	dput(new_parent);
+	dput(old_dentry);
+	dput(new_dentry);
 	return error;
 }
 
-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
+int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 {
-	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
-	struct sysfs_dirent *new_parent_sd, *sd;
+	struct sysfs_dirent *sd = kobj->sd;
+	struct sysfs_dirent *new_parent_sd;
+	struct dentry *old_parent, *new_parent = NULL;
+	struct dentry *old_dentry = NULL, *new_dentry = NULL;
 	int error;
 
-	old_parent_dentry = kobj->parent ?
-		kobj->parent->sd->s_dentry : sysfs_mount->mnt_sb->s_root;
-	new_parent_dentry = new_parent ?
-		new_parent->sd->s_dentry : sysfs_mount->mnt_sb->s_root;
+	BUG_ON(!sd->s_parent);
+	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
 
-	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
-		return 0;	/* nothing to move */
+	/* get dentries */
+	old_dentry = sysfs_get_dentry(sd);
+	if (IS_ERR(old_dentry)) {
+		error = PTR_ERR(old_dentry);
+		goto out_dput;
+	}
+	old_parent = sd->s_parent->s_dentry;
+
+	new_parent = sysfs_get_dentry(new_parent_sd);
+	if (IS_ERR(new_parent)) {
+		error = PTR_ERR(new_parent);
+		goto out_dput;
+	}
+
+	if (old_parent->d_inode == new_parent->d_inode) {
+		error = 0;
+		goto out_dput;	/* nothing to move */
+	}
 again:
-	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
-	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
-		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+	mutex_lock(&old_parent->d_inode->i_mutex);
+	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
+		mutex_unlock(&old_parent->d_inode->i_mutex);
 		goto again;
 	}
 
-	new_parent_sd = new_parent_dentry->d_fsdata;
-	sd = kobj->sd;
-
-	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
-				    strlen(kobj->name));
+	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
 	if (IS_ERR(new_dentry)) {
 		error = PTR_ERR(new_dentry);
-		goto out;
+		goto out_unlock;
 	} else
 		error = 0;
 	d_add(new_dentry, NULL);
@@ -1027,10 +1027,14 @@
 	sysfs_link_sibling(sd);
 
 	mutex_unlock(&sysfs_mutex);
-out:
-	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
-	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
 
+ out_unlock:
+	mutex_unlock(&new_parent->d_inode->i_mutex);
+	mutex_unlock(&old_parent->d_inode->i_mutex);
+ out_dput:
+	dput(new_parent);
+	dput(old_dentry);
+	dput(new_dentry);
 	return error;
 }
 
@@ -1191,12 +1195,20 @@
 int sysfs_make_shadowed_dir(struct kobject *kobj,
 	void * (*follow_link)(struct dentry *, struct nameidata *))
 {
+	struct dentry *dentry;
 	struct inode *inode;
 	struct inode_operations *i_op;
 
-	inode = kobj->sd->s_dentry->d_inode;
-	if (inode->i_op != &sysfs_dir_inode_operations)
+	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
+	dentry = sysfs_get_dentry(kobj->sd);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	inode = dentry->d_inode;
+	if (inode->i_op != &sysfs_dir_inode_operations) {
+		dput(dentry);
 		return -EINVAL;
+	}
 
 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
 	if (!i_op)
@@ -1223,17 +1235,23 @@
 
 struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
 {
-	struct dentry *dir = kobj->sd->s_dentry;
-	struct inode *inode = dir->d_inode;
-	struct dentry *parent = dir->d_parent;
-	struct sysfs_dirent *parent_sd = parent->d_fsdata;
-	struct dentry *shadow;
+	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+	struct dentry *dir, *parent, *shadow;
+	struct inode *inode;
 	struct sysfs_dirent *sd;
 	struct sysfs_addrm_cxt acxt;
 
+	dir = sysfs_get_dentry(kobj->sd);
+	if (IS_ERR(dir)) {
+		sd = (void *)dir;
+		goto out;
+	}
+	parent = dir->d_parent;
+
+	inode = dir->d_inode;
 	sd = ERR_PTR(-EINVAL);
 	if (!sysfs_is_shadowed_inode(inode))
-		goto out;
+		goto out_dput;
 
 	shadow = d_alloc(parent, &dir->d_name);
 	if (!shadow)
@@ -1258,12 +1276,15 @@
 
 	dget(shadow);		/* Extra count - pin the dentry in core */
 
-out:
-	return sd;
-nomem:
+	goto out_dput;
+
+ nomem:
 	dput(shadow);
 	sd = ERR_PTR(-ENOMEM);
-	goto out;
+ out_dput:
+	dput(dir);
+ out:
+	return sd;
 }
 
 /**
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 69bacf1..cc49799 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -362,43 +362,22 @@
 	return POLLERR|POLLPRI;
 }
 
-
-static struct dentry *step_down(struct dentry *dir, const char * name)
+void sysfs_notify(struct kobject *k, char *dir, char *attr)
 {
-	struct dentry * de;
+	struct sysfs_dirent *sd = k->sd;
 
-	if (dir == NULL || dir->d_inode == NULL)
-		return NULL;
+	mutex_lock(&sysfs_mutex);
 
-	mutex_lock(&dir->d_inode->i_mutex);
-	de = lookup_one_len(name, dir, strlen(name));
-	mutex_unlock(&dir->d_inode->i_mutex);
-	dput(dir);
-	if (IS_ERR(de))
-		return NULL;
-	if (de->d_inode == NULL) {
-		dput(de);
-		return NULL;
-	}
-	return de;
-}
-
-void sysfs_notify(struct kobject * k, char *dir, char *attr)
-{
-	struct dentry *de = k->sd->s_dentry;
-	if (de)
-		dget(de);
-	if (de && dir)
-		de = step_down(de, dir);
-	if (de && attr)
-		de = step_down(de, attr);
-	if (de) {
-		struct sysfs_dirent * sd = de->d_fsdata;
-		if (sd)
-			atomic_inc(&sd->s_event);
+	if (sd && dir)
+		sd = sysfs_find_dirent(sd, dir);
+	if (sd && attr)
+		sd = sysfs_find_dirent(sd, attr);
+	if (sd) {
+		atomic_inc(&sd->s_event);
 		wake_up_interruptible(&k->poll);
-		dput(de);
 	}
+
+	mutex_unlock(&sysfs_mutex);
 }
 EXPORT_SYMBOL_GPL(sysfs_notify);
 
@@ -485,30 +464,31 @@
  */
 int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 {
-	struct dentry *dir = kobj->sd->s_dentry;
-	struct dentry * victim;
-	int res = -ENOENT;
+	struct sysfs_dirent *victim_sd = NULL;
+	struct dentry *victim = NULL;
+	int rc;
 
-	mutex_lock(&dir->d_inode->i_mutex);
-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
-	if (!IS_ERR(victim)) {
-		/* make sure dentry is really there */
-		if (victim->d_inode && 
-		    (victim->d_parent->d_inode == dir->d_inode)) {
-			victim->d_inode->i_mtime = CURRENT_TIME;
-			fsnotify_modify(victim);
-			res = 0;
-		} else
-			d_drop(victim);
-		
-		/**
-		 * Drop the reference acquired from lookup_one_len() above.
-		 */
-		dput(victim);
+	rc = -ENOENT;
+	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
+	if (!victim_sd)
+		goto out;
+
+	victim = sysfs_get_dentry(victim_sd);
+	if (IS_ERR(victim)) {
+		rc = PTR_ERR(victim);
+		victim = NULL;
+		goto out;
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return res;
+	mutex_lock(&victim->d_inode->i_mutex);
+	victim->d_inode->i_mtime = CURRENT_TIME;
+	fsnotify_modify(victim);
+	mutex_unlock(&victim->d_inode->i_mutex);
+	rc = 0;
+ out:
+	dput(victim);
+	sysfs_put(victim_sd);
+	return rc;
 }
 
 
@@ -521,30 +501,34 @@
  */
 int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 {
-	struct dentry *dir = kobj->sd->s_dentry;
-	struct dentry *victim;
+	struct sysfs_dirent *victim_sd = NULL;
+	struct dentry *victim = NULL;
 	struct inode * inode;
 	struct iattr newattrs;
-	int res = -ENOENT;
+	int rc;
 
-	mutex_lock(&dir->d_inode->i_mutex);
-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
-	if (!IS_ERR(victim)) {
-		if (victim->d_inode &&
-		    (victim->d_parent->d_inode == dir->d_inode)) {
-			inode = victim->d_inode;
-			mutex_lock(&inode->i_mutex);
-			newattrs.ia_mode = (mode & S_IALLUGO) |
-						(inode->i_mode & ~S_IALLUGO);
-			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-			res = notify_change(victim, &newattrs);
-			mutex_unlock(&inode->i_mutex);
-		}
-		dput(victim);
+	rc = -ENOENT;
+	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
+	if (!victim_sd)
+		goto out;
+
+	victim = sysfs_get_dentry(victim_sd);
+	if (IS_ERR(victim)) {
+		rc = PTR_ERR(victim);
+		victim = NULL;
+		goto out;
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return res;
+	inode = victim->d_inode;
+	mutex_lock(&inode->i_mutex);
+	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+	rc = notify_change(victim, &newattrs);
+	mutex_unlock(&inode->i_mutex);
+ out:
+	dput(victim);
+	sysfs_put(victim_sd);
+	return rc;
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 078537e..402cc35 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -24,7 +24,7 @@
 	.drop_inode	= sysfs_delete_inode,
 };
 
-static struct sysfs_dirent sysfs_root = {
+struct sysfs_dirent sysfs_root = {
 	.s_count	= ATOMIC_INIT(1),
 	.s_flags	= SYSFS_ROOT,
 	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 72530dc..6a37f23 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -52,6 +52,7 @@
 };
 
 extern struct vfsmount * sysfs_mount;
+extern struct sysfs_dirent sysfs_root;
 extern struct kmem_cache *sysfs_dir_cachep;
 
 extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 4c43030..2f58ca1 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -81,7 +81,6 @@
 #define SYSFS_KOBJ_ATTR 	0x0004
 #define SYSFS_KOBJ_BIN_ATTR	0x0008
 #define SYSFS_KOBJ_LINK 	0x0020
-#define SYSFS_NOT_PINNED	(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK)
 #define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
 
 #define SYSFS_FLAG_MASK		~SYSFS_TYPE_MASK