sysfs: implement sysfs_dirent active reference and immediate disconnect

sysfs: implement sysfs_dirent active reference and immediate disconnect

Opening a sysfs node references its associated kobject, so userland
can arbitrarily prolong lifetime of a kobject which complicates
lifetime rules in drivers.  This patch implements active reference and
makes the association between kobject and sysfs immediately breakable.

Now each sysfs_dirent has two reference counts - s_count and s_active.
s_count is a regular reference count which guarantees that the
containing sysfs_dirent is accessible.  As long as s_count reference
is held, all sysfs internal fields in sysfs_dirent are accessible
including s_parent and s_name.

The newly added s_active is active reference count.  This is acquired
by invoking sysfs_get_active() and it's the caller's responsibility to
ensure sysfs_dirent itself is accessible (should be holding s_count
one way or the other).  Dereferencing sysfs_dirent to access objects
out of sysfs proper requires active reference.  This includes access
to the associated kobjects, attributes and ops.

The active references can be drained and denied by calling
sysfs_deactivate().  All active sysfs_dirents must be deactivated
after deletion but before the default reference is dropped.  This
enables immediate disconnect of sysfs nodes.  Once a sysfs_dirent is
deleted, it won't access any entity external to sysfs proper.

Because attr/bin_attr ops access both the node itself and its parent
for kobject, they need to hold active references to both.
sysfs_get/put_active_two() helpers are provided to help grabbing both
references.  Parent's is acquired first and released last.

Unlike other operations, mmapped area lingers on after mmap() is
finished and the module implement implementing it and kobj need to
stay referenced till all the mapped pages are gone.  This is
accomplished by holding one set of active references to the bin_attr
and its parent if there have been any mmap during lifetime of an
openfile.  The references are dropped when the openfile is released.

This change makes sysfs lifetime rules independent from both kobject's
and module's.  It not only fixes several race conditions caused by
sysfs not holding onto the proper module when referencing kobject, but
also helps fixing and simplifying lifetime management in driver model
and drivers by taking sysfs out of the equation.

Please read the following message for more info.

  http://article.gmane.org/gmane.linux.kernel/510293

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 04f6b0e..310430b 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -87,8 +87,8 @@
  */
 static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
 {
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_ops * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
@@ -98,8 +98,15 @@
 	if (!buffer->page)
 		return -ENOMEM;
 
-	buffer->event = atomic_read(&sd->s_event);
-	count = ops->show(kobj, sd->s_elem.attr.attr, buffer->page);
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	buffer->event = atomic_read(&attr_sd->s_event);
+	count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
+
+	sysfs_put_active_two(attr_sd);
+
 	BUG_ON(count > (ssize_t)PAGE_SIZE);
 	if (count >= 0) {
 		buffer->needs_read_fill = 0;
@@ -195,14 +202,23 @@
  *	passing the buffer that we acquired in fill_write_buffer().
  */
 
-static int 
+static int
 flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
 {
 	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_ops * ops = buffer->ops;
+	int rc;
 
-	return ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
+
+	sysfs_put_active_two(attr_sd);
+
+	return rc;
 }
 
 
@@ -246,22 +262,22 @@
 
 static int sysfs_open_file(struct inode *inode, struct file *file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct attribute *attr = attr_sd->s_elem.attr.attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_buffer_collection *set;
 	struct sysfs_buffer * buffer;
 	struct sysfs_ops * ops = NULL;
-	int error = 0;
+	int error;
 
-	if (!kobj || !attr)
-		goto Einval;
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
-	/* Grab the module reference for this attribute if we have one */
-	if (!try_module_get(attr->owner)) {
-		error = -ENODEV;
-		goto Done;
-	}
+	/* Grab the module reference for this attribute */
+	error = -ENODEV;
+	if (!try_module_get(attr->owner))
+		goto err_sput;
 
 	/* if the kobject has no ktype, then we assume that it is a subsystem
 	 * itself, and use ops for it.
@@ -276,30 +292,30 @@
 	/* No sysfs operations, either from having no subsystem,
 	 * or the subsystem have no operations.
 	 */
+	error = -EACCES;
 	if (!ops)
-		goto Eaccess;
+		goto err_mput;
 
 	/* make sure we have a collection to add our buffers to */
 	mutex_lock(&inode->i_mutex);
 	if (!(set = inode->i_private)) {
-		if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
-			error = -ENOMEM;
-			goto Done;
-		} else {
+		error = -ENOMEM;
+		if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL)))
+			goto err_mput;
+		else
 			INIT_LIST_HEAD(&set->associates);
-		}
 	}
 	mutex_unlock(&inode->i_mutex);
 
+	error = -EACCES;
+
 	/* File needs write support.
 	 * The inode's perms must say it's ok, 
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-
 		if (!(inode->i_mode & S_IWUGO) || !ops->store)
-			goto Eaccess;
-
+			goto err_mput;
 	}
 
 	/* File needs read support.
@@ -308,46 +324,45 @@
 	 */
 	if (file->f_mode & FMODE_READ) {
 		if (!(inode->i_mode & S_IRUGO) || !ops->show)
-			goto Eaccess;
+			goto err_mput;
 	}
 
 	/* No error? Great, allocate a buffer for the file, and store it
 	 * it in file->private_data for easy access.
 	 */
+	error = -ENOMEM;
 	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
-	if (buffer) {
-		INIT_LIST_HEAD(&buffer->associates);
-		init_MUTEX(&buffer->sem);
-		buffer->needs_read_fill = 1;
-		buffer->ops = ops;
-		add_to_collection(buffer, inode);
-		file->private_data = buffer;
-	} else
-		error = -ENOMEM;
-	goto Done;
+	if (!buffer)
+		goto err_mput;
 
- Einval:
-	error = -EINVAL;
-	goto Done;
- Eaccess:
-	error = -EACCES;
+	INIT_LIST_HEAD(&buffer->associates);
+	init_MUTEX(&buffer->sem);
+	buffer->needs_read_fill = 1;
+	buffer->ops = ops;
+	add_to_collection(buffer, inode);
+	file->private_data = buffer;
+
+	/* open succeeded, put active references and pin attr_sd */
+	sysfs_put_active_two(attr_sd);
+	sysfs_get(attr_sd);
+	return 0;
+
+ err_mput:
 	module_put(attr->owner);
- Done:
-	if (error)
-		kobject_put(kobj);
+ err_sput:
+	sysfs_put_active_two(attr_sd);
 	return error;
 }
 
 static int sysfs_release(struct inode * inode, struct file * filp)
 {
-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
 	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
 	struct attribute *attr = attr_sd->s_elem.attr.attr;
 	struct sysfs_buffer * buffer = filp->private_data;
 
 	if (buffer)
 		remove_from_collection(buffer, inode);
-	kobject_put(kobj);
+	sysfs_put(attr_sd);
 	/* After this point, attr should not be accessed. */
 	module_put(attr->owner);
 
@@ -376,18 +391,25 @@
 static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 {
 	struct sysfs_buffer * buffer = filp->private_data;
-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
-	struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
-	int res = 0;
+	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+
+	/* need parent for the kobj, grab both */
+	if (!sysfs_get_active_two(attr_sd))
+		goto trigger;
 
 	poll_wait(filp, &kobj->poll, wait);
 
-	if (buffer->event != atomic_read(&sd->s_event)) {
-		res = POLLERR|POLLPRI;
-		buffer->needs_read_fill = 1;
-	}
+	sysfs_put_active_two(attr_sd);
 
-	return res;
+	if (buffer->event != atomic_read(&attr_sd->s_event))
+		goto trigger;
+
+	return 0;
+
+ trigger:
+	buffer->needs_read_fill = 1;
+	return POLLERR|POLLPRI;
 }