ACPI / hotplug: Fix concurrency issues and memory leaks

This changeset is aimed at fixing a few different but related
problems in the ACPI hotplug infrastructure.

First of all, since notify handlers may be run in parallel with
acpi_bus_scan(), acpi_bus_trim() and acpi_bus_hot_remove_device()
and some of them are installed for ACPI handles that have no struct
acpi_device objects attached (i.e. before those objects are created),
those notify handlers have to take acpi_scan_lock to prevent races
from taking place (e.g. a struct acpi_device is found to be present
for the given ACPI handle, but right after that it is removed by
acpi_bus_trim() running in parallel to the given notify handler).
Moreover, since some of them call acpi_bus_scan() and
acpi_bus_trim(), this leads to the conclusion that acpi_scan_lock
should be acquired by the callers of these two funtions rather by
these functions themselves.

For these reasons, make all notify handlers that can handle device
addition and eject events take acpi_scan_lock and remove the
acpi_scan_lock locking from acpi_bus_scan() and acpi_bus_trim().
Accordingly, update all of their users to make sure that they
are always called under acpi_scan_lock.

Furthermore, since eject operations are carried out asynchronously
with respect to the notify events that trigger them, with the help
of acpi_bus_hot_remove_device(), even if notify handlers take the
ACPI scan lock, it still is possible that, for example,
acpi_bus_trim() will run between acpi_bus_hot_remove_device() and
the notify handler that scheduled its execution and that
acpi_bus_trim() will remove the device node passed to
acpi_bus_hot_remove_device() for ejection.  In that case, the struct
acpi_device object obtained by acpi_bus_hot_remove_device() will be
invalid and not-so-funny things will ensue.  To protect agaist that,
make the users of acpi_bus_hot_remove_device() run get_device() on
ACPI device node objects that are about to be passed to it and make
acpi_bus_hot_remove_device() run put_device() on them and check if
their ACPI handles are not NULL (make acpi_device_unregister() clear
the device nodes' ACPI handles for that check to work).

Finally, observe that acpi_os_hotplug_execute() actually can fail,
in which case its caller ought to free memory allocated for the
context object to prevent leaks from happening.  It also needs to
run put_device() on the device node that it ran get_device() on
previously in that case.  Modify the code accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index c7676ee..d16a94e 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -42,6 +42,18 @@
 	struct list_head node;
 };
 
+void acpi_scan_lock_acquire(void)
+{
+	mutex_lock(&acpi_scan_lock);
+}
+EXPORT_SYMBOL_GPL(acpi_scan_lock_acquire);
+
+void acpi_scan_lock_release(void)
+{
+	mutex_unlock(&acpi_scan_lock);
+}
+EXPORT_SYMBOL_GPL(acpi_scan_lock_release);
+
 int acpi_scan_add_handler(struct acpi_scan_handler *handler)
 {
 	if (!handler || !handler->attach)
@@ -95,8 +107,6 @@
 }
 static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL);
 
-static void __acpi_bus_trim(struct acpi_device *start);
-
 /**
  * acpi_bus_hot_remove_device: hot-remove a device and its children
  * @context: struct acpi_eject_event pointer (freed in this func)
@@ -107,7 +117,7 @@
  */
 void acpi_bus_hot_remove_device(void *context)
 {
-	struct acpi_eject_event *ej_event = (struct acpi_eject_event *) context;
+	struct acpi_eject_event *ej_event = context;
 	struct acpi_device *device = ej_event->device;
 	acpi_handle handle = device->handle;
 	acpi_handle temp;
@@ -118,11 +128,19 @@
 
 	mutex_lock(&acpi_scan_lock);
 
+	/* If there is no handle, the device node has been unregistered. */
+	if (!device->handle) {
+		dev_dbg(&device->dev, "ACPI handle missing\n");
+		put_device(&device->dev);
+		goto out;
+	}
+
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 		"Hot-removing device %s...\n", dev_name(&device->dev)));
 
-	__acpi_bus_trim(device);
-	/* Device node has been released. */
+	acpi_bus_trim(device);
+	/* Device node has been unregistered. */
+	put_device(&device->dev);
 	device = NULL;
 
 	if (ACPI_SUCCESS(acpi_get_handle(handle, "_LCK", &temp))) {
@@ -151,6 +169,7 @@
 					  ost_code, NULL);
 	}
 
+ out:
 	mutex_unlock(&acpi_scan_lock);
 	kfree(context);
 	return;
@@ -212,6 +231,7 @@
 		goto err;
 	}
 
+	get_device(&acpi_device->dev);
 	ej_event->device = acpi_device;
 	if (acpi_device->flags.eject_pending) {
 		/* event originated from ACPI eject notification */
@@ -224,7 +244,11 @@
 			ej_event->event, ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
 	}
 
-	acpi_os_hotplug_execute(acpi_bus_hot_remove_device, (void *)ej_event);
+	status = acpi_os_hotplug_execute(acpi_bus_hot_remove_device, ej_event);
+	if (ACPI_FAILURE(status)) {
+		put_device(&acpi_device->dev);
+		kfree(ej_event);
+	}
 err:
 	return ret;
 }
@@ -779,6 +803,7 @@
 	 * no more references.
 	 */
 	acpi_device_set_power(device, ACPI_STATE_D3_COLD);
+	device->handle = NULL;
 	put_device(&device->dev);
 }
 
@@ -1623,14 +1648,14 @@
  * there has been a real error.  There just have been no suitable ACPI objects
  * in the table trunk from which the kernel could create a device and add an
  * appropriate driver.
+ *
+ * Must be called under acpi_scan_lock.
  */
 int acpi_bus_scan(acpi_handle handle)
 {
 	void *device = NULL;
 	int error = 0;
 
-	mutex_lock(&acpi_scan_lock);
-
 	if (ACPI_SUCCESS(acpi_bus_check_add(handle, 0, NULL, &device)))
 		acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
 				    acpi_bus_check_add, NULL, NULL, &device);
@@ -1641,7 +1666,6 @@
 		acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
 				    acpi_bus_device_attach, NULL, NULL, NULL);
 
-	mutex_unlock(&acpi_scan_lock);
 	return error;
 }
 EXPORT_SYMBOL(acpi_bus_scan);
@@ -1678,7 +1702,13 @@
 	return AE_OK;
 }
 
-static void __acpi_bus_trim(struct acpi_device *start)
+/**
+ * acpi_bus_trim - Remove ACPI device node and all of its descendants
+ * @start: Root of the ACPI device nodes subtree to remove.
+ *
+ * Must be called under acpi_scan_lock.
+ */
+void acpi_bus_trim(struct acpi_device *start)
 {
 	/*
 	 * Execute acpi_bus_device_detach() as a post-order callback to detach
@@ -1695,13 +1725,6 @@
 			    acpi_bus_remove, NULL, NULL);
 	acpi_bus_remove(start->handle, 0, NULL, NULL);
 }
-
-void acpi_bus_trim(struct acpi_device *start)
-{
-	mutex_lock(&acpi_scan_lock);
-	__acpi_bus_trim(start);
-	mutex_unlock(&acpi_scan_lock);
-}
 EXPORT_SYMBOL_GPL(acpi_bus_trim);
 
 static int acpi_bus_scan_fixed(void)
@@ -1758,23 +1781,27 @@
 	acpi_csrt_init();
 	acpi_container_init();
 
+	mutex_lock(&acpi_scan_lock);
 	/*
 	 * Enumerate devices in the ACPI namespace.
 	 */
 	result = acpi_bus_scan(ACPI_ROOT_OBJECT);
 	if (result)
-		return result;
+		goto out;
 
 	result = acpi_bus_get_device(ACPI_ROOT_OBJECT, &acpi_root);
 	if (result)
-		return result;
+		goto out;
 
 	result = acpi_bus_scan_fixed();
 	if (result) {
 		acpi_device_unregister(acpi_root);
-		return result;
+		goto out;
 	}
 
 	acpi_update_all_gpes();
-	return 0;
+
+ out:
+	mutex_unlock(&acpi_scan_lock);
+	return result;
 }