msm: SSR: Fix problems with concurrent SSRs
If one SSR call comes in and queues a work and the work item
starts running and then another SSR call comes in we will end up
with a running work item and a pending work item. The pending
work item will not run until the running work item completes.
With the current code the work item will run to completion and
then the pending work item will run and restart the subsystem
again.
This is wrong since we want to 'short-circuit' the code in this
case and do nothing if multiple SSR calls come in while the
subsystem is in the 'crashed' state. Add state tracking logic so
that we know what part of the restart process a particular
subsystem is in and use it to fix this problem.
Change-Id: I3592a0f9ed777251081a30cc0ee0af9e49fff733
CRs-Fixed: 397848
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
diff --git a/arch/arm/mach-msm/subsystem_restart.c b/arch/arm/mach-msm/subsystem_restart.c
index 0318a70..bae1ab0 100644
--- a/arch/arm/mach-msm/subsystem_restart.c
+++ b/arch/arm/mach-msm/subsystem_restart.c
@@ -58,11 +58,13 @@
enum subsys_state {
SUBSYS_OFFLINE,
SUBSYS_ONLINE,
+ SUBSYS_CRASHED,
};
static const char * const subsys_states[] = {
[SUBSYS_OFFLINE] = "OFFLINE",
[SUBSYS_ONLINE] = "ONLINE",
+ [SUBSYS_CRASHED] = "CRASHED",
};
struct subsys_device {
@@ -71,7 +73,7 @@
char wlname[64];
struct work_struct work;
spinlock_t restart_lock;
- int restart_count;
+ bool restarting;
void *notify;
struct device dev;
@@ -107,6 +109,21 @@
return snprintf(buf, PAGE_SIZE, "%s\n", subsys_states[state]);
}
+static void subsys_set_state(struct subsys_device *subsys,
+ enum subsys_state state)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&subsys->restart_lock, flags);
+ if (subsys->state != state) {
+ subsys->state = state;
+ spin_unlock_irqrestore(&subsys->restart_lock, flags);
+ sysfs_notify(&subsys->dev.kobj, NULL, "state");
+ return;
+ }
+ spin_unlock_irqrestore(&subsys->restart_lock, flags);
+}
+
static struct device_attribute subsys_attrs[] = {
__ATTR_RO(name),
__ATTR_RO(state),
@@ -333,6 +350,7 @@
if (dev->desc->shutdown(dev->desc) < 0)
panic("subsys-restart: [%p]: Failed to shutdown %s!",
current, name);
+ subsys_set_state(dev, SUBSYS_OFFLINE);
}
static void subsystem_ramdump(struct subsys_device *dev, void *data)
@@ -351,6 +369,7 @@
pr_info("[%p]: Powering up %s\n", current, name);
if (dev->desc->powerup(dev->desc) < 0)
panic("[%p]: Failed to powerup %s!", current, name);
+ subsys_set_state(dev, SUBSYS_ONLINE);
}
static int __find_subsys(struct device *dev, void *data)
@@ -418,7 +437,10 @@
* Now that we've acquired the shutdown lock, either we're the first to
* restart these subsystems or some other thread is doing the powerup
* sequence for these subsystems. In the latter case, panic and bail
- * out, since a subsystem died in its powerup sequence.
+ * out, since a subsystem died in its powerup sequence. This catches
+ * the case where a subsystem in a restart order isn't the one
+ * who initiated the original restart but has crashed while the restart
+ * order is being rebooted.
*/
if (!mutex_trylock(powerup_lock))
panic("%s[%p]: Subsystem died during powerup!",
@@ -465,32 +487,36 @@
out:
spin_lock_irqsave(&dev->restart_lock, flags);
- dev->restart_count--;
- if (!dev->restart_count)
- wake_unlock(&dev->wake_lock);
+ dev->restarting = false;
+ wake_unlock(&dev->wake_lock);
spin_unlock_irqrestore(&dev->restart_lock, flags);
}
static void __subsystem_restart_dev(struct subsys_device *dev)
{
struct subsys_desc *desc = dev->desc;
+ const char *name = dev->desc->name;
unsigned long flags;
pr_debug("Restarting %s [level=%d]!\n", desc->name, restart_level);
+ /*
+ * We want to allow drivers to call subsystem_restart{_dev}() as many
+ * times as they want up until the point where the subsystem is
+ * shutdown.
+ */
spin_lock_irqsave(&dev->restart_lock, flags);
- if (!dev->restart_count)
- wake_lock(&dev->wake_lock);
- dev->restart_count++;
- spin_unlock_irqrestore(&dev->restart_lock, flags);
-
- if (!queue_work(ssr_wq, &dev->work)) {
- spin_lock_irqsave(&dev->restart_lock, flags);
- dev->restart_count--;
- if (!dev->restart_count)
- wake_unlock(&dev->wake_lock);
- spin_unlock_irqrestore(&dev->restart_lock, flags);
+ if (dev->state != SUBSYS_CRASHED) {
+ if (dev->state == SUBSYS_ONLINE && !dev->restarting) {
+ dev->restarting = true;
+ dev->state = SUBSYS_CRASHED;
+ wake_lock(&dev->wake_lock);
+ queue_work(ssr_wq, &dev->work);
+ } else {
+ panic("Subsystem %s crashed during SSR!", name);
+ }
}
+ spin_unlock_irqrestore(&dev->restart_lock, flags);
}
int subsystem_restart_dev(struct subsys_device *dev)
@@ -644,6 +670,7 @@
subsys->dev.parent = desc->dev;
subsys->dev.bus = &subsys_bus_type;
subsys->dev.release = subsys_device_release;
+ subsys->state = SUBSYS_ONLINE; /* Until proper refcounting appears */
subsys->notify = subsys_notif_add_subsys(desc->name);
subsys->restart_order = update_restart_order(subsys);