core_ctl: Synchronize access to cluster cpu list

Cluster cpu list traversal is not properly protected against removal of
element by a separate thread. Add proper locking to ensure an element
cannot be removed while accessing the list.

In addition ensure we don't end up in a livelock never exiting the loop
due to hotplug continuously moving elements to the end of the list.

Change-Id: Ie98fe48c2f4fdd0244573229b77ee9823df9e214
Signed-off-by: Olav Haugan <ohaugan@codeaurora.org>
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index 0db85a4..ecf6c56 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -719,8 +719,18 @@
 static void try_to_isolate(struct cluster_data *cluster, unsigned int need)
 {
 	struct cpu_data *c, *tmp;
+	unsigned long flags;
+	unsigned int num_cpus = cluster->num_cpus;
 
+	/*
+	 * Protect against entry being removed (and added at tail) by other
+	 * thread (hotplug).
+	 */
+	spin_lock_irqsave(&state_lock, flags);
 	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
+		if (!num_cpus--)
+			break;
+
 		if (!is_active(c))
 			continue;
 		if (cluster->active_cpus == need)
@@ -729,6 +739,8 @@
 		if (c->is_busy)
 			continue;
 
+		spin_unlock_irqrestore(&state_lock, flags);
+
 		pr_debug("Trying to isolate CPU%u\n", c->cpu);
 		if (!sched_isolate_cpu(c->cpu)) {
 			c->isolated_by_us = true;
@@ -738,7 +750,9 @@
 			pr_debug("Unable to isolate CPU%u\n", c->cpu);
 		}
 		cluster->active_cpus = get_active_cpu_count(cluster);
+		spin_lock_irqsave(&state_lock, flags);
 	}
+	spin_unlock_irqrestore(&state_lock, flags);
 
 	/*
 	 * If the number of active CPUs is within the limits, then
@@ -747,12 +761,19 @@
 	if (cluster->active_cpus <= cluster->max_cpus)
 		return;
 
+	num_cpus = cluster->num_cpus;
+	spin_lock_irqsave(&state_lock, flags);
 	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
+		if (!num_cpus--)
+			break;
+
 		if (!is_active(c))
 			continue;
 		if (cluster->active_cpus <= cluster->max_cpus)
 			break;
 
+		spin_unlock_irqrestore(&state_lock, flags);
+
 		pr_debug("Trying to isolate CPU%u\n", c->cpu);
 		if (!sched_isolate_cpu(c->cpu)) {
 			c->isolated_by_us = true;
@@ -762,15 +783,28 @@
 			pr_debug("Unable to isolate CPU%u\n", c->cpu);
 		}
 		cluster->active_cpus = get_active_cpu_count(cluster);
+		spin_lock_irqsave(&state_lock, flags);
 	}
+	spin_unlock_irqrestore(&state_lock, flags);
+
 }
 
 static void __try_to_unisolate(struct cluster_data *cluster,
 			       unsigned int need, bool force)
 {
 	struct cpu_data *c, *tmp;
+	unsigned long flags;
+	unsigned int num_cpus = cluster->num_cpus;
 
+	/*
+	 * Protect against entry being removed (and added at tail) by other
+	 * thread (hotplug).
+	 */
+	spin_lock_irqsave(&state_lock, flags);
 	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
+		if (!num_cpus--)
+			break;
+
 		if (!c->isolated_by_us)
 			continue;
 		if ((c->online && !cpu_isolated(c->cpu)) ||
@@ -779,6 +813,8 @@
 		if (cluster->active_cpus == need)
 			break;
 
+		spin_unlock_irqrestore(&state_lock, flags);
+
 		pr_debug("Trying to unisolate CPU%u\n", c->cpu);
 		if (!sched_unisolate_cpu(c->cpu)) {
 			c->isolated_by_us = false;
@@ -787,7 +823,9 @@
 			pr_debug("Unable to unisolate CPU%u\n", c->cpu);
 		}
 		cluster->active_cpus = get_active_cpu_count(cluster);
+		spin_lock_irqsave(&state_lock, flags);
 	}
+	spin_unlock_irqrestore(&state_lock, flags);
 }
 
 static void try_to_unisolate(struct cluster_data *cluster, unsigned int need)