cpuidle: Single/Global registration of idle states

This patch makes the cpuidle_states structure global (single copy)
instead of per-cpu. The statistics needed on per-cpu basis
by the governor are kept per-cpu. This simplifies the cpuidle
subsystem as state registration is done by single cpu only.
Having single copy of cpuidle_states saves memory. Rare case
of asymmetric C-states can be handled within the cpuidle driver
and architectures such as POWER do not have asymmetric C-states.

Having single/global registration of all the idle states,
dynamic C-state transitions on x86 are handled by
the boot cpu. Here, the boot cpu  would disable all the devices,
re-populate the states and later enable all the devices,
irrespective of the cpu that would receive the notification first.

Reference:
https://lkml.org/lkml/2011/4/25/83

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Tested-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index b98c752..24fe3af 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -741,11 +741,13 @@
 /**
  * acpi_idle_enter_c1 - enters an ACPI C1 state-type
  * @dev: the target CPU
+ * @drv: cpuidle driver containing cpuidle state info
  * @index: index of target state
  *
  * This is equivalent to the HALT instruction.
  */
-static int acpi_idle_enter_c1(struct cpuidle_device *dev, int index)
+static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
 {
 	ktime_t  kt1, kt2;
 	s64 idle_time;
@@ -787,9 +789,11 @@
 /**
  * acpi_idle_enter_simple - enters an ACPI state without BM handling
  * @dev: the target CPU
+ * @drv: cpuidle driver with cpuidle state information
  * @index: the index of suggested state
  */
-static int acpi_idle_enter_simple(struct cpuidle_device *dev, int index)
+static int acpi_idle_enter_simple(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
 {
 	struct acpi_processor *pr;
 	struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
@@ -869,11 +873,13 @@
 /**
  * acpi_idle_enter_bm - enters C3 with proper BM handling
  * @dev: the target CPU
+ * @drv: cpuidle driver containing state data
  * @index: the index of suggested state
  *
  * If BM is detected, the deepest non-C3 idle state is entered instead.
  */
-static int acpi_idle_enter_bm(struct cpuidle_device *dev, int index)
+static int acpi_idle_enter_bm(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
 {
 	struct acpi_processor *pr;
 	struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
@@ -896,9 +902,9 @@
 	}
 
 	if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
-		if (dev->safe_state_index >= 0) {
-			return dev->states[dev->safe_state_index].enter(dev,
-						dev->safe_state_index);
+		if (drv->safe_state_index >= 0) {
+			return drv->states[drv->safe_state_index].enter(dev,
+						drv, drv->safe_state_index);
 		} else {
 			local_irq_disable();
 			acpi_safe_halt();
@@ -993,14 +999,15 @@
 };
 
 /**
- * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE
+ * acpi_processor_setup_cpuidle_cx - prepares and configures CPUIDLE
+ * device i.e. per-cpu data
+ *
  * @pr: the ACPI processor
  */
-static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
+static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
 {
 	int i, count = CPUIDLE_DRIVER_STATE_START;
 	struct acpi_processor_cx *cx;
-	struct cpuidle_state *state;
 	struct cpuidle_state_usage *state_usage;
 	struct cpuidle_device *dev = &pr->power.dev;
 
@@ -1012,18 +1019,12 @@
 	}
 
 	dev->cpu = pr->id;
-	dev->safe_state_index = -1;
-	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
-		dev->states[i].name[0] = '\0';
-		dev->states[i].desc[0] = '\0';
-	}
 
 	if (max_cstate == 0)
 		max_cstate = 1;
 
 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
 		cx = &pr->power.states[i];
-		state = &dev->states[count];
 		state_usage = &dev->states_usage[count];
 
 		if (!cx->valid)
@@ -1035,8 +1036,64 @@
 		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
 			continue;
 #endif
+
 		cpuidle_set_statedata(state_usage, cx);
 
+		count++;
+		if (count == CPUIDLE_STATE_MAX)
+			break;
+	}
+
+	dev->state_count = count;
+
+	if (!count)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * acpi_processor_setup_cpuidle states- prepares and configures cpuidle
+ * global state data i.e. idle routines
+ *
+ * @pr: the ACPI processor
+ */
+static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
+{
+	int i, count = CPUIDLE_DRIVER_STATE_START;
+	struct acpi_processor_cx *cx;
+	struct cpuidle_state *state;
+	struct cpuidle_driver *drv = &acpi_idle_driver;
+
+	if (!pr->flags.power_setup_done)
+		return -EINVAL;
+
+	if (pr->flags.power == 0)
+		return -EINVAL;
+
+	drv->safe_state_index = -1;
+	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
+		drv->states[i].name[0] = '\0';
+		drv->states[i].desc[0] = '\0';
+	}
+
+	if (max_cstate == 0)
+		max_cstate = 1;
+
+	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
+		cx = &pr->power.states[i];
+
+		if (!cx->valid)
+			continue;
+
+#ifdef CONFIG_HOTPLUG_CPU
+		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
+		    !pr->flags.has_cst &&
+		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
+			continue;
+#endif
+
+		state = &drv->states[count];
 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
 		strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
 		state->exit_latency = cx->latency;
@@ -1049,13 +1106,13 @@
 				state->flags |= CPUIDLE_FLAG_TIME_VALID;
 
 			state->enter = acpi_idle_enter_c1;
-			dev->safe_state_index = count;
+			drv->safe_state_index = count;
 			break;
 
 			case ACPI_STATE_C2:
 			state->flags |= CPUIDLE_FLAG_TIME_VALID;
 			state->enter = acpi_idle_enter_simple;
-			dev->safe_state_index = count;
+			drv->safe_state_index = count;
 			break;
 
 			case ACPI_STATE_C3:
@@ -1071,7 +1128,7 @@
 			break;
 	}
 
-	dev->state_count = count;
+	drv->state_count = count;
 
 	if (!count)
 		return -EINVAL;
@@ -1079,7 +1136,7 @@
 	return 0;
 }
 
-int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+int acpi_processor_hotplug(struct acpi_processor *pr)
 {
 	int ret = 0;
 
@@ -1100,7 +1157,7 @@
 	cpuidle_disable_device(&pr->power.dev);
 	acpi_processor_get_power_info(pr);
 	if (pr->flags.power) {
-		acpi_processor_setup_cpuidle(pr);
+		acpi_processor_setup_cpuidle_cx(pr);
 		ret = cpuidle_enable_device(&pr->power.dev);
 	}
 	cpuidle_resume_and_unlock();
@@ -1108,10 +1165,72 @@
 	return ret;
 }
 
+int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+{
+	int cpu;
+	struct acpi_processor *_pr;
+
+	if (disabled_by_idle_boot_param())
+		return 0;
+
+	if (!pr)
+		return -EINVAL;
+
+	if (nocst)
+		return -ENODEV;
+
+	if (!pr->flags.power_setup_done)
+		return -ENODEV;
+
+	/*
+	 * FIXME:  Design the ACPI notification to make it once per
+	 * system instead of once per-cpu.  This condition is a hack
+	 * to make the code that updates C-States be called once.
+	 */
+
+	if (smp_processor_id() == 0 &&
+			cpuidle_get_driver() == &acpi_idle_driver) {
+
+		cpuidle_pause_and_lock();
+		/* Protect against cpu-hotplug */
+		get_online_cpus();
+
+		/* Disable all cpuidle devices */
+		for_each_online_cpu(cpu) {
+			_pr = per_cpu(processors, cpu);
+			if (!_pr || !_pr->flags.power_setup_done)
+				continue;
+			cpuidle_disable_device(&_pr->power.dev);
+		}
+
+		/* Populate Updated C-state information */
+		acpi_processor_setup_cpuidle_states(pr);
+
+		/* Enable all cpuidle devices */
+		for_each_online_cpu(cpu) {
+			_pr = per_cpu(processors, cpu);
+			if (!_pr || !_pr->flags.power_setup_done)
+				continue;
+			acpi_processor_get_power_info(_pr);
+			if (_pr->flags.power) {
+				acpi_processor_setup_cpuidle_cx(_pr);
+				cpuidle_enable_device(&_pr->power.dev);
+			}
+		}
+		put_online_cpus();
+		cpuidle_resume_and_unlock();
+	}
+
+	return 0;
+}
+
+static int acpi_processor_registered;
+
 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 			      struct acpi_device *device)
 {
 	acpi_status status = 0;
+	int retval;
 	static int first_run;
 
 	if (disabled_by_idle_boot_param())
@@ -1148,9 +1267,26 @@
 	 * platforms that only support C1.
 	 */
 	if (pr->flags.power) {
-		acpi_processor_setup_cpuidle(pr);
-		if (cpuidle_register_device(&pr->power.dev))
-			return -EIO;
+		/* Register acpi_idle_driver if not already registered */
+		if (!acpi_processor_registered) {
+			acpi_processor_setup_cpuidle_states(pr);
+			retval = cpuidle_register_driver(&acpi_idle_driver);
+			if (retval)
+				return retval;
+			printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n",
+					acpi_idle_driver.name);
+		}
+		/* Register per-cpu cpuidle_device. Cpuidle driver
+		 * must already be registered before registering device
+		 */
+		acpi_processor_setup_cpuidle_cx(pr);
+		retval = cpuidle_register_device(&pr->power.dev);
+		if (retval) {
+			if (acpi_processor_registered == 0)
+				cpuidle_unregister_driver(&acpi_idle_driver);
+			return retval;
+		}
+		acpi_processor_registered++;
 	}
 	return 0;
 }
@@ -1161,8 +1297,13 @@
 	if (disabled_by_idle_boot_param())
 		return 0;
 
-	cpuidle_unregister_device(&pr->power.dev);
-	pr->flags.power_setup_done = 0;
+	if (pr->flags.power) {
+		cpuidle_unregister_device(&pr->power.dev);
+		acpi_processor_registered--;
+		if (acpi_processor_registered == 0)
+			cpuidle_unregister_driver(&acpi_idle_driver);
+	}
 
+	pr->flags.power_setup_done = 0;
 	return 0;
 }