cpuidle: Single/Global registration of idle states

This patch makes the cpuidle_states structure global (single copy)
instead of per-cpu. The statistics needed on per-cpu basis
by the governor are kept per-cpu. This simplifies the cpuidle
subsystem as state registration is done by single cpu only.
Having single copy of cpuidle_states saves memory. Rare case
of asymmetric C-states can be handled within the cpuidle driver
and architectures such as POWER do not have asymmetric C-states.

Having single/global registration of all the idle states,
dynamic C-state transitions on x86 are handled by
the boot cpu. Here, the boot cpu  would disable all the devices,
re-populate the states and later enable all the devices,
irrespective of the cpu that would receive the notification first.

Reference:
https://lkml.org/lkml/2011/4/25/83

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Tested-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/arch/arm/mach-at91/cpuidle.c b/arch/arm/mach-at91/cpuidle.c
index 4696a0d..93178f6 100644
--- a/arch/arm/mach-at91/cpuidle.c
+++ b/arch/arm/mach-at91/cpuidle.c
@@ -33,6 +33,7 @@
 
 /* Actual code that puts the SoC in different idle states */
 static int at91_enter_idle(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
 			       int index)
 {
 	struct timeval before, after;
@@ -64,27 +65,29 @@
 static int at91_init_cpuidle(void)
 {
 	struct cpuidle_device *device;
-
-	cpuidle_register_driver(&at91_idle_driver);
+	struct cpuidle_driver *driver = &at91_idle_driver;
 
 	device = &per_cpu(at91_cpuidle_device, smp_processor_id());
 	device->state_count = AT91_MAX_STATES;
+	driver->state_count = AT91_MAX_STATES;
 
 	/* Wait for interrupt state */
-	device->states[0].enter = at91_enter_idle;
-	device->states[0].exit_latency = 1;
-	device->states[0].target_residency = 10000;
-	device->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
-	strcpy(device->states[0].name, "WFI");
-	strcpy(device->states[0].desc, "Wait for interrupt");
+	driver->states[0].enter = at91_enter_idle;
+	driver->states[0].exit_latency = 1;
+	driver->states[0].target_residency = 10000;
+	driver->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(driver->states[0].name, "WFI");
+	strcpy(driver->states[0].desc, "Wait for interrupt");
 
 	/* Wait for interrupt and RAM self refresh state */
-	device->states[1].enter = at91_enter_idle;
-	device->states[1].exit_latency = 10;
-	device->states[1].target_residency = 10000;
-	device->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
-	strcpy(device->states[1].name, "RAM_SR");
-	strcpy(device->states[1].desc, "WFI and RAM Self Refresh");
+	driver->states[1].enter = at91_enter_idle;
+	driver->states[1].exit_latency = 10;
+	driver->states[1].target_residency = 10000;
+	driver->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(driver->states[1].name, "RAM_SR");
+	strcpy(driver->states[1].desc, "WFI and RAM Self Refresh");
+
+	cpuidle_register_driver(&at91_idle_driver);
 
 	if (cpuidle_register_device(device)) {
 		printk(KERN_ERR "at91_init_cpuidle: Failed registering\n");
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index f2d2f34..dbeeccd 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -78,6 +78,7 @@
 
 /* Actual code that puts the SoC in different idle states */
 static int davinci_enter_idle(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
 						int index)
 {
 	struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
@@ -109,6 +110,7 @@
 {
 	int ret;
 	struct cpuidle_device *device;
+	struct cpuidle_driver *driver = &davinci_idle_driver;
 	struct davinci_cpuidle_config *pdata = pdev->dev.platform_data;
 
 	device = &per_cpu(davinci_cpuidle_device, smp_processor_id());
@@ -120,32 +122,33 @@
 
 	ddr2_reg_base = pdata->ddr2_ctlr_base;
 
-	ret = cpuidle_register_driver(&davinci_idle_driver);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to register driver\n");
-		return ret;
-	}
-
 	/* Wait for interrupt state */
-	device->states[0].enter = davinci_enter_idle;
-	device->states[0].exit_latency = 1;
-	device->states[0].target_residency = 10000;
-	device->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
-	strcpy(device->states[0].name, "WFI");
-	strcpy(device->states[0].desc, "Wait for interrupt");
+	driver->states[0].enter = davinci_enter_idle;
+	driver->states[0].exit_latency = 1;
+	driver->states[0].target_residency = 10000;
+	driver->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(driver->states[0].name, "WFI");
+	strcpy(driver->states[0].desc, "Wait for interrupt");
 
 	/* Wait for interrupt and DDR self refresh state */
-	device->states[1].enter = davinci_enter_idle;
-	device->states[1].exit_latency = 10;
-	device->states[1].target_residency = 10000;
-	device->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
-	strcpy(device->states[1].name, "DDR SR");
-	strcpy(device->states[1].desc, "WFI and DDR Self Refresh");
+	driver->states[1].enter = davinci_enter_idle;
+	driver->states[1].exit_latency = 10;
+	driver->states[1].target_residency = 10000;
+	driver->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(driver->states[1].name, "DDR SR");
+	strcpy(driver->states[1].desc, "WFI and DDR Self Refresh");
 	if (pdata->ddr2_pdown)
 		davinci_states[1].flags |= DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN;
 	cpuidle_set_statedata(&device->states_usage[1], &davinci_states[1]);
 
 	device->state_count = DAVINCI_CPUIDLE_MAX_STATES;
+	driver->state_count = DAVINCI_CPUIDLE_MAX_STATES;
+
+	ret = cpuidle_register_driver(&davinci_idle_driver);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register driver\n");
+		return ret;
+	}
 
 	ret = cpuidle_register_device(device);
 	if (ret) {
diff --git a/arch/arm/mach-exynos4/cpuidle.c b/arch/arm/mach-exynos4/cpuidle.c
index ea026e7..35f6502 100644
--- a/arch/arm/mach-exynos4/cpuidle.c
+++ b/arch/arm/mach-exynos4/cpuidle.c
@@ -16,6 +16,7 @@
 #include <asm/proc-fns.h>
 
 static int exynos4_enter_idle(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
 			      int index);
 
 static struct cpuidle_state exynos4_cpuidle_set[] = {
@@ -37,6 +38,7 @@
 };
 
 static int exynos4_enter_idle(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
 			      int index)
 {
 	struct timeval before, after;
@@ -60,22 +62,23 @@
 {
 	int i, max_cpuidle_state, cpu_id;
 	struct cpuidle_device *device;
+	struct cpuidle_driver *drv = &exynos4_idle_driver;
 
+	/* Setup cpuidle driver */
+	drv->state_count = (sizeof(exynos4_cpuidle_set) /
+				       sizeof(struct cpuidle_state));
+	max_cpuidle_state = drv->state_count;
+	for (i = 0; i < max_cpuidle_state; i++) {
+		memcpy(&drv->states[i], &exynos4_cpuidle_set[i],
+				sizeof(struct cpuidle_state));
+	}
 	cpuidle_register_driver(&exynos4_idle_driver);
 
 	for_each_cpu(cpu_id, cpu_online_mask) {
 		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
 		device->cpu = cpu_id;
 
-		device->state_count = (sizeof(exynos4_cpuidle_set) /
-					       sizeof(struct cpuidle_state));
-
-		max_cpuidle_state = device->state_count;
-
-		for (i = 0; i < max_cpuidle_state; i++) {
-			memcpy(&device->states[i], &exynos4_cpuidle_set[i],
-					sizeof(struct cpuidle_state));
-		}
+		device->state_count = drv->state_count;
 
 		if (cpuidle_register_device(device)) {
 			printk(KERN_ERR "CPUidle register device failed\n,");
diff --git a/arch/arm/mach-kirkwood/cpuidle.c b/arch/arm/mach-kirkwood/cpuidle.c
index 358dd80..ffd690d 100644
--- a/arch/arm/mach-kirkwood/cpuidle.c
+++ b/arch/arm/mach-kirkwood/cpuidle.c
@@ -32,6 +32,7 @@
 
 /* Actual code that puts the SoC in different idle states */
 static int kirkwood_enter_idle(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
 			       int index)
 {
 	struct timeval before, after;
@@ -68,28 +69,29 @@
 static int kirkwood_init_cpuidle(void)
 {
 	struct cpuidle_device *device;
-
-	cpuidle_register_driver(&kirkwood_idle_driver);
+	struct cpuidle_driver *driver = &kirkwood_idle_driver;
 
 	device = &per_cpu(kirkwood_cpuidle_device, smp_processor_id());
 	device->state_count = KIRKWOOD_MAX_STATES;
+	driver->state_count = KIRKWOOD_MAX_STATES;
 
 	/* Wait for interrupt state */
-	device->states[0].enter = kirkwood_enter_idle;
-	device->states[0].exit_latency = 1;
-	device->states[0].target_residency = 10000;
-	device->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
-	strcpy(device->states[0].name, "WFI");
-	strcpy(device->states[0].desc, "Wait for interrupt");
+	driver->states[0].enter = kirkwood_enter_idle;
+	driver->states[0].exit_latency = 1;
+	driver->states[0].target_residency = 10000;
+	driver->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(driver->states[0].name, "WFI");
+	strcpy(driver->states[0].desc, "Wait for interrupt");
 
 	/* Wait for interrupt and DDR self refresh state */
-	device->states[1].enter = kirkwood_enter_idle;
-	device->states[1].exit_latency = 10;
-	device->states[1].target_residency = 10000;
-	device->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
-	strcpy(device->states[1].name, "DDR SR");
-	strcpy(device->states[1].desc, "WFI and DDR Self Refresh");
+	driver->states[1].enter = kirkwood_enter_idle;
+	driver->states[1].exit_latency = 10;
+	driver->states[1].target_residency = 10000;
+	driver->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(driver->states[1].name, "DDR SR");
+	strcpy(driver->states[1].desc, "WFI and DDR Self Refresh");
 
+	cpuidle_register_driver(&kirkwood_idle_driver);
 	if (cpuidle_register_device(device)) {
 		printk(KERN_ERR "kirkwood_init_cpuidle: Failed registering\n");
 		return -EIO;
diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index d3fce7b..1fe35c2 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -88,12 +88,14 @@
 /**
  * omap3_enter_idle - Programs OMAP3 to enter the specified state
  * @dev: cpuidle device
+ * @drv: cpuidle driver
  * @index: the index of state to be entered
  *
  * Called from the CPUidle framework to program the device to the
  * specified target state selected by the governor.
  */
 static int omap3_enter_idle(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
 				int index)
 {
 	struct omap3_idle_statedata *cx =
@@ -148,6 +150,7 @@
 /**
  * next_valid_state - Find next valid C-state
  * @dev: cpuidle device
+ * @drv: cpuidle driver
  * @index: Index of currently selected c-state
  *
  * If the state corresponding to index is valid, index is returned back
@@ -158,10 +161,11 @@
  * if it satisfies the enable_off_mode condition.
  */
 static int next_valid_state(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
 				int index)
 {
 	struct cpuidle_state_usage *curr_usage = &dev->states_usage[index];
-	struct cpuidle_state *curr = &dev->states[index];
+	struct cpuidle_state *curr = &drv->states[index];
 	struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr_usage);
 	u32 mpu_deepest_state = PWRDM_POWER_RET;
 	u32 core_deepest_state = PWRDM_POWER_RET;
@@ -188,7 +192,7 @@
 
 		/* Reach the current state starting at highest C-state */
 		for (; idx >= 0; idx--) {
-			if (&dev->states[idx] == curr) {
+			if (&drv->states[idx] == curr) {
 				next_index = idx;
 				break;
 			}
@@ -224,12 +228,14 @@
 /**
  * omap3_enter_idle_bm - Checks for any bus activity
  * @dev: cpuidle device
+ * @drv: cpuidle driver
  * @index: array index of target state to be programmed
  *
  * This function checks for any pending activity and then programs
  * the device to the specified or a safer state.
  */
 static int omap3_enter_idle_bm(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
 			       int index)
 {
 	int new_state_idx;
@@ -238,7 +244,7 @@
 	int ret;
 
 	if (!omap3_can_sleep()) {
-		new_state_idx = dev->safe_state_index;
+		new_state_idx = drv->safe_state_index;
 		goto select_state;
 	}
 
@@ -248,7 +254,7 @@
 	 */
 	cam_state = pwrdm_read_pwrst(cam_pd);
 	if (cam_state == PWRDM_POWER_ON) {
-		new_state_idx = dev->safe_state_index;
+		new_state_idx = drv->safe_state_index;
 		goto select_state;
 	}
 
@@ -275,10 +281,10 @@
 	if (per_next_state != per_saved_state)
 		pwrdm_set_next_pwrst(per_pd, per_next_state);
 
-	new_state_idx = next_valid_state(dev, index);
+	new_state_idx = next_valid_state(dev, drv, index);
 
 select_state:
-	ret = omap3_enter_idle(dev, new_state_idx);
+	ret = omap3_enter_idle(dev, drv, new_state_idx);
 
 	/* Restore original PER state if it was modified */
 	if (per_next_state != per_saved_state)
@@ -311,22 +317,30 @@
 	.owner = 	THIS_MODULE,
 };
 
-/* Helper to fill the C-state common data and register the driver_data */
-static inline struct omap3_idle_statedata *_fill_cstate(
-					struct cpuidle_device *dev,
+/* Helper to fill the C-state common data*/
+static inline void _fill_cstate(struct cpuidle_driver *drv,
 					int idx, const char *descr)
 {
-	struct omap3_idle_statedata *cx = &omap3_idle_data[idx];
-	struct cpuidle_state *state = &dev->states[idx];
-	struct cpuidle_state_usage *state_usage = &dev->states_usage[idx];
+	struct cpuidle_state *state = &drv->states[idx];
 
 	state->exit_latency	= cpuidle_params_table[idx].exit_latency;
 	state->target_residency	= cpuidle_params_table[idx].target_residency;
 	state->flags		= CPUIDLE_FLAG_TIME_VALID;
 	state->enter		= omap3_enter_idle_bm;
-	cx->valid		= cpuidle_params_table[idx].valid;
 	sprintf(state->name, "C%d", idx + 1);
 	strncpy(state->desc, descr, CPUIDLE_DESC_LEN);
+
+}
+
+/* Helper to register the driver_data */
+static inline struct omap3_idle_statedata *_fill_cstate_usage(
+					struct cpuidle_device *dev,
+					int idx)
+{
+	struct omap3_idle_statedata *cx = &omap3_idle_data[idx];
+	struct cpuidle_state_usage *state_usage = &dev->states_usage[idx];
+
+	cx->valid		= cpuidle_params_table[idx].valid;
 	cpuidle_set_statedata(state_usage, cx);
 
 	return cx;
@@ -341,6 +355,7 @@
 int __init omap3_idle_init(void)
 {
 	struct cpuidle_device *dev;
+	struct cpuidle_driver *drv = &omap3_idle_driver;
 	struct omap3_idle_statedata *cx;
 
 	mpu_pd = pwrdm_lookup("mpu_pwrdm");
@@ -348,45 +363,52 @@
 	per_pd = pwrdm_lookup("per_pwrdm");
 	cam_pd = pwrdm_lookup("cam_pwrdm");
 
-	cpuidle_register_driver(&omap3_idle_driver);
+
+	drv->safe_state_index = -1;
 	dev = &per_cpu(omap3_idle_dev, smp_processor_id());
-	dev->safe_state_index = -1;
 
 	/* C1 . MPU WFI + Core active */
-	cx = _fill_cstate(dev, 0, "MPU ON + CORE ON");
-	(&dev->states[0])->enter = omap3_enter_idle;
-	dev->safe_state_index = 0;
+	_fill_cstate(drv, 0, "MPU ON + CORE ON");
+	(&drv->states[0])->enter = omap3_enter_idle;
+	drv->safe_state_index = 0;
+	cx = _fill_cstate_usage(dev, 0);
 	cx->valid = 1;	/* C1 is always valid */
 	cx->mpu_state = PWRDM_POWER_ON;
 	cx->core_state = PWRDM_POWER_ON;
 
 	/* C2 . MPU WFI + Core inactive */
-	cx = _fill_cstate(dev, 1, "MPU ON + CORE ON");
+	_fill_cstate(drv, 1, "MPU ON + CORE ON");
+	cx = _fill_cstate_usage(dev, 1);
 	cx->mpu_state = PWRDM_POWER_ON;
 	cx->core_state = PWRDM_POWER_ON;
 
 	/* C3 . MPU CSWR + Core inactive */
-	cx = _fill_cstate(dev, 2, "MPU RET + CORE ON");
+	_fill_cstate(drv, 2, "MPU RET + CORE ON");
+	cx = _fill_cstate_usage(dev, 2);
 	cx->mpu_state = PWRDM_POWER_RET;
 	cx->core_state = PWRDM_POWER_ON;
 
 	/* C4 . MPU OFF + Core inactive */
-	cx = _fill_cstate(dev, 3, "MPU OFF + CORE ON");
+	_fill_cstate(drv, 3, "MPU OFF + CORE ON");
+	cx = _fill_cstate_usage(dev, 3);
 	cx->mpu_state = PWRDM_POWER_OFF;
 	cx->core_state = PWRDM_POWER_ON;
 
 	/* C5 . MPU RET + Core RET */
-	cx = _fill_cstate(dev, 4, "MPU RET + CORE RET");
+	_fill_cstate(drv, 4, "MPU RET + CORE RET");
+	cx = _fill_cstate_usage(dev, 4);
 	cx->mpu_state = PWRDM_POWER_RET;
 	cx->core_state = PWRDM_POWER_RET;
 
 	/* C6 . MPU OFF + Core RET */
-	cx = _fill_cstate(dev, 5, "MPU OFF + CORE RET");
+	_fill_cstate(drv, 5, "MPU OFF + CORE RET");
+	cx = _fill_cstate_usage(dev, 5);
 	cx->mpu_state = PWRDM_POWER_OFF;
 	cx->core_state = PWRDM_POWER_RET;
 
 	/* C7 . MPU OFF + Core OFF */
-	cx = _fill_cstate(dev, 6, "MPU OFF + CORE OFF");
+	_fill_cstate(drv, 6, "MPU OFF + CORE OFF");
+	cx = _fill_cstate_usage(dev, 6);
 	/*
 	 * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
 	 * enable OFF mode in a stable form for previous revisions.
@@ -400,6 +422,9 @@
 	cx->mpu_state = PWRDM_POWER_OFF;
 	cx->core_state = PWRDM_POWER_OFF;
 
+	drv->state_count = OMAP3_NUM_STATES;
+	cpuidle_register_driver(&omap3_idle_driver);
+
 	dev->state_count = OMAP3_NUM_STATES;
 	if (cpuidle_register_device(dev)) {
 		printk(KERN_ERR "%s: CPUidle register device failed\n",
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index 7be50d4c..ad1012ad 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -25,6 +25,7 @@
 };
 
 static int cpuidle_sleep_enter(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
 				int index)
 {
 	unsigned long allowed_mode = arch_hwblk_sleep_mode();
@@ -64,19 +65,19 @@
 void sh_mobile_setup_cpuidle(void)
 {
 	struct cpuidle_device *dev = &cpuidle_dev;
+	struct cpuidle_driver *drv = &cpuidle_driver;
 	struct cpuidle_state *state;
 	int i;
 
-	cpuidle_register_driver(&cpuidle_driver);
 
 	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
-		dev->states[i].name[0] = '\0';
-		dev->states[i].desc[0] = '\0';
+		drv->states[i].name[0] = '\0';
+		drv->states[i].desc[0] = '\0';
 	}
 
 	i = CPUIDLE_DRIVER_STATE_START;
 
-	state = &dev->states[i++];
+	state = &drv->states[i++];
 	snprintf(state->name, CPUIDLE_NAME_LEN, "C1");
 	strncpy(state->desc, "SuperH Sleep Mode", CPUIDLE_DESC_LEN);
 	state->exit_latency = 1;
@@ -86,10 +87,10 @@
 	state->flags |= CPUIDLE_FLAG_TIME_VALID;
 	state->enter = cpuidle_sleep_enter;
 
-	dev->safe_state_index = i-1;
+	drv->safe_state_index = i-1;
 
 	if (sh_mobile_sleep_supported & SUSP_SH_SF) {
-		state = &dev->states[i++];
+		state = &drv->states[i++];
 		snprintf(state->name, CPUIDLE_NAME_LEN, "C2");
 		strncpy(state->desc, "SuperH Sleep Mode [SF]",
 			CPUIDLE_DESC_LEN);
@@ -102,7 +103,7 @@
 	}
 
 	if (sh_mobile_sleep_supported & SUSP_SH_STANDBY) {
-		state = &dev->states[i++];
+		state = &drv->states[i++];
 		snprintf(state->name, CPUIDLE_NAME_LEN, "C3");
 		strncpy(state->desc, "SuperH Mobile Standby Mode [SF]",
 			CPUIDLE_DESC_LEN);
@@ -114,7 +115,10 @@
 		state->enter = cpuidle_sleep_enter;
 	}
 
+	drv->state_count = i;
 	dev->state_count = i;
 
+	cpuidle_register_driver(&cpuidle_driver);
+
 	cpuidle_register_device(dev);
 }