cpuidle: Move dev->last_residency update to driver enter routine; remove dev->last_state

Cpuidle governor only suggests the state to enter using the
governor->select() interface, but allows the low level driver to
override the recommended state. The actual entered state
may be different because of software or hardware demotion. Software
demotion is done by the back-end cpuidle driver and can be accounted
correctly. Current cpuidle code uses last_state field to capture the
actual state entered and based on that updates the statistics for the
state entered.

Ideally the driver enter routine should update the counters,
and it should return the state actually entered rather than the time
spent there. The generic cpuidle code should simply handle where
the counters live in the sysfs namespace, not updating the counters.

Reference:
https://lkml.org/lkml/2011/3/25/52

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Tested-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/arch/arm/mach-at91/cpuidle.c b/arch/arm/mach-at91/cpuidle.c
index 1cfeac1..4696a0d 100644
--- a/arch/arm/mach-at91/cpuidle.c
+++ b/arch/arm/mach-at91/cpuidle.c
@@ -33,7 +33,7 @@
 
 /* Actual code that puts the SoC in different idle states */
 static int at91_enter_idle(struct cpuidle_device *dev,
-			       struct cpuidle_state *state)
+			       int index)
 {
 	struct timeval before, after;
 	int idle_time;
@@ -41,10 +41,10 @@
 
 	local_irq_disable();
 	do_gettimeofday(&before);
-	if (state == &dev->states[0])
+	if (index == 0)
 		/* Wait for interrupt state */
 		cpu_do_idle();
-	else if (state == &dev->states[1]) {
+	else if (index == 1) {
 		asm("b 1f; .align 5; 1:");
 		asm("mcr p15, 0, r0, c7, c10, 4");	/* drain write buffer */
 		saved_lpr = sdram_selfrefresh_enable();
@@ -55,7 +55,9 @@
 	local_irq_enable();
 	idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC +
 			(after.tv_usec - before.tv_usec);
-	return idle_time;
+
+	dev->last_residency = idle_time;
+	return index;
 }
 
 /* Initialize CPU idle by registering the idle states */
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index bd59f31..ca8582a 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -78,9 +78,9 @@
 
 /* Actual code that puts the SoC in different idle states */
 static int davinci_enter_idle(struct cpuidle_device *dev,
-						struct cpuidle_state *state)
+						int index)
 {
-	struct davinci_ops *ops = cpuidle_get_statedata(state);
+	struct davinci_ops *ops = cpuidle_get_statedata(&dev->states[index]);
 	struct timeval before, after;
 	int idle_time;
 
@@ -98,7 +98,10 @@
 	local_irq_enable();
 	idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC +
 			(after.tv_usec - before.tv_usec);
-	return idle_time;
+
+	dev->last_residency = idle_time;
+
+	return index;
 }
 
 static int __init davinci_cpuidle_probe(struct platform_device *pdev)
diff --git a/arch/arm/mach-exynos4/cpuidle.c b/arch/arm/mach-exynos4/cpuidle.c
index bf7e96f..ea026e7 100644
--- a/arch/arm/mach-exynos4/cpuidle.c
+++ b/arch/arm/mach-exynos4/cpuidle.c
@@ -16,7 +16,7 @@
 #include <asm/proc-fns.h>
 
 static int exynos4_enter_idle(struct cpuidle_device *dev,
-			      struct cpuidle_state *state);
+			      int index);
 
 static struct cpuidle_state exynos4_cpuidle_set[] = {
 	[0] = {
@@ -37,7 +37,7 @@
 };
 
 static int exynos4_enter_idle(struct cpuidle_device *dev,
-			      struct cpuidle_state *state)
+			      int index)
 {
 	struct timeval before, after;
 	int idle_time;
@@ -52,7 +52,8 @@
 	idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC +
 		    (after.tv_usec - before.tv_usec);
 
-	return idle_time;
+	dev->last_residency = idle_time;
+	return index;
 }
 
 static int __init exynos4_init_cpuidle(void)
diff --git a/arch/arm/mach-kirkwood/cpuidle.c b/arch/arm/mach-kirkwood/cpuidle.c
index f68d33f..358dd80 100644
--- a/arch/arm/mach-kirkwood/cpuidle.c
+++ b/arch/arm/mach-kirkwood/cpuidle.c
@@ -32,17 +32,17 @@
 
 /* Actual code that puts the SoC in different idle states */
 static int kirkwood_enter_idle(struct cpuidle_device *dev,
-			       struct cpuidle_state *state)
+			       int index)
 {
 	struct timeval before, after;
 	int idle_time;
 
 	local_irq_disable();
 	do_gettimeofday(&before);
-	if (state == &dev->states[0])
+	if (index == 0)
 		/* Wait for interrupt state */
 		cpu_do_idle();
-	else if (state == &dev->states[1]) {
+	else if (index == 1) {
 		/*
 		 * Following write will put DDR in self refresh.
 		 * Note that we have 256 cycles before DDR puts it
@@ -57,7 +57,11 @@
 	local_irq_enable();
 	idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC +
 			(after.tv_usec - before.tv_usec);
-	return idle_time;
+
+	/* Update last residency */
+	dev->last_residency = idle_time;
+
+	return index;
 }
 
 /* Initialize CPU idle by registering the idle states */
diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index 4bf6e6e..58425c7 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -88,17 +88,19 @@
 /**
  * omap3_enter_idle - Programs OMAP3 to enter the specified state
  * @dev: cpuidle device
- * @state: The target state to be programmed
+ * @index: the index of state to be entered
  *
  * Called from the CPUidle framework to program the device to the
  * specified target state selected by the governor.
  */
 static int omap3_enter_idle(struct cpuidle_device *dev,
-			struct cpuidle_state *state)
+				int index)
 {
-	struct omap3_idle_statedata *cx = cpuidle_get_statedata(state);
+	struct omap3_idle_statedata *cx =
+			cpuidle_get_statedata(&dev->states[index]);
 	struct timespec ts_preidle, ts_postidle, ts_idle;
 	u32 mpu_state = cx->mpu_state, core_state = cx->core_state;
+	int idle_time;
 
 	/* Used to keep track of the total time in idle */
 	getnstimeofday(&ts_preidle);
@@ -113,7 +115,7 @@
 		goto return_sleep_time;
 
 	/* Deny idle for C1 */
-	if (state == &dev->states[0]) {
+	if (index == 0) {
 		pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle);
 		pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle);
 	}
@@ -122,7 +124,7 @@
 	omap_sram_idle();
 
 	/* Re-allow idle for C1 */
-	if (state == &dev->states[0]) {
+	if (index == 0) {
 		pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle);
 		pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle);
 	}
@@ -134,28 +136,35 @@
 	local_irq_enable();
 	local_fiq_enable();
 
-	return ts_idle.tv_nsec / NSEC_PER_USEC + ts_idle.tv_sec * USEC_PER_SEC;
+	idle_time = ts_idle.tv_nsec / NSEC_PER_USEC + ts_idle.tv_sec * \
+								USEC_PER_SEC;
+
+	/* Update cpuidle counters */
+	dev->last_residency = idle_time;
+
+	return index;
 }
 
 /**
  * next_valid_state - Find next valid C-state
  * @dev: cpuidle device
- * @state: Currently selected C-state
+ * @index: Index of currently selected c-state
  *
- * If the current state is valid, it is returned back to the caller.
- * Else, this function searches for a lower c-state which is still
- * valid.
+ * If the state corresponding to index is valid, index is returned back
+ * to the caller. Else, this function searches for a lower c-state which is
+ * still valid (as defined in omap3_power_states[]) and returns its index.
  *
  * A state is valid if the 'valid' field is enabled and
  * if it satisfies the enable_off_mode condition.
  */
-static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
-					      struct cpuidle_state *curr)
+static int next_valid_state(struct cpuidle_device *dev,
+				int index)
 {
-	struct cpuidle_state *next = NULL;
+	struct cpuidle_state *curr = &dev->states[index];
 	struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr);
 	u32 mpu_deepest_state = PWRDM_POWER_RET;
 	u32 core_deepest_state = PWRDM_POWER_RET;
+	int next_index = -1;
 
 	if (enable_off_mode) {
 		mpu_deepest_state = PWRDM_POWER_OFF;
@@ -172,20 +181,20 @@
 	if ((cx->valid) &&
 	    (cx->mpu_state >= mpu_deepest_state) &&
 	    (cx->core_state >= core_deepest_state)) {
-		return curr;
+		return index;
 	} else {
 		int idx = OMAP3_NUM_STATES - 1;
 
 		/* Reach the current state starting at highest C-state */
 		for (; idx >= 0; idx--) {
 			if (&dev->states[idx] == curr) {
-				next = &dev->states[idx];
+				next_index = idx;
 				break;
 			}
 		}
 
 		/* Should never hit this condition */
-		WARN_ON(next == NULL);
+		WARN_ON(next_index == -1);
 
 		/*
 		 * Drop to next valid state.
@@ -197,37 +206,39 @@
 			if ((cx->valid) &&
 			    (cx->mpu_state >= mpu_deepest_state) &&
 			    (cx->core_state >= core_deepest_state)) {
-				next = &dev->states[idx];
+				next_index = idx;
 				break;
 			}
 		}
 		/*
 		 * C1 is always valid.
-		 * So, no need to check for 'next==NULL' outside this loop.
+		 * So, no need to check for 'next_index == -1' outside
+		 * this loop.
 		 */
 	}
 
-	return next;
+	return next_index;
 }
 
 /**
  * omap3_enter_idle_bm - Checks for any bus activity
  * @dev: cpuidle device
- * @state: The target state to be programmed
+ * @index: array index of target state to be programmed
  *
  * This function checks for any pending activity and then programs
  * the device to the specified or a safer state.
  */
 static int omap3_enter_idle_bm(struct cpuidle_device *dev,
-			       struct cpuidle_state *state)
+			       int index)
 {
-	struct cpuidle_state *new_state;
+	struct cpuidle_state *state = &dev->states[index];
+	int new_state_idx;
 	u32 core_next_state, per_next_state = 0, per_saved_state = 0, cam_state;
 	struct omap3_idle_statedata *cx;
 	int ret;
 
 	if (!omap3_can_sleep()) {
-		new_state = dev->safe_state;
+		new_state_idx = dev->safe_state_index;
 		goto select_state;
 	}
 
@@ -237,7 +248,7 @@
 	 */
 	cam_state = pwrdm_read_pwrst(cam_pd);
 	if (cam_state == PWRDM_POWER_ON) {
-		new_state = dev->safe_state;
+		new_state_idx = dev->safe_state_index;
 		goto select_state;
 	}
 
@@ -264,11 +275,10 @@
 	if (per_next_state != per_saved_state)
 		pwrdm_set_next_pwrst(per_pd, per_next_state);
 
-	new_state = next_valid_state(dev, state);
+	new_state_idx = next_valid_state(dev, index);
 
 select_state:
-	dev->last_state = new_state;
-	ret = omap3_enter_idle(dev, new_state);
+	ret = omap3_enter_idle(dev, new_state_idx);
 
 	/* Restore original PER state if it was modified */
 	if (per_next_state != per_saved_state)
@@ -339,11 +349,12 @@
 
 	cpuidle_register_driver(&omap3_idle_driver);
 	dev = &per_cpu(omap3_idle_dev, smp_processor_id());
+	dev->safe_state_index = -1;
 
 	/* C1 . MPU WFI + Core active */
 	cx = _fill_cstate(dev, 0, "MPU ON + CORE ON");
 	(&dev->states[0])->enter = omap3_enter_idle;
-	dev->safe_state = &dev->states[0];
+	dev->safe_state_index = 0;
 	cx->valid = 1;	/* C1 is always valid */
 	cx->mpu_state = PWRDM_POWER_ON;
 	cx->core_state = PWRDM_POWER_ON;
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index e4469e72..7be50d4c 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -25,11 +25,11 @@
 };
 
 static int cpuidle_sleep_enter(struct cpuidle_device *dev,
-			       struct cpuidle_state *state)
+				int index)
 {
 	unsigned long allowed_mode = arch_hwblk_sleep_mode();
 	ktime_t before, after;
-	int requested_state = state - &dev->states[0];
+	int requested_state = index;
 	int allowed_state;
 	int k;
 
@@ -46,11 +46,13 @@
 	 */
 	k = min_t(int, allowed_state, requested_state);
 
-	dev->last_state = &dev->states[k];
 	before = ktime_get();
 	sh_mobile_call_standby(cpuidle_mode[k]);
 	after = ktime_get();
-	return ktime_to_ns(ktime_sub(after, before)) >> 10;
+
+	dev->last_residency = (int)ktime_to_ns(ktime_sub(after, before)) >> 10;
+
+	return k;
 }
 
 static struct cpuidle_device cpuidle_dev;
@@ -84,7 +86,7 @@
 	state->flags |= CPUIDLE_FLAG_TIME_VALID;
 	state->enter = cpuidle_sleep_enter;
 
-	dev->safe_state = state;
+	dev->safe_state_index = i-1;
 
 	if (sh_mobile_sleep_supported & SUSP_SH_SF) {
 		state = &dev->states[i++];