cpuidle: Single/Global registration of idle states

This patch makes the cpuidle_states structure global (single copy)
instead of per-cpu. The statistics needed on per-cpu basis
by the governor are kept per-cpu. This simplifies the cpuidle
subsystem as state registration is done by single cpu only.
Having single copy of cpuidle_states saves memory. Rare case
of asymmetric C-states can be handled within the cpuidle driver
and architectures such as POWER do not have asymmetric C-states.

Having single/global registration of all the idle states,
dynamic C-state transitions on x86 are handled by
the boot cpu. Here, the boot cpu  would disable all the devices,
re-populate the states and later enable all the devices,
irrespective of the cpu that would receive the notification first.

Reference:
https://lkml.org/lkml/2011/4/25/83

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Tested-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 3aa8d4c..5be9d59 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -81,7 +81,8 @@
 static unsigned int lapic_timer_reliable_states = (1 << 1);	 /* Default to only C1 */
 
 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
-static int intel_idle(struct cpuidle_device *dev, int index);
+static int intel_idle(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv, int index);
 
 static struct cpuidle_state *cpuidle_state_table;
 
@@ -227,13 +228,15 @@
 /**
  * intel_idle
  * @dev: cpuidle_device
+ * @drv: cpuidle driver
  * @index: index of cpuidle state
  *
  */
-static int intel_idle(struct cpuidle_device *dev, int index)
+static int intel_idle(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
 {
 	unsigned long ecx = 1; /* break on interrupt flag */
-	struct cpuidle_state *state = &dev->states[index];
+	struct cpuidle_state *state = &drv->states[index];
 	struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
 	unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
 	unsigned int cstate;
@@ -420,6 +423,60 @@
 	return;
 }
 /*
+ * intel_idle_cpuidle_driver_init()
+ * allocate, initialize cpuidle_states
+ */
+static int intel_idle_cpuidle_driver_init(void)
+{
+	int cstate;
+	struct cpuidle_driver *drv = &intel_idle_driver;
+
+	drv->state_count = 1;
+
+	for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
+		int num_substates;
+
+		if (cstate > max_cstate) {
+			printk(PREFIX "max_cstate %d reached\n",
+				max_cstate);
+			break;
+		}
+
+		/* does the state exist in CPUID.MWAIT? */
+		num_substates = (mwait_substates >> ((cstate) * 4))
+					& MWAIT_SUBSTATE_MASK;
+		if (num_substates == 0)
+			continue;
+		/* is the state not enabled? */
+		if (cpuidle_state_table[cstate].enter == NULL) {
+			/* does the driver not know about the state? */
+			if (*cpuidle_state_table[cstate].name == '\0')
+				pr_debug(PREFIX "unaware of model 0x%x"
+					" MWAIT %d please"
+					" contact lenb@kernel.org",
+				boot_cpu_data.x86_model, cstate);
+			continue;
+		}
+
+		if ((cstate > 2) &&
+			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+			mark_tsc_unstable("TSC halts in idle"
+					" states deeper than C2");
+
+		drv->states[drv->state_count] =	/* structure copy */
+			cpuidle_state_table[cstate];
+
+		drv->state_count += 1;
+	}
+
+	if (auto_demotion_disable_flags)
+		smp_call_function(auto_demotion_disable, NULL, 1);
+
+	return 0;
+}
+
+
+/*
  * intel_idle_cpuidle_devices_init()
  * allocate, initialize, register cpuidle_devices
  */
@@ -453,23 +510,9 @@
 				continue;
 			/* is the state not enabled? */
 			if (cpuidle_state_table[cstate].enter == NULL) {
-				/* does the driver not know about the state? */
-				if (*cpuidle_state_table[cstate].name == '\0')
-					pr_debug(PREFIX "unaware of model 0x%x"
-						" MWAIT %d please"
-						" contact lenb@kernel.org",
-					boot_cpu_data.x86_model, cstate);
 				continue;
 			}
 
-			if ((cstate > 2) &&
-				!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
-				mark_tsc_unstable("TSC halts in idle"
-					" states deeper than C2");
-
-			dev->states[dev->state_count] =	/* structure copy */
-				cpuidle_state_table[cstate];
-
 			dev->states_usage[dev->state_count].driver_data =
 				(void *)get_driver_data(cstate);
 
@@ -484,8 +527,6 @@
 			return -EIO;
 		}
 	}
-	if (auto_demotion_disable_flags)
-		smp_call_function(auto_demotion_disable, NULL, 1);
 
 	return 0;
 }
@@ -503,6 +544,7 @@
 	if (retval)
 		return retval;
 
+	intel_idle_cpuidle_driver_init();
 	retval = cpuidle_register_driver(&intel_idle_driver);
 	if (retval) {
 		printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",