intel_pstate: Add support for HWP

Add support of Hardware Managed Performance States (HWP) described in Volume 3
section 14.4 of the SDM.

With HWP enbaled intel_pstate will no longer be responsible for selecting P
states for the processor. intel_pstate will continue to register to
the cpufreq core as the scaling driver for CPUs implementing
HWP. In HWP mode intel_pstate provides three functions reporting
frequency to the cpufreq core, support for the set_policy() interface
from the core and maintaining the intel_pstate sysfs interface in
/sys/devices/system/cpu/intel_pstate.  User preferences expressed via
the set_policy() interface or the sysfs interface are forwared to the
CPU via the HWP MSR interface.

Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 27bb6d3..ba35db0 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -137,6 +137,7 @@
 
 static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
+static int hwp_active;
 
 struct perf_limits {
 	int no_turbo;
@@ -244,6 +245,34 @@
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
+#define PCT_TO_HWP(x) (x * 255 / 100)
+static void intel_pstate_hwp_set(void)
+{
+	int min, max, cpu;
+	u64 value, freq;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+		min = PCT_TO_HWP(limits.min_perf_pct);
+		value &= ~HWP_MIN_PERF(~0L);
+		value |= HWP_MIN_PERF(min);
+
+		max = PCT_TO_HWP(limits.max_perf_pct);
+		if (limits.no_turbo) {
+			rdmsrl( MSR_HWP_CAPABILITIES, freq);
+			max = HWP_GUARANTEED_PERF(freq);
+		}
+
+		value &= ~HWP_MAX_PERF(~0L);
+		value |= HWP_MAX_PERF(max);
+		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+	}
+
+	put_online_cpus();
+}
+
 /************************** debugfs begin ************************/
 static int pid_param_set(void *data, u64 val)
 {
@@ -279,6 +308,8 @@
 	struct dentry *debugfs_parent;
 	int i = 0;
 
+	if (hwp_active)
+		return;
 	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
 	if (IS_ERR_OR_NULL(debugfs_parent))
 		return;
@@ -329,8 +360,12 @@
 		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 		return -EPERM;
 	}
+
 	limits.no_turbo = clamp_t(int, input, 0, 1);
 
+	if (hwp_active)
+		intel_pstate_hwp_set();
+
 	return count;
 }
 
@@ -348,6 +383,8 @@
 	limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 
+	if (hwp_active)
+		intel_pstate_hwp_set();
 	return count;
 }
 
@@ -363,6 +400,8 @@
 	limits.min_perf_pct = clamp_t(int, input, 0 , 100);
 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
 
+	if (hwp_active)
+		intel_pstate_hwp_set();
 	return count;
 }
 
@@ -395,8 +434,16 @@
 	rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
 	BUG_ON(rc);
 }
-
 /************************** sysfs end ************************/
+
+static void intel_pstate_hwp_enable(void)
+{
+	hwp_active++;
+	pr_info("intel_pstate HWP enabled\n");
+
+	wrmsrl( MSR_PM_ENABLE, 0x1);
+}
+
 static int byt_get_min_pstate(void)
 {
 	u64 value;
@@ -648,6 +695,14 @@
 	cpu->prev_mperf = mperf;
 }
 
+static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
+{
+	int delay;
+
+	delay = msecs_to_jiffies(50);
+	mod_timer_pinned(&cpu->timer, jiffies + delay);
+}
+
 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 {
 	int delay;
@@ -694,6 +749,14 @@
 	intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
 }
 
+static void intel_hwp_timer_func(unsigned long __data)
+{
+	struct cpudata *cpu = (struct cpudata *) __data;
+
+	intel_pstate_sample(cpu);
+	intel_hwp_set_sample_time(cpu);
+}
+
 static void intel_pstate_timer_func(unsigned long __data)
 {
 	struct cpudata *cpu = (struct cpudata *) __data;
@@ -737,6 +800,11 @@
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
+static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
+	ICPU(0x56, core_params),
+	{}
+};
+
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
 	struct cpudata *cpu;
@@ -753,9 +821,14 @@
 	intel_pstate_get_cpu_pstates(cpu);
 
 	init_timer_deferrable(&cpu->timer);
-	cpu->timer.function = intel_pstate_timer_func;
 	cpu->timer.data = (unsigned long)cpu;
 	cpu->timer.expires = jiffies + HZ/100;
+
+	if (!hwp_active)
+		cpu->timer.function = intel_pstate_timer_func;
+	else
+		cpu->timer.function = intel_hwp_timer_func;
+
 	intel_pstate_busy_pid_reset(cpu);
 	intel_pstate_sample(cpu);
 
@@ -792,6 +865,7 @@
 		limits.no_turbo = 0;
 		return 0;
 	}
+
 	limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
 	limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
@@ -801,6 +875,9 @@
 	limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
 
+	if (hwp_active)
+		intel_pstate_hwp_set();
+
 	return 0;
 }
 
@@ -823,6 +900,9 @@
 	pr_info("intel_pstate CPU %d exiting\n", cpu_num);
 
 	del_timer_sync(&all_cpu_data[cpu_num]->timer);
+	if (hwp_active)
+		return;
+
 	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
 }
 
@@ -866,6 +946,7 @@
 };
 
 static int __initdata no_load;
+static int __initdata no_hwp;
 
 static int intel_pstate_msrs_not_valid(void)
 {
@@ -959,6 +1040,15 @@
 {
 	struct acpi_table_header hdr;
 	struct hw_vendor_info *v_info;
+	const struct x86_cpu_id *id;
+	u64 misc_pwr;
+
+	id = x86_match_cpu(intel_pstate_cpu_oob_ids);
+	if (id) {
+		rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
+		if ( misc_pwr & (1 << 8))
+			return true;
+	}
 
 	if (acpi_disabled ||
 	    ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
@@ -982,6 +1072,7 @@
 	int cpu, rc = 0;
 	const struct x86_cpu_id *id;
 	struct cpu_defaults *cpu_info;
+	struct cpuinfo_x86 *c = &boot_cpu_data;
 
 	if (no_load)
 		return -ENODEV;
@@ -1011,6 +1102,9 @@
 	if (!all_cpu_data)
 		return -ENOMEM;
 
+	if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp)
+		intel_pstate_hwp_enable();
+
 	rc = cpufreq_register_driver(&intel_pstate_driver);
 	if (rc)
 		goto out;
@@ -1041,6 +1135,8 @@
 
 	if (!strcmp(str, "disable"))
 		no_load = 1;
+	if (!strcmp(str, "no_hwp"))
+		no_hwp = 1;
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);