intel_pstate: Update frequencies of policy->cpus only from ->set_policy()
The intel-pstate driver is using intel_pstate_hwp_set() from two
separate paths, i.e. ->set_policy() callback and sysfs update path for
the files present in /sys/devices/system/cpu/intel_pstate/ directory.
While an update to the sysfs path applies to all the CPUs being managed
by the driver (which essentially means all the online CPUs), the update
via the ->set_policy() callback applies to a smaller group of CPUs
managed by the policy for which ->set_policy() is called.
And so, intel_pstate_hwp_set() should update frequencies of only the
CPUs that are part of policy->cpus mask, while it is called from
->set_policy() callback.
In order to do that, add a parameter (cpumask) to intel_pstate_hwp_set()
and apply the frequency changes only to the concerned CPUs.
For ->set_policy() path, we are only concerned about policy->cpus, and
so policy->rwsem lock taken by the core prior to calling ->set_policy()
is enough to take care of any races. The larger lock acquired by
get_online_cpus() is required only for the updates to sysfs files.
Add another routine, intel_pstate_hwp_set_online_cpus(), and call it
from the sysfs update paths.
This also fixes a lockdep reported recently, where policy->rwsem and
get_online_cpus() could have been acquired in any order causing an ABBA
deadlock. The sequence of events leading to that was:
intel_pstate_init(...)
...cpufreq_online(...)
down_write(&policy->rwsem); // Locks policy->rwsem
...
cpufreq_init_policy(policy);
...intel_pstate_hwp_set();
get_online_cpus(); // Temporarily locks cpu_hotplug.lock
...
up_write(&policy->rwsem);
pm_suspend(...)
...disable_nonboot_cpus()
_cpu_down()
cpu_hotplug_begin(); // Locks cpu_hotplug.lock
__cpu_notify(CPU_DOWN_PREPARE, ...);
...cpufreq_offline_prepare();
down_write(&policy->rwsem); // Locks policy->rwsem
Reported-and-tested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d47..e856776 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -286,7 +286,7 @@
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
-static void intel_pstate_hwp_set(void)
+static void intel_pstate_hwp_set(const struct cpumask *cpumask)
{
int min, hw_min, max, hw_max, cpu, range, adj_range;
u64 value, cap;
@@ -296,9 +296,7 @@
hw_max = HWP_HIGHEST_PERF(cap);
range = hw_max - hw_min;
- get_online_cpus();
-
- for_each_online_cpu(cpu) {
+ for_each_cpu(cpu, cpumask) {
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
adj_range = limits->min_perf_pct * range / 100;
min = hw_min + adj_range;
@@ -317,7 +315,12 @@
value |= HWP_MAX_PERF(max);
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
+}
+static void intel_pstate_hwp_set_online_cpus(void)
+{
+ get_online_cpus();
+ intel_pstate_hwp_set(cpu_online_mask);
put_online_cpus();
}
@@ -439,7 +442,7 @@
limits->no_turbo = clamp_t(int, input, 0, 1);
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -465,7 +468,7 @@
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -490,7 +493,7 @@
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -1141,7 +1144,7 @@
pr_debug("intel_pstate: set performance\n");
limits = &performance_limits;
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set(policy->cpus);
return 0;
}
@@ -1173,7 +1176,7 @@
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set(policy->cpus);
return 0;
}