blob: 45ab9d1fe011514a873c0be369517f583a28c108 [file] [log] [blame]
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301/*
2 * linux/drivers/thermal/cpu_cooling.c
3 *
4 * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
5 * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
6 *
Viresh Kumar73904cb2014-12-04 09:42:08 +05307 * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
8 *
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05309 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053025#include <linux/module.h>
26#include <linux/thermal.h>
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053027#include <linux/cpufreq.h>
28#include <linux/err.h>
Javi Merinoc36cf072015-02-26 19:00:29 +000029#include <linux/pm_opp.h>
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053030#include <linux/slab.h>
31#include <linux/cpu.h>
32#include <linux/cpu_cooling.h>
Lina Iyer97a13ed2016-07-15 14:53:58 -060033#include <linux/sched.h>
Lina Iyer986fde12016-02-23 13:08:31 -070034#include <linux/of_device.h>
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053035
Javi Merino6828a472015-03-02 17:17:20 +000036#include <trace/events/thermal.h>
37
Viresh Kumar07d888d2014-12-04 09:41:49 +053038/*
39 * Cooling state <-> CPUFreq frequency
40 *
41 * Cooling states are translated to frequencies throughout this driver and this
42 * is the relation between them.
43 *
44 * Highest cooling state corresponds to lowest possible frequency.
45 *
46 * i.e.
47 * level 0 --> 1st Max Freq
48 * level 1 --> 2nd Max Freq
49 * ...
Lina Iyer97a13ed2016-07-15 14:53:58 -060050 * leven n --> core isolated
Viresh Kumar07d888d2014-12-04 09:41:49 +053051 */
52
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053053/**
Javi Merinoc36cf072015-02-26 19:00:29 +000054 * struct power_table - frequency to power conversion
55 * @frequency: frequency in KHz
56 * @power: power in mW
57 *
58 * This structure is built when the cooling device registers and helps
59 * in translating frequency to power and viceversa.
60 */
61struct power_table {
62 u32 frequency;
63 u32 power;
64};
65
66/**
Eduardo Valentin3b3c0742013-04-17 17:11:56 +000067 * struct cpufreq_cooling_device - data for cooling device with cpufreq
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053068 * @id: unique integer value corresponding to each cpufreq_cooling_device
69 * registered.
Eduardo Valentin3b3c0742013-04-17 17:11:56 +000070 * @cool_dev: thermal_cooling_device pointer to keep track of the
71 * registered cooling device.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053072 * @cpufreq_state: integer value representing the current state of cpufreq
73 * cooling devices.
Viresh Kumar59f0d212015-07-30 12:40:33 +053074 * @clipped_freq: integer value representing the absolute value of the clipped
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053075 * frequency.
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -060076 * @cpufreq_floor_state: integer value representing the frequency floor state
77 * of cpufreq cooling devices.
78 * @floor_freq: integer value representing the absolute value of the floor
79 * frequency.
Lina Iyer97a13ed2016-07-15 14:53:58 -060080 * @max_level: maximum cooling level. [0..max_level-1: <freq>
81 * max_level: Core unavailable]
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053082 * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
Javi Merinofc4de352014-12-15 16:55:52 +000083 * @node: list_head to link all cpufreq_cooling_device together.
Hugh Kang0744f132016-09-07 09:35:39 +090084 * @last_load: load measured by the latest call to cpufreq_get_requested_power()
Javi Merinoc36cf072015-02-26 19:00:29 +000085 * @time_in_idle: previous reading of the absolute time that this cpu was idle
86 * @time_in_idle_timestamp: wall time of the last invocation of
87 * get_cpu_idle_time_us()
88 * @dyn_power_table: array of struct power_table for frequency to power
89 * conversion, sorted in ascending order.
90 * @dyn_power_table_entries: number of entries in the @dyn_power_table array
91 * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
92 * @plat_get_static_power: callback to calculate the static power
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053093 *
Viresh Kumarbeca6052014-12-04 09:41:48 +053094 * This structure is required for keeping information of each registered
95 * cpufreq_cooling_device.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +053096 */
97struct cpufreq_cooling_device {
98 int id;
99 struct thermal_cooling_device *cool_dev;
100 unsigned int cpufreq_state;
Viresh Kumar59f0d212015-07-30 12:40:33 +0530101 unsigned int clipped_freq;
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600102 unsigned int cpufreq_floor_state;
103 unsigned int floor_freq;
Viresh Kumardcc6c7f2014-12-04 09:42:02 +0530104 unsigned int max_level;
Viresh Kumarf6859012014-12-04 09:42:06 +0530105 unsigned int *freq_table; /* In descending order */
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530106 struct cpumask allowed_cpus;
Yadwinder Singh Brar2dcd8512014-11-07 19:12:29 +0530107 struct list_head node;
Javi Merinoc36cf072015-02-26 19:00:29 +0000108 u32 last_load;
109 u64 *time_in_idle;
110 u64 *time_in_idle_timestamp;
111 struct power_table *dyn_power_table;
112 int dyn_power_table_entries;
113 struct device *cpu_dev;
114 get_static_t plat_get_static_power;
Lina Iyer986fde12016-02-23 13:08:31 -0700115 struct cpu_cooling_ops *plat_ops;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530116};
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530117static DEFINE_IDR(cpufreq_idr);
hongbo.zhang160b7d82012-10-30 17:48:59 +0100118static DEFINE_MUTEX(cooling_cpufreq_lock);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530119
Russell King02373d72015-08-12 15:22:16 +0530120static unsigned int cpufreq_dev_count;
Ram Chandrasekar30ca8042017-06-13 16:05:47 -0600121static int8_t cpuhp_registered;
122static struct work_struct cpuhp_register_work;
123static struct cpumask cpus_pending_online;
124static DEFINE_MUTEX(core_isolate_lock);
Russell King02373d72015-08-12 15:22:16 +0530125
126static DEFINE_MUTEX(cooling_list_lock);
Yadwinder Singh Brar2dcd8512014-11-07 19:12:29 +0530127static LIST_HEAD(cpufreq_dev_list);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530128
129/**
130 * get_idr - function to get a unique id.
131 * @idr: struct idr * handle used to create a id.
132 * @id: int * value generated by this function.
Eduardo Valentin79491e52013-04-17 17:11:59 +0000133 *
134 * This function will populate @id with an unique
135 * id, using the idr API.
136 *
137 * Return: 0 on success, an error code on failure.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530138 */
139static int get_idr(struct idr *idr, int *id)
140{
Tejun Heo6deb69f2013-02-27 17:04:46 -0800141 int ret;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530142
143 mutex_lock(&cooling_cpufreq_lock);
Tejun Heo6deb69f2013-02-27 17:04:46 -0800144 ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530145 mutex_unlock(&cooling_cpufreq_lock);
Tejun Heo6deb69f2013-02-27 17:04:46 -0800146 if (unlikely(ret < 0))
147 return ret;
148 *id = ret;
Eduardo Valentin79491e52013-04-17 17:11:59 +0000149
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530150 return 0;
151}
152
153/**
154 * release_idr - function to free the unique id.
155 * @idr: struct idr * handle used for creating the id.
156 * @id: int value representing the unique id.
157 */
158static void release_idr(struct idr *idr, int id)
159{
160 mutex_lock(&cooling_cpufreq_lock);
161 idr_remove(idr, id);
162 mutex_unlock(&cooling_cpufreq_lock);
163}
164
165/* Below code defines functions to be used for cpufreq as cooling device */
166
167/**
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530168 * get_level: Find the level for a particular frequency
Viresh Kumarb9f8b412014-12-04 09:42:05 +0530169 * @cpufreq_dev: cpufreq_dev for which the property is required
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530170 * @freq: Frequency
Eduardo Valentin82b9ee42013-04-17 17:12:00 +0000171 *
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530172 * Return: level on success, THERMAL_CSTATE_INVALID on error.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530173 */
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530174static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
175 unsigned int freq)
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530176{
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530177 unsigned long level;
Eduardo Valentin79491e52013-04-17 17:11:59 +0000178
Lina Iyer97a13ed2016-07-15 14:53:58 -0600179 for (level = 0; level < cpufreq_dev->max_level; level++) {
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530180 if (freq == cpufreq_dev->freq_table[level])
181 return level;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530182
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530183 if (freq > cpufreq_dev->freq_table[level])
184 break;
Zhang Ruifc35b352013-02-08 13:09:32 +0800185 }
Zhang Ruia1167762014-01-02 11:57:48 +0800186
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530187 return THERMAL_CSTATE_INVALID;
Zhang Ruifc35b352013-02-08 13:09:32 +0800188}
189
Eduardo Valentin44952d32013-04-17 17:12:05 +0000190/**
Viresh Kumar728c03c2014-12-04 09:41:47 +0530191 * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
Eduardo Valentin44952d32013-04-17 17:12:05 +0000192 * @cpu: cpu for which the level is required
193 * @freq: the frequency of interest
194 *
195 * This function will match the cooling level corresponding to the
196 * requested @freq and return it.
197 *
198 * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
199 * otherwise.
200 */
Zhang Rui57df8102013-02-08 14:52:06 +0800201unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
202{
Viresh Kumarb9f8b412014-12-04 09:42:05 +0530203 struct cpufreq_cooling_device *cpufreq_dev;
Zhang Rui57df8102013-02-08 14:52:06 +0800204
Russell King02373d72015-08-12 15:22:16 +0530205 mutex_lock(&cooling_list_lock);
Viresh Kumarb9f8b412014-12-04 09:42:05 +0530206 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
207 if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
Viresh Kumar7cd7b562017-04-25 15:57:08 +0530208 unsigned long level = get_level(cpufreq_dev, freq);
209
Russell King02373d72015-08-12 15:22:16 +0530210 mutex_unlock(&cooling_list_lock);
Viresh Kumar7cd7b562017-04-25 15:57:08 +0530211 return level;
Viresh Kumarb9f8b412014-12-04 09:42:05 +0530212 }
213 }
Russell King02373d72015-08-12 15:22:16 +0530214 mutex_unlock(&cooling_list_lock);
Eduardo Valentin79491e52013-04-17 17:11:59 +0000215
Viresh Kumarb9f8b412014-12-04 09:42:05 +0530216 pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
217 return THERMAL_CSTATE_INVALID;
Zhang Rui57df8102013-02-08 14:52:06 +0800218}
Eduardo Valentin243dbd92013-04-17 17:11:57 +0000219EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
Zhang Rui57df8102013-02-08 14:52:06 +0800220
Ram Chandrasekar30ca8042017-06-13 16:05:47 -0600221static int cpufreq_hp_offline(unsigned int offline_cpu)
222{
223 struct cpufreq_cooling_device *cpufreq_dev;
224
225 mutex_lock(&cooling_list_lock);
226 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
227 if (!cpumask_test_cpu(offline_cpu, &cpufreq_dev->allowed_cpus))
228 continue;
229
230 mutex_lock(&core_isolate_lock);
231 if (cpufreq_dev->cpufreq_state == cpufreq_dev->max_level)
232 sched_unisolate_cpu_unlocked(offline_cpu);
233 mutex_unlock(&core_isolate_lock);
234 break;
235 }
236 mutex_unlock(&cooling_list_lock);
237
238 return 0;
239}
240
241static int cpufreq_hp_online(unsigned int online_cpu)
242{
243 struct cpufreq_cooling_device *cpufreq_dev;
244 int ret = 0;
245
246 mutex_lock(&cooling_list_lock);
247 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
248 if (!cpumask_test_cpu(online_cpu, &cpufreq_dev->allowed_cpus))
249 continue;
250
251 mutex_lock(&core_isolate_lock);
252 if (cpufreq_dev->cpufreq_state == cpufreq_dev->max_level) {
253 cpumask_set_cpu(online_cpu, &cpus_pending_online);
254 ret = NOTIFY_BAD;
255 }
256 mutex_unlock(&core_isolate_lock);
257 break;
258 }
259 mutex_unlock(&cooling_list_lock);
260
261 return ret;
262}
263
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530264/**
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530265 * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
266 * @nb: struct notifier_block * with callback info.
267 * @event: value showing cpufreq event for which this function invoked.
268 * @data: callback-specific data
Eduardo Valentinbab30552013-04-17 17:12:09 +0000269 *
Javi Merino9746b6e2014-06-25 18:11:17 +0100270 * Callback to hijack the notification on cpufreq policy transition.
Eduardo Valentinbab30552013-04-17 17:12:09 +0000271 * Every time there is a change in policy, we will intercept and
272 * update the cpufreq policy with thermal constraints.
273 *
274 * Return: 0 (success)
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530275 */
276static int cpufreq_thermal_notifier(struct notifier_block *nb,
Eduardo Valentin5fda7f62013-04-17 17:12:11 +0000277 unsigned long event, void *data)
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530278{
279 struct cpufreq_policy *policy = data;
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600280 unsigned long clipped_freq, floor_freq;
Yadwinder Singh Brar2dcd8512014-11-07 19:12:29 +0530281 struct cpufreq_cooling_device *cpufreq_dev;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530282
Viresh Kumara24af232015-07-30 12:40:32 +0530283 if (event != CPUFREQ_ADJUST)
Javi Merinoc36cf072015-02-26 19:00:29 +0000284 return NOTIFY_DONE;
Viresh Kumara24af232015-07-30 12:40:32 +0530285
286 mutex_lock(&cooling_list_lock);
287 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
288 if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
289 continue;
290
Viresh Kumar1afb9c52015-07-30 12:40:35 +0530291 /*
292 * policy->max is the maximum allowed frequency defined by user
293 * and clipped_freq is the maximum that thermal constraints
294 * allow.
295 *
296 * If clipped_freq is lower than policy->max, then we need to
297 * readjust policy->max.
298 *
299 * But, if clipped_freq is greater than policy->max, we don't
300 * need to do anything.
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600301 *
302 * Similarly, if policy minimum set by the user is less than
303 * the floor_frequency, then adjust the policy->min.
Viresh Kumar1afb9c52015-07-30 12:40:35 +0530304 */
Viresh Kumarabcbcc22015-07-30 12:40:34 +0530305 clipped_freq = cpufreq_dev->clipped_freq;
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600306 floor_freq = cpufreq_dev->floor_freq;
Viresh Kumara24af232015-07-30 12:40:32 +0530307
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600308 if (policy->max > clipped_freq || policy->min < floor_freq)
309 cpufreq_verify_within_limits(policy, floor_freq,
310 clipped_freq);
Viresh Kumara24af232015-07-30 12:40:32 +0530311 break;
Yadwinder Singh Brar2dcd8512014-11-07 19:12:29 +0530312 }
Viresh Kumara24af232015-07-30 12:40:32 +0530313 mutex_unlock(&cooling_list_lock);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530314
Javi Merinoc36cf072015-02-26 19:00:29 +0000315 return NOTIFY_OK;
316}
317
318/**
319 * build_dyn_power_table() - create a dynamic power to frequency table
320 * @cpufreq_device: the cpufreq cooling device in which to store the table
321 * @capacitance: dynamic power coefficient for these cpus
322 *
323 * Build a dynamic power to frequency table for this cpu and store it
324 * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
325 * cpu_freq_to_power() to convert between power and frequency
326 * efficiently. Power is stored in mW, frequency in KHz. The
327 * resulting table is in ascending order.
328 *
Javi Merino459ac372015-08-17 19:21:42 +0100329 * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
330 * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
331 * added/enabled while the function was executing.
Javi Merinoc36cf072015-02-26 19:00:29 +0000332 */
333static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
334 u32 capacitance)
335{
336 struct power_table *power_table;
337 struct dev_pm_opp *opp;
338 struct device *dev = NULL;
Javi Merinoeba4f882015-08-17 19:21:43 +0100339 int num_opps = 0, cpu, i, ret = 0;
Javi Merinoc36cf072015-02-26 19:00:29 +0000340 unsigned long freq;
341
Javi Merinoc36cf072015-02-26 19:00:29 +0000342 for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
343 dev = get_cpu_device(cpu);
344 if (!dev) {
345 dev_warn(&cpufreq_device->cool_dev->device,
346 "No cpu device for cpu %d\n", cpu);
347 continue;
348 }
349
350 num_opps = dev_pm_opp_get_opp_count(dev);
Javi Merino459ac372015-08-17 19:21:42 +0100351 if (num_opps > 0)
Javi Merinoc36cf072015-02-26 19:00:29 +0000352 break;
Javi Merino459ac372015-08-17 19:21:42 +0100353 else if (num_opps < 0)
354 return num_opps;
Javi Merinoc36cf072015-02-26 19:00:29 +0000355 }
356
Javi Merino459ac372015-08-17 19:21:42 +0100357 if (num_opps == 0)
358 return -EINVAL;
Javi Merinoc36cf072015-02-26 19:00:29 +0000359
360 power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
Javi Merino459ac372015-08-17 19:21:42 +0100361 if (!power_table)
362 return -ENOMEM;
363
364 rcu_read_lock();
Javi Merinoc36cf072015-02-26 19:00:29 +0000365
366 for (freq = 0, i = 0;
367 opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
368 freq++, i++) {
369 u32 freq_mhz, voltage_mv;
370 u64 power;
371
Javi Merino459ac372015-08-17 19:21:42 +0100372 if (i >= num_opps) {
373 rcu_read_unlock();
Javi Merinoeba4f882015-08-17 19:21:43 +0100374 ret = -EAGAIN;
375 goto free_power_table;
Javi Merino459ac372015-08-17 19:21:42 +0100376 }
377
Javi Merinoc36cf072015-02-26 19:00:29 +0000378 freq_mhz = freq / 1000000;
379 voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
380
381 /*
382 * Do the multiplication with MHz and millivolt so as
383 * to not overflow.
384 */
385 power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
386 do_div(power, 1000000000);
387
388 /* frequency is stored in power_table in KHz */
389 power_table[i].frequency = freq / 1000;
390
391 /* power is stored in mW */
392 power_table[i].power = power;
393 }
394
Javi Merino459ac372015-08-17 19:21:42 +0100395 rcu_read_unlock();
396
Javi Merinoeba4f882015-08-17 19:21:43 +0100397 if (i != num_opps) {
398 ret = PTR_ERR(opp);
399 goto free_power_table;
400 }
Javi Merinoc36cf072015-02-26 19:00:29 +0000401
402 cpufreq_device->cpu_dev = dev;
403 cpufreq_device->dyn_power_table = power_table;
404 cpufreq_device->dyn_power_table_entries = i;
405
Javi Merino459ac372015-08-17 19:21:42 +0100406 return 0;
Javi Merinoeba4f882015-08-17 19:21:43 +0100407
408free_power_table:
409 kfree(power_table);
410
411 return ret;
Javi Merinoc36cf072015-02-26 19:00:29 +0000412}
413
414static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
415 u32 freq)
416{
417 int i;
418 struct power_table *pt = cpufreq_device->dyn_power_table;
419
420 for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
421 if (freq < pt[i].frequency)
422 break;
423
424 return pt[i - 1].power;
425}
426
427static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
428 u32 power)
429{
430 int i;
431 struct power_table *pt = cpufreq_device->dyn_power_table;
432
433 for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
434 if (power < pt[i].power)
435 break;
436
437 return pt[i - 1].frequency;
438}
439
440/**
441 * get_load() - get load for a cpu since last updated
442 * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
443 * @cpu: cpu number
Javi Merinoa53b8392016-02-11 12:00:51 +0000444 * @cpu_idx: index of the cpu in cpufreq_device->allowed_cpus
Javi Merinoc36cf072015-02-26 19:00:29 +0000445 *
446 * Return: The average load of cpu @cpu in percentage since this
447 * function was last called.
448 */
Javi Merinoa53b8392016-02-11 12:00:51 +0000449static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
450 int cpu_idx)
Javi Merinoc36cf072015-02-26 19:00:29 +0000451{
452 u32 load;
453 u64 now, now_idle, delta_time, delta_idle;
454
455 now_idle = get_cpu_idle_time(cpu, &now, 0);
Javi Merinoa53b8392016-02-11 12:00:51 +0000456 delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
457 delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
Javi Merinoc36cf072015-02-26 19:00:29 +0000458
459 if (delta_time <= delta_idle)
460 load = 0;
461 else
462 load = div64_u64(100 * (delta_time - delta_idle), delta_time);
463
Javi Merinoa53b8392016-02-11 12:00:51 +0000464 cpufreq_device->time_in_idle[cpu_idx] = now_idle;
465 cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
Javi Merinoc36cf072015-02-26 19:00:29 +0000466
467 return load;
468}
469
470/**
471 * get_static_power() - calculate the static power consumed by the cpus
472 * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
473 * @tz: thermal zone device in which we're operating
474 * @freq: frequency in KHz
475 * @power: pointer in which to store the calculated static power
476 *
477 * Calculate the static power consumed by the cpus described by
478 * @cpu_actor running at frequency @freq. This function relies on a
479 * platform specific function that should have been provided when the
480 * actor was registered. If it wasn't, the static power is assumed to
481 * be negligible. The calculated static power is stored in @power.
482 *
483 * Return: 0 on success, -E* on failure.
484 */
485static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
486 struct thermal_zone_device *tz, unsigned long freq,
487 u32 *power)
488{
489 struct dev_pm_opp *opp;
490 unsigned long voltage;
491 struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
492 unsigned long freq_hz = freq * 1000;
493
494 if (!cpufreq_device->plat_get_static_power ||
495 !cpufreq_device->cpu_dev) {
496 *power = 0;
497 return 0;
498 }
499
500 rcu_read_lock();
501
502 opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
503 true);
504 voltage = dev_pm_opp_get_voltage(opp);
505
506 rcu_read_unlock();
507
508 if (voltage == 0) {
509 dev_warn_ratelimited(cpufreq_device->cpu_dev,
510 "Failed to get voltage for frequency %lu: %ld\n",
511 freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
512 return -EINVAL;
513 }
514
515 return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
516 voltage, power);
517}
518
519/**
520 * get_dynamic_power() - calculate the dynamic power
521 * @cpufreq_device: &cpufreq_cooling_device for this cdev
522 * @freq: current frequency
523 *
524 * Return: the dynamic power consumed by the cpus described by
525 * @cpufreq_device.
526 */
527static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
528 unsigned long freq)
529{
530 u32 raw_cpu_power;
531
532 raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
533 return (raw_cpu_power * cpufreq_device->last_load) / 100;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530534}
535
Eduardo Valentin1b9e3522013-04-17 17:12:02 +0000536/* cpufreq cooling device callback functions are defined below */
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530537
538/**
539 * cpufreq_get_max_state - callback function to get the max cooling state.
540 * @cdev: thermal cooling device pointer.
541 * @state: fill this variable with the max cooling state.
Eduardo Valentin62c00422013-04-17 17:12:12 +0000542 *
543 * Callback for the thermal cooling device to return the cpufreq
544 * max cooling state.
545 *
546 * Return: 0 on success, an error code otherwise.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530547 */
548static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
549 unsigned long *state)
550{
hongbo.zhang160b7d82012-10-30 17:48:59 +0100551 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530552
Viresh Kumardcc6c7f2014-12-04 09:42:02 +0530553 *state = cpufreq_device->max_level;
554 return 0;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530555}
556
557/**
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600558 * cpufreq_get_min_state - callback function to get the device floor state.
559 * @cdev: thermal cooling device pointer.
560 * @state: fill this variable with the cooling device floor.
561 *
562 * Callback for the thermal cooling device to return the cpufreq
563 * floor state.
564 *
565 * Return: 0 on success, an error code otherwise.
566 */
567static int cpufreq_get_min_state(struct thermal_cooling_device *cdev,
568 unsigned long *state)
569{
570 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
571
572 *state = cpufreq_device->cpufreq_floor_state;
573
574 return 0;
575}
576
577/**
578 * cpufreq_set_min_state - callback function to set the device floor state.
579 * @cdev: thermal cooling device pointer.
580 * @state: set this variable to the current cooling state.
581 *
582 * Callback for the thermal cooling device to change the cpufreq
583 * floor state.
584 *
585 * Return: 0 on success, an error code otherwise.
586 */
587static int cpufreq_set_min_state(struct thermal_cooling_device *cdev,
588 unsigned long state)
589{
590 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
591 unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
592 unsigned int floor_freq;
593
594 if (state > cpufreq_device->max_level)
595 state = cpufreq_device->max_level;
596
597 if (cpufreq_device->cpufreq_floor_state == state)
598 return 0;
599
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600600 cpufreq_device->cpufreq_floor_state = state;
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600601
Ram Chandrasekar23b2db42017-04-19 13:23:31 -0600602 /*
603 * Check if the device has a platform mitigation function that
604 * can handle the CPU freq mitigation, if not, notify cpufreq
605 * framework.
606 */
607 if (cpufreq_device->plat_ops &&
608 cpufreq_device->plat_ops->floor_limit) {
609 /*
610 * Last level is core isolation so use the frequency
611 * of previous state.
612 */
613 if (state == cpufreq_device->max_level)
614 state--;
615 floor_freq = cpufreq_device->freq_table[state];
616 cpufreq_device->floor_freq = floor_freq;
617 cpufreq_device->plat_ops->floor_limit(cpu, floor_freq);
618 } else {
619 floor_freq = cpufreq_device->freq_table[state];
620 cpufreq_device->floor_freq = floor_freq;
621 cpufreq_update_policy(cpu);
622 }
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600623
624 return 0;
625}
626
627/**
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530628 * cpufreq_get_cur_state - callback function to get the current cooling state.
629 * @cdev: thermal cooling device pointer.
630 * @state: fill this variable with the current cooling state.
Eduardo Valentin36725522013-04-17 17:12:13 +0000631 *
632 * Callback for the thermal cooling device to return the cpufreq
633 * current cooling state.
634 *
635 * Return: 0 on success, an error code otherwise.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530636 */
637static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
638 unsigned long *state)
639{
hongbo.zhang160b7d82012-10-30 17:48:59 +0100640 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530641
hongbo.zhang160b7d82012-10-30 17:48:59 +0100642 *state = cpufreq_device->cpufreq_state;
Eduardo Valentin79491e52013-04-17 17:11:59 +0000643
hongbo.zhang160b7d82012-10-30 17:48:59 +0100644 return 0;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530645}
646
647/**
648 * cpufreq_set_cur_state - callback function to set the current cooling state.
649 * @cdev: thermal cooling device pointer.
650 * @state: set this variable to the current cooling state.
Eduardo Valentin56e05fdb2013-04-17 17:12:14 +0000651 *
652 * Callback for the thermal cooling device to change the cpufreq
653 * current cooling state.
654 *
655 * Return: 0 on success, an error code otherwise.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530656 */
657static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
658 unsigned long state)
659{
hongbo.zhang160b7d82012-10-30 17:48:59 +0100660 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
Viresh Kumar5194fe42014-12-04 09:42:00 +0530661 unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
662 unsigned int clip_freq;
Ram Chandrasekar30ca8042017-06-13 16:05:47 -0600663 unsigned long prev_state;
664 struct device *cpu_dev;
665 int ret = 0;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530666
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530667 /* Request state should be less than max_level */
668 if (WARN_ON(state > cpufreq_device->max_level))
669 return -EINVAL;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530670
Viresh Kumar5194fe42014-12-04 09:42:00 +0530671 /* Check if the old cooling action is same as new cooling action */
672 if (cpufreq_device->cpufreq_state == state)
673 return 0;
674
Ram Chandrasekar30ca8042017-06-13 16:05:47 -0600675 mutex_lock(&core_isolate_lock);
676 prev_state = cpufreq_device->cpufreq_state;
Ram Chandrasekarbd080e42017-05-25 15:49:20 -0600677 cpufreq_device->cpufreq_state = state;
Lina Iyer97a13ed2016-07-15 14:53:58 -0600678 /* If state is the last, isolate the CPU */
Ram Chandrasekar30ca8042017-06-13 16:05:47 -0600679 if (state == cpufreq_device->max_level) {
680 if (cpu_online(cpu))
681 sched_isolate_cpu(cpu);
682 mutex_unlock(&core_isolate_lock);
683 return ret;
684 } else if ((prev_state == cpufreq_device->max_level)
685 && (state < cpufreq_device->max_level)) {
686 if (cpumask_test_and_clear_cpu(cpu, &cpus_pending_online)) {
687 cpu_dev = get_cpu_device(cpu);
688 mutex_unlock(&core_isolate_lock);
689 /*
690 * Unlock before calling the device_online.
691 * Else, this will lead to deadlock, since the hp
692 * online callback will be blocked on this mutex.
693 */
694 ret = device_online(cpu_dev);
695 if (ret)
696 pr_err("CPU:%d online error:%d\n", cpu, ret);
697 goto update_frequency;
698 } else
699 sched_unisolate_cpu(cpu);
700 }
701 mutex_unlock(&core_isolate_lock);
702update_frequency:
Viresh Kumar4843c4a2014-12-04 09:42:07 +0530703 clip_freq = cpufreq_device->freq_table[state];
Viresh Kumar59f0d212015-07-30 12:40:33 +0530704 cpufreq_device->clipped_freq = clip_freq;
Viresh Kumar5194fe42014-12-04 09:42:00 +0530705
Lina Iyer986fde12016-02-23 13:08:31 -0700706 /* Check if the device has a platform mitigation function that
707 * can handle the CPU freq mitigation, if not, notify cpufreq
708 * framework.
709 */
710 if (cpufreq_device->plat_ops) {
711 if (cpufreq_device->plat_ops->ceil_limit)
712 cpufreq_device->plat_ops->ceil_limit(cpu,
713 clip_freq);
714 } else {
715 cpufreq_update_policy(cpu);
716 }
Viresh Kumar5194fe42014-12-04 09:42:00 +0530717
718 return 0;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530719}
720
Javi Merinoc36cf072015-02-26 19:00:29 +0000721/**
722 * cpufreq_get_requested_power() - get the current power
723 * @cdev: &thermal_cooling_device pointer
724 * @tz: a valid thermal zone device pointer
725 * @power: pointer in which to store the resulting power
726 *
727 * Calculate the current power consumption of the cpus in milliwatts
728 * and store it in @power. This function should actually calculate
729 * the requested power, but it's hard to get the frequency that
730 * cpufreq would have assigned if there were no thermal limits.
731 * Instead, we calculate the current power on the assumption that the
732 * immediate future will look like the immediate past.
733 *
734 * We use the current frequency and the average load since this
735 * function was last called. In reality, there could have been
736 * multiple opps since this function was last called and that affects
737 * the load calculation. While it's not perfectly accurate, this
738 * simplification is good enough and works. REVISIT this, as more
739 * complex code may be needed if experiments show that it's not
740 * accurate enough.
741 *
742 * Return: 0 on success, -E* if getting the static power failed.
743 */
744static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
745 struct thermal_zone_device *tz,
746 u32 *power)
747{
748 unsigned long freq;
Javi Merino6828a472015-03-02 17:17:20 +0000749 int i = 0, cpu, ret;
Javi Merinoc36cf072015-02-26 19:00:29 +0000750 u32 static_power, dynamic_power, total_load = 0;
751 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
Javi Merino6828a472015-03-02 17:17:20 +0000752 u32 *load_cpu = NULL;
Javi Merinoc36cf072015-02-26 19:00:29 +0000753
Kapileshwar Singhdd658e02015-03-16 12:00:51 +0000754 cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
755
756 /*
757 * All the CPUs are offline, thus the requested power by
758 * the cdev is 0
759 */
760 if (cpu >= nr_cpu_ids) {
761 *power = 0;
762 return 0;
763 }
764
765 freq = cpufreq_quick_get(cpu);
Javi Merinoc36cf072015-02-26 19:00:29 +0000766
Javi Merino6828a472015-03-02 17:17:20 +0000767 if (trace_thermal_power_cpu_get_power_enabled()) {
768 u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
769
Vaishali Thakkara71544c2015-08-19 11:52:19 +0530770 load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
Javi Merino6828a472015-03-02 17:17:20 +0000771 }
772
Javi Merinoc36cf072015-02-26 19:00:29 +0000773 for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
774 u32 load;
775
776 if (cpu_online(cpu))
Javi Merinoa53b8392016-02-11 12:00:51 +0000777 load = get_load(cpufreq_device, cpu, i);
Javi Merinoc36cf072015-02-26 19:00:29 +0000778 else
779 load = 0;
780
781 total_load += load;
Javi Merino6828a472015-03-02 17:17:20 +0000782 if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
783 load_cpu[i] = load;
784
785 i++;
Javi Merinoc36cf072015-02-26 19:00:29 +0000786 }
787
788 cpufreq_device->last_load = total_load;
789
790 dynamic_power = get_dynamic_power(cpufreq_device, freq);
791 ret = get_static_power(cpufreq_device, tz, freq, &static_power);
Javi Merino6828a472015-03-02 17:17:20 +0000792 if (ret) {
Vaishali Thakkara71544c2015-08-19 11:52:19 +0530793 kfree(load_cpu);
Javi Merinoc36cf072015-02-26 19:00:29 +0000794 return ret;
Javi Merino6828a472015-03-02 17:17:20 +0000795 }
796
797 if (load_cpu) {
798 trace_thermal_power_cpu_get_power(
799 &cpufreq_device->allowed_cpus,
800 freq, load_cpu, i, dynamic_power, static_power);
801
Vaishali Thakkara71544c2015-08-19 11:52:19 +0530802 kfree(load_cpu);
Javi Merino6828a472015-03-02 17:17:20 +0000803 }
Javi Merinoc36cf072015-02-26 19:00:29 +0000804
805 *power = static_power + dynamic_power;
806 return 0;
807}
808
809/**
810 * cpufreq_state2power() - convert a cpu cdev state to power consumed
811 * @cdev: &thermal_cooling_device pointer
812 * @tz: a valid thermal zone device pointer
813 * @state: cooling device state to be converted
814 * @power: pointer in which to store the resulting power
815 *
816 * Convert cooling device state @state into power consumption in
817 * milliwatts assuming 100% load. Store the calculated power in
818 * @power.
819 *
820 * Return: 0 on success, -EINVAL if the cooling device state could not
821 * be converted into a frequency or other -E* if there was an error
822 * when calculating the static power.
823 */
824static int cpufreq_state2power(struct thermal_cooling_device *cdev,
825 struct thermal_zone_device *tz,
826 unsigned long state, u32 *power)
827{
828 unsigned int freq, num_cpus;
829 cpumask_t cpumask;
830 u32 static_power, dynamic_power;
831 int ret;
832 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
833
834 cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
835 num_cpus = cpumask_weight(&cpumask);
836
837 /* None of our cpus are online, so no power */
838 if (num_cpus == 0) {
839 *power = 0;
840 return 0;
841 }
842
843 freq = cpufreq_device->freq_table[state];
844 if (!freq)
845 return -EINVAL;
846
847 dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
848 ret = get_static_power(cpufreq_device, tz, freq, &static_power);
849 if (ret)
850 return ret;
851
852 *power = static_power + dynamic_power;
853 return 0;
854}
855
856/**
857 * cpufreq_power2state() - convert power to a cooling device state
858 * @cdev: &thermal_cooling_device pointer
859 * @tz: a valid thermal zone device pointer
860 * @power: power in milliwatts to be converted
861 * @state: pointer in which to store the resulting state
862 *
863 * Calculate a cooling device state for the cpus described by @cdev
864 * that would allow them to consume at most @power mW and store it in
865 * @state. Note that this calculation depends on external factors
866 * such as the cpu load or the current static power. Calling this
867 * function with the same power as input can yield different cooling
868 * device states depending on those external factors.
869 *
870 * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
871 * the calculated frequency could not be converted to a valid state.
872 * The latter should not happen unless the frequencies available to
873 * cpufreq have changed since the initialization of the cpu cooling
874 * device.
875 */
876static int cpufreq_power2state(struct thermal_cooling_device *cdev,
877 struct thermal_zone_device *tz, u32 power,
878 unsigned long *state)
879{
880 unsigned int cpu, cur_freq, target_freq;
881 int ret;
882 s32 dyn_power;
883 u32 last_load, normalised_power, static_power;
884 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
885
886 cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
887
888 /* None of our cpus are online */
889 if (cpu >= nr_cpu_ids)
890 return -ENODEV;
891
892 cur_freq = cpufreq_quick_get(cpu);
893 ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
894 if (ret)
895 return ret;
896
897 dyn_power = power - static_power;
898 dyn_power = dyn_power > 0 ? dyn_power : 0;
899 last_load = cpufreq_device->last_load ?: 1;
900 normalised_power = (dyn_power * 100) / last_load;
901 target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
902
903 *state = cpufreq_cooling_get_level(cpu, target_freq);
904 if (*state == THERMAL_CSTATE_INVALID) {
905 dev_warn_ratelimited(&cdev->device,
906 "Failed to convert %dKHz for cpu %d into a cdev state\n",
907 target_freq, cpu);
908 return -EINVAL;
909 }
910
Javi Merino6828a472015-03-02 17:17:20 +0000911 trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
912 target_freq, *state, power);
Javi Merinoc36cf072015-02-26 19:00:29 +0000913 return 0;
914}
915
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530916/* Bind cpufreq callbacks to thermal cooling device ops */
Brendan Jackmana305a432016-08-17 16:14:59 +0100917
Javi Merinoc36cf072015-02-26 19:00:29 +0000918static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530919 .get_max_state = cpufreq_get_max_state,
920 .get_cur_state = cpufreq_get_cur_state,
921 .set_cur_state = cpufreq_set_cur_state,
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -0600922 .set_min_state = cpufreq_set_min_state,
923 .get_min_state = cpufreq_get_min_state,
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530924};
925
Brendan Jackmana305a432016-08-17 16:14:59 +0100926static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
927 .get_max_state = cpufreq_get_max_state,
928 .get_cur_state = cpufreq_get_cur_state,
929 .set_cur_state = cpufreq_set_cur_state,
930 .get_requested_power = cpufreq_get_requested_power,
931 .state2power = cpufreq_state2power,
932 .power2state = cpufreq_power2state,
933};
934
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530935/* Notifier for cpufreq policy change */
936static struct notifier_block thermal_cpufreq_notifier_block = {
937 .notifier_call = cpufreq_thermal_notifier,
938};
939
Viresh Kumarf6859012014-12-04 09:42:06 +0530940static unsigned int find_next_max(struct cpufreq_frequency_table *table,
941 unsigned int prev_max)
942{
943 struct cpufreq_frequency_table *pos;
944 unsigned int max = 0;
945
946 cpufreq_for_each_valid_entry(pos, table) {
947 if (pos->frequency > max && pos->frequency < prev_max)
948 max = pos->frequency;
949 }
950
951 return max;
952}
953
Ram Chandrasekar30ca8042017-06-13 16:05:47 -0600954static void register_cdev(struct work_struct *work)
955{
956 int ret = 0;
957
958 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
959 "cpu_cooling/no-sched", cpufreq_hp_online,
960 cpufreq_hp_offline);
961 if (ret < 0)
962 pr_err("Error registering for hotpug callback:%d\n", ret);
963}
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530964/**
Eduardo Valentin39d99cf2013-09-12 19:26:45 -0400965 * __cpufreq_cooling_register - helper function to create cpufreq cooling device
966 * @np: a valid struct device_node to the cooling device device tree node
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530967 * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
Viresh Kumar405fb822014-12-04 09:41:55 +0530968 * Normally this should be same as cpufreq policy->related_cpus.
Javi Merinoc36cf072015-02-26 19:00:29 +0000969 * @capacitance: dynamic power coefficient for these cpus
970 * @plat_static_func: function to calculate the static power consumed by these
971 * cpus (optional)
Lina Iyer986fde12016-02-23 13:08:31 -0700972 * @plat_mitig_func: function that does the mitigation by changing the
973 * frequencies (Optional). By default, cpufreq framweork will
974 * be notified of the new limits.
Eduardo Valentin12cb08b2013-04-17 17:12:15 +0000975 *
976 * This interface function registers the cpufreq cooling device with the name
977 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
Eduardo Valentin39d99cf2013-09-12 19:26:45 -0400978 * cooling devices. It also gives the opportunity to link the cooling device
979 * with a device tree node, in order to bind it via the thermal DT code.
Eduardo Valentin12cb08b2013-04-17 17:12:15 +0000980 *
981 * Return: a valid struct thermal_cooling_device pointer on success,
982 * on failure, it returns a corresponding ERR_PTR().
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530983 */
Eduardo Valentin39d99cf2013-09-12 19:26:45 -0400984static struct thermal_cooling_device *
985__cpufreq_cooling_register(struct device_node *np,
Javi Merinoc36cf072015-02-26 19:00:29 +0000986 const struct cpumask *clip_cpus, u32 capacitance,
Lina Iyer986fde12016-02-23 13:08:31 -0700987 get_static_t plat_static_func,
988 struct cpu_cooling_ops *plat_ops)
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530989{
Viresh Kumarf8bfc112016-06-03 10:58:47 +0530990 struct cpufreq_policy *policy;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530991 struct thermal_cooling_device *cool_dev;
Viresh Kumar5d3bdb82014-12-04 09:41:52 +0530992 struct cpufreq_cooling_device *cpufreq_dev;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530993 char dev_name[THERMAL_NAME_LENGTH];
Viresh Kumardcc6c7f2014-12-04 09:42:02 +0530994 struct cpufreq_frequency_table *pos, *table;
Javi Merinoc36cf072015-02-26 19:00:29 +0000995 unsigned int freq, i, num_cpus;
Viresh Kumar405fb822014-12-04 09:41:55 +0530996 int ret;
Brendan Jackmana305a432016-08-17 16:14:59 +0100997 struct thermal_cooling_device_ops *cooling_ops;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +0530998
Ram Chandrasekar5682d552017-05-03 16:01:53 -0600999 policy = cpufreq_cpu_get(cpumask_first(clip_cpus));
Viresh Kumarf8bfc112016-06-03 10:58:47 +05301000 if (!policy) {
1001 pr_debug("%s: CPUFreq policy not found\n", __func__);
Eduardo Valentin0f1be512014-12-04 09:41:43 +05301002 return ERR_PTR(-EPROBE_DEFER);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301003 }
Eduardo Valentin0f1be512014-12-04 09:41:43 +05301004
Viresh Kumarf8bfc112016-06-03 10:58:47 +05301005 table = policy->freq_table;
1006 if (!table) {
1007 pr_debug("%s: CPUFreq table not found\n", __func__);
1008 cool_dev = ERR_PTR(-ENODEV);
1009 goto put_policy;
1010 }
1011
Viresh Kumar98d522f2014-12-04 09:41:50 +05301012 cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
Viresh Kumarf8bfc112016-06-03 10:58:47 +05301013 if (!cpufreq_dev) {
1014 cool_dev = ERR_PTR(-ENOMEM);
1015 goto put_policy;
1016 }
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301017
Javi Merinoc36cf072015-02-26 19:00:29 +00001018 num_cpus = cpumask_weight(clip_cpus);
1019 cpufreq_dev->time_in_idle = kcalloc(num_cpus,
1020 sizeof(*cpufreq_dev->time_in_idle),
1021 GFP_KERNEL);
1022 if (!cpufreq_dev->time_in_idle) {
1023 cool_dev = ERR_PTR(-ENOMEM);
1024 goto free_cdev;
1025 }
1026
1027 cpufreq_dev->time_in_idle_timestamp =
1028 kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
1029 GFP_KERNEL);
1030 if (!cpufreq_dev->time_in_idle_timestamp) {
1031 cool_dev = ERR_PTR(-ENOMEM);
1032 goto free_time_in_idle;
1033 }
1034
Viresh Kumardcc6c7f2014-12-04 09:42:02 +05301035 /* Find max levels */
1036 cpufreq_for_each_valid_entry(pos, table)
1037 cpufreq_dev->max_level++;
1038
Lina Iyer97a13ed2016-07-15 14:53:58 -06001039 /* Last level will indicate the core will be isolated. */
1040 cpufreq_dev->max_level++;
1041 cpufreq_dev->freq_table = kzalloc(sizeof(*cpufreq_dev->freq_table) *
Viresh Kumarf6859012014-12-04 09:42:06 +05301042 cpufreq_dev->max_level, GFP_KERNEL);
1043 if (!cpufreq_dev->freq_table) {
Viresh Kumarf6859012014-12-04 09:42:06 +05301044 cool_dev = ERR_PTR(-ENOMEM);
Javi Merinoc36cf072015-02-26 19:00:29 +00001045 goto free_time_in_idle_timestamp;
Viresh Kumarf6859012014-12-04 09:42:06 +05301046 }
1047
Viresh Kumardcc6c7f2014-12-04 09:42:02 +05301048 /* max_level is an index, not a counter */
1049 cpufreq_dev->max_level--;
1050
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301051 cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
1052
Javi Merinoc36cf072015-02-26 19:00:29 +00001053 if (capacitance) {
Javi Merinoc36cf072015-02-26 19:00:29 +00001054 cpufreq_dev->plat_get_static_power = plat_static_func;
1055
1056 ret = build_dyn_power_table(cpufreq_dev, capacitance);
1057 if (ret) {
1058 cool_dev = ERR_PTR(ret);
1059 goto free_table;
1060 }
Brendan Jackmana305a432016-08-17 16:14:59 +01001061
1062 cooling_ops = &cpufreq_power_cooling_ops;
1063 } else {
1064 cooling_ops = &cpufreq_cooling_ops;
Javi Merinoc36cf072015-02-26 19:00:29 +00001065 }
1066
Lina Iyer986fde12016-02-23 13:08:31 -07001067 cpufreq_dev->plat_ops = plat_ops;
1068
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301069 ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
1070 if (ret) {
Viresh Kumar730abe02014-12-04 09:41:58 +05301071 cool_dev = ERR_PTR(ret);
Javi Merinoeba4f882015-08-17 19:21:43 +01001072 goto free_power_table;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301073 }
1074
Viresh Kumarf6859012014-12-04 09:42:06 +05301075 /* Fill freq-table in descending order of frequencies */
Lina Iyer97a13ed2016-07-15 14:53:58 -06001076 for (i = 0, freq = -1; i < cpufreq_dev->max_level; i++) {
Viresh Kumarf6859012014-12-04 09:42:06 +05301077 freq = find_next_max(table, freq);
1078 cpufreq_dev->freq_table[i] = freq;
1079
1080 /* Warn for duplicate entries */
1081 if (!freq)
1082 pr_warn("%s: table has duplicate entries\n", __func__);
1083 else
1084 pr_debug("%s: freq:%u KHz\n", __func__, freq);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301085 }
Viresh Kumarf6859012014-12-04 09:42:06 +05301086
Lukasz Lubaf840ab12016-05-31 11:32:02 +01001087 snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
1088 cpufreq_dev->id);
1089
1090 cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
Brendan Jackmana305a432016-08-17 16:14:59 +01001091 cooling_ops);
Lukasz Lubaf840ab12016-05-31 11:32:02 +01001092 if (IS_ERR(cool_dev))
1093 goto remove_idr;
1094
Viresh Kumar59f0d212015-07-30 12:40:33 +05301095 cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
Ram Chandrasekard8e4bf22016-09-21 17:08:06 -06001096 cpufreq_dev->floor_freq =
1097 cpufreq_dev->freq_table[cpufreq_dev->max_level];
1098 cpufreq_dev->cpufreq_floor_state = cpufreq_dev->max_level;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301099 cpufreq_dev->cool_dev = cool_dev;
Viresh Kumar92e615e2014-12-04 09:41:51 +05301100
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301101 mutex_lock(&cooling_cpufreq_lock);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301102
Russell King02373d72015-08-12 15:22:16 +05301103 mutex_lock(&cooling_list_lock);
1104 list_add(&cpufreq_dev->node, &cpufreq_dev_list);
1105 mutex_unlock(&cooling_list_lock);
1106
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301107 /* Register the notifier for first cpufreq cooling device */
Ram Chandrasekarcbd1a402017-10-27 12:18:49 -06001108 if (!cpufreq_dev_count++ && !cpufreq_dev->plat_ops)
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301109 cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
Eduardo Valentin5fda7f62013-04-17 17:12:11 +00001110 CPUFREQ_POLICY_NOTIFIER);
Ram Chandrasekar30ca8042017-06-13 16:05:47 -06001111 if (!cpuhp_registered) {
1112 cpuhp_registered = 1;
1113 cpumask_clear(&cpus_pending_online);
1114 INIT_WORK(&cpuhp_register_work, register_cdev);
1115 queue_work(system_wq, &cpuhp_register_work);
1116 }
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301117 mutex_unlock(&cooling_cpufreq_lock);
Eduardo Valentin79491e52013-04-17 17:11:59 +00001118
Viresh Kumarf8bfc112016-06-03 10:58:47 +05301119 goto put_policy;
Viresh Kumar730abe02014-12-04 09:41:58 +05301120
1121remove_idr:
1122 release_idr(&cpufreq_idr, cpufreq_dev->id);
Javi Merinoeba4f882015-08-17 19:21:43 +01001123free_power_table:
1124 kfree(cpufreq_dev->dyn_power_table);
Viresh Kumarf6859012014-12-04 09:42:06 +05301125free_table:
1126 kfree(cpufreq_dev->freq_table);
Javi Merinoc36cf072015-02-26 19:00:29 +00001127free_time_in_idle_timestamp:
1128 kfree(cpufreq_dev->time_in_idle_timestamp);
1129free_time_in_idle:
1130 kfree(cpufreq_dev->time_in_idle);
Viresh Kumar730abe02014-12-04 09:41:58 +05301131free_cdev:
1132 kfree(cpufreq_dev);
Viresh Kumarf8bfc112016-06-03 10:58:47 +05301133put_policy:
1134 cpufreq_cpu_put(policy);
Viresh Kumar730abe02014-12-04 09:41:58 +05301135
1136 return cool_dev;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301137}
Eduardo Valentin39d99cf2013-09-12 19:26:45 -04001138
1139/**
1140 * cpufreq_cooling_register - function to create cpufreq cooling device.
1141 * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
1142 *
1143 * This interface function registers the cpufreq cooling device with the name
1144 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
1145 * cooling devices.
1146 *
1147 * Return: a valid struct thermal_cooling_device pointer on success,
1148 * on failure, it returns a corresponding ERR_PTR().
1149 */
1150struct thermal_cooling_device *
1151cpufreq_cooling_register(const struct cpumask *clip_cpus)
1152{
Lina Iyer986fde12016-02-23 13:08:31 -07001153 return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL, NULL);
Eduardo Valentin39d99cf2013-09-12 19:26:45 -04001154}
Eduardo Valentin243dbd92013-04-17 17:11:57 +00001155EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301156
1157/**
Eduardo Valentin39d99cf2013-09-12 19:26:45 -04001158 * of_cpufreq_cooling_register - function to create cpufreq cooling device.
1159 * @np: a valid struct device_node to the cooling device device tree node
1160 * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
1161 *
1162 * This interface function registers the cpufreq cooling device with the name
1163 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
1164 * cooling devices. Using this API, the cpufreq cooling device will be
1165 * linked to the device tree node provided.
1166 *
1167 * Return: a valid struct thermal_cooling_device pointer on success,
1168 * on failure, it returns a corresponding ERR_PTR().
1169 */
1170struct thermal_cooling_device *
1171of_cpufreq_cooling_register(struct device_node *np,
1172 const struct cpumask *clip_cpus)
1173{
1174 if (!np)
1175 return ERR_PTR(-EINVAL);
1176
Lina Iyer986fde12016-02-23 13:08:31 -07001177 return __cpufreq_cooling_register(np, clip_cpus, 0, NULL, NULL);
Eduardo Valentin39d99cf2013-09-12 19:26:45 -04001178}
1179EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
1180
1181/**
Javi Merinoc36cf072015-02-26 19:00:29 +00001182 * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
1183 * @clip_cpus: cpumask of cpus where the frequency constraints will happen
1184 * @capacitance: dynamic power coefficient for these cpus
1185 * @plat_static_func: function to calculate the static power consumed by these
1186 * cpus (optional)
1187 *
1188 * This interface function registers the cpufreq cooling device with
1189 * the name "thermal-cpufreq-%x". This api can support multiple
1190 * instances of cpufreq cooling devices. Using this function, the
1191 * cooling device will implement the power extensions by using a
1192 * simple cpu power model. The cpus must have registered their OPPs
1193 * using the OPP library.
1194 *
1195 * An optional @plat_static_func may be provided to calculate the
1196 * static power consumed by these cpus. If the platform's static
1197 * power consumption is unknown or negligible, make it NULL.
1198 *
1199 * Return: a valid struct thermal_cooling_device pointer on success,
1200 * on failure, it returns a corresponding ERR_PTR().
1201 */
1202struct thermal_cooling_device *
1203cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
1204 get_static_t plat_static_func)
1205{
1206 return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
Lina Iyer986fde12016-02-23 13:08:31 -07001207 plat_static_func, NULL);
Javi Merinoc36cf072015-02-26 19:00:29 +00001208}
1209EXPORT_SYMBOL(cpufreq_power_cooling_register);
1210
1211/**
Lina Iyer986fde12016-02-23 13:08:31 -07001212 * cpufreq_platform_cooling_register() - create cpufreq cooling device with
1213 * additional platform specific mitigation function.
1214 *
1215 * @clip_cpus: cpumask of cpus where the frequency constraints will happen
1216 * @plat_ops: the platform mitigation functions that will be called insted of
1217 * cpufreq, if provided.
1218 *
1219 * Return: a valid struct thermal_cooling_device pointer on success,
1220 * on failure, it returns a corresponding ERR_PTR().
1221 */
1222struct thermal_cooling_device *
1223cpufreq_platform_cooling_register(const struct cpumask *clip_cpus,
1224 struct cpu_cooling_ops *plat_ops)
1225{
1226 struct device_node *cpu_node;
1227
1228 cpu_node = of_cpu_device_node_get(cpumask_first(clip_cpus));
1229 return __cpufreq_cooling_register(cpu_node, clip_cpus, 0, NULL,
1230 plat_ops);
1231}
1232EXPORT_SYMBOL(cpufreq_platform_cooling_register);
1233
1234/**
Javi Merinoc36cf072015-02-26 19:00:29 +00001235 * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
1236 * @np: a valid struct device_node to the cooling device device tree node
1237 * @clip_cpus: cpumask of cpus where the frequency constraints will happen
1238 * @capacitance: dynamic power coefficient for these cpus
1239 * @plat_static_func: function to calculate the static power consumed by these
1240 * cpus (optional)
1241 *
1242 * This interface function registers the cpufreq cooling device with
1243 * the name "thermal-cpufreq-%x". This api can support multiple
1244 * instances of cpufreq cooling devices. Using this API, the cpufreq
1245 * cooling device will be linked to the device tree node provided.
1246 * Using this function, the cooling device will implement the power
1247 * extensions by using a simple cpu power model. The cpus must have
1248 * registered their OPPs using the OPP library.
1249 *
1250 * An optional @plat_static_func may be provided to calculate the
1251 * static power consumed by these cpus. If the platform's static
1252 * power consumption is unknown or negligible, make it NULL.
1253 *
1254 * Return: a valid struct thermal_cooling_device pointer on success,
1255 * on failure, it returns a corresponding ERR_PTR().
1256 */
1257struct thermal_cooling_device *
1258of_cpufreq_power_cooling_register(struct device_node *np,
1259 const struct cpumask *clip_cpus,
1260 u32 capacitance,
1261 get_static_t plat_static_func)
1262{
1263 if (!np)
1264 return ERR_PTR(-EINVAL);
1265
1266 return __cpufreq_cooling_register(np, clip_cpus, capacitance,
Lina Iyer986fde12016-02-23 13:08:31 -07001267 plat_static_func, NULL);
Javi Merinoc36cf072015-02-26 19:00:29 +00001268}
1269EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
1270
1271/**
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301272 * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
1273 * @cdev: thermal cooling device pointer.
Eduardo Valentin135266b2013-04-17 17:12:16 +00001274 *
1275 * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301276 */
1277void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
1278{
Eduardo Valentin50e66c72013-08-15 10:54:46 -04001279 struct cpufreq_cooling_device *cpufreq_dev;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301280
Eduardo Valentin50e66c72013-08-15 10:54:46 -04001281 if (!cdev)
1282 return;
1283
1284 cpufreq_dev = cdev->devdata;
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301285
1286 /* Unregister the notifier for the last cpufreq cooling device */
Russell King02373d72015-08-12 15:22:16 +05301287 mutex_lock(&cooling_cpufreq_lock);
Ram Chandrasekarcbd1a402017-10-27 12:18:49 -06001288 if (!--cpufreq_dev_count && !cpufreq_dev->plat_ops)
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301289 cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
Eduardo Valentin5fda7f62013-04-17 17:12:11 +00001290 CPUFREQ_POLICY_NOTIFIER);
Russell King02373d72015-08-12 15:22:16 +05301291
1292 mutex_lock(&cooling_list_lock);
1293 list_del(&cpufreq_dev->node);
1294 mutex_unlock(&cooling_list_lock);
1295
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301296 mutex_unlock(&cooling_cpufreq_lock);
hongbo.zhang160b7d82012-10-30 17:48:59 +01001297
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301298 thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
1299 release_idr(&cpufreq_idr, cpufreq_dev->id);
Javi Merinoeba4f882015-08-17 19:21:43 +01001300 kfree(cpufreq_dev->dyn_power_table);
Javi Merinoc36cf072015-02-26 19:00:29 +00001301 kfree(cpufreq_dev->time_in_idle_timestamp);
1302 kfree(cpufreq_dev->time_in_idle);
Viresh Kumarf6859012014-12-04 09:42:06 +05301303 kfree(cpufreq_dev->freq_table);
Amit Daniel Kachhap02361412012-08-16 17:11:40 +05301304 kfree(cpufreq_dev);
1305}
Eduardo Valentin243dbd92013-04-17 17:11:57 +00001306EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);