blob: 246ae147df74f48dbbde8dcfbe8c9116e39265c6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * drivers/cpufreq/cpufreq_ondemand.c
3 *
4 * Copyright (C) 2001 Russell King
5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6 * Jun Nakajima <jun.nakajima@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/kernel.h>
14#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/cpufreq.h>
Andrew Morton138a01282006-06-23 03:31:19 -070017#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/jiffies.h>
19#include <linux/kernel_stat.h>
akpm@osdl.org3fc54d32006-01-13 15:54:22 -080020#include <linux/mutex.h>
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -070021#include <linux/hrtimer.h>
22#include <linux/tick.h>
23#include <linux/ktime.h>
Thomas Renninger9411b4e2009-02-04 11:54:04 +010024#include <linux/sched.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
26/*
27 * dbs is used in this file as a shortform for demandbased switching
28 * It helps to keep variable names smaller, simpler
29 */
30
venkatesh.pallipadi@intel.come9d95bf2008-08-04 11:59:10 -070031#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#define DEF_FREQUENCY_UP_THRESHOLD (80)
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -070033#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
34#define MICRO_FREQUENCY_UP_THRESHOLD (95)
Thomas Renningercef96152009-04-22 13:48:29 +020035#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
Dave Jonesc29f1402005-05-31 19:03:50 -070036#define MIN_FREQUENCY_UP_THRESHOLD (11)
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#define MAX_FREQUENCY_UP_THRESHOLD (100)
38
Dave Jones32ee8c32006-02-28 00:43:23 -050039/*
40 * The polling frequency of this governor depends on the capability of
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * the processor. Default polling frequency is 1000 times the transition
Dave Jones32ee8c32006-02-28 00:43:23 -050042 * latency of the processor. The governor will work on any processor with
43 * transition latency <= 10mS, using appropriate sampling
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 * rate.
45 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
46 * this governor will not work.
47 * All times here are in uS.
48 */
Dave Jonesdf8b59b2005-09-20 12:39:35 -070049#define MIN_SAMPLING_RATE_RATIO (2)
Thomas Renninger112124a2009-02-04 11:55:12 +010050
Thomas Renningercef96152009-04-22 13:48:29 +020051static unsigned int min_sampling_rate;
52
Thomas Renninger112124a2009-02-04 11:55:12 +010053#define LATENCY_MULTIPLIER (1000)
Thomas Renningercef96152009-04-22 13:48:29 +020054#define MIN_LATENCY_MULTIPLIER (100)
Thomas Renninger1c256242007-10-02 13:28:12 -070055#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
David Howellsc4028952006-11-22 14:57:56 +000057static void do_dbs_timer(struct work_struct *work);
58
59/* Sampling types */
Venkatesh Pallipadi529af7a2007-02-05 16:12:44 -080060enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
62struct cpu_dbs_info_s {
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -070063 cputime64_t prev_cpu_idle;
64 cputime64_t prev_cpu_wall;
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -070065 cputime64_t prev_cpu_nice;
Dave Jones32ee8c32006-02-28 00:43:23 -050066 struct cpufreq_policy *cur_policy;
Dave Jones2b03f892009-01-18 01:43:44 -050067 struct delayed_work work;
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +040068 struct cpufreq_frequency_table *freq_table;
69 unsigned int freq_lo;
70 unsigned int freq_lo_jiffies;
71 unsigned int freq_hi_jiffies;
Venkatesh Pallipadi529af7a2007-02-05 16:12:44 -080072 int cpu;
73 unsigned int enable:1,
Dave Jones2b03f892009-01-18 01:43:44 -050074 sample_type:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075};
76static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
77
78static unsigned int dbs_enable; /* number of CPUs using this policy */
79
Venkatesh Pallipadi4ec223d2006-06-21 15:18:34 -070080/*
venkatesh.pallipadi@intel.com7d26e2d2009-07-02 17:08:30 -070081 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
82 * different CPUs. It protects dbs_enable in governor start/stop. It also
83 * serializes governor limit_change with do_dbs_timer. We do not want
84 * do_dbs_timer to run when user is changing the governor or limits.
Venkatesh Pallipadi4ec223d2006-06-21 15:18:34 -070085 */
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -070086static DEFINE_MUTEX(dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
Venkatesh Pallipadi2f8a8352006-06-28 13:51:19 -070088static struct workqueue_struct *kondemand_wq;
Andi Kleen6810b542006-05-08 15:17:31 +020089
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +040090static struct dbs_tuners {
Dave Jones32ee8c32006-02-28 00:43:23 -050091 unsigned int sampling_rate;
Dave Jones32ee8c32006-02-28 00:43:23 -050092 unsigned int up_threshold;
venkatesh.pallipadi@intel.come9d95bf2008-08-04 11:59:10 -070093 unsigned int down_differential;
Dave Jones32ee8c32006-02-28 00:43:23 -050094 unsigned int ignore_nice;
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +040095 unsigned int powersave_bias;
96} dbs_tuners_ins = {
Dave Jones32ee8c32006-02-28 00:43:23 -050097 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
venkatesh.pallipadi@intel.come9d95bf2008-08-04 11:59:10 -070098 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
Eric Piel9cbad612006-03-10 11:35:27 +020099 .ignore_nice = 0,
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400100 .powersave_bias = 0,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101};
102
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700103static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
104 cputime64_t *wall)
Dave Jonesdac1c1a2005-05-31 19:03:49 -0700105{
Venki Pallipadiea487612007-06-20 14:26:24 -0700106 cputime64_t idle_time;
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700107 cputime64_t cur_wall_time;
Venki Pallipadiea487612007-06-20 14:26:24 -0700108 cputime64_t busy_time;
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700109
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700110 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
Venki Pallipadiea487612007-06-20 14:26:24 -0700111 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
112 kstat_cpu(cpu).cpustat.system);
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700113
Venki Pallipadiea487612007-06-20 14:26:24 -0700114 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
115 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
116 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
Venkatesh Pallipadi1ca3abd2009-01-23 09:25:02 -0500117 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
Venki Pallipadiea487612007-06-20 14:26:24 -0700118
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700119 idle_time = cputime64_sub(cur_wall_time, busy_time);
120 if (wall)
121 *wall = cur_wall_time;
122
Venki Pallipadiea487612007-06-20 14:26:24 -0700123 return idle_time;
Dave Jonesdac1c1a2005-05-31 19:03:49 -0700124}
125
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700126static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
127{
128 u64 idle_time = get_cpu_idle_time_us(cpu, wall);
129
130 if (idle_time == -1ULL)
131 return get_cpu_idle_time_jiffy(cpu, wall);
132
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700133 return idle_time;
134}
135
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400136/*
137 * Find right freq to be set now with powersave_bias on.
138 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
139 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
140 */
Adrian Bunkb5ecf602006-08-13 23:00:08 +0200141static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
142 unsigned int freq_next,
143 unsigned int relation)
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400144{
145 unsigned int freq_req, freq_reduc, freq_avg;
146 unsigned int freq_hi, freq_lo;
147 unsigned int index = 0;
148 unsigned int jiffies_total, jiffies_hi, jiffies_lo;
149 struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
150
151 if (!dbs_info->freq_table) {
152 dbs_info->freq_lo = 0;
153 dbs_info->freq_lo_jiffies = 0;
154 return freq_next;
155 }
156
157 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
158 relation, &index);
159 freq_req = dbs_info->freq_table[index].frequency;
160 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
161 freq_avg = freq_req - freq_reduc;
162
163 /* Find freq bounds for freq_avg in freq_table */
164 index = 0;
165 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
166 CPUFREQ_RELATION_H, &index);
167 freq_lo = dbs_info->freq_table[index].frequency;
168 index = 0;
169 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
170 CPUFREQ_RELATION_L, &index);
171 freq_hi = dbs_info->freq_table[index].frequency;
172
173 /* Find out how long we have to be in hi and lo freqs */
174 if (freq_hi == freq_lo) {
175 dbs_info->freq_lo = 0;
176 dbs_info->freq_lo_jiffies = 0;
177 return freq_lo;
178 }
179 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
180 jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
181 jiffies_hi += ((freq_hi - freq_lo) / 2);
182 jiffies_hi /= (freq_hi - freq_lo);
183 jiffies_lo = jiffies_total - jiffies_hi;
184 dbs_info->freq_lo = freq_lo;
185 dbs_info->freq_lo_jiffies = jiffies_lo;
186 dbs_info->freq_hi_jiffies = jiffies_hi;
187 return freq_hi;
188}
189
190static void ondemand_powersave_bias_init(void)
191{
192 int i;
193 for_each_online_cpu(i) {
194 struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
195 dbs_info->freq_table = cpufreq_frequency_get_table(i);
196 dbs_info->freq_lo = 0;
197 }
198}
199
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200/************************** sysfs interface ************************/
201static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
202{
Thomas Renninger4f4d1ad2009-04-22 13:48:31 +0200203 printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max "
204 "sysfs file is deprecated - used by: %s\n", current->comm);
Thomas Renningercef96152009-04-22 13:48:29 +0200205 return sprintf(buf, "%u\n", -1U);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206}
207
208static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
209{
Thomas Renningercef96152009-04-22 13:48:29 +0200210 return sprintf(buf, "%u\n", min_sampling_rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
Dave Jones32ee8c32006-02-28 00:43:23 -0500213#define define_one_ro(_name) \
214static struct freq_attr _name = \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215__ATTR(_name, 0444, show_##_name, NULL)
216
217define_one_ro(sampling_rate_max);
218define_one_ro(sampling_rate_min);
219
220/* cpufreq_ondemand Governor Tunables */
221#define show_one(file_name, object) \
222static ssize_t show_##file_name \
223(struct cpufreq_policy *unused, char *buf) \
224{ \
225 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
226}
227show_one(sampling_rate, sampling_rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228show_one(up_threshold, up_threshold);
Alexander Clouter001893c2005-12-01 01:09:25 -0800229show_one(ignore_nice_load, ignore_nice);
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400230show_one(powersave_bias, powersave_bias);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
Dave Jones32ee8c32006-02-28 00:43:23 -0500232static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 const char *buf, size_t count)
234{
235 unsigned int input;
236 int ret;
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700237 ret = sscanf(buf, "%u", &input);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800239 mutex_lock(&dbs_mutex);
Thomas Renninger112124a2009-02-04 11:55:12 +0100240 if (ret != 1) {
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800241 mutex_unlock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 return -EINVAL;
243 }
Thomas Renningercef96152009-04-22 13:48:29 +0200244 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800245 mutex_unlock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
247 return count;
248}
249
Dave Jones32ee8c32006-02-28 00:43:23 -0500250static ssize_t store_up_threshold(struct cpufreq_policy *unused,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 const char *buf, size_t count)
252{
253 unsigned int input;
254 int ret;
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700255 ret = sscanf(buf, "%u", &input);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800257 mutex_lock(&dbs_mutex);
Dave Jones32ee8c32006-02-28 00:43:23 -0500258 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
Dave Jonesc29f1402005-05-31 19:03:50 -0700259 input < MIN_FREQUENCY_UP_THRESHOLD) {
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800260 mutex_unlock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 return -EINVAL;
262 }
263
264 dbs_tuners_ins.up_threshold = input;
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800265 mutex_unlock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
267 return count;
268}
269
Alexander Clouter001893c2005-12-01 01:09:25 -0800270static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
Dave Jones3d5ee9e2005-05-31 19:03:47 -0700271 const char *buf, size_t count)
272{
273 unsigned int input;
274 int ret;
275
276 unsigned int j;
Dave Jones32ee8c32006-02-28 00:43:23 -0500277
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700278 ret = sscanf(buf, "%u", &input);
Dave Jones2b03f892009-01-18 01:43:44 -0500279 if (ret != 1)
Dave Jones3d5ee9e2005-05-31 19:03:47 -0700280 return -EINVAL;
281
Dave Jones2b03f892009-01-18 01:43:44 -0500282 if (input > 1)
Dave Jones3d5ee9e2005-05-31 19:03:47 -0700283 input = 1;
Dave Jones32ee8c32006-02-28 00:43:23 -0500284
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800285 mutex_lock(&dbs_mutex);
Dave Jones2b03f892009-01-18 01:43:44 -0500286 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800287 mutex_unlock(&dbs_mutex);
Dave Jones3d5ee9e2005-05-31 19:03:47 -0700288 return count;
289 }
290 dbs_tuners_ins.ignore_nice = input;
291
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700292 /* we need to re-evaluate prev_cpu_idle */
Dave Jonesdac1c1a2005-05-31 19:03:49 -0700293 for_each_online_cpu(j) {
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700294 struct cpu_dbs_info_s *dbs_info;
295 dbs_info = &per_cpu(cpu_dbs_info, j);
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700296 dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
297 &dbs_info->prev_cpu_wall);
Venkatesh Pallipadi1ca3abd2009-01-23 09:25:02 -0500298 if (dbs_tuners_ins.ignore_nice)
299 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
300
Dave Jones3d5ee9e2005-05-31 19:03:47 -0700301 }
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800302 mutex_unlock(&dbs_mutex);
Dave Jones3d5ee9e2005-05-31 19:03:47 -0700303
304 return count;
305}
306
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400307static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
308 const char *buf, size_t count)
309{
310 unsigned int input;
311 int ret;
312 ret = sscanf(buf, "%u", &input);
313
314 if (ret != 1)
315 return -EINVAL;
316
317 if (input > 1000)
318 input = 1000;
319
320 mutex_lock(&dbs_mutex);
321 dbs_tuners_ins.powersave_bias = input;
322 ondemand_powersave_bias_init();
323 mutex_unlock(&dbs_mutex);
324
325 return count;
326}
327
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328#define define_one_rw(_name) \
329static struct freq_attr _name = \
330__ATTR(_name, 0644, show_##_name, store_##_name)
331
332define_one_rw(sampling_rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333define_one_rw(up_threshold);
Alexander Clouter001893c2005-12-01 01:09:25 -0800334define_one_rw(ignore_nice_load);
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400335define_one_rw(powersave_bias);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
Dave Jones2b03f892009-01-18 01:43:44 -0500337static struct attribute *dbs_attributes[] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 &sampling_rate_max.attr,
339 &sampling_rate_min.attr,
340 &sampling_rate.attr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 &up_threshold.attr,
Alexander Clouter001893c2005-12-01 01:09:25 -0800342 &ignore_nice_load.attr,
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400343 &powersave_bias.attr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 NULL
345};
346
347static struct attribute_group dbs_attr_group = {
348 .attrs = dbs_attributes,
349 .name = "ondemand",
350};
351
352/************************** sysfs end ************************/
353
Venkatesh Pallipadi2f8a8352006-06-28 13:51:19 -0700354static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355{
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700356 unsigned int max_load_freq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
358 struct cpufreq_policy *policy;
359 unsigned int j;
360
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 if (!this_dbs_info->enable)
362 return;
363
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400364 this_dbs_info->freq_lo = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 policy = this_dbs_info->cur_policy;
Venki Pallipadiea487612007-06-20 14:26:24 -0700366
Dave Jones32ee8c32006-02-28 00:43:23 -0500367 /*
Dave Jonesc29f1402005-05-31 19:03:50 -0700368 * Every sampling_rate, we check, if current idle time is less
369 * than 20% (default), then we try to increase frequency
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700370 * Every sampling_rate, we look for a the lowest
Dave Jonesc29f1402005-05-31 19:03:50 -0700371 * frequency which can sustain the load while keeping idle time over
372 * 30%. If such a frequency exist, we try to decrease to this frequency.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 *
Dave Jones32ee8c32006-02-28 00:43:23 -0500374 * Any frequency increase takes it to the maximum frequency.
375 * Frequency reduction happens at minimum steps of
376 * 5% (default) of current frequency
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 */
378
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700379 /* Get Absolute Load - in terms of freq */
380 max_load_freq = 0;
381
Rusty Russell835481d2009-01-04 05:18:06 -0800382 for_each_cpu(j, policy->cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 struct cpu_dbs_info_s *j_dbs_info;
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700384 cputime64_t cur_wall_time, cur_idle_time;
385 unsigned int idle_time, wall_time;
386 unsigned int load, load_freq;
387 int freq_avg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 j_dbs_info = &per_cpu(cpu_dbs_info, j);
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700390
391 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
392
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700393 wall_time = (unsigned int) cputime64_sub(cur_wall_time,
394 j_dbs_info->prev_cpu_wall);
395 j_dbs_info->prev_cpu_wall = cur_wall_time;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700397 idle_time = (unsigned int) cputime64_sub(cur_idle_time,
398 j_dbs_info->prev_cpu_idle);
399 j_dbs_info->prev_cpu_idle = cur_idle_time;
400
Venkatesh Pallipadi1ca3abd2009-01-23 09:25:02 -0500401 if (dbs_tuners_ins.ignore_nice) {
402 cputime64_t cur_nice;
403 unsigned long cur_nice_jiffies;
404
405 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
406 j_dbs_info->prev_cpu_nice);
407 /*
408 * Assumption: nice time between sampling periods will
409 * be less than 2^32 jiffies for 32 bit sys
410 */
411 cur_nice_jiffies = (unsigned long)
412 cputime64_to_jiffies64(cur_nice);
413
414 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
415 idle_time += jiffies_to_usecs(cur_nice_jiffies);
416 }
417
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700418 if (unlikely(!wall_time || wall_time < idle_time))
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700419 continue;
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700420
421 load = 100 * (wall_time - idle_time) / wall_time;
422
423 freq_avg = __cpufreq_driver_getavg(policy, j);
424 if (freq_avg <= 0)
425 freq_avg = policy->cur;
426
427 load_freq = load * freq_avg;
428 if (load_freq > max_load_freq)
429 max_load_freq = load_freq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 }
431
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700432 /* Check for frequency increase */
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700433 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
Dave Jonesc11420a2005-05-31 19:03:48 -0700434 /* if we are already at full speed then break out early */
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400435 if (!dbs_tuners_ins.powersave_bias) {
436 if (policy->cur == policy->max)
437 return;
Dave Jones32ee8c32006-02-28 00:43:23 -0500438
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400439 __cpufreq_driver_target(policy, policy->max,
440 CPUFREQ_RELATION_H);
441 } else {
442 int freq = powersave_bias_target(policy, policy->max,
443 CPUFREQ_RELATION_H);
444 __cpufreq_driver_target(policy, freq,
445 CPUFREQ_RELATION_L);
446 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 return;
448 }
449
450 /* Check for frequency decrease */
Dave Jonesc29f1402005-05-31 19:03:50 -0700451 /* if we cannot reduce the frequency anymore, break out early */
452 if (policy->cur == policy->min)
453 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
Dave Jonesc29f1402005-05-31 19:03:50 -0700455 /*
456 * The optimal frequency is the frequency that is the lowest that
457 * can support the current CPU usage without triggering the up
458 * policy. To be safe, we focus 10 points under the threshold.
459 */
venkatesh.pallipadi@intel.come9d95bf2008-08-04 11:59:10 -0700460 if (max_load_freq <
461 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
462 policy->cur) {
venkatesh.pallipadi@intel.comc43aa3b2008-08-04 11:59:08 -0700463 unsigned int freq_next;
venkatesh.pallipadi@intel.come9d95bf2008-08-04 11:59:10 -0700464 freq_next = max_load_freq /
465 (dbs_tuners_ins.up_threshold -
466 dbs_tuners_ins.down_differential);
Venkatesh Pallipadidfde5d62006-10-03 12:38:45 -0700467
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400468 if (!dbs_tuners_ins.powersave_bias) {
469 __cpufreq_driver_target(policy, freq_next,
470 CPUFREQ_RELATION_L);
471 } else {
472 int freq = powersave_bias_target(policy, freq_next,
473 CPUFREQ_RELATION_L);
474 __cpufreq_driver_target(policy, freq,
475 CPUFREQ_RELATION_L);
476 }
Venkatesh Pallipadiccb2fe22006-06-28 13:49:52 -0700477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478}
479
David Howellsc4028952006-11-22 14:57:56 +0000480static void do_dbs_timer(struct work_struct *work)
Dave Jones32ee8c32006-02-28 00:43:23 -0500481{
Venkatesh Pallipadi529af7a2007-02-05 16:12:44 -0800482 struct cpu_dbs_info_s *dbs_info =
483 container_of(work, struct cpu_dbs_info_s, work.work);
484 unsigned int cpu = dbs_info->cpu;
485 int sample_type = dbs_info->sample_type;
486
Alexey Starikovskiy1ce28d62006-07-31 22:25:20 +0400487 /* We want all CPUs to do sampling nearly on same jiffy */
488 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
David Howellsc4028952006-11-22 14:57:56 +0000489
Alexey Starikovskiy1ce28d62006-07-31 22:25:20 +0400490 delay -= jiffies % delay;
Venkatesh Pallipadi2f8a8352006-06-28 13:51:19 -0700491
venkatesh.pallipadi@intel.com7d26e2d2009-07-02 17:08:30 -0700492 mutex_lock(&dbs_mutex);
Venkatesh Pallipadi56463b72007-02-05 16:12:45 -0800493
494 if (!dbs_info->enable) {
venkatesh.pallipadi@intel.com7d26e2d2009-07-02 17:08:30 -0700495 mutex_unlock(&dbs_mutex);
Venkatesh Pallipadi56463b72007-02-05 16:12:45 -0800496 return;
497 }
498
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400499 /* Common NORMAL_SAMPLE setup */
David Howellsc4028952006-11-22 14:57:56 +0000500 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400501 if (!dbs_tuners_ins.powersave_bias ||
David Howellsc4028952006-11-22 14:57:56 +0000502 sample_type == DBS_NORMAL_SAMPLE) {
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400503 dbs_check_cpu(dbs_info);
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400504 if (dbs_info->freq_lo) {
505 /* Setup timer for SUB_SAMPLE */
David Howellsc4028952006-11-22 14:57:56 +0000506 dbs_info->sample_type = DBS_SUB_SAMPLE;
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400507 delay = dbs_info->freq_hi_jiffies;
508 }
509 } else {
510 __cpufreq_driver_target(dbs_info->cur_policy,
Dave Jones2b03f892009-01-18 01:43:44 -0500511 dbs_info->freq_lo, CPUFREQ_RELATION_H);
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400512 }
Alexey Starikovskiy1ce28d62006-07-31 22:25:20 +0400513 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
venkatesh.pallipadi@intel.com7d26e2d2009-07-02 17:08:30 -0700514 mutex_unlock(&dbs_mutex);
Dave Jones32ee8c32006-02-28 00:43:23 -0500515}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
Venkatesh Pallipadi529af7a2007-02-05 16:12:44 -0800517static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518{
Alexey Starikovskiy1ce28d62006-07-31 22:25:20 +0400519 /* We want all CPUs to do sampling nearly on same jiffy */
520 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
521 delay -= jiffies % delay;
Venkatesh Pallipadi2f8a8352006-06-28 13:51:19 -0700522
Dave Jonesc18a1482007-02-10 20:03:51 -0500523 dbs_info->enable = 1;
Alexey Starikovskiy05ca0352006-07-31 22:28:12 +0400524 ondemand_powersave_bias_init();
David Howellsc4028952006-11-22 14:57:56 +0000525 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
Venki Pallipadi28287032007-05-08 00:27:47 -0700526 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
Venkatesh Pallipadi529af7a2007-02-05 16:12:44 -0800527 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
Dave Jones2b03f892009-01-18 01:43:44 -0500528 delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529}
530
Linus Torvalds2cd7cbd2006-07-23 12:05:00 -0700531static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532{
Linus Torvalds2cd7cbd2006-07-23 12:05:00 -0700533 dbs_info->enable = 0;
Mathieu Desnoyersb14893a2009-05-17 10:30:45 -0400534 cancel_delayed_work_sync(&dbs_info->work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535}
536
537static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
538 unsigned int event)
539{
540 unsigned int cpu = policy->cpu;
541 struct cpu_dbs_info_s *this_dbs_info;
542 unsigned int j;
Jeff Garzik914f7c32006-10-20 14:31:00 -0700543 int rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
545 this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
546
547 switch (event) {
548 case CPUFREQ_GOV_START:
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700549 if ((!cpu_online(cpu)) || (!policy->cur))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 return -EINVAL;
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 if (this_dbs_info->enable) /* Already enabled */
553 break;
Dave Jones32ee8c32006-02-28 00:43:23 -0500554
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800555 mutex_lock(&dbs_mutex);
Venkatesh Pallipadi2f8a8352006-06-28 13:51:19 -0700556 dbs_enable++;
Jeff Garzik914f7c32006-10-20 14:31:00 -0700557
558 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
559 if (rc) {
Jeff Garzik914f7c32006-10-20 14:31:00 -0700560 dbs_enable--;
561 mutex_unlock(&dbs_mutex);
562 return rc;
563 }
564
Rusty Russell835481d2009-01-04 05:18:06 -0800565 for_each_cpu(j, policy->cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 struct cpu_dbs_info_s *j_dbs_info;
567 j_dbs_info = &per_cpu(cpu_dbs_info, j);
568 j_dbs_info->cur_policy = policy;
Dave Jones32ee8c32006-02-28 00:43:23 -0500569
venkatesh.pallipadi@intel.com34305022008-08-04 11:59:09 -0700570 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
571 &j_dbs_info->prev_cpu_wall);
Venkatesh Pallipadi1ca3abd2009-01-23 09:25:02 -0500572 if (dbs_tuners_ins.ignore_nice) {
573 j_dbs_info->prev_cpu_nice =
574 kstat_cpu(j).cpustat.nice;
575 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 }
Venkatesh Pallipadi529af7a2007-02-05 16:12:44 -0800577 this_dbs_info->cpu = cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 /*
579 * Start the timerschedule work, when this governor
580 * is used for first time
581 */
582 if (dbs_enable == 1) {
583 unsigned int latency;
584 /* policy latency is in nS. Convert it to uS first */
Dave Jonesdf8b59b2005-09-20 12:39:35 -0700585 latency = policy->cpuinfo.transition_latency / 1000;
586 if (latency == 0)
587 latency = 1;
Thomas Renningercef96152009-04-22 13:48:29 +0200588 /* Bring kernel and HW constraints together */
589 min_sampling_rate = max(min_sampling_rate,
590 MIN_LATENCY_MULTIPLIER * latency);
591 dbs_tuners_ins.sampling_rate =
592 max(min_sampling_rate,
593 latency * LATENCY_MULTIPLIER);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 }
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800595 mutex_unlock(&dbs_mutex);
venkatesh.pallipadi@intel.com7d26e2d2009-07-02 17:08:30 -0700596
597 dbs_timer_init(this_dbs_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 break;
599
600 case CPUFREQ_GOV_STOP:
Linus Torvalds2cd7cbd2006-07-23 12:05:00 -0700601 dbs_timer_exit(this_dbs_info);
venkatesh.pallipadi@intel.com7d26e2d2009-07-02 17:08:30 -0700602
603 mutex_lock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
605 dbs_enable--;
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800606 mutex_unlock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
608 break;
609
610 case CPUFREQ_GOV_LIMITS:
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800611 mutex_lock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 if (policy->max < this_dbs_info->cur_policy->cur)
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700613 __cpufreq_driver_target(this_dbs_info->cur_policy,
Dave Jones2b03f892009-01-18 01:43:44 -0500614 policy->max, CPUFREQ_RELATION_H);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 else if (policy->min > this_dbs_info->cur_policy->cur)
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700616 __cpufreq_driver_target(this_dbs_info->cur_policy,
Dave Jones2b03f892009-01-18 01:43:44 -0500617 policy->min, CPUFREQ_RELATION_L);
akpm@osdl.org3fc54d32006-01-13 15:54:22 -0800618 mutex_unlock(&dbs_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 break;
620 }
621 return 0;
622}
623
Sven Wegenerc4d14bc2008-09-20 16:50:08 +0200624#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
625static
626#endif
Thomas Renninger1c256242007-10-02 13:28:12 -0700627struct cpufreq_governor cpufreq_gov_ondemand = {
628 .name = "ondemand",
629 .governor = cpufreq_governor_dbs,
630 .max_transition_latency = TRANSITION_LATENCY_LIMIT,
631 .owner = THIS_MODULE,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
634static int __init cpufreq_gov_dbs_init(void)
635{
Akinobu Mita888a7942008-07-14 12:00:45 +0900636 int err;
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700637 cputime64_t wall;
Andrea Righi4f6e6b92008-09-18 10:43:40 +0000638 u64 idle_time;
639 int cpu = get_cpu();
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700640
Andrea Righi4f6e6b92008-09-18 10:43:40 +0000641 idle_time = get_cpu_idle_time_us(cpu, &wall);
642 put_cpu();
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700643 if (idle_time != -1ULL) {
644 /* Idle micro accounting is supported. Use finer thresholds */
645 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
646 dbs_tuners_ins.down_differential =
647 MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
Thomas Renningercef96152009-04-22 13:48:29 +0200648 /*
649 * In no_hz/micro accounting case we set the minimum frequency
650 * not depending on HZ, but fixed (very low). The deferred
651 * timer might skip some samples if idle/sleeping as needed.
652 */
653 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
654 } else {
655 /* For correct statistics, we need 10 ticks for each measure */
656 min_sampling_rate =
657 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
venkatesh.pallipadi@intel.com80800912008-08-04 11:59:12 -0700658 }
Akinobu Mita888a7942008-07-14 12:00:45 +0900659
Venkatesh Pallipadi56463b72007-02-05 16:12:45 -0800660 kondemand_wq = create_workqueue("kondemand");
661 if (!kondemand_wq) {
662 printk(KERN_ERR "Creation of kondemand failed\n");
663 return -EFAULT;
664 }
Akinobu Mita888a7942008-07-14 12:00:45 +0900665 err = cpufreq_register_governor(&cpufreq_gov_ondemand);
666 if (err)
667 destroy_workqueue(kondemand_wq);
668
669 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670}
671
672static void __exit cpufreq_gov_dbs_exit(void)
673{
Thomas Renninger1c256242007-10-02 13:28:12 -0700674 cpufreq_unregister_governor(&cpufreq_gov_ondemand);
Venkatesh Pallipadi56463b72007-02-05 16:12:45 -0800675 destroy_workqueue(kondemand_wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676}
677
678
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700679MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
680MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
681MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
Dave Jones2b03f892009-01-18 01:43:44 -0500682 "Low Latency Frequency Transition capable processors");
Venkatesh Pallipadiffac80e2006-06-28 13:52:18 -0700683MODULE_LICENSE("GPL");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
Johannes Weiner69157192008-01-17 15:21:08 -0800685#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
686fs_initcall(cpufreq_gov_dbs_init);
687#else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688module_init(cpufreq_gov_dbs_init);
Johannes Weiner69157192008-01-17 15:21:08 -0800689#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690module_exit(cpufreq_gov_dbs_exit);