blob: 8f071757d5162b54dc26489cb4c31dc8214fdccd [file] [log] [blame]
Olav Haugan9306c802016-08-18 17:22:44 -07001/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/init.h>
14#include <linux/notifier.h>
15#include <linux/cpu.h>
16#include <linux/cpumask.h>
17#include <linux/cpufreq.h>
18#include <linux/timer.h>
19#include <linux/kthread.h>
20#include <linux/sched.h>
21#include <linux/sched/rt.h>
22
23#include <trace/events/power.h>
24
25#define MAX_CPUS_PER_GROUP 4
26
27struct cpu_data {
28 /* Per CPU data. */
29 bool inited;
30 bool online;
31 bool rejected;
32 bool is_busy;
33 bool not_preferred;
34 unsigned int busy;
35 unsigned int cpu;
36 struct list_head sib;
37 unsigned int first_cpu;
38
39 /* Per cluster data set only on first CPU */
40 unsigned int min_cpus;
41 unsigned int max_cpus;
42 unsigned int offline_delay_ms;
43 unsigned int busy_up_thres[MAX_CPUS_PER_GROUP];
44 unsigned int busy_down_thres[MAX_CPUS_PER_GROUP];
45 unsigned int online_cpus;
46 unsigned int avail_cpus;
47 unsigned int num_cpus;
48 unsigned int need_cpus;
49 unsigned int task_thres;
50 s64 need_ts;
51 struct list_head lru;
52 bool pending;
53 spinlock_t pending_lock;
54 bool is_big_cluster;
55 int nrrun;
56 bool nrrun_changed;
57 struct timer_list timer;
58 struct task_struct *hotplug_thread;
59 struct kobject kobj;
60};
61
62static DEFINE_PER_CPU(struct cpu_data, cpu_state);
63static DEFINE_SPINLOCK(state_lock);
64static void apply_need(struct cpu_data *f);
65static void wake_up_hotplug_thread(struct cpu_data *state);
66
67/* ========================= sysfs interface =========================== */
68
69static ssize_t store_min_cpus(struct cpu_data *state,
70 const char *buf, size_t count)
71{
72 unsigned int val;
73
74 if (sscanf(buf, "%u\n", &val) != 1)
75 return -EINVAL;
76
77 state->min_cpus = min(val, state->max_cpus);
78 wake_up_hotplug_thread(state);
79
80 return count;
81}
82
83static ssize_t show_min_cpus(struct cpu_data *state, char *buf)
84{
85 return snprintf(buf, PAGE_SIZE, "%u\n", state->min_cpus);
86}
87
88static ssize_t store_max_cpus(struct cpu_data *state,
89 const char *buf, size_t count)
90{
91 unsigned int val;
92
93 if (sscanf(buf, "%u\n", &val) != 1)
94 return -EINVAL;
95
96 val = min(val, state->num_cpus);
97 state->max_cpus = val;
98 state->min_cpus = min(state->min_cpus, state->max_cpus);
99 wake_up_hotplug_thread(state);
100
101 return count;
102}
103
104static ssize_t show_max_cpus(struct cpu_data *state, char *buf)
105{
106 return snprintf(buf, PAGE_SIZE, "%u\n", state->max_cpus);
107}
108
109static ssize_t store_offline_delay_ms(struct cpu_data *state,
110 const char *buf, size_t count)
111{
112 unsigned int val;
113
114 if (sscanf(buf, "%u\n", &val) != 1)
115 return -EINVAL;
116
117 state->offline_delay_ms = val;
118 apply_need(state);
119
120 return count;
121}
122
123static ssize_t show_task_thres(struct cpu_data *state, char *buf)
124{
125 return snprintf(buf, PAGE_SIZE, "%u\n", state->task_thres);
126}
127
128static ssize_t store_task_thres(struct cpu_data *state,
129 const char *buf, size_t count)
130{
131 unsigned int val;
132
133 if (sscanf(buf, "%u\n", &val) != 1)
134 return -EINVAL;
135
136 if (val < state->num_cpus)
137 return -EINVAL;
138
139 state->task_thres = val;
140 apply_need(state);
141
142 return count;
143}
144
145static ssize_t show_offline_delay_ms(struct cpu_data *state, char *buf)
146{
147 return snprintf(buf, PAGE_SIZE, "%u\n", state->offline_delay_ms);
148}
149
150static ssize_t store_busy_up_thres(struct cpu_data *state,
151 const char *buf, size_t count)
152{
153 unsigned int val[MAX_CPUS_PER_GROUP];
154 int ret, i;
155
156 ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
157 if (ret != 1 && ret != state->num_cpus)
158 return -EINVAL;
159
160 if (ret == 1) {
161 for (i = 0; i < state->num_cpus; i++)
162 state->busy_up_thres[i] = val[0];
163 } else {
164 for (i = 0; i < state->num_cpus; i++)
165 state->busy_up_thres[i] = val[i];
166 }
167 apply_need(state);
168 return count;
169}
170
171static ssize_t show_busy_up_thres(struct cpu_data *state, char *buf)
172{
173 int i, count = 0;
174
175 for (i = 0; i < state->num_cpus; i++)
176 count += snprintf(buf + count, PAGE_SIZE - count, "%u ",
177 state->busy_up_thres[i]);
178 count += snprintf(buf + count, PAGE_SIZE - count, "\n");
179 return count;
180}
181
182static ssize_t store_busy_down_thres(struct cpu_data *state,
183 const char *buf, size_t count)
184{
185 unsigned int val[MAX_CPUS_PER_GROUP];
186 int ret, i;
187
188 ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
189 if (ret != 1 && ret != state->num_cpus)
190 return -EINVAL;
191
192 if (ret == 1) {
193 for (i = 0; i < state->num_cpus; i++)
194 state->busy_down_thres[i] = val[0];
195 } else {
196 for (i = 0; i < state->num_cpus; i++)
197 state->busy_down_thres[i] = val[i];
198 }
199 apply_need(state);
200 return count;
201}
202
203static ssize_t show_busy_down_thres(struct cpu_data *state, char *buf)
204{
205 int i, count = 0;
206
207 for (i = 0; i < state->num_cpus; i++)
208 count += snprintf(buf + count, PAGE_SIZE - count, "%u ",
209 state->busy_down_thres[i]);
210 count += snprintf(buf + count, PAGE_SIZE - count, "\n");
211 return count;
212}
213
214static ssize_t store_is_big_cluster(struct cpu_data *state,
215 const char *buf, size_t count)
216{
217 unsigned int val;
218
219 if (sscanf(buf, "%u\n", &val) != 1)
220 return -EINVAL;
221
222 state->is_big_cluster = val ? 1 : 0;
223 return count;
224}
225
226static ssize_t show_is_big_cluster(struct cpu_data *state, char *buf)
227{
228 return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
229}
230
231static ssize_t show_cpus(struct cpu_data *state, char *buf)
232{
233 struct cpu_data *c;
234 ssize_t count = 0;
235 unsigned long flags;
236
237 spin_lock_irqsave(&state_lock, flags);
238 list_for_each_entry(c, &state->lru, sib) {
239 count += snprintf(buf + count, PAGE_SIZE - count,
240 "CPU%u (%s)\n", c->cpu,
241 c->online ? "Online" : "Offline");
242 }
243 spin_unlock_irqrestore(&state_lock, flags);
244 return count;
245}
246
247static ssize_t show_need_cpus(struct cpu_data *state, char *buf)
248{
249 return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus);
250}
251
252static ssize_t show_online_cpus(struct cpu_data *state, char *buf)
253{
254 return snprintf(buf, PAGE_SIZE, "%u\n", state->online_cpus);
255}
256
257static ssize_t show_global_state(struct cpu_data *state, char *buf)
258{
259 struct cpu_data *c;
260 ssize_t count = 0;
261 unsigned int cpu;
262
263 for_each_possible_cpu(cpu) {
264 count += snprintf(buf + count, PAGE_SIZE - count,
265 "CPU%u\n", cpu);
266 c = &per_cpu(cpu_state, cpu);
267 if (!c->inited)
268 continue;
269 count += snprintf(buf + count, PAGE_SIZE - count,
270 "\tCPU: %u\n", c->cpu);
271 count += snprintf(buf + count, PAGE_SIZE - count,
272 "\tOnline: %u\n", c->online);
273 count += snprintf(buf + count, PAGE_SIZE - count,
274 "\tRejected: %u\n", c->rejected);
275 count += snprintf(buf + count, PAGE_SIZE - count,
276 "\tFirst CPU: %u\n", c->first_cpu);
277 count += snprintf(buf + count, PAGE_SIZE - count,
278 "\tBusy%%: %u\n", c->busy);
279 count += snprintf(buf + count, PAGE_SIZE - count,
280 "\tIs busy: %u\n", c->is_busy);
281 if (c->cpu != c->first_cpu)
282 continue;
283 count += snprintf(buf + count, PAGE_SIZE - count,
284 "\tNr running: %u\n", c->nrrun);
285 count += snprintf(buf + count, PAGE_SIZE - count,
286 "\tAvail CPUs: %u\n", c->avail_cpus);
287 count += snprintf(buf + count, PAGE_SIZE - count,
288 "\tNeed CPUs: %u\n", c->need_cpus);
289 }
290
291 return count;
292}
293
294static ssize_t store_not_preferred(struct cpu_data *state,
295 const char *buf, size_t count)
296{
297 struct cpu_data *c;
298 unsigned int i, first_cpu;
299 unsigned int val[MAX_CPUS_PER_GROUP];
300 int ret;
301
302 ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
303 if (ret != 1 && ret != state->num_cpus)
304 return -EINVAL;
305
306 first_cpu = state->first_cpu;
307
308 for (i = 0; i < state->num_cpus; i++) {
309 c = &per_cpu(cpu_state, first_cpu);
310 c->not_preferred = val[i];
311 first_cpu++;
312 }
313
314 return count;
315}
316
317static ssize_t show_not_preferred(struct cpu_data *state, char *buf)
318{
319 struct cpu_data *c;
320 ssize_t count = 0;
321 unsigned int i, first_cpu;
322
323 first_cpu = state->first_cpu;
324
325 for (i = 0; i < state->num_cpus; i++) {
326 c = &per_cpu(cpu_state, first_cpu);
327 count += snprintf(buf + count, PAGE_SIZE - count,
328 "\tCPU:%d %u\n", first_cpu, c->not_preferred);
329 first_cpu++;
330 }
331
332 return count;
333}
334
335struct core_ctl_attr {
336 struct attribute attr;
337 ssize_t (*show)(struct cpu_data *, char *);
338 ssize_t (*store)(struct cpu_data *, const char *, size_t count);
339};
340
341#define core_ctl_attr_ro(_name) \
342static struct core_ctl_attr _name = \
343__ATTR(_name, 0444, show_##_name, NULL)
344
345#define core_ctl_attr_rw(_name) \
346static struct core_ctl_attr _name = \
347__ATTR(_name, 0644, show_##_name, store_##_name)
348
349core_ctl_attr_rw(min_cpus);
350core_ctl_attr_rw(max_cpus);
351core_ctl_attr_rw(offline_delay_ms);
352core_ctl_attr_rw(busy_up_thres);
353core_ctl_attr_rw(busy_down_thres);
354core_ctl_attr_rw(task_thres);
355core_ctl_attr_rw(is_big_cluster);
356core_ctl_attr_ro(cpus);
357core_ctl_attr_ro(need_cpus);
358core_ctl_attr_ro(online_cpus);
359core_ctl_attr_ro(global_state);
360core_ctl_attr_rw(not_preferred);
361
362static struct attribute *default_attrs[] = {
363 &min_cpus.attr,
364 &max_cpus.attr,
365 &offline_delay_ms.attr,
366 &busy_up_thres.attr,
367 &busy_down_thres.attr,
368 &task_thres.attr,
369 &is_big_cluster.attr,
370 &cpus.attr,
371 &need_cpus.attr,
372 &online_cpus.attr,
373 &global_state.attr,
374 &not_preferred.attr,
375 NULL
376};
377
378#define to_cpu_data(k) container_of(k, struct cpu_data, kobj)
379#define to_attr(a) container_of(a, struct core_ctl_attr, attr)
380static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
381{
382 struct cpu_data *data = to_cpu_data(kobj);
383 struct core_ctl_attr *cattr = to_attr(attr);
384 ssize_t ret = -EIO;
385
386 if (cattr->show)
387 ret = cattr->show(data, buf);
388
389 return ret;
390}
391
392static ssize_t store(struct kobject *kobj, struct attribute *attr,
393 const char *buf, size_t count)
394{
395 struct cpu_data *data = to_cpu_data(kobj);
396 struct core_ctl_attr *cattr = to_attr(attr);
397 ssize_t ret = -EIO;
398
399 if (cattr->store)
400 ret = cattr->store(data, buf, count);
401
402 return ret;
403}
404
405static const struct sysfs_ops sysfs_ops = {
406 .show = show,
407 .store = store,
408};
409
410static struct kobj_type ktype_core_ctl = {
411 .sysfs_ops = &sysfs_ops,
412 .default_attrs = default_attrs,
413};
414
415/* ==================== runqueue based core count =================== */
416
417#define RQ_AVG_TOLERANCE 2
418#define RQ_AVG_DEFAULT_MS 20
419#define NR_RUNNING_TOLERANCE 5
420static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS;
421
422static s64 rq_avg_timestamp_ms;
423static struct timer_list rq_avg_timer;
424
425static void update_running_avg(bool trigger_update)
426{
427 int cpu;
428 struct cpu_data *pcpu;
429 int avg, iowait_avg, big_avg, old_nrrun;
430 s64 now;
431 unsigned long flags;
432
433 spin_lock_irqsave(&state_lock, flags);
434
435 now = ktime_to_ms(ktime_get());
436 if (now - rq_avg_timestamp_ms < rq_avg_period_ms - RQ_AVG_TOLERANCE) {
437 spin_unlock_irqrestore(&state_lock, flags);
438 return;
439 }
440 rq_avg_timestamp_ms = now;
441 sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg);
442
443 spin_unlock_irqrestore(&state_lock, flags);
444
445 /*
446 * Round up to the next integer if the average nr running tasks
447 * is within NR_RUNNING_TOLERANCE/100 of the next integer.
448 * If normal rounding up is used, it will allow a transient task
449 * to trigger online event. By the time core is onlined, the task
450 * has finished.
451 * Rounding to closest suffers same problem because scheduler
452 * might only provide running stats per jiffy, and a transient
453 * task could skew the number for one jiffy. If core control
454 * samples every 2 jiffies, it will observe 0.5 additional running
455 * average which rounds up to 1 task.
456 */
457 avg = (avg + NR_RUNNING_TOLERANCE) / 100;
458 big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100;
459
460 for_each_possible_cpu(cpu) {
461 pcpu = &per_cpu(cpu_state, cpu);
462 if (!pcpu->inited || pcpu->first_cpu != cpu)
463 continue;
464 old_nrrun = pcpu->nrrun;
465 /*
466 * Big cluster only need to take care of big tasks, but if
467 * there are not enough big cores, big tasks need to be run
468 * on little as well. Thus for little's runqueue stat, it
469 * has to use overall runqueue average, or derive what big
470 * tasks would have to be run on little. The latter approach
471 * is not easy to get given core control reacts much slower
472 * than scheduler, and can't predict scheduler's behavior.
473 */
474 pcpu->nrrun = pcpu->is_big_cluster ? big_avg : avg;
475 if (pcpu->nrrun != old_nrrun) {
476 if (trigger_update)
477 apply_need(pcpu);
478 else
479 pcpu->nrrun_changed = true;
480 }
481 }
482}
483
484/* adjust needed CPUs based on current runqueue information */
485static unsigned int apply_task_need(struct cpu_data *f, unsigned int new_need)
486{
487 /* Online all cores if there are enough tasks */
488 if (f->nrrun >= f->task_thres)
489 return f->num_cpus;
490
491 /* only online more cores if there are tasks to run */
492 if (f->nrrun > new_need)
493 return new_need + 1;
494
495 return new_need;
496}
497
498static u64 round_to_nw_start(void)
499{
500 unsigned long step = msecs_to_jiffies(rq_avg_period_ms);
501 u64 jif = get_jiffies_64();
502
503 do_div(jif, step);
504 return (jif + 1) * step;
505}
506
507static void rq_avg_timer_func(unsigned long not_used)
508{
509 update_running_avg(true);
510 mod_timer(&rq_avg_timer, round_to_nw_start());
511}
512
513/* ======================= load based core count ====================== */
514
515static unsigned int apply_limits(struct cpu_data *f, unsigned int need_cpus)
516{
517 return min(max(f->min_cpus, need_cpus), f->max_cpus);
518}
519
520static bool eval_need(struct cpu_data *f)
521{
522 unsigned long flags;
523 struct cpu_data *c;
524 unsigned int need_cpus = 0, last_need, thres_idx;
525 int ret = 0;
526 bool need_flag = false;
527 s64 now;
528
529 if (unlikely(!f->inited))
530 return 0;
531
532 spin_lock_irqsave(&state_lock, flags);
533 thres_idx = f->online_cpus ? f->online_cpus - 1 : 0;
534 list_for_each_entry(c, &f->lru, sib) {
535 if (c->busy >= f->busy_up_thres[thres_idx])
536 c->is_busy = true;
537 else if (c->busy < f->busy_down_thres[thres_idx])
538 c->is_busy = false;
539 need_cpus += c->is_busy;
540 }
541 need_cpus = apply_task_need(f, need_cpus);
542 need_flag = apply_limits(f, need_cpus) != apply_limits(f, f->need_cpus);
543 last_need = f->need_cpus;
544
545 now = ktime_to_ms(ktime_get());
546
547 if (need_cpus == last_need) {
548 f->need_ts = now;
549 spin_unlock_irqrestore(&state_lock, flags);
550 return 0;
551 }
552
553 if (need_cpus > last_need) {
554 ret = 1;
555 } else if (need_cpus < last_need) {
556 s64 elapsed = now - f->need_ts;
557
558 if (elapsed >= f->offline_delay_ms) {
559 ret = 1;
560 } else {
561 mod_timer(&f->timer, jiffies +
562 msecs_to_jiffies(f->offline_delay_ms));
563 }
564 }
565
566 if (ret) {
567 f->need_ts = now;
568 f->need_cpus = need_cpus;
569 }
570
571 trace_core_ctl_eval_need(f->cpu, last_need, need_cpus,
572 ret && need_flag);
573 spin_unlock_irqrestore(&state_lock, flags);
574
575 return ret && need_flag;
576}
577
578static void apply_need(struct cpu_data *f)
579{
580 if (eval_need(f))
581 wake_up_hotplug_thread(f);
582}
583
584static int core_ctl_set_busy(unsigned int cpu, unsigned int busy)
585{
586 struct cpu_data *c = &per_cpu(cpu_state, cpu);
587 struct cpu_data *f;
588 unsigned int old_is_busy = c->is_busy;
589
590 if (!c->inited)
591 return 0;
592 f = &per_cpu(cpu_state, c->first_cpu);
593
594 update_running_avg(false);
595 if (c->busy == busy && !f->nrrun_changed)
596 return 0;
597 c->busy = busy;
598 f->nrrun_changed = false;
599
600 apply_need(f);
601 trace_core_ctl_set_busy(cpu, busy, old_is_busy, c->is_busy);
602 return 0;
603}
604
605/* ========================= core count enforcement ==================== */
606
607/*
608 * If current thread is hotplug thread, don't attempt to wake up
609 * itself or other hotplug threads because it will deadlock. Instead,
610 * schedule a timer to fire in next timer tick and wake up the thread.
611 */
612static void wake_up_hotplug_thread(struct cpu_data *state)
613{
614 unsigned long flags;
615 int cpu;
616 struct cpu_data *pcpu;
617 bool no_wakeup = false;
618
619 for_each_possible_cpu(cpu) {
620 pcpu = &per_cpu(cpu_state, cpu);
621 if (cpu != pcpu->first_cpu)
622 continue;
623 if (pcpu->hotplug_thread == current) {
624 no_wakeup = true;
625 break;
626 }
627 }
628
629 spin_lock_irqsave(&state->pending_lock, flags);
630 state->pending = true;
631 spin_unlock_irqrestore(&state->pending_lock, flags);
632
633 if (no_wakeup) {
634 spin_lock_irqsave(&state_lock, flags);
635 mod_timer(&state->timer, jiffies);
636 spin_unlock_irqrestore(&state_lock, flags);
637 } else {
638 wake_up_process(state->hotplug_thread);
639 }
640}
641
642static void core_ctl_timer_func(unsigned long cpu)
643{
644 struct cpu_data *state = &per_cpu(cpu_state, cpu);
645 unsigned long flags;
646
647 if (eval_need(state)) {
648 spin_lock_irqsave(&state->pending_lock, flags);
649 state->pending = true;
650 spin_unlock_irqrestore(&state->pending_lock, flags);
651 wake_up_process(state->hotplug_thread);
652 }
653
654}
655
656static int core_ctl_online_core(unsigned int cpu)
657{
658 int ret;
659 struct device *dev;
660
661 lock_device_hotplug();
662 dev = get_cpu_device(cpu);
663 if (!dev) {
664 pr_err("%s: failed to get cpu%d device\n", __func__, cpu);
665 ret = -ENODEV;
666 } else {
667 ret = device_online(dev);
668 }
669 unlock_device_hotplug();
670 return ret;
671}
672
673static int core_ctl_offline_core(unsigned int cpu)
674{
675 int ret;
676 struct device *dev;
677
678 lock_device_hotplug();
679 dev = get_cpu_device(cpu);
680 if (!dev) {
681 pr_err("%s: failed to get cpu%d device\n", __func__, cpu);
682 ret = -ENODEV;
683 } else {
684 ret = device_offline(dev);
685 }
686 unlock_device_hotplug();
687 return ret;
688}
689
690static void __ref do_hotplug(struct cpu_data *f)
691{
692 unsigned int need;
693 struct cpu_data *c, *tmp;
694
695 need = apply_limits(f, f->need_cpus);
696 pr_debug("Trying to adjust group %u to %u\n", f->first_cpu, need);
697
698 if (f->online_cpus > need) {
699 list_for_each_entry_safe(c, tmp, &f->lru, sib) {
700 if (!c->online)
701 continue;
702
703 if (f->online_cpus == need)
704 break;
705
706 /* Don't offline busy CPUs. */
707 if (c->is_busy)
708 continue;
709
710 pr_debug("Trying to Offline CPU%u\n", c->cpu);
711 if (core_ctl_offline_core(c->cpu))
712 pr_debug("Unable to Offline CPU%u\n", c->cpu);
713 }
714
715 /*
716 * If the number of online CPUs is within the limits, then
717 * don't force any busy CPUs offline.
718 */
719 if (f->online_cpus <= f->max_cpus)
720 return;
721
722 list_for_each_entry_safe(c, tmp, &f->lru, sib) {
723 if (!c->online)
724 continue;
725
726 if (f->online_cpus <= f->max_cpus)
727 break;
728
729 pr_debug("Trying to Offline CPU%u\n", c->cpu);
730 if (core_ctl_offline_core(c->cpu))
731 pr_debug("Unable to Offline CPU%u\n", c->cpu);
732 }
733 } else if (f->online_cpus < need) {
734 list_for_each_entry_safe(c, tmp, &f->lru, sib) {
735 if (c->online || c->rejected || c->not_preferred)
736 continue;
737 if (f->online_cpus == need)
738 break;
739
740 pr_debug("Trying to Online CPU%u\n", c->cpu);
741 if (core_ctl_online_core(c->cpu))
742 pr_debug("Unable to Online CPU%u\n", c->cpu);
743 }
744
745 if (f->online_cpus == need)
746 return;
747
748
749 list_for_each_entry_safe(c, tmp, &f->lru, sib) {
750 if (c->online || c->rejected || !c->not_preferred)
751 continue;
752 if (f->online_cpus == need)
753 break;
754
755 pr_debug("Trying to Online CPU%u\n", c->cpu);
756 if (core_ctl_online_core(c->cpu))
757 pr_debug("Unable to Online CPU%u\n", c->cpu);
758 }
759
760 }
761}
762
763static int __ref try_hotplug(void *data)
764{
765 struct cpu_data *f = data;
766 unsigned long flags;
767
768 while (1) {
769 set_current_state(TASK_INTERRUPTIBLE);
770 spin_lock_irqsave(&f->pending_lock, flags);
771 if (!f->pending) {
772 spin_unlock_irqrestore(&f->pending_lock, flags);
773 schedule();
774 if (kthread_should_stop())
775 break;
776 spin_lock_irqsave(&f->pending_lock, flags);
777 }
778 set_current_state(TASK_RUNNING);
779 f->pending = false;
780 spin_unlock_irqrestore(&f->pending_lock, flags);
781
782 do_hotplug(f);
783 }
784
785 return 0;
786}
787
788static int __ref cpu_callback(struct notifier_block *nfb,
789 unsigned long action, void *hcpu)
790{
791 uint32_t cpu = (uintptr_t)hcpu;
792 struct cpu_data *state = &per_cpu(cpu_state, cpu);
793 struct cpu_data *f;
794 int ret = NOTIFY_OK;
795 unsigned long flags;
796
797 /* Don't affect suspend resume */
798 if (action & CPU_TASKS_FROZEN)
799 return NOTIFY_OK;
800
801 if (unlikely(!state->inited))
802 return NOTIFY_OK;
803
804 f = &per_cpu(cpu_state, state->first_cpu);
805
806 switch (action) {
807 case CPU_UP_PREPARE:
808
809 /* If online state of CPU somehow got out of sync, fix it. */
810 if (state->online) {
811 f->online_cpus--;
812 state->online = false;
813 pr_warn("CPU%d offline when state is online\n", cpu);
814 }
815
816 if (state->rejected) {
817 state->rejected = false;
818 f->avail_cpus++;
819 }
820
821 /*
822 * If a CPU is in the process of coming up, mark it as online
823 * so that there's no race with hotplug thread bringing up more
824 * CPUs than necessary.
825 */
826 if (apply_limits(f, f->need_cpus) <= f->online_cpus) {
827 pr_debug("Prevent CPU%d onlining\n", cpu);
828 ret = NOTIFY_BAD;
829 } else {
830 state->online = true;
831 f->online_cpus++;
832 }
833 break;
834
835 case CPU_ONLINE:
836 /*
837 * Moving to the end of the list should only happen in
838 * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an
839 * infinite list traversal when thermal (or other entities)
840 * reject trying to online CPUs.
841 */
842 spin_lock_irqsave(&state_lock, flags);
843 list_del(&state->sib);
844 list_add_tail(&state->sib, &f->lru);
845 spin_unlock_irqrestore(&state_lock, flags);
846 break;
847
848 case CPU_DEAD:
849 /* Move a CPU to the end of the LRU when it goes offline. */
850 spin_lock_irqsave(&state_lock, flags);
851 list_del(&state->sib);
852 list_add_tail(&state->sib, &f->lru);
853 spin_unlock_irqrestore(&state_lock, flags);
854
855 /* Fall through */
856
857 case CPU_UP_CANCELED:
858
859 /* If online state of CPU somehow got out of sync, fix it. */
860 if (!state->online) {
861 f->online_cpus++;
862 pr_warn("CPU%d online when state is offline\n", cpu);
863 }
864
865 if (!state->rejected && action == CPU_UP_CANCELED) {
866 state->rejected = true;
867 f->avail_cpus--;
868 }
869
870 state->online = false;
871 state->busy = 0;
872 f->online_cpus--;
873 break;
874 }
875
876 if (f->online_cpus < apply_limits(f, f->need_cpus)
877 && f->online_cpus < f->avail_cpus
878 && action == CPU_DEAD)
879 wake_up_hotplug_thread(f);
880
881 return ret;
882}
883
884static struct notifier_block __refdata cpu_notifier = {
885 .notifier_call = cpu_callback,
886};
887
888/* ============================ init code ============================== */
889
890static int group_init(struct cpumask *mask)
891{
892 struct device *dev;
893 unsigned int first_cpu = cpumask_first(mask);
894 struct cpu_data *f = &per_cpu(cpu_state, first_cpu);
895 struct cpu_data *state;
896 unsigned int cpu;
897 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
898
899 if (likely(f->inited))
900 return 0;
901
902 dev = get_cpu_device(first_cpu);
903 if (!dev)
904 return -ENODEV;
905
906 pr_info("Creating CPU group %d\n", first_cpu);
907
908 f->num_cpus = cpumask_weight(mask);
909 if (f->num_cpus > MAX_CPUS_PER_GROUP) {
910 pr_err("HW configuration not supported\n");
911 return -EINVAL;
912 }
913 f->min_cpus = 1;
914 f->max_cpus = f->num_cpus;
915 f->need_cpus = f->num_cpus;
916 f->avail_cpus = f->num_cpus;
917 f->offline_delay_ms = 100;
918 f->task_thres = UINT_MAX;
919 f->nrrun = f->num_cpus;
920 INIT_LIST_HEAD(&f->lru);
921 init_timer(&f->timer);
922 spin_lock_init(&f->pending_lock);
923 f->timer.function = core_ctl_timer_func;
924 f->timer.data = first_cpu;
925
926 for_each_cpu(cpu, mask) {
927 pr_info("Init CPU%u state\n", cpu);
928
929 state = &per_cpu(cpu_state, cpu);
930 state->cpu = cpu;
931 state->first_cpu = first_cpu;
932
933 if (cpu_online(cpu)) {
934 f->online_cpus++;
935 state->online = true;
936 }
937
938 list_add_tail(&state->sib, &f->lru);
939 }
940
941 f->hotplug_thread = kthread_run(try_hotplug, (void *) f,
942 "core_ctl/%d", first_cpu);
943 sched_setscheduler_nocheck(f->hotplug_thread, SCHED_FIFO, &param);
944
945 for_each_cpu(cpu, mask) {
946 state = &per_cpu(cpu_state, cpu);
947 state->inited = true;
948 }
949
950 kobject_init(&f->kobj, &ktype_core_ctl);
951 return kobject_add(&f->kobj, &dev->kobj, "core_ctl");
952}
953
954static int cpufreq_policy_cb(struct notifier_block *nb, unsigned long val,
955 void *data)
956{
957 struct cpufreq_policy *policy = data;
958
959 switch (val) {
960 case CPUFREQ_CREATE_POLICY:
961 group_init(policy->related_cpus);
962 break;
963 }
964
965 return NOTIFY_OK;
966}
967
968static struct notifier_block cpufreq_pol_nb = {
969 .notifier_call = cpufreq_policy_cb,
970};
971
972static int cpufreq_gov_cb(struct notifier_block *nb, unsigned long val,
973 void *data)
974{
975 struct cpufreq_govinfo *info = data;
976
977 switch (val) {
978 case CPUFREQ_LOAD_CHANGE:
979 core_ctl_set_busy(info->cpu, info->load);
980 break;
981 }
982
983 return NOTIFY_OK;
984}
985
986static struct notifier_block cpufreq_gov_nb = {
987 .notifier_call = cpufreq_gov_cb,
988};
989
990static int __init core_ctl_init(void)
991{
992 struct cpufreq_policy *policy;
993 unsigned int cpu;
994
995 register_cpu_notifier(&cpu_notifier);
996 cpufreq_register_notifier(&cpufreq_pol_nb, CPUFREQ_POLICY_NOTIFIER);
997 cpufreq_register_notifier(&cpufreq_gov_nb, CPUFREQ_GOVINFO_NOTIFIER);
998 init_timer_deferrable(&rq_avg_timer);
999 rq_avg_timer.function = rq_avg_timer_func;
1000
1001 get_online_cpus();
1002 for_each_online_cpu(cpu) {
1003 policy = cpufreq_cpu_get(cpu);
1004 if (policy) {
1005 group_init(policy->related_cpus);
1006 cpufreq_cpu_put(policy);
1007 }
1008 }
1009 put_online_cpus();
1010 mod_timer(&rq_avg_timer, round_to_nw_start());
1011 return 0;
1012}
1013
1014late_initcall(core_ctl_init);