blob: 92fcb9240a6135234b7645746b22eb602af1a124 [file] [log] [blame]
Patrick Bellasiffbceda2015-06-23 09:17:54 +01001#include <linux/cgroup.h>
2#include <linux/err.h>
3#include <linux/percpu.h>
4#include <linux/printk.h>
5#include <linux/slab.h>
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -08006#include <linux/kernel.h>
Patrick Bellasiedd28d32015-07-07 15:33:20 +01007#include <linux/rcupdate.h>
Patrick Bellasiae710302015-06-23 09:17:54 +01008#include <linux/slab.h>
Patrick Bellasi050dcb82015-06-22 13:49:07 +01009#include <trace/events/sched.h>
Patrick Bellasiffbceda2015-06-23 09:17:54 +010010
Patrick Bellasi62c1c062015-06-22 18:11:44 +010011#include "sched.h"
Patrick Bellasic5b20422016-07-29 15:45:57 +010012#include "tune.h"
Patrick Bellasi62c1c062015-06-22 18:11:44 +010013
14unsigned int sysctl_sched_cfs_boost __read_mostly;
15
Patrick Bellasiffbceda2015-06-23 09:17:54 +010016#ifdef CONFIG_CGROUP_SCHEDTUNE
Chris Redpath293edee2017-03-27 18:20:20 +010017bool schedtune_initialized = false;
Patrick Bellasid2489002016-07-28 18:44:40 +010018#endif
Patrick Bellasiffbceda2015-06-23 09:17:54 +010019
Patrick Bellasi69fa4c72015-06-22 18:11:44 +010020unsigned int sysctl_sched_cfs_boost __read_mostly;
21
Patrick Bellasid8460c72016-10-13 17:31:24 +010022extern struct reciprocal_value schedtune_spc_rdiv;
Patrick Bellasi326e4472017-09-12 14:57:51 +010023struct target_nrg schedtune_target_nrg;
Patrick Bellasi2f369bb2016-01-12 18:12:13 +000024
25/* Performance Boost region (B) threshold params */
26static int perf_boost_idx;
27
28/* Performance Constraint region (C) threshold params */
29static int perf_constrain_idx;
30
31/**
32 * Performance-Energy (P-E) Space thresholds constants
33 */
34struct threshold_params {
35 int nrg_gain;
36 int cap_gain;
37};
38
39/*
40 * System specific P-E space thresholds constants
41 */
42static struct threshold_params
43threshold_gains[] = {
Patrick Bellasid5563d32016-07-29 15:32:26 +010044 { 0, 5 }, /* < 10% */
45 { 1, 5 }, /* < 20% */
46 { 2, 5 }, /* < 30% */
47 { 3, 5 }, /* < 40% */
48 { 4, 5 }, /* < 50% */
49 { 5, 4 }, /* < 60% */
50 { 5, 3 }, /* < 70% */
51 { 5, 2 }, /* < 80% */
52 { 5, 1 }, /* < 90% */
53 { 5, 0 } /* <= 100% */
Patrick Bellasi2f369bb2016-01-12 18:12:13 +000054};
55
56static int
57__schedtune_accept_deltas(int nrg_delta, int cap_delta,
58 int perf_boost_idx, int perf_constrain_idx)
59{
60 int payoff = -INT_MAX;
Patrick Bellasi2ed513e2016-07-28 17:38:25 +010061 int gain_idx = -1;
Patrick Bellasi2f369bb2016-01-12 18:12:13 +000062
63 /* Performance Boost (B) region */
Patrick Bellasi2ed513e2016-07-28 17:38:25 +010064 if (nrg_delta >= 0 && cap_delta > 0)
65 gain_idx = perf_boost_idx;
Patrick Bellasi2f369bb2016-01-12 18:12:13 +000066 /* Performance Constraint (C) region */
Patrick Bellasi2ed513e2016-07-28 17:38:25 +010067 else if (nrg_delta < 0 && cap_delta <= 0)
68 gain_idx = perf_constrain_idx;
Patrick Bellasi2f369bb2016-01-12 18:12:13 +000069
70 /* Default: reject schedule candidate */
Patrick Bellasi2ed513e2016-07-28 17:38:25 +010071 if (gain_idx == -1)
72 return payoff;
73
74 /*
75 * Evaluate "Performance Boost" vs "Energy Increase"
76 *
77 * - Performance Boost (B) region
78 *
79 * Condition: nrg_delta > 0 && cap_delta > 0
80 * Payoff criteria:
81 * cap_gain / nrg_gain < cap_delta / nrg_delta =
82 * cap_gain * nrg_delta < cap_delta * nrg_gain
83 * Note that since both nrg_gain and nrg_delta are positive, the
84 * inequality does not change. Thus:
85 *
86 * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
87 *
88 * - Performance Constraint (C) region
89 *
90 * Condition: nrg_delta < 0 && cap_delta < 0
91 * payoff criteria:
92 * cap_gain / nrg_gain > cap_delta / nrg_delta =
93 * cap_gain * nrg_delta < cap_delta * nrg_gain
94 * Note that since nrg_gain > 0 while nrg_delta < 0, the
95 * inequality change. Thus:
96 *
97 * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
98 *
99 * This means that, in case of same positive defined {cap,nrg}_gain
100 * for both the B and C regions, we can use the same payoff formula
101 * where a positive value represents the accept condition.
102 */
103 payoff = cap_delta * threshold_gains[gain_idx].nrg_gain;
104 payoff -= nrg_delta * threshold_gains[gain_idx].cap_gain;
105
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000106 return payoff;
107}
108
Patrick Bellasiae710302015-06-23 09:17:54 +0100109#ifdef CONFIG_CGROUP_SCHEDTUNE
Vikram Mulukutlad056dbc2017-02-07 18:58:07 -0800110
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100111/*
112 * EAS scheduler tunables for task groups.
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000113 *
114 * When CGroup support is enabled, we have to synchronize two different
115 * paths:
116 * - slow path: where CGroups are created/updated/removed
117 * - fast path: where tasks in a CGroups are accounted
118 *
119 * The slow path tracks (a limited number of) CGroups and maps each on a
120 * "boost_group" index. The fastpath accounts tasks currently RUNNABLE on each
121 * "boost_group".
122 *
123 * Once a new CGroup is created, a boost group idx is assigned and the
124 * corresponding "boost_group" marked as valid on each CPU.
125 * Once a CGroup is release, the corresponding "boost_group" is marked as
126 * invalid on each CPU. The CPU boost value (boost_max) is aggregated by
127 * considering only valid boost_groups with a non null tasks counter.
128 *
129 * .:: Locking strategy
130 *
131 * The fast path uses a spin lock for each CPU boost_group which protects the
132 * tasks counter.
133 *
134 * The "valid" and "boost" values of each CPU boost_group is instead
135 * protected by the RCU lock provided by the CGroups callbacks. Thus, only the
136 * slow path can access and modify the boost_group attribtues of each CPU.
137 * The fast path will catch up the most updated values at the next scheduling
138 * event (i.e. enqueue/dequeue).
139 *
140 * |
141 * SLOW PATH | FAST PATH
142 * CGroup add/update/remove | Scheduler enqueue/dequeue events
143 * |
144 * |
145 * | DEFINE_PER_CPU(struct boost_groups)
146 * | +--------------+----+---+----+----+
147 * | | idle | | | | |
148 * | | boost_max | | | | |
149 * | +---->lock | | | | |
150 * struct schedtune allocated_groups | | | group[ ] | | | | |
151 * +------------------------------+ +-------+ | | +--+---------+-+----+---+----+----+
152 * | idx | | | | | | valid |
153 * | boots / prefer_idle | | | | | | boost |
154 * | perf_{boost/constraints}_idx | <---------+(*) | | | | tasks | <------------+
155 * | css | +-------+ | | +---------+ |
156 * +-+----------------------------+ | | | | | | |
157 * ^ | | | | | | |
158 * | +-------+ | | +---------+ |
159 * | | | | | | | |
160 * | | | | | | | |
161 * | +-------+ | | +---------+ |
162 * | zmalloc | | | | | | |
163 * | | | | | | | |
164 * | +-------+ | | +---------+ |
165 * + BOOSTGROUPS_COUNT | | BOOSTGROUPS_COUNT |
166 * schedtune_boostgroup_init() | + |
167 * | schedtune_{en,de}queue_task() |
168 * | +
169 * | schedtune_tasks_update()
170 * |
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100171 */
172
173/* SchdTune tunables for a group of tasks */
174struct schedtune {
175 /* SchedTune CGroup subsystem */
176 struct cgroup_subsys_state css;
177
178 /* Boost group allocated ID */
179 int idx;
180
181 /* Boost value for tasks on that SchedTune CGroup */
182 int boost;
183
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800184#ifdef CONFIG_SCHED_WALT
Syed Rameez Mustafa084075b2016-08-31 16:54:12 -0700185 /* Toggle ability to override sched boost enabled */
186 bool sched_boost_no_override;
187
188 /*
189 * Controls whether a cgroup is eligible for sched boost or not. This
190 * can temporariliy be disabled by the kernel based on the no_override
191 * flag above.
192 */
193 bool sched_boost_enabled;
194
195 /*
196 * This tracks the default value of sched_boost_enabled and is used
197 * restore the value following any temporary changes to that flag.
198 */
199 bool sched_boost_enabled_backup;
200
201 /*
202 * Controls whether tasks of this cgroup should be colocated with each
203 * other and tasks of other cgroups that have the same flag turned on.
204 */
205 bool colocate;
206
207 /* Controls whether further updates are allowed to the colocate flag */
208 bool colocate_update_disabled;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800209#endif /* CONFIG_SCHED_WALT */
Syed Rameez Mustafa084075b2016-08-31 16:54:12 -0700210
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000211 /* Performance Boost (B) region threshold params */
212 int perf_boost_idx;
213
214 /* Performance Constraint (C) region threshold params */
215 int perf_constrain_idx;
Srinath Sridharan42503db2016-07-14 13:09:03 -0700216
217 /* Hint to bias scheduling of tasks on that SchedTune CGroup
218 * towards idle CPUs */
219 int prefer_idle;
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100220};
221
222static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
223{
Syed Rameez Mustafa642cef52016-10-11 18:24:43 -0700224 return container_of(css, struct schedtune, css);
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100225}
226
227static inline struct schedtune *task_schedtune(struct task_struct *tsk)
228{
229 return css_st(task_css(tsk, schedtune_cgrp_id));
230}
231
232static inline struct schedtune *parent_st(struct schedtune *st)
233{
234 return css_st(st->css.parent);
235}
236
237/*
238 * SchedTune root control group
239 * The root control group is used to defined a system-wide boosting tuning,
240 * which is applied to all tasks in the system.
241 * Task specific boost tuning could be specified by creating and
242 * configuring a child control group under the root one.
243 * By default, system-wide boosting is disabled, i.e. no boosting is applied
244 * to tasks which are not into a child control group.
245 */
246static struct schedtune
247root_schedtune = {
248 .boost = 0,
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800249#ifdef CONFIG_SCHED_WALT
Syed Rameez Mustafa084075b2016-08-31 16:54:12 -0700250 .sched_boost_no_override = false,
251 .sched_boost_enabled = true,
252 .sched_boost_enabled_backup = true,
253 .colocate = false,
254 .colocate_update_disabled = false,
255#endif
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000256 .perf_boost_idx = 0,
257 .perf_constrain_idx = 0,
Srinath Sridharan42503db2016-07-14 13:09:03 -0700258 .prefer_idle = 0,
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100259};
260
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000261int
262schedtune_accept_deltas(int nrg_delta, int cap_delta,
263 struct task_struct *task)
264{
265 struct schedtune *ct;
266 int perf_boost_idx;
267 int perf_constrain_idx;
268
269 /* Optimal (O) region */
Patrick Bellasi5824d982016-01-20 14:06:05 +0000270 if (nrg_delta < 0 && cap_delta > 0) {
271 trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000272 return INT_MAX;
Patrick Bellasi5824d982016-01-20 14:06:05 +0000273 }
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000274
275 /* Suboptimal (S) region */
Patrick Bellasi5824d982016-01-20 14:06:05 +0000276 if (nrg_delta > 0 && cap_delta < 0) {
277 trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000278 return -INT_MAX;
Patrick Bellasi5824d982016-01-20 14:06:05 +0000279 }
Patrick Bellasi2f369bb2016-01-12 18:12:13 +0000280
281 /* Get task specific perf Boost/Constraints indexes */
282 rcu_read_lock();
283 ct = task_schedtune(task);
284 perf_boost_idx = ct->perf_boost_idx;
285 perf_constrain_idx = ct->perf_constrain_idx;
286 rcu_read_unlock();
287
288 return __schedtune_accept_deltas(nrg_delta, cap_delta,
289 perf_boost_idx, perf_constrain_idx);
290}
291
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100292/*
293 * Maximum number of boost groups to support
294 * When per-task boosting is used we still allow only limited number of
295 * boost groups for two main reasons:
296 * 1. on a real system we usually have only few classes of workloads which
297 * make sense to boost with different values (e.g. background vs foreground
298 * tasks, interactive vs low-priority tasks)
299 * 2. a limited number allows for a simpler and more memory/time efficient
300 * implementation especially for the computation of the per-CPU boost
301 * value
302 */
Syed Rameez Mustafaca545c82016-09-02 17:51:39 -0700303#define BOOSTGROUPS_COUNT 5
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100304
305/* Array of configured boostgroups */
306static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
307 &root_schedtune,
308 NULL,
309};
310
311/* SchedTune boost groups
312 * Keep track of all the boost groups which impact on CPU, for example when a
313 * CPU has two RUNNABLE tasks belonging to two different boost groups and thus
314 * likely with different boost values.
315 * Since on each system we expect only a limited number of boost groups, here
316 * we use a simple array to keep track of the metrics required to compute the
317 * maximum per-CPU boosting value.
318 */
319struct boost_groups {
320 /* Maximum boost value for all RUNNABLE tasks on a CPU */
Srinath Sridharane71c4252016-07-28 17:28:55 +0100321 int boost_max;
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100322 struct {
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000323 /* True when this boost group maps an actual cgroup */
324 bool valid;
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100325 /* The boost for tasks on that boost group */
Srinath Sridharane71c4252016-07-28 17:28:55 +0100326 int boost;
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100327 /* Count of RUNNABLE tasks on that boost group */
328 unsigned tasks;
329 } group[BOOSTGROUPS_COUNT];
Patrick Bellasid2489002016-07-28 18:44:40 +0100330 /* CPU's boost group locking */
331 raw_spinlock_t lock;
Patrick Bellasiffbceda2015-06-23 09:17:54 +0100332};
333
334/* Boost groups affecting each CPU in the system */
335DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
336
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800337#ifdef CONFIG_SCHED_WALT
Vikram Mulukutlad056dbc2017-02-07 18:58:07 -0800338static inline void init_sched_boost(struct schedtune *st)
339{
340 st->sched_boost_no_override = false;
341 st->sched_boost_enabled = true;
342 st->sched_boost_enabled_backup = st->sched_boost_enabled;
343 st->colocate = false;
344 st->colocate_update_disabled = false;
345}
346
347bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
348{
349 return task_schedtune(tsk1) == task_schedtune(tsk2);
350}
351
352void update_cgroup_boost_settings(void)
353{
354 int i;
355
356 for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
357 if (!allocated_group[i])
358 break;
359
360 if (allocated_group[i]->sched_boost_no_override)
361 continue;
362
363 allocated_group[i]->sched_boost_enabled = false;
364 }
365}
366
367void restore_cgroup_boost_settings(void)
368{
369 int i;
370
371 for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
372 if (!allocated_group[i])
373 break;
374
375 allocated_group[i]->sched_boost_enabled =
376 allocated_group[i]->sched_boost_enabled_backup;
377 }
378}
379
380bool task_sched_boost(struct task_struct *p)
381{
382 struct schedtune *st = task_schedtune(p);
383
384 return st->sched_boost_enabled;
385}
386
387static u64
388sched_boost_override_read(struct cgroup_subsys_state *css,
389 struct cftype *cft)
390{
391 struct schedtune *st = css_st(css);
392
393 return st->sched_boost_no_override;
394}
395
396static int sched_boost_override_write(struct cgroup_subsys_state *css,
397 struct cftype *cft, u64 override)
398{
399 struct schedtune *st = css_st(css);
400
401 st->sched_boost_no_override = !!override;
402
403 return 0;
404}
405
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800406#endif /* CONFIG_SCHED_WALT */
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800407
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000408static void
409schedtune_cpu_update(int cpu)
410{
411 struct boost_groups *bg;
Srinath Sridharane71c4252016-07-28 17:28:55 +0100412 int boost_max;
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000413 int idx;
414
415 bg = &per_cpu(cpu_boost_groups, cpu);
416
417 /* The root boost group is always active */
418 boost_max = bg->group[0].boost;
419 for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000420
421 /* Ignore non boostgroups not mapping a cgroup */
422 if (!bg->group[idx].valid)
423 continue;
424
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000425 /*
426 * A boost group affects a CPU only if it has
427 * RUNNABLE tasks on that CPU
428 */
429 if (bg->group[idx].tasks == 0)
430 continue;
Srinath Sridharane71c4252016-07-28 17:28:55 +0100431
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000432 boost_max = max(boost_max, bg->group[idx].boost);
433 }
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000434
Srinath Sridharane71c4252016-07-28 17:28:55 +0100435 /* Ensures boost_max is non-negative when all cgroup boost values
436 * are neagtive. Avoids under-accounting of cpu capacity which may cause
437 * task stacking and frequency spikes.*/
438 boost_max = max(boost_max, 0);
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000439 bg->boost_max = boost_max;
440}
441
442static int
443schedtune_boostgroup_update(int idx, int boost)
444{
445 struct boost_groups *bg;
446 int cur_boost_max;
447 int old_boost;
448 int cpu;
449
450 /* Update per CPU boost groups */
451 for_each_possible_cpu(cpu) {
452 bg = &per_cpu(cpu_boost_groups, cpu);
453
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000454 /* CGroups are never associated to non active cgroups */
455 BUG_ON(!bg->group[idx].valid);
456
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000457 /*
458 * Keep track of current boost values to compute the per CPU
459 * maximum only when it has been affected by the new value of
460 * the updated boost group
461 */
462 cur_boost_max = bg->boost_max;
463 old_boost = bg->group[idx].boost;
464
465 /* Update the boost value of this boost group */
466 bg->group[idx].boost = boost;
467
468 /* Check if this update increase current max */
469 if (boost > cur_boost_max && bg->group[idx].tasks) {
470 bg->boost_max = boost;
Patrick Bellasi953b1042015-06-24 15:36:08 +0100471 trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000472 continue;
473 }
474
475 /* Check if this update has decreased current max */
Patrick Bellasi953b1042015-06-24 15:36:08 +0100476 if (cur_boost_max == old_boost && old_boost > boost) {
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000477 schedtune_cpu_update(cpu);
Patrick Bellasi953b1042015-06-24 15:36:08 +0100478 trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
479 continue;
480 }
481
482 trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000483 }
484
485 return 0;
486}
487
Patrick Bellasid2489002016-07-28 18:44:40 +0100488#define ENQUEUE_TASK 1
489#define DEQUEUE_TASK -1
490
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100491static inline void
492schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
493{
Patrick Bellasid2489002016-07-28 18:44:40 +0100494 struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
495 int tasks = bg->group[idx].tasks + task_count;
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100496
497 /* Update boosted tasks count while avoiding to make it negative */
Patrick Bellasid2489002016-07-28 18:44:40 +0100498 bg->group[idx].tasks = max(0, tasks);
Patrick Bellasi953b1042015-06-24 15:36:08 +0100499
500 trace_sched_tune_tasks_update(p, cpu, tasks, idx,
501 bg->group[idx].boost, bg->boost_max);
502
Patrick Bellasid2489002016-07-28 18:44:40 +0100503 /* Boost group activation or deactivation on that RQ */
504 if (tasks == 1 || tasks == 0)
505 schedtune_cpu_update(cpu);
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100506}
507
508/*
509 * NOTE: This function must be called while holding the lock on the CPU RQ
510 */
511void schedtune_enqueue_task(struct task_struct *p, int cpu)
512{
Patrick Bellasid2489002016-07-28 18:44:40 +0100513 struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
514 unsigned long irq_flags;
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100515 struct schedtune *st;
516 int idx;
517
Patrick Bellasid2489002016-07-28 18:44:40 +0100518 if (!unlikely(schedtune_initialized))
519 return;
520
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100521 /*
522 * When a task is marked PF_EXITING by do_exit() it's going to be
523 * dequeued and enqueued multiple times in the exit path.
524 * Thus we avoid any further update, since we do not want to change
525 * CPU boosting while the task is exiting.
526 */
527 if (p->flags & PF_EXITING)
528 return;
529
Patrick Bellasid2489002016-07-28 18:44:40 +0100530 /*
531 * Boost group accouting is protected by a per-cpu lock and requires
532 * interrupt to be disabled to avoid race conditions for example on
533 * do_exit()::cgroup_exit() and task migration.
534 */
535 raw_spin_lock_irqsave(&bg->lock, irq_flags);
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100536 rcu_read_lock();
Patrick Bellasid2489002016-07-28 18:44:40 +0100537
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100538 st = task_schedtune(p);
539 idx = st->idx;
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100540
Patrick Bellasid2489002016-07-28 18:44:40 +0100541 schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK);
542
543 rcu_read_unlock();
544 raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
545}
546
Patrick Bellasid2489002016-07-28 18:44:40 +0100547int schedtune_can_attach(struct cgroup_taskset *tset)
548{
549 struct task_struct *task;
550 struct cgroup_subsys_state *css;
551 struct boost_groups *bg;
552 struct rq_flags irq_flags;
553 unsigned int cpu;
554 struct rq *rq;
555 int src_bg; /* Source boost group index */
556 int dst_bg; /* Destination boost group index */
557 int tasks;
558
559 if (!unlikely(schedtune_initialized))
560 return 0;
561
562
563 cgroup_taskset_for_each(task, css, tset) {
564
565 /*
566 * Lock the CPU's RQ the task is enqueued to avoid race
567 * conditions with migration code while the task is being
568 * accounted
569 */
570 rq = lock_rq_of(task, &irq_flags);
571
572 if (!task->on_rq) {
573 unlock_rq_of(rq, task, &irq_flags);
574 continue;
575 }
576
577 /*
578 * Boost group accouting is protected by a per-cpu lock and requires
579 * interrupt to be disabled to avoid race conditions on...
580 */
581 cpu = cpu_of(rq);
582 bg = &per_cpu(cpu_boost_groups, cpu);
583 raw_spin_lock(&bg->lock);
584
585 dst_bg = css_st(css)->idx;
586 src_bg = task_schedtune(task)->idx;
587
588 /*
589 * Current task is not changing boostgroup, which can
590 * happen when the new hierarchy is in use.
591 */
592 if (unlikely(dst_bg == src_bg)) {
593 raw_spin_unlock(&bg->lock);
594 unlock_rq_of(rq, task, &irq_flags);
595 continue;
596 }
597
598 /*
599 * This is the case of a RUNNABLE task which is switching its
600 * current boost group.
601 */
602
603 /* Move task from src to dst boost group */
604 tasks = bg->group[src_bg].tasks - 1;
605 bg->group[src_bg].tasks = max(0, tasks);
606 bg->group[dst_bg].tasks += 1;
607
608 raw_spin_unlock(&bg->lock);
609 unlock_rq_of(rq, task, &irq_flags);
610
611 /* Update CPU boost group */
612 if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1)
613 schedtune_cpu_update(task_cpu(task));
614
615 }
616
617 return 0;
618}
619
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800620#ifdef CONFIG_SCHED_WALT
Vikram Mulukutlad056dbc2017-02-07 18:58:07 -0800621static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,
622 struct cftype *cft)
623{
624 struct schedtune *st = css_st(css);
625
626 return st->sched_boost_enabled;
627}
628
629static int sched_boost_enabled_write(struct cgroup_subsys_state *css,
630 struct cftype *cft, u64 enable)
631{
632 struct schedtune *st = css_st(css);
633
634 st->sched_boost_enabled = !!enable;
635 st->sched_boost_enabled_backup = st->sched_boost_enabled;
636
637 return 0;
638}
639
640static u64 sched_colocate_read(struct cgroup_subsys_state *css,
641 struct cftype *cft)
642{
643 struct schedtune *st = css_st(css);
644
645 return st->colocate;
646}
647
648static int sched_colocate_write(struct cgroup_subsys_state *css,
649 struct cftype *cft, u64 colocate)
650{
651 struct schedtune *st = css_st(css);
652
653 if (st->colocate_update_disabled)
654 return -EPERM;
655
656 st->colocate = !!colocate;
657 st->colocate_update_disabled = true;
658 return 0;
659}
660
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800661#else /* CONFIG_SCHED_WALT */
Vikram Mulukutlad056dbc2017-02-07 18:58:07 -0800662
663static inline void init_sched_boost(struct schedtune *st) { }
664
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800665#endif /* CONFIG_SCHED_WALT */
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800666
Patrick Bellasid2489002016-07-28 18:44:40 +0100667void schedtune_cancel_attach(struct cgroup_taskset *tset)
668{
669 /* This can happen only if SchedTune controller is mounted with
670 * other hierarchies ane one of them fails. Since usually SchedTune is
671 * mouted on its own hierarcy, for the time being we do not implement
672 * a proper rollback mechanism */
673 WARN(1, "SchedTune cancel attach not implemented");
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100674}
675
676/*
677 * NOTE: This function must be called while holding the lock on the CPU RQ
678 */
679void schedtune_dequeue_task(struct task_struct *p, int cpu)
680{
Patrick Bellasid2489002016-07-28 18:44:40 +0100681 struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
682 unsigned long irq_flags;
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100683 struct schedtune *st;
684 int idx;
685
Patrick Bellasid2489002016-07-28 18:44:40 +0100686 if (!unlikely(schedtune_initialized))
687 return;
688
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100689 /*
690 * When a task is marked PF_EXITING by do_exit() it's going to be
691 * dequeued and enqueued multiple times in the exit path.
692 * Thus we avoid any further update, since we do not want to change
693 * CPU boosting while the task is exiting.
Patrick Bellasid2489002016-07-28 18:44:40 +0100694 * The last dequeue is already enforce by the do_exit() code path
695 * via schedtune_exit_task().
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100696 */
697 if (p->flags & PF_EXITING)
698 return;
699
Patrick Bellasid2489002016-07-28 18:44:40 +0100700 /*
701 * Boost group accouting is protected by a per-cpu lock and requires
702 * interrupt to be disabled to avoid race conditions on...
703 */
704 raw_spin_lock_irqsave(&bg->lock, irq_flags);
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100705 rcu_read_lock();
Patrick Bellasid2489002016-07-28 18:44:40 +0100706
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100707 st = task_schedtune(p);
708 idx = st->idx;
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100709
Patrick Bellasid2489002016-07-28 18:44:40 +0100710 schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK);
711
712 rcu_read_unlock();
713 raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
714}
715
716void schedtune_exit_task(struct task_struct *tsk)
717{
718 struct schedtune *st;
719 struct rq_flags irq_flags;
720 unsigned int cpu;
721 struct rq *rq;
722 int idx;
723
724 if (!unlikely(schedtune_initialized))
725 return;
726
727 rq = lock_rq_of(tsk, &irq_flags);
728 rcu_read_lock();
729
730 cpu = cpu_of(rq);
731 st = task_schedtune(tsk);
732 idx = st->idx;
733 schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK);
734
735 rcu_read_unlock();
736 unlock_rq_of(rq, tsk, &irq_flags);
Patrick Bellasiedd28d32015-07-07 15:33:20 +0100737}
738
739int schedtune_cpu_boost(int cpu)
740{
741 struct boost_groups *bg;
742
743 bg = &per_cpu(cpu_boost_groups, cpu);
744 return bg->boost_max;
745}
746
Patrick Bellasi9b2b8da2016-01-14 18:31:53 +0000747int schedtune_task_boost(struct task_struct *p)
748{
749 struct schedtune *st;
750 int task_boost;
751
Chris Redpath293edee2017-03-27 18:20:20 +0100752 if (!unlikely(schedtune_initialized))
753 return 0;
754
Patrick Bellasi9b2b8da2016-01-14 18:31:53 +0000755 /* Get task boost value */
756 rcu_read_lock();
757 st = task_schedtune(p);
758 task_boost = st->boost;
759 rcu_read_unlock();
760
761 return task_boost;
762}
763
Srinath Sridharan42503db2016-07-14 13:09:03 -0700764int schedtune_prefer_idle(struct task_struct *p)
765{
766 struct schedtune *st;
767 int prefer_idle;
768
Chris Redpath293edee2017-03-27 18:20:20 +0100769 if (!unlikely(schedtune_initialized))
770 return 0;
771
Srinath Sridharan42503db2016-07-14 13:09:03 -0700772 /* Get prefer_idle value */
773 rcu_read_lock();
774 st = task_schedtune(p);
775 prefer_idle = st->prefer_idle;
776 rcu_read_unlock();
777
778 return prefer_idle;
779}
780
781static u64
782prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
783{
784 struct schedtune *st = css_st(css);
785
786 return st->prefer_idle;
787}
788
789static int
790prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
791 u64 prefer_idle)
792{
793 struct schedtune *st = css_st(css);
Wei Wangaa4de6f2018-09-26 13:48:19 -0700794 st->prefer_idle = !!prefer_idle;
Srinath Sridharan42503db2016-07-14 13:09:03 -0700795
796 return 0;
797}
798
Srinath Sridharane71c4252016-07-28 17:28:55 +0100799static s64
Patrick Bellasiae710302015-06-23 09:17:54 +0100800boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
801{
802 struct schedtune *st = css_st(css);
803
804 return st->boost;
805}
806
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800807#ifdef CONFIG_SCHED_WALT
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800808static void schedtune_attach(struct cgroup_taskset *tset)
809{
810 struct task_struct *task;
811 struct cgroup_subsys_state *css;
812 struct schedtune *st;
813 bool colocate;
814
815 cgroup_taskset_first(tset, &css);
816 st = css_st(css);
817
818 colocate = st->colocate;
819
820 cgroup_taskset_for_each(task, css, tset)
821 sync_cgroup_colocation(task, colocate);
822
823}
Pavankumar Kondeti758a8cc2018-04-03 14:10:56 +0530824#else
825static void schedtune_attach(struct cgroup_taskset *tset)
826{
827}
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800828#endif
829
Patrick Bellasiae710302015-06-23 09:17:54 +0100830static int
831boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
Srinath Sridharane71c4252016-07-28 17:28:55 +0100832 s64 boost)
Patrick Bellasiae710302015-06-23 09:17:54 +0100833{
834 struct schedtune *st = css_st(css);
Patrick Bellasid5563d32016-07-29 15:32:26 +0100835 unsigned threshold_idx;
836 int boost_pct;
Patrick Bellasiae710302015-06-23 09:17:54 +0100837
Srinath Sridharane71c4252016-07-28 17:28:55 +0100838 if (boost < -100 || boost > 100)
Patrick Bellasiae710302015-06-23 09:17:54 +0100839 return -EINVAL;
Patrick Bellasid5563d32016-07-29 15:32:26 +0100840 boost_pct = boost;
841
842 /*
843 * Update threshold params for Performance Boost (B)
844 * and Performance Constraint (C) regions.
845 * The current implementatio uses the same cuts for both
846 * B and C regions.
847 */
848 threshold_idx = clamp(boost_pct, 0, 99) / 10;
849 st->perf_boost_idx = threshold_idx;
850 st->perf_constrain_idx = threshold_idx;
Patrick Bellasiae710302015-06-23 09:17:54 +0100851
852 st->boost = boost;
Patrick Bellasid5563d32016-07-29 15:32:26 +0100853 if (css == &root_schedtune.css) {
Patrick Bellasiae710302015-06-23 09:17:54 +0100854 sysctl_sched_cfs_boost = boost;
Patrick Bellasid5563d32016-07-29 15:32:26 +0100855 perf_boost_idx = threshold_idx;
856 perf_constrain_idx = threshold_idx;
857 }
Patrick Bellasiae710302015-06-23 09:17:54 +0100858
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000859 /* Update CPU boost */
860 schedtune_boostgroup_update(st->idx, st->boost);
861
Patrick Bellasi050dcb82015-06-22 13:49:07 +0100862 trace_sched_tune_config(st->boost);
863
Patrick Bellasiae710302015-06-23 09:17:54 +0100864 return 0;
865}
866
867static struct cftype files[] = {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800868#ifdef CONFIG_SCHED_WALT
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800869 {
870 .name = "sched_boost_no_override",
871 .read_u64 = sched_boost_override_read,
872 .write_u64 = sched_boost_override_write,
873 },
874 {
875 .name = "sched_boost_enabled",
876 .read_u64 = sched_boost_enabled_read,
877 .write_u64 = sched_boost_enabled_write,
878 },
879 {
880 .name = "colocate",
881 .read_u64 = sched_colocate_read,
882 .write_u64 = sched_colocate_write,
883 },
884#endif
Patrick Bellasiae710302015-06-23 09:17:54 +0100885 {
886 .name = "boost",
Srinath Sridharane71c4252016-07-28 17:28:55 +0100887 .read_s64 = boost_read,
888 .write_s64 = boost_write,
Patrick Bellasiae710302015-06-23 09:17:54 +0100889 },
Srinath Sridharan42503db2016-07-14 13:09:03 -0700890 {
891 .name = "prefer_idle",
892 .read_u64 = prefer_idle_read,
893 .write_u64 = prefer_idle_write,
894 },
Patrick Bellasiae710302015-06-23 09:17:54 +0100895 { } /* terminate */
896};
897
Patrick Bellasieafebca2018-02-12 16:04:35 +0000898static void
899schedtune_boostgroup_init(struct schedtune *st, int idx)
Patrick Bellasiae710302015-06-23 09:17:54 +0100900{
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000901 struct boost_groups *bg;
902 int cpu;
903
Patrick Bellasieafebca2018-02-12 16:04:35 +0000904 /* Initialize per CPUs boost group support */
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000905 for_each_possible_cpu(cpu) {
906 bg = &per_cpu(cpu_boost_groups, cpu);
Patrick Bellasieafebca2018-02-12 16:04:35 +0000907 bg->group[idx].boost = 0;
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000908 bg->group[idx].valid = true;
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000909 }
910
Patrick Bellasieafebca2018-02-12 16:04:35 +0000911 /* Keep track of allocated boost groups */
912 allocated_group[idx] = st;
913 st->idx = idx;
Patrick Bellasiae710302015-06-23 09:17:54 +0100914}
915
Patrick Bellasiae710302015-06-23 09:17:54 +0100916static struct cgroup_subsys_state *
917schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
918{
919 struct schedtune *st;
920 int idx;
921
Patrick Bellasi52cb67e2016-07-29 15:19:41 +0100922 if (!parent_css)
Patrick Bellasiae710302015-06-23 09:17:54 +0100923 return &root_schedtune.css;
Patrick Bellasiae710302015-06-23 09:17:54 +0100924
925 /* Allow only single level hierachies */
926 if (parent_css != &root_schedtune.css) {
927 pr_err("Nested SchedTune boosting groups not allowed\n");
928 return ERR_PTR(-ENOMEM);
929 }
930
931 /* Allow only a limited number of boosting groups */
932 for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
933 if (!allocated_group[idx])
934 break;
935 if (idx == BOOSTGROUPS_COUNT) {
936 pr_err("Trying to create more than %d SchedTune boosting groups\n",
937 BOOSTGROUPS_COUNT);
938 return ERR_PTR(-ENOSPC);
939 }
940
941 st = kzalloc(sizeof(*st), GFP_KERNEL);
942 if (!st)
943 goto out;
944
945 /* Initialize per CPUs boost group support */
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800946 init_sched_boost(st);
Patrick Bellasieafebca2018-02-12 16:04:35 +0000947 schedtune_boostgroup_init(st, idx);
Patrick Bellasiae710302015-06-23 09:17:54 +0100948
949 return &st->css;
950
Patrick Bellasiae710302015-06-23 09:17:54 +0100951out:
952 return ERR_PTR(-ENOMEM);
953}
954
955static void
956schedtune_boostgroup_release(struct schedtune *st)
957{
Patrick Bellasieafebca2018-02-12 16:04:35 +0000958 struct boost_groups *bg;
959 int cpu;
960
961 /* Reset per CPUs boost group support */
962 for_each_possible_cpu(cpu) {
963 bg = &per_cpu(cpu_boost_groups, cpu);
Patrick Bellasidb2c5202018-02-12 16:09:28 +0000964 bg->group[st->idx].valid = false;
Patrick Bellasieafebca2018-02-12 16:04:35 +0000965 bg->group[st->idx].boost = 0;
966 }
Patrick Bellasi9a871ed2016-01-14 12:31:35 +0000967
Patrick Bellasiae710302015-06-23 09:17:54 +0100968 /* Keep track of allocated boost groups */
969 allocated_group[st->idx] = NULL;
970}
971
972static void
973schedtune_css_free(struct cgroup_subsys_state *css)
974{
975 struct schedtune *st = css_st(css);
976
Patrick Bellasieafebca2018-02-12 16:04:35 +0000977 /* Release per CPUs boost group support */
Patrick Bellasiae710302015-06-23 09:17:54 +0100978 schedtune_boostgroup_release(st);
979 kfree(st);
980}
981
982struct cgroup_subsys schedtune_cgrp_subsys = {
983 .css_alloc = schedtune_css_alloc,
984 .css_free = schedtune_css_free,
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -0800985 .allow_attach = subsys_cgroup_allow_attach,
986 .attach = schedtune_attach,
Patrick Bellasid2489002016-07-28 18:44:40 +0100987 .can_attach = schedtune_can_attach,
988 .cancel_attach = schedtune_cancel_attach,
Patrick Bellasiae710302015-06-23 09:17:54 +0100989 .legacy_cftypes = files,
990 .early_init = 1,
991};
992
Patrick Bellasi52cb67e2016-07-29 15:19:41 +0100993static inline void
994schedtune_init_cgroups(void)
995{
996 struct boost_groups *bg;
997 int cpu;
998
999 /* Initialize the per CPU boost groups */
1000 for_each_possible_cpu(cpu) {
1001 bg = &per_cpu(cpu_boost_groups, cpu);
1002 memset(bg, 0, sizeof(struct boost_groups));
Patrick Bellasidb2c5202018-02-12 16:09:28 +00001003 bg->group[0].valid = true;
Ke Wang751e5092016-11-25 13:38:45 +08001004 raw_spin_lock_init(&bg->lock);
Patrick Bellasi52cb67e2016-07-29 15:19:41 +01001005 }
1006
1007 pr_info("schedtune: configured to support %d boost groups\n",
1008 BOOSTGROUPS_COUNT);
Patrick Bellasi82ab2432016-08-24 11:02:29 +01001009
1010 schedtune_initialized = true;
Patrick Bellasi52cb67e2016-07-29 15:19:41 +01001011}
1012
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001013#else /* CONFIG_CGROUP_SCHEDTUNE */
1014
1015int
1016schedtune_accept_deltas(int nrg_delta, int cap_delta,
1017 struct task_struct *task)
1018{
1019 /* Optimal (O) region */
Patrick Bellasi5824d982016-01-20 14:06:05 +00001020 if (nrg_delta < 0 && cap_delta > 0) {
1021 trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001022 return INT_MAX;
Patrick Bellasi5824d982016-01-20 14:06:05 +00001023 }
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001024
1025 /* Suboptimal (S) region */
Patrick Bellasi5824d982016-01-20 14:06:05 +00001026 if (nrg_delta > 0 && cap_delta < 0) {
1027 trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001028 return -INT_MAX;
Patrick Bellasi5824d982016-01-20 14:06:05 +00001029 }
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001030
1031 return __schedtune_accept_deltas(nrg_delta, cap_delta,
1032 perf_boost_idx, perf_constrain_idx);
1033}
1034
Patrick Bellasiae710302015-06-23 09:17:54 +01001035#endif /* CONFIG_CGROUP_SCHEDTUNE */
1036
Patrick Bellasi69fa4c72015-06-22 18:11:44 +01001037int
1038sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
1039 void __user *buffer, size_t *lenp,
1040 loff_t *ppos)
1041{
1042 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
Patrick Bellasid5563d32016-07-29 15:32:26 +01001043 unsigned threshold_idx;
1044 int boost_pct;
Patrick Bellasi69fa4c72015-06-22 18:11:44 +01001045
1046 if (ret || !write)
1047 return ret;
1048
Patrick Bellasid5563d32016-07-29 15:32:26 +01001049 if (sysctl_sched_cfs_boost < -100 || sysctl_sched_cfs_boost > 100)
1050 return -EINVAL;
1051 boost_pct = sysctl_sched_cfs_boost;
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001052
Patrick Bellasid5563d32016-07-29 15:32:26 +01001053 /*
1054 * Update threshold params for Performance Boost (B)
1055 * and Performance Constraint (C) regions.
1056 * The current implementatio uses the same cuts for both
1057 * B and C regions.
1058 */
1059 threshold_idx = clamp(boost_pct, 0, 99) / 10;
1060 perf_boost_idx = threshold_idx;
1061 perf_constrain_idx = threshold_idx;
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001062
Patrick Bellasi69fa4c72015-06-22 18:11:44 +01001063 return 0;
1064}
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001065
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001066#ifdef CONFIG_SCHED_DEBUG
1067static void
1068schedtune_test_nrg(unsigned long delta_pwr)
1069{
1070 unsigned long test_delta_pwr;
1071 unsigned long test_norm_pwr;
1072 int idx;
1073
1074 /*
1075 * Check normalization constants using some constant system
1076 * energy values
1077 */
1078 pr_info("schedtune: verify normalization constants...\n");
1079 for (idx = 0; idx < 6; ++idx) {
1080 test_delta_pwr = delta_pwr >> idx;
1081
1082 /* Normalize on max energy for target platform */
1083 test_norm_pwr = reciprocal_divide(
1084 test_delta_pwr << SCHED_CAPACITY_SHIFT,
1085 schedtune_target_nrg.rdiv);
1086
1087 pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
1088 idx, test_delta_pwr, test_norm_pwr);
1089 }
1090}
1091#else
1092#define schedtune_test_nrg(delta_pwr)
1093#endif
1094
1095/*
1096 * Compute the min/max power consumption of a cluster and all its CPUs
1097 */
1098static void
1099schedtune_add_cluster_nrg(
1100 struct sched_domain *sd,
1101 struct sched_group *sg,
1102 struct target_nrg *ste)
1103{
1104 struct sched_domain *sd2;
1105 struct sched_group *sg2;
1106
1107 struct cpumask *cluster_cpus;
1108 char str[32];
1109
1110 unsigned long min_pwr;
1111 unsigned long max_pwr;
1112 int cpu;
1113
1114 /* Get Cluster energy using EM data for the first CPU */
1115 cluster_cpus = sched_group_cpus(sg);
1116 snprintf(str, 32, "CLUSTER[%*pbl]",
1117 cpumask_pr_args(cluster_cpus));
1118
1119 min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
1120 max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
1121 pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
1122 str, min_pwr, max_pwr);
1123
1124 /*
1125 * Keep track of this cluster's energy in the computation of the
1126 * overall system energy
1127 */
1128 ste->min_power += min_pwr;
1129 ste->max_power += max_pwr;
1130
1131 /* Get CPU energy using EM data for each CPU in the group */
1132 for_each_cpu(cpu, cluster_cpus) {
1133 /* Get a SD view for the specific CPU */
1134 for_each_domain(cpu, sd2) {
1135 /* Get the CPU group */
1136 sg2 = sd2->groups;
1137 min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
1138 max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
1139
1140 ste->min_power += min_pwr;
1141 ste->max_power += max_pwr;
1142
1143 snprintf(str, 32, "CPU[%d]", cpu);
1144 pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
1145 str, min_pwr, max_pwr);
1146
1147 /*
1148 * Assume we have EM data only at the CPU and
1149 * the upper CLUSTER level
1150 */
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001151 break;
1152 }
1153 }
1154}
1155
1156/*
1157 * Initialize the constants required to compute normalized energy.
1158 * The values of these constants depends on the EM data for the specific
1159 * target system and topology.
1160 * Thus, this function is expected to be called by the code
1161 * that bind the EM to the topology information.
1162 */
1163static int
Patrick Bellasi52cb67e2016-07-29 15:19:41 +01001164schedtune_init(void)
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001165{
1166 struct target_nrg *ste = &schedtune_target_nrg;
1167 unsigned long delta_pwr = 0;
1168 struct sched_domain *sd;
1169 struct sched_group *sg;
1170
1171 pr_info("schedtune: init normalization constants...\n");
1172 ste->max_power = 0;
1173 ste->min_power = 0;
1174
1175 rcu_read_lock();
1176
1177 /*
1178 * When EAS is in use, we always have a pointer to the highest SD
1179 * which provides EM data.
1180 */
1181 sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
1182 if (!sd) {
1183 pr_info("schedtune: no energy model data\n");
1184 goto nodata;
1185 }
1186
1187 sg = sd->groups;
1188 do {
1189 schedtune_add_cluster_nrg(sd, sg, ste);
1190 } while (sg = sg->next, sg != sd->groups);
1191
1192 rcu_read_unlock();
1193
1194 pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
1195 "SYSTEM", ste->min_power, ste->max_power);
1196
1197 /* Compute normalization constants */
1198 delta_pwr = ste->max_power - ste->min_power;
1199 ste->rdiv = reciprocal_value(delta_pwr);
1200 pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
1201 ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
1202
1203 schedtune_test_nrg(delta_pwr);
Patrick Bellasi52cb67e2016-07-29 15:19:41 +01001204
1205#ifdef CONFIG_CGROUP_SCHEDTUNE
1206 schedtune_init_cgroups();
1207#else
1208 pr_info("schedtune: configured to support global boosting only\n");
Channagoud Kadabi8810e5f2017-02-17 16:01:05 -08001209#endif /* CONFIG_CGROUP_SCHEDTUNE */
Patrick Bellasi52cb67e2016-07-29 15:19:41 +01001210
Patrick Bellasid8460c72016-10-13 17:31:24 +01001211 schedtune_spc_rdiv = reciprocal_value(100);
1212
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001213 return 0;
1214
1215nodata:
Patrick Bellasic0c5d552016-10-13 17:34:47 +01001216 pr_warning("schedtune: disabled!\n");
Patrick Bellasi2f369bb2016-01-12 18:12:13 +00001217 rcu_read_unlock();
1218 return -EINVAL;
1219}
Patrick Bellasif4725392016-07-29 16:09:03 +01001220postcore_initcall(schedtune_init);