blob: f6e2bf132d085b47d38fd344cf4c47f99c80d95c [file] [log] [blame]
Peter Zijlstra029632f2011-10-25 10:00:11 +02001
2#include <linux/sched.h>
Clark Williamscf4aebc22013-02-07 09:46:59 -06003#include <linux/sched/sysctl.h>
Clark Williams8bd75c72013-02-07 09:47:07 -06004#include <linux/sched/rt.h>
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +02005#include <linux/u64_stats_sync.h>
Dario Faggioliaab03e02013-11-28 11:14:43 +01006#include <linux/sched/deadline.h>
Steven Rostedt (Red Hat)3866e842016-02-22 16:26:51 -05007#include <linux/binfmts.h>
Peter Zijlstra029632f2011-10-25 10:00:11 +02008#include <linux/mutex.h>
9#include <linux/spinlock.h>
10#include <linux/stop_machine.h>
Steven Rostedtb6366f02015-03-18 14:49:46 -040011#include <linux/irq_work.h>
Frederic Weisbecker9f3660c2013-04-20 14:35:09 +020012#include <linux/tick.h>
Mel Gormanf809ca92013-10-07 11:28:57 +010013#include <linux/slab.h>
Peter Zijlstra029632f2011-10-25 10:00:11 +020014
Peter Zijlstra391e43d2011-11-15 17:14:39 +010015#include "cpupri.h"
Juri Lelli6bfd6d72013-11-07 14:43:47 +010016#include "cpudeadline.h"
Li Zefan60fed782013-03-29 14:36:43 +080017#include "cpuacct.h"
Peter Zijlstra029632f2011-10-25 10:00:11 +020018
Peter Zijlstra9148a3a2016-09-20 22:34:51 +020019#ifdef CONFIG_SCHED_DEBUG
20#define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
21#else
22#define SCHED_WARN_ON(x) ((void)(x))
23#endif
24
Paul Gortmaker45ceebf2013-04-19 15:10:49 -040025struct rq;
Daniel Lezcano442bf3a2014-09-04 11:32:09 -040026struct cpuidle_state;
Paul Gortmaker45ceebf2013-04-19 15:10:49 -040027
Kirill Tkhaida0c1e62014-08-20 13:47:32 +040028/* task_struct::on_rq states: */
29#define TASK_ON_RQ_QUEUED 1
Kirill Tkhaicca26e82014-08-20 13:47:42 +040030#define TASK_ON_RQ_MIGRATING 2
Kirill Tkhaida0c1e62014-08-20 13:47:32 +040031
Peter Zijlstra029632f2011-10-25 10:00:11 +020032extern __read_mostly int scheduler_running;
33
Paul Gortmaker45ceebf2013-04-19 15:10:49 -040034extern unsigned long calc_load_update;
35extern atomic_long_t calc_load_tasks;
36
Peter Zijlstra3289bdb2015-04-14 13:19:42 +020037extern void calc_global_load_tick(struct rq *this_rq);
Thomas Gleixnerd60585c2016-07-12 18:33:56 +020038extern long calc_load_fold_active(struct rq *this_rq, long adjust);
Peter Zijlstra3289bdb2015-04-14 13:19:42 +020039
40#ifdef CONFIG_SMP
Frederic Weisbeckercee1afc2016-04-13 15:56:50 +020041extern void cpu_load_update_active(struct rq *this_rq);
Peter Zijlstra3289bdb2015-04-14 13:19:42 +020042#else
Frederic Weisbeckercee1afc2016-04-13 15:56:50 +020043static inline void cpu_load_update_active(struct rq *this_rq) { }
Peter Zijlstra3289bdb2015-04-14 13:19:42 +020044#endif
Paul Gortmaker45ceebf2013-04-19 15:10:49 -040045
Peter Zijlstra029632f2011-10-25 10:00:11 +020046/*
Peter Zijlstra029632f2011-10-25 10:00:11 +020047 * Helpers for converting nanosecond timing to jiffy resolution
48 */
49#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
50
Li Zefancc1f4b12013-03-05 16:06:09 +080051/*
52 * Increase resolution of nice-level calculations for 64-bit architectures.
53 * The extra resolution improves shares distribution and load balancing of
54 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
55 * hierarchies, especially on larger systems. This is not a user-visible change
56 * and does not change the user-interface for setting shares/weights.
57 *
58 * We increase resolution only if we have enough bits to allow this increased
Peter Zijlstra21591972016-04-28 12:49:38 +020059 * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
60 * pretty high and the returns do not justify the increased costs.
61 *
62 * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
63 * increase coverage and consistency always enable it on 64bit platforms.
Li Zefancc1f4b12013-03-05 16:06:09 +080064 */
Peter Zijlstra21591972016-04-28 12:49:38 +020065#ifdef CONFIG_64BIT
Yuyang Du172895e2016-04-05 12:12:27 +080066# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
Yuyang Du6ecdd742016-04-05 12:12:26 +080067# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
68# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
Li Zefancc1f4b12013-03-05 16:06:09 +080069#else
Yuyang Du172895e2016-04-05 12:12:27 +080070# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
Li Zefancc1f4b12013-03-05 16:06:09 +080071# define scale_load(w) (w)
72# define scale_load_down(w) (w)
73#endif
74
Yuyang Du6ecdd742016-04-05 12:12:26 +080075/*
Yuyang Du172895e2016-04-05 12:12:27 +080076 * Task weight (visible to users) and its load (invisible to users) have
77 * independent resolution, but they should be well calibrated. We use
78 * scale_load() and scale_load_down(w) to convert between them. The
79 * following must be true:
80 *
81 * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
82 *
Yuyang Du6ecdd742016-04-05 12:12:26 +080083 */
Yuyang Du172895e2016-04-05 12:12:27 +080084#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
Peter Zijlstra029632f2011-10-25 10:00:11 +020085
86/*
Dario Faggioli332ac172013-11-07 14:43:45 +010087 * Single value that decides SCHED_DEADLINE internal math precision.
88 * 10 -> just above 1us
89 * 9 -> just above 0.5us
90 */
91#define DL_SCALE (10)
92
93/*
Peter Zijlstra029632f2011-10-25 10:00:11 +020094 * These are the 'tuning knobs' of the scheduler:
Peter Zijlstra029632f2011-10-25 10:00:11 +020095 */
Peter Zijlstra029632f2011-10-25 10:00:11 +020096
97/*
98 * single value that denotes runtime == period, ie unlimited time.
99 */
100#define RUNTIME_INF ((u64)~0ULL)
101
Henrik Austad20f9cd22015-09-09 17:00:41 +0200102static inline int idle_policy(int policy)
103{
104 return policy == SCHED_IDLE;
105}
Dario Faggiolid50dde52013-11-07 14:43:36 +0100106static inline int fair_policy(int policy)
107{
108 return policy == SCHED_NORMAL || policy == SCHED_BATCH;
109}
110
Peter Zijlstra029632f2011-10-25 10:00:11 +0200111static inline int rt_policy(int policy)
112{
Dario Faggiolid50dde52013-11-07 14:43:36 +0100113 return policy == SCHED_FIFO || policy == SCHED_RR;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200114}
115
Dario Faggioliaab03e02013-11-28 11:14:43 +0100116static inline int dl_policy(int policy)
117{
118 return policy == SCHED_DEADLINE;
119}
Henrik Austad20f9cd22015-09-09 17:00:41 +0200120static inline bool valid_policy(int policy)
121{
122 return idle_policy(policy) || fair_policy(policy) ||
123 rt_policy(policy) || dl_policy(policy);
124}
Dario Faggioliaab03e02013-11-28 11:14:43 +0100125
Peter Zijlstra029632f2011-10-25 10:00:11 +0200126static inline int task_has_rt_policy(struct task_struct *p)
127{
128 return rt_policy(p->policy);
129}
130
Dario Faggioliaab03e02013-11-28 11:14:43 +0100131static inline int task_has_dl_policy(struct task_struct *p)
132{
133 return dl_policy(p->policy);
134}
135
Dario Faggioli2d3d8912013-11-07 14:43:44 +0100136/*
137 * Tells if entity @a should preempt entity @b.
138 */
Dario Faggioli332ac172013-11-07 14:43:45 +0100139static inline bool
140dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
Dario Faggioli2d3d8912013-11-07 14:43:44 +0100141{
142 return dl_time_before(a->deadline, b->deadline);
143}
144
Peter Zijlstra029632f2011-10-25 10:00:11 +0200145/*
146 * This is the priority-queue data structure of the RT scheduling class:
147 */
148struct rt_prio_array {
149 DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
150 struct list_head queue[MAX_RT_PRIO];
151};
152
153struct rt_bandwidth {
154 /* nests inside the rq lock: */
155 raw_spinlock_t rt_runtime_lock;
156 ktime_t rt_period;
157 u64 rt_runtime;
158 struct hrtimer rt_period_timer;
Peter Zijlstra4cfafd32015-05-14 12:23:11 +0200159 unsigned int rt_period_active;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200160};
Juri Lellia5e7be32014-09-19 10:22:39 +0100161
162void __dl_clear_params(struct task_struct *p);
163
Dario Faggioli332ac172013-11-07 14:43:45 +0100164/*
165 * To keep the bandwidth of -deadline tasks and groups under control
166 * we need some place where:
167 * - store the maximum -deadline bandwidth of the system (the group);
168 * - cache the fraction of that bandwidth that is currently allocated.
169 *
170 * This is all done in the data structure below. It is similar to the
171 * one used for RT-throttling (rt_bandwidth), with the main difference
172 * that, since here we are only interested in admission control, we
173 * do not decrease any runtime while the group "executes", neither we
174 * need a timer to replenish it.
175 *
176 * With respect to SMP, the bandwidth is given on a per-CPU basis,
177 * meaning that:
178 * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
179 * - dl_total_bw array contains, in the i-eth element, the currently
180 * allocated bandwidth on the i-eth CPU.
181 * Moreover, groups consume bandwidth on each CPU, while tasks only
182 * consume bandwidth on the CPU they're running on.
183 * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
184 * that will be shown the next time the proc or cgroup controls will
185 * be red. It on its turn can be changed by writing on its own
186 * control.
187 */
188struct dl_bandwidth {
189 raw_spinlock_t dl_runtime_lock;
190 u64 dl_runtime;
191 u64 dl_period;
192};
193
194static inline int dl_bandwidth_enabled(void)
195{
Peter Zijlstra17248132013-12-17 12:44:49 +0100196 return sysctl_sched_rt_runtime >= 0;
Dario Faggioli332ac172013-11-07 14:43:45 +0100197}
198
199extern struct dl_bw *dl_bw_of(int i);
200
201struct dl_bw {
202 raw_spinlock_t lock;
203 u64 bw, total_bw;
204};
205
Juri Lelli7f514122014-09-19 10:22:40 +0100206static inline
207void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
208{
209 dl_b->total_bw -= tsk_bw;
210}
211
212static inline
213void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
214{
215 dl_b->total_bw += tsk_bw;
216}
217
218static inline
219bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
220{
221 return dl_b->bw != -1 &&
222 dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
223}
224
Peter Zijlstra029632f2011-10-25 10:00:11 +0200225extern struct mutex sched_domains_mutex;
226
227#ifdef CONFIG_CGROUP_SCHED
228
229#include <linux/cgroup.h>
230
231struct cfs_rq;
232struct rt_rq;
233
Mike Galbraith35cf4e52012-08-07 05:00:13 +0200234extern struct list_head task_groups;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200235
236struct cfs_bandwidth {
237#ifdef CONFIG_CFS_BANDWIDTH
238 raw_spinlock_t lock;
239 ktime_t period;
240 u64 quota, runtime;
Zhihui Zhang9c58c792014-09-20 21:24:36 -0400241 s64 hierarchical_quota;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200242 u64 runtime_expires;
243
Peter Zijlstra4cfafd32015-05-14 12:23:11 +0200244 int idle, period_active;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200245 struct hrtimer period_timer, slack_timer;
246 struct list_head throttled_cfs_rq;
247
248 /* statistics */
249 int nr_periods, nr_throttled;
250 u64 throttled_time;
251#endif
252};
253
254/* task group related information */
255struct task_group {
256 struct cgroup_subsys_state css;
257
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -0700258#ifdef CONFIG_SCHED_HMP
259 bool upmigrate_discouraged;
260#endif
261
Peter Zijlstra029632f2011-10-25 10:00:11 +0200262#ifdef CONFIG_FAIR_GROUP_SCHED
263 /* schedulable entities of this group on each cpu */
264 struct sched_entity **se;
265 /* runqueue "owned" by this group on each cpu */
266 struct cfs_rq **cfs_rq;
267 unsigned long shares;
268
Alex Shifa6bdde2013-06-20 10:18:46 +0800269#ifdef CONFIG_SMP
Waiman Longb0367622015-12-02 13:41:49 -0500270 /*
271 * load_avg can be heavily contended at clock tick time, so put
272 * it in its own cacheline separated from the fields above which
273 * will also be accessed at each tick.
274 */
275 atomic_long_t load_avg ____cacheline_aligned;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200276#endif
Alex Shifa6bdde2013-06-20 10:18:46 +0800277#endif
Peter Zijlstra029632f2011-10-25 10:00:11 +0200278
279#ifdef CONFIG_RT_GROUP_SCHED
280 struct sched_rt_entity **rt_se;
281 struct rt_rq **rt_rq;
282
283 struct rt_bandwidth rt_bandwidth;
284#endif
285
286 struct rcu_head rcu;
287 struct list_head list;
288
289 struct task_group *parent;
290 struct list_head siblings;
291 struct list_head children;
292
293#ifdef CONFIG_SCHED_AUTOGROUP
294 struct autogroup *autogroup;
295#endif
296
297 struct cfs_bandwidth cfs_bandwidth;
298};
299
300#ifdef CONFIG_FAIR_GROUP_SCHED
301#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
302
303/*
304 * A weight of 0 or 1 can cause arithmetics problems.
305 * A weight of a cfs_rq is the sum of weights of which entities
306 * are queued on this cfs_rq, so a weight of a entity should not be
307 * too large, so as the shares value of a task group.
308 * (The default weight is 1024 - so there's no practical
309 * limitation from this.)
310 */
311#define MIN_SHARES (1UL << 1)
312#define MAX_SHARES (1UL << 18)
313#endif
314
Peter Zijlstra029632f2011-10-25 10:00:11 +0200315typedef int (*tg_visitor)(struct task_group *, void *);
316
317extern int walk_tg_tree_from(struct task_group *from,
318 tg_visitor down, tg_visitor up, void *data);
319
320/*
321 * Iterate the full tree, calling @down when first entering a node and @up when
322 * leaving it for the final time.
323 *
324 * Caller must hold rcu_lock or sufficient equivalent.
325 */
326static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
327{
328 return walk_tg_tree_from(&root_task_group, down, up, data);
329}
330
331extern int tg_nop(struct task_group *tg, void *data);
332
333extern void free_fair_sched_group(struct task_group *tg);
334extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
Peter Zijlstra8663e242016-06-22 14:58:02 +0200335extern void online_fair_sched_group(struct task_group *tg);
Peter Zijlstra6fe1f342016-01-21 22:24:16 +0100336extern void unregister_fair_sched_group(struct task_group *tg);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200337extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
338 struct sched_entity *se, int cpu,
339 struct sched_entity *parent);
340extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200341
342extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +0200343extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200344extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
345
346extern void free_rt_sched_group(struct task_group *tg);
347extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
348extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
349 struct sched_rt_entity *rt_se, int cpu,
350 struct sched_rt_entity *parent);
351
Li Zefan25cc7da2013-03-05 16:07:33 +0800352extern struct task_group *sched_create_group(struct task_group *parent);
353extern void sched_online_group(struct task_group *tg,
354 struct task_group *parent);
355extern void sched_destroy_group(struct task_group *tg);
356extern void sched_offline_group(struct task_group *tg);
357
358extern void sched_move_task(struct task_struct *tsk);
359
360#ifdef CONFIG_FAIR_GROUP_SCHED
361extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
Byungchul Parkad936d82015-10-24 01:16:19 +0900362
363#ifdef CONFIG_SMP
364extern void set_task_rq_fair(struct sched_entity *se,
365 struct cfs_rq *prev, struct cfs_rq *next);
366#else /* !CONFIG_SMP */
367static inline void set_task_rq_fair(struct sched_entity *se,
368 struct cfs_rq *prev, struct cfs_rq *next) { }
369#endif /* CONFIG_SMP */
370#endif /* CONFIG_FAIR_GROUP_SCHED */
Li Zefan25cc7da2013-03-05 16:07:33 +0800371
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -0700372extern struct task_group *css_tg(struct cgroup_subsys_state *css);
373
Peter Zijlstra029632f2011-10-25 10:00:11 +0200374#else /* CONFIG_CGROUP_SCHED */
375
376struct cfs_bandwidth { };
377
378#endif /* CONFIG_CGROUP_SCHED */
379
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -0700380#ifdef CONFIG_SCHED_HMP
381
382struct hmp_sched_stats {
383 int nr_big_tasks;
384 u64 cumulative_runnable_avg;
385 u64 pred_demands_sum;
386};
387
388struct sched_cluster {
389 struct list_head list;
390 struct cpumask cpus;
391 int id;
392 int max_power_cost;
393 int min_power_cost;
394 int max_possible_capacity;
395 int capacity;
396 int efficiency; /* Differentiate cpus with different IPC capability */
397 int load_scale_factor;
398 unsigned int exec_scale_factor;
399 /*
400 * max_freq = user maximum
401 * max_mitigated_freq = thermal defined maximum
402 * max_possible_freq = maximum supported by hardware
403 */
404 unsigned int cur_freq, max_freq, max_mitigated_freq, min_freq;
405 unsigned int max_possible_freq;
406 bool freq_init_done;
407 int dstate, dstate_wakeup_latency, dstate_wakeup_energy;
408 unsigned int static_cluster_pwr_cost;
409 int notifier_sent;
410};
411
412extern unsigned long all_cluster_ids[];
413
414static inline int cluster_first_cpu(struct sched_cluster *cluster)
415{
416 return cpumask_first(&cluster->cpus);
417}
418
419struct related_thread_group {
420 int id;
421 raw_spinlock_t lock;
422 struct list_head tasks;
423 struct list_head list;
424 struct sched_cluster *preferred_cluster;
425 struct rcu_head rcu;
426 u64 last_update;
427 struct group_cpu_time __percpu *cpu_time; /* one per cluster */
428};
429
430struct migration_sum_data {
431 struct rq *src_rq, *dst_rq;
432 struct group_cpu_time *src_cpu_time, *dst_cpu_time;
433};
434
435extern struct list_head cluster_head;
436extern int num_clusters;
437extern struct sched_cluster *sched_cluster[NR_CPUS];
438
439struct cpu_cycle {
440 u64 cycles;
441 u64 time;
442};
443
444#define for_each_sched_cluster(cluster) \
445 list_for_each_entry_rcu(cluster, &cluster_head, list)
446
447#endif /* CONFIG_SCHED_HMP */
448
Peter Zijlstra029632f2011-10-25 10:00:11 +0200449/* CFS-related fields in a runqueue */
450struct cfs_rq {
451 struct load_weight load;
Peter Zijlstrac82513e2012-04-26 13:12:27 +0200452 unsigned int nr_running, h_nr_running;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200453
454 u64 exec_clock;
455 u64 min_vruntime;
456#ifndef CONFIG_64BIT
457 u64 min_vruntime_copy;
458#endif
459
460 struct rb_root tasks_timeline;
461 struct rb_node *rb_leftmost;
462
Peter Zijlstra029632f2011-10-25 10:00:11 +0200463 /*
464 * 'curr' points to currently running entity on this cfs_rq.
465 * It is set to NULL otherwise (i.e when none are currently running).
466 */
467 struct sched_entity *curr, *next, *last, *skip;
468
469#ifdef CONFIG_SCHED_DEBUG
470 unsigned int nr_spread_over;
471#endif
472
Paul Turner2dac7542012-10-04 13:18:30 +0200473#ifdef CONFIG_SMP
474 /*
Yuyang Du9d89c252015-07-15 08:04:37 +0800475 * CFS load tracking
Paul Turner2dac7542012-10-04 13:18:30 +0200476 */
Yuyang Du9d89c252015-07-15 08:04:37 +0800477 struct sched_avg avg;
Yuyang Du13962232015-07-15 08:04:41 +0800478 u64 runnable_load_sum;
479 unsigned long runnable_load_avg;
Yuyang Du9d89c252015-07-15 08:04:37 +0800480#ifdef CONFIG_FAIR_GROUP_SCHED
481 unsigned long tg_load_avg_contrib;
482#endif
483 atomic_long_t removed_load_avg, removed_util_avg;
484#ifndef CONFIG_64BIT
485 u64 load_last_update_time_copy;
486#endif
Alex Shi141965c2013-06-26 13:05:39 +0800487
Paul Turnerc566e8e2012-10-04 13:18:30 +0200488#ifdef CONFIG_FAIR_GROUP_SCHED
Paul Turner82958362012-10-04 13:18:31 +0200489 /*
490 * h_load = weight * f(tg)
491 *
492 * Where f(tg) is the recursive weight fraction assigned to
493 * this group.
494 */
495 unsigned long h_load;
Vladimir Davydov68520792013-07-15 17:49:19 +0400496 u64 last_h_load_update;
497 struct sched_entity *h_load_next;
498#endif /* CONFIG_FAIR_GROUP_SCHED */
Paul Turner82958362012-10-04 13:18:31 +0200499#endif /* CONFIG_SMP */
500
Peter Zijlstra029632f2011-10-25 10:00:11 +0200501#ifdef CONFIG_FAIR_GROUP_SCHED
502 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
503
504 /*
505 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
506 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
507 * (like users, containers etc.)
508 *
509 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
510 * list is used during load balance.
511 */
512 int on_list;
513 struct list_head leaf_cfs_rq_list;
514 struct task_group *tg; /* group that "owns" this runqueue */
515
Peter Zijlstra029632f2011-10-25 10:00:11 +0200516#ifdef CONFIG_CFS_BANDWIDTH
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -0700517#ifdef CONFIG_SCHED_HMP
518 struct hmp_sched_stats hmp_stats;
519#endif
520
Peter Zijlstra029632f2011-10-25 10:00:11 +0200521 int runtime_enabled;
522 u64 runtime_expires;
523 s64 runtime_remaining;
524
Paul Turnerf1b17282012-10-04 13:18:31 +0200525 u64 throttled_clock, throttled_clock_task;
526 u64 throttled_clock_task_time;
Peter Zijlstra55e16d32016-06-22 15:14:26 +0200527 int throttled, throttle_count;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200528 struct list_head throttled_list;
529#endif /* CONFIG_CFS_BANDWIDTH */
530#endif /* CONFIG_FAIR_GROUP_SCHED */
531};
532
533static inline int rt_bandwidth_enabled(void)
534{
535 return sysctl_sched_rt_runtime >= 0;
536}
537
Steven Rostedtb6366f02015-03-18 14:49:46 -0400538/* RT IPI pull logic requires IRQ_WORK */
539#ifdef CONFIG_IRQ_WORK
540# define HAVE_RT_PUSH_IPI
541#endif
542
Peter Zijlstra029632f2011-10-25 10:00:11 +0200543/* Real-Time classes' related field in a runqueue: */
544struct rt_rq {
545 struct rt_prio_array active;
Peter Zijlstrac82513e2012-04-26 13:12:27 +0200546 unsigned int rt_nr_running;
Frederic Weisbecker01d36d02015-11-04 18:17:10 +0100547 unsigned int rr_nr_running;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200548#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
549 struct {
550 int curr; /* highest queued rt task prio */
551#ifdef CONFIG_SMP
552 int next; /* next highest */
553#endif
554 } highest_prio;
555#endif
556#ifdef CONFIG_SMP
557 unsigned long rt_nr_migratory;
558 unsigned long rt_nr_total;
559 int overloaded;
560 struct plist_head pushable_tasks;
Steven Rostedtb6366f02015-03-18 14:49:46 -0400561#ifdef HAVE_RT_PUSH_IPI
562 int push_flags;
563 int push_cpu;
564 struct irq_work push_work;
565 raw_spinlock_t push_lock;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200566#endif
Steven Rostedtb6366f02015-03-18 14:49:46 -0400567#endif /* CONFIG_SMP */
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400568 int rt_queued;
569
Peter Zijlstra029632f2011-10-25 10:00:11 +0200570 int rt_throttled;
571 u64 rt_time;
572 u64 rt_runtime;
573 /* Nests inside the rq lock: */
574 raw_spinlock_t rt_runtime_lock;
575
576#ifdef CONFIG_RT_GROUP_SCHED
577 unsigned long rt_nr_boosted;
578
579 struct rq *rq;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200580 struct task_group *tg;
581#endif
582};
583
Dario Faggioliaab03e02013-11-28 11:14:43 +0100584/* Deadline class' related fields in a runqueue */
585struct dl_rq {
586 /* runqueue is an rbtree, ordered by deadline */
587 struct rb_root rb_root;
588 struct rb_node *rb_leftmost;
589
590 unsigned long dl_nr_running;
Juri Lelli1baca4c2013-11-07 14:43:38 +0100591
592#ifdef CONFIG_SMP
593 /*
594 * Deadline values of the currently executing and the
595 * earliest ready task on this rq. Caching these facilitates
596 * the decision wether or not a ready but not running task
597 * should migrate somewhere else.
598 */
599 struct {
600 u64 curr;
601 u64 next;
602 } earliest_dl;
603
604 unsigned long dl_nr_migratory;
Juri Lelli1baca4c2013-11-07 14:43:38 +0100605 int overloaded;
606
607 /*
608 * Tasks on this rq that can be pushed away. They are kept in
609 * an rb-tree, ordered by tasks' deadlines, with caching
610 * of the leftmost (earliest deadline) element.
611 */
612 struct rb_root pushable_dl_tasks_root;
613 struct rb_node *pushable_dl_tasks_leftmost;
Dario Faggioli332ac172013-11-07 14:43:45 +0100614#else
615 struct dl_bw dl_bw;
Juri Lelli1baca4c2013-11-07 14:43:38 +0100616#endif
Dario Faggioliaab03e02013-11-28 11:14:43 +0100617};
618
Peter Zijlstra029632f2011-10-25 10:00:11 +0200619#ifdef CONFIG_SMP
620
621/*
622 * We add the notion of a root-domain which will be used to define per-domain
623 * variables. Each exclusive cpuset essentially defines an island domain by
624 * fully partitioning the member cpus from any other cpuset. Whenever a new
625 * exclusive cpuset is created, we also create and attach a new root-domain
626 * object.
627 *
628 */
629struct root_domain {
630 atomic_t refcount;
631 atomic_t rto_count;
632 struct rcu_head rcu;
633 cpumask_var_t span;
634 cpumask_var_t online;
635
Tim Chen4486edd2014-06-23 12:16:49 -0700636 /* Indicate more than one runnable task for any CPU */
637 bool overload;
638
Peter Zijlstra029632f2011-10-25 10:00:11 +0200639 /*
Juri Lelli1baca4c2013-11-07 14:43:38 +0100640 * The bit corresponding to a CPU gets set here if such CPU has more
641 * than one runnable -deadline task (as it is below for RT tasks).
642 */
643 cpumask_var_t dlo_mask;
644 atomic_t dlo_count;
Dario Faggioli332ac172013-11-07 14:43:45 +0100645 struct dl_bw dl_bw;
Juri Lelli6bfd6d72013-11-07 14:43:47 +0100646 struct cpudl cpudl;
Juri Lelli1baca4c2013-11-07 14:43:38 +0100647
648 /*
Peter Zijlstra029632f2011-10-25 10:00:11 +0200649 * The "RT overload" flag: it gets set if a CPU has more than
650 * one runnable RT task.
651 */
652 cpumask_var_t rto_mask;
653 struct cpupri cpupri;
Dietmar Eggemanncd92bfd2016-08-01 19:53:35 +0100654
655 unsigned long max_cpu_capacity;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200656};
657
658extern struct root_domain def_root_domain;
659
660#endif /* CONFIG_SMP */
661
662/*
663 * This is the main, per-CPU runqueue data structure.
664 *
665 * Locking rule: those places that want to lock multiple runqueues
666 * (such as the load balancing or the thread migration code), lock
667 * acquire operations must be ordered by ascending &runqueue.
668 */
669struct rq {
670 /* runqueue lock: */
671 raw_spinlock_t lock;
672
673 /*
674 * nr_running and cpu_load should be in the same cacheline because
675 * remote CPUs use both these fields when doing load calculation.
676 */
Peter Zijlstrac82513e2012-04-26 13:12:27 +0200677 unsigned int nr_running;
Peter Zijlstra0ec8aa02013-10-07 11:29:33 +0100678#ifdef CONFIG_NUMA_BALANCING
679 unsigned int nr_numa_running;
680 unsigned int nr_preferred_running;
681#endif
Peter Zijlstra029632f2011-10-25 10:00:11 +0200682 #define CPU_LOAD_IDX_MAX 5
683 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
Frederic Weisbecker3451d022011-08-10 23:21:01 +0200684#ifdef CONFIG_NO_HZ_COMMON
Frederic Weisbecker9fd81dd2016-04-19 17:36:51 +0200685#ifdef CONFIG_SMP
686 unsigned long last_load_update_tick;
687#endif /* CONFIG_SMP */
Suresh Siddha1c792db2011-12-01 17:07:32 -0800688 unsigned long nohz_flags;
Frederic Weisbecker9fd81dd2016-04-19 17:36:51 +0200689#endif /* CONFIG_NO_HZ_COMMON */
Frederic Weisbecker265f22a2013-05-03 03:39:05 +0200690#ifdef CONFIG_NO_HZ_FULL
691 unsigned long last_sched_tick;
692#endif
Peter Zijlstra029632f2011-10-25 10:00:11 +0200693 /* capture load from *all* tasks on this cpu: */
694 struct load_weight load;
695 unsigned long nr_load_updates;
696 u64 nr_switches;
697
698 struct cfs_rq cfs;
699 struct rt_rq rt;
Dario Faggioliaab03e02013-11-28 11:14:43 +0100700 struct dl_rq dl;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200701
702#ifdef CONFIG_FAIR_GROUP_SCHED
703 /* list of leaf cfs_rq on this cpu: */
704 struct list_head leaf_cfs_rq_list;
Peter Zijlstraa35b6462012-08-08 21:46:40 +0200705#endif /* CONFIG_FAIR_GROUP_SCHED */
706
Peter Zijlstra029632f2011-10-25 10:00:11 +0200707 /*
708 * This is part of a global counter where only the total sum
709 * over all CPUs matters. A task can increase this counter on
710 * one CPU and if it got migrated afterwards it may decrease
711 * it on another CPU. Always updated under the runqueue lock:
712 */
713 unsigned long nr_uninterruptible;
714
715 struct task_struct *curr, *idle, *stop;
716 unsigned long next_balance;
717 struct mm_struct *prev_mm;
718
Peter Zijlstra9edfbfe2015-01-05 11:18:11 +0100719 unsigned int clock_skip_update;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200720 u64 clock;
721 u64 clock_task;
722
723 atomic_t nr_iowait;
724
725#ifdef CONFIG_SMP
726 struct root_domain *rd;
727 struct sched_domain *sd;
728
Nicolas Pitreced549f2014-05-26 18:19:38 -0400729 unsigned long cpu_capacity;
Vincent Guittotca6d75e2015-02-27 16:54:09 +0100730 unsigned long cpu_capacity_orig;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200731
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200732 struct callback_head *balance_callback;
733
Peter Zijlstra029632f2011-10-25 10:00:11 +0200734 unsigned char idle_balance;
735 /* For active balancing */
Peter Zijlstra029632f2011-10-25 10:00:11 +0200736 int active_balance;
737 int push_cpu;
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -0700738 struct task_struct *push_task;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200739 struct cpu_stop_work active_balance_work;
740 /* cpu of this runqueue: */
741 int cpu;
742 int online;
743
Peter Zijlstra367456c2012-02-20 21:49:09 +0100744 struct list_head cfs_tasks;
745
Peter Zijlstra029632f2011-10-25 10:00:11 +0200746 u64 rt_avg;
747 u64 age_stamp;
748 u64 idle_stamp;
749 u64 avg_idle;
Jason Low9bd721c2013-09-13 11:26:52 -0700750
751 /* This is used to determine avg_idle's max value */
752 u64 max_idle_balance_cost;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200753#endif
754
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -0700755#ifdef CONFIG_SCHED_HMP
756 struct sched_cluster *cluster;
757 struct cpumask freq_domain_cpumask;
758 struct hmp_sched_stats hmp_stats;
759
760 int cstate, wakeup_latency, wakeup_energy;
761 u64 window_start;
762 unsigned long hmp_flags;
763
764 u64 cur_irqload;
765 u64 avg_irqload;
766 u64 irqload_ts;
767 unsigned int static_cpu_pwr_cost;
768 struct task_struct *ed_task;
769 struct cpu_cycle cc;
770 u64 old_busy_time, old_busy_time_group;
771 u64 old_estimated_time;
772 u64 curr_runnable_sum;
773 u64 prev_runnable_sum;
774 u64 nt_curr_runnable_sum;
775 u64 nt_prev_runnable_sum;
776#endif
777
Peter Zijlstra029632f2011-10-25 10:00:11 +0200778#ifdef CONFIG_IRQ_TIME_ACCOUNTING
779 u64 prev_irq_time;
780#endif
781#ifdef CONFIG_PARAVIRT
782 u64 prev_steal_time;
783#endif
784#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
785 u64 prev_steal_time_rq;
786#endif
787
788 /* calc_load related fields */
789 unsigned long calc_load_update;
790 long calc_load_active;
791
792#ifdef CONFIG_SCHED_HRTICK
793#ifdef CONFIG_SMP
794 int hrtick_csd_pending;
795 struct call_single_data hrtick_csd;
796#endif
797 struct hrtimer hrtick_timer;
798#endif
799
800#ifdef CONFIG_SCHEDSTATS
801 /* latency stats */
802 struct sched_info rq_sched_info;
803 unsigned long long rq_cpu_time;
804 /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
805
806 /* sys_sched_yield() stats */
807 unsigned int yld_count;
808
809 /* schedule() stats */
Peter Zijlstra029632f2011-10-25 10:00:11 +0200810 unsigned int sched_count;
811 unsigned int sched_goidle;
812
813 /* try_to_wake_up() stats */
814 unsigned int ttwu_count;
815 unsigned int ttwu_local;
816#endif
817
818#ifdef CONFIG_SMP
819 struct llist_head wake_list;
820#endif
Daniel Lezcano442bf3a2014-09-04 11:32:09 -0400821
822#ifdef CONFIG_CPU_IDLE
823 /* Must be inspected within a rcu lock section */
824 struct cpuidle_state *idle_state;
825#endif
Peter Zijlstra029632f2011-10-25 10:00:11 +0200826};
827
828static inline int cpu_of(struct rq *rq)
829{
830#ifdef CONFIG_SMP
831 return rq->cpu;
832#else
833 return 0;
834#endif
835}
836
Peter Zijlstra1b568f02016-05-09 10:38:41 +0200837
838#ifdef CONFIG_SCHED_SMT
839
840extern struct static_key_false sched_smt_present;
841
842extern void __update_idle_core(struct rq *rq);
843
844static inline void update_idle_core(struct rq *rq)
845{
846 if (static_branch_unlikely(&sched_smt_present))
847 __update_idle_core(rq);
848}
849
850#else
851static inline void update_idle_core(struct rq *rq) { }
852#endif
853
Pranith Kumar8b06c552014-08-13 13:28:12 -0400854DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200855
Peter Zijlstra518cd622011-12-07 15:07:31 +0100856#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
Christoph Lameter4a32fea2014-08-17 12:30:27 -0500857#define this_rq() this_cpu_ptr(&runqueues)
Peter Zijlstra518cd622011-12-07 15:07:31 +0100858#define task_rq(p) cpu_rq(task_cpu(p))
859#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
Christoph Lameter4a32fea2014-08-17 12:30:27 -0500860#define raw_rq() raw_cpu_ptr(&runqueues)
Peter Zijlstra518cd622011-12-07 15:07:31 +0100861
Peter Zijlstracebde6d2015-01-05 11:18:10 +0100862static inline u64 __rq_clock_broken(struct rq *rq)
863{
Jason Low316c1608d2015-04-28 13:00:20 -0700864 return READ_ONCE(rq->clock);
Peter Zijlstracebde6d2015-01-05 11:18:10 +0100865}
866
Frederic Weisbecker78becc22013-04-12 01:51:02 +0200867static inline u64 rq_clock(struct rq *rq)
868{
Peter Zijlstracebde6d2015-01-05 11:18:10 +0100869 lockdep_assert_held(&rq->lock);
Frederic Weisbecker78becc22013-04-12 01:51:02 +0200870 return rq->clock;
871}
872
873static inline u64 rq_clock_task(struct rq *rq)
874{
Peter Zijlstracebde6d2015-01-05 11:18:10 +0100875 lockdep_assert_held(&rq->lock);
Frederic Weisbecker78becc22013-04-12 01:51:02 +0200876 return rq->clock_task;
877}
878
Peter Zijlstra9edfbfe2015-01-05 11:18:11 +0100879#define RQCF_REQ_SKIP 0x01
880#define RQCF_ACT_SKIP 0x02
881
882static inline void rq_clock_skip_update(struct rq *rq, bool skip)
883{
884 lockdep_assert_held(&rq->lock);
885 if (skip)
886 rq->clock_skip_update |= RQCF_REQ_SKIP;
887 else
888 rq->clock_skip_update &= ~RQCF_REQ_SKIP;
889}
890
Rik van Riel9942f792014-10-17 03:29:49 -0400891#ifdef CONFIG_NUMA
Rik van Riele3fe70b2014-10-17 03:29:50 -0400892enum numa_topology_type {
893 NUMA_DIRECT,
894 NUMA_GLUELESS_MESH,
895 NUMA_BACKPLANE,
896};
897extern enum numa_topology_type sched_numa_topology_type;
Rik van Riel9942f792014-10-17 03:29:49 -0400898extern int sched_max_numa_distance;
899extern bool find_numa_distance(int distance);
900#endif
901
Mel Gormanf809ca92013-10-07 11:28:57 +0100902#ifdef CONFIG_NUMA_BALANCING
Iulia Manda44dba3d2014-10-31 02:13:31 +0200903/* The regions in numa_faults array from task_struct */
904enum numa_faults_stats {
905 NUMA_MEM = 0,
906 NUMA_CPU,
907 NUMA_MEMBUF,
908 NUMA_CPUBUF
909};
Peter Zijlstra0ec8aa02013-10-07 11:29:33 +0100910extern void sched_setnuma(struct task_struct *p, int node);
Mel Gormane6628d52013-10-07 11:29:02 +0100911extern int migrate_task_to(struct task_struct *p, int cpu);
Peter Zijlstraac66f542013-10-07 11:29:16 +0100912extern int migrate_swap(struct task_struct *, struct task_struct *);
Mel Gormanf809ca92013-10-07 11:28:57 +0100913#endif /* CONFIG_NUMA_BALANCING */
914
Peter Zijlstra518cd622011-12-07 15:07:31 +0100915#ifdef CONFIG_SMP
916
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200917static inline void
918queue_balance_callback(struct rq *rq,
919 struct callback_head *head,
920 void (*func)(struct rq *rq))
921{
922 lockdep_assert_held(&rq->lock);
923
924 if (unlikely(head->next))
925 return;
926
927 head->func = (void (*)(struct callback_head *))func;
928 head->next = rq->balance_callback;
929 rq->balance_callback = head;
930}
931
Peter Zijlstrae3baac42014-06-04 10:31:18 -0700932extern void sched_ttwu_pending(void);
933
Peter Zijlstra029632f2011-10-25 10:00:11 +0200934#define rcu_dereference_check_sched_domain(p) \
935 rcu_dereference_check((p), \
936 lockdep_is_held(&sched_domains_mutex))
937
938/*
939 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
940 * See detach_destroy_domains: synchronize_sched for details.
941 *
942 * The domain tree of any CPU may only be accessed from within
943 * preempt-disabled sections.
944 */
945#define for_each_domain(cpu, __sd) \
Peter Zijlstra518cd622011-12-07 15:07:31 +0100946 for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
947 __sd; __sd = __sd->parent)
Peter Zijlstra029632f2011-10-25 10:00:11 +0200948
Suresh Siddha77e81362011-11-17 11:08:23 -0800949#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
950
Peter Zijlstra518cd622011-12-07 15:07:31 +0100951/**
952 * highest_flag_domain - Return highest sched_domain containing flag.
953 * @cpu: The cpu whose highest level of sched domain is to
954 * be returned.
955 * @flag: The flag to check for the highest sched_domain
956 * for the given cpu.
957 *
958 * Returns the highest sched_domain of a cpu which contains the given flag.
959 */
960static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
961{
962 struct sched_domain *sd, *hsd = NULL;
963
964 for_each_domain(cpu, sd) {
965 if (!(sd->flags & flag))
966 break;
967 hsd = sd;
968 }
969
970 return hsd;
971}
972
Mel Gormanfb13c7e2013-10-07 11:29:17 +0100973static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
974{
975 struct sched_domain *sd;
976
977 for_each_domain(cpu, sd) {
978 if (sd->flags & flag)
979 break;
980 }
981
982 return sd;
983}
984
Peter Zijlstra518cd622011-12-07 15:07:31 +0100985DECLARE_PER_CPU(struct sched_domain *, sd_llc);
Peter Zijlstra7d9ffa82013-07-04 12:56:46 +0800986DECLARE_PER_CPU(int, sd_llc_size);
Peter Zijlstra518cd622011-12-07 15:07:31 +0100987DECLARE_PER_CPU(int, sd_llc_id);
Peter Zijlstra0e369d72016-05-09 10:38:01 +0200988DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
Mel Gormanfb13c7e2013-10-07 11:29:17 +0100989DECLARE_PER_CPU(struct sched_domain *, sd_numa);
Preeti U Murthy37dc6b52013-10-30 08:42:52 +0530990DECLARE_PER_CPU(struct sched_domain *, sd_asym);
Peter Zijlstra518cd622011-12-07 15:07:31 +0100991
Nicolas Pitre63b2ca32014-05-26 18:19:37 -0400992struct sched_group_capacity {
Li Zefan5e6521e2013-03-05 16:06:23 +0800993 atomic_t ref;
994 /*
Yuyang Du172895e2016-04-05 12:12:27 +0800995 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
Nicolas Pitre63b2ca32014-05-26 18:19:37 -0400996 * for a single CPU.
Li Zefan5e6521e2013-03-05 16:06:23 +0800997 */
Vincent Guittotdc7ff762015-03-03 11:35:03 +0100998 unsigned int capacity;
Li Zefan5e6521e2013-03-05 16:06:23 +0800999 unsigned long next_update;
Nicolas Pitre63b2ca32014-05-26 18:19:37 -04001000 int imbalance; /* XXX unrelated to capacity but shared group state */
Li Zefan5e6521e2013-03-05 16:06:23 +08001001
1002 unsigned long cpumask[0]; /* iteration mask */
1003};
1004
1005struct sched_group {
1006 struct sched_group *next; /* Must be a circular list */
1007 atomic_t ref;
1008
1009 unsigned int group_weight;
Nicolas Pitre63b2ca32014-05-26 18:19:37 -04001010 struct sched_group_capacity *sgc;
Li Zefan5e6521e2013-03-05 16:06:23 +08001011
1012 /*
1013 * The CPUs this group covers.
1014 *
1015 * NOTE: this field is variable length. (Allocated dynamically
1016 * by attaching extra space to the end of the structure,
1017 * depending on how many CPUs the kernel has booted up with)
1018 */
1019 unsigned long cpumask[0];
1020};
1021
1022static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
1023{
1024 return to_cpumask(sg->cpumask);
1025}
1026
1027/*
1028 * cpumask masking which cpus in the group are allowed to iterate up the domain
1029 * tree.
1030 */
1031static inline struct cpumask *sched_group_mask(struct sched_group *sg)
1032{
Nicolas Pitre63b2ca32014-05-26 18:19:37 -04001033 return to_cpumask(sg->sgc->cpumask);
Li Zefan5e6521e2013-03-05 16:06:23 +08001034}
1035
1036/**
1037 * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
1038 * @group: The group whose first cpu is to be returned.
1039 */
1040static inline unsigned int group_first_cpu(struct sched_group *group)
1041{
1042 return cpumask_first(sched_group_cpus(group));
1043}
1044
Peter Zijlstrac1174872012-05-31 14:47:33 +02001045extern int group_balance_cpu(struct sched_group *sg);
1046
Steven Rostedt (Red Hat)3866e842016-02-22 16:26:51 -05001047#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
1048void register_sched_domain_sysctl(void);
1049void unregister_sched_domain_sysctl(void);
1050#else
1051static inline void register_sched_domain_sysctl(void)
1052{
1053}
1054static inline void unregister_sched_domain_sysctl(void)
1055{
1056}
1057#endif
1058
Peter Zijlstrae3baac42014-06-04 10:31:18 -07001059#else
1060
1061static inline void sched_ttwu_pending(void) { }
1062
Peter Zijlstra518cd622011-12-07 15:07:31 +01001063#endif /* CONFIG_SMP */
Peter Zijlstra029632f2011-10-25 10:00:11 +02001064
Peter Zijlstra391e43d2011-11-15 17:14:39 +01001065#include "stats.h"
1066#include "auto_group.h"
Peter Zijlstra029632f2011-10-25 10:00:11 +02001067
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -07001068#ifdef CONFIG_SCHED_HMP
1069
1070#define WINDOW_STATS_RECENT 0
1071#define WINDOW_STATS_MAX 1
1072#define WINDOW_STATS_MAX_RECENT_AVG 2
1073#define WINDOW_STATS_AVG 3
1074#define WINDOW_STATS_INVALID_POLICY 4
1075
1076#define MAJOR_TASK_PCT 85
1077#define SCHED_UPMIGRATE_MIN_NICE 15
1078#define EXITING_TASK_MARKER 0xdeaddead
1079
1080#define UP_MIGRATION 1
1081#define DOWN_MIGRATION 2
1082#define IRQLOAD_MIGRATION 3
1083
1084extern struct mutex policy_mutex;
1085extern unsigned int sched_ravg_window;
1086extern unsigned int sched_disable_window_stats;
1087extern unsigned int max_possible_freq;
1088extern unsigned int min_max_freq;
1089extern unsigned int pct_task_load(struct task_struct *p);
1090extern unsigned int max_possible_efficiency;
1091extern unsigned int min_possible_efficiency;
1092extern unsigned int max_capacity;
1093extern unsigned int min_capacity;
1094extern unsigned int max_load_scale_factor;
1095extern unsigned int max_possible_capacity;
1096extern unsigned int min_max_possible_capacity;
1097extern unsigned int sched_upmigrate;
1098extern unsigned int sched_downmigrate;
1099extern unsigned int sched_init_task_load_windows;
1100extern unsigned int up_down_migrate_scale_factor;
1101extern unsigned int sysctl_sched_restrict_cluster_spill;
1102extern unsigned int sched_pred_alert_load;
1103extern unsigned int sched_major_task_runtime;
1104extern struct sched_cluster init_cluster;
1105extern unsigned int __read_mostly sched_short_sleep_task_threshold;
1106extern unsigned int __read_mostly sched_long_cpu_selection_threshold;
1107extern unsigned int __read_mostly sched_big_waker_task_load;
1108extern unsigned int __read_mostly sched_small_wakee_task_load;
1109extern unsigned int __read_mostly sched_spill_load;
1110extern unsigned int __read_mostly sched_upmigrate;
1111extern unsigned int __read_mostly sched_downmigrate;
1112extern unsigned int __read_mostly sysctl_sched_spill_nr_run;
1113
1114extern void init_new_task_load(struct task_struct *p);
1115extern u64 sched_ktime_clock(void);
1116extern int got_boost_kick(void);
1117extern int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
1118extern void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
1119 u64 wallclock, u64 irqtime);
1120extern bool early_detection_notify(struct rq *rq, u64 wallclock);
1121extern void clear_ed_task(struct task_struct *p, struct rq *rq);
1122extern void fixup_busy_time(struct task_struct *p, int new_cpu);
1123extern void clear_boost_kick(int cpu);
1124extern void clear_hmp_request(int cpu);
1125extern void mark_task_starting(struct task_struct *p);
1126extern void set_window_start(struct rq *rq);
1127extern void migrate_sync_cpu(int cpu);
1128extern void update_cluster_topology(void);
1129extern void set_task_last_wake(struct task_struct *p, u64 wallclock);
1130extern void set_task_last_switch_out(struct task_struct *p, u64 wallclock);
1131extern void init_clusters(void);
1132extern void reset_cpu_hmp_stats(int cpu, int reset_cra);
1133extern unsigned int max_task_load(void);
1134extern void sched_account_irqtime(int cpu, struct task_struct *curr,
1135 u64 delta, u64 wallclock);
1136extern void sched_account_irqstart(int cpu, struct task_struct *curr,
1137 u64 wallclock);
1138extern unsigned int cpu_temp(int cpu);
1139extern unsigned int nr_eligible_big_tasks(int cpu);
1140extern void update_up_down_migrate(void);
1141extern int update_preferred_cluster(struct related_thread_group *grp,
1142 struct task_struct *p, u32 old_load);
1143extern void set_preferred_cluster(struct related_thread_group *grp);
1144extern void add_new_task_to_grp(struct task_struct *new);
1145
1146enum sched_boost_type {
1147 SCHED_BOOST_NONE,
1148 SCHED_BOOST_ON_BIG,
1149 SCHED_BOOST_ON_ALL,
1150};
1151
1152static inline struct sched_cluster *cpu_cluster(int cpu)
1153{
1154 return cpu_rq(cpu)->cluster;
1155}
1156
1157static inline int cpu_capacity(int cpu)
1158{
1159 return cpu_rq(cpu)->cluster->capacity;
1160}
1161
1162static inline int cpu_max_possible_capacity(int cpu)
1163{
1164 return cpu_rq(cpu)->cluster->max_possible_capacity;
1165}
1166
1167static inline int cpu_load_scale_factor(int cpu)
1168{
1169 return cpu_rq(cpu)->cluster->load_scale_factor;
1170}
1171
1172static inline int cpu_efficiency(int cpu)
1173{
1174 return cpu_rq(cpu)->cluster->efficiency;
1175}
1176
1177static inline unsigned int cpu_cur_freq(int cpu)
1178{
1179 return cpu_rq(cpu)->cluster->cur_freq;
1180}
1181
1182static inline unsigned int cpu_min_freq(int cpu)
1183{
1184 return cpu_rq(cpu)->cluster->min_freq;
1185}
1186
1187static inline unsigned int cluster_max_freq(struct sched_cluster *cluster)
1188{
1189 /*
1190 * Governor and thermal driver don't know the other party's mitigation
1191 * voting. So struct cluster saves both and return min() for current
1192 * cluster fmax.
1193 */
1194 return min(cluster->max_mitigated_freq, cluster->max_freq);
1195}
1196
1197static inline unsigned int cpu_max_freq(int cpu)
1198{
1199 return cluster_max_freq(cpu_rq(cpu)->cluster);
1200}
1201
1202static inline unsigned int cpu_max_possible_freq(int cpu)
1203{
1204 return cpu_rq(cpu)->cluster->max_possible_freq;
1205}
1206
1207static inline int same_cluster(int src_cpu, int dst_cpu)
1208{
1209 return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster;
1210}
1211
1212static inline int cpu_max_power_cost(int cpu)
1213{
1214 return cpu_rq(cpu)->cluster->max_power_cost;
1215}
1216
1217static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
1218{
1219 return div64_u64(cycles, period);
1220}
1221
1222static inline bool hmp_capable(void)
1223{
1224 return max_possible_capacity != min_max_possible_capacity;
1225}
1226
1227/*
1228 * 'load' is in reference to "best cpu" at its best frequency.
1229 * Scale that in reference to a given cpu, accounting for how bad it is
1230 * in reference to "best cpu".
1231 */
1232static inline u64 scale_load_to_cpu(u64 task_load, int cpu)
1233{
1234 u64 lsf = cpu_load_scale_factor(cpu);
1235
1236 if (lsf != 1024) {
1237 task_load *= lsf;
1238 task_load /= 1024;
1239 }
1240
1241 return task_load;
1242}
1243
1244static inline unsigned int task_load(struct task_struct *p)
1245{
1246 return p->ravg.demand;
1247}
1248
1249static inline void
1250inc_cumulative_runnable_avg(struct hmp_sched_stats *stats,
1251 struct task_struct *p)
1252{
1253 u32 task_load;
1254
1255 if (sched_disable_window_stats)
1256 return;
1257
1258 task_load = sched_disable_window_stats ? 0 : p->ravg.demand;
1259
1260 stats->cumulative_runnable_avg += task_load;
1261 stats->pred_demands_sum += p->ravg.pred_demand;
1262}
1263
1264static inline void
1265dec_cumulative_runnable_avg(struct hmp_sched_stats *stats,
1266 struct task_struct *p)
1267{
1268 u32 task_load;
1269
1270 if (sched_disable_window_stats)
1271 return;
1272
1273 task_load = sched_disable_window_stats ? 0 : p->ravg.demand;
1274
1275 stats->cumulative_runnable_avg -= task_load;
1276
1277 BUG_ON((s64)stats->cumulative_runnable_avg < 0);
1278
1279 stats->pred_demands_sum -= p->ravg.pred_demand;
1280 BUG_ON((s64)stats->pred_demands_sum < 0);
1281}
1282
1283static inline void
1284fixup_cumulative_runnable_avg(struct hmp_sched_stats *stats,
1285 struct task_struct *p, s64 task_load_delta,
1286 s64 pred_demand_delta)
1287{
1288 if (sched_disable_window_stats)
1289 return;
1290
1291 stats->cumulative_runnable_avg += task_load_delta;
1292 BUG_ON((s64)stats->cumulative_runnable_avg < 0);
1293
1294 stats->pred_demands_sum += pred_demand_delta;
1295 BUG_ON((s64)stats->pred_demands_sum < 0);
1296}
1297
1298#define pct_to_real(tunable) \
1299 (div64_u64((u64)tunable * (u64)max_task_load(), 100))
1300
1301#define real_to_pct(tunable) \
1302 (div64_u64((u64)tunable * (u64)100, (u64)max_task_load()))
1303
1304#define SCHED_HIGH_IRQ_TIMEOUT 3
1305static inline u64 sched_irqload(int cpu)
1306{
1307 struct rq *rq = cpu_rq(cpu);
1308 s64 delta;
1309
1310 delta = get_jiffies_64() - rq->irqload_ts;
1311 /*
1312 * Current context can be preempted by irq and rq->irqload_ts can be
1313 * updated by irq context so that delta can be negative.
1314 * But this is okay and we can safely return as this means there
1315 * was recent irq occurrence.
1316 */
1317
1318 if (delta < SCHED_HIGH_IRQ_TIMEOUT)
1319 return rq->avg_irqload;
1320 else
1321 return 0;
1322}
1323
1324static inline int sched_cpu_high_irqload(int cpu)
1325{
1326 return sched_irqload(cpu) >= sysctl_sched_cpu_high_irqload;
1327}
1328
1329static inline bool task_in_related_thread_group(struct task_struct *p)
1330{
1331 return !!(rcu_access_pointer(p->grp) != NULL);
1332}
1333
1334static inline
1335struct related_thread_group *task_related_thread_group(struct task_struct *p)
1336{
1337 return rcu_dereference(p->grp);
1338}
1339
1340#define PRED_DEMAND_DELTA ((s64)new_pred_demand - p->ravg.pred_demand)
1341
1342extern void
1343check_for_freq_change(struct rq *rq, bool check_pred, bool check_groups);
1344
1345extern void notify_migration(int src_cpu, int dest_cpu,
1346 bool src_cpu_dead, struct task_struct *p);
1347
1348struct group_cpu_time {
1349 u64 curr_runnable_sum;
1350 u64 prev_runnable_sum;
1351 u64 nt_curr_runnable_sum;
1352 u64 nt_prev_runnable_sum;
1353 u64 window_start;
1354};
1355
1356/* Is frequency of two cpus synchronized with each other? */
1357static inline int same_freq_domain(int src_cpu, int dst_cpu)
1358{
1359 struct rq *rq = cpu_rq(src_cpu);
1360
1361 if (src_cpu == dst_cpu)
1362 return 1;
1363
1364 return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
1365}
1366
1367#define BOOST_KICK 0
1368#define CPU_RESERVED 1
1369
1370static inline int is_reserved(int cpu)
1371{
1372 struct rq *rq = cpu_rq(cpu);
1373
1374 return test_bit(CPU_RESERVED, &rq->hmp_flags);
1375}
1376
1377static inline int mark_reserved(int cpu)
1378{
1379 struct rq *rq = cpu_rq(cpu);
1380
1381 /* Name boost_flags as hmp_flags? */
1382 return test_and_set_bit(CPU_RESERVED, &rq->hmp_flags);
1383}
1384
1385static inline void clear_reserved(int cpu)
1386{
1387 struct rq *rq = cpu_rq(cpu);
1388
1389 clear_bit(CPU_RESERVED, &rq->hmp_flags);
1390}
1391
1392static inline u64 cpu_cravg_sync(int cpu, int sync)
1393{
1394 struct rq *rq = cpu_rq(cpu);
1395 u64 load;
1396
1397 load = rq->hmp_stats.cumulative_runnable_avg;
1398
1399 /*
1400 * If load is being checked in a sync wakeup environment,
1401 * we may want to discount the load of the currently running
1402 * task.
1403 */
1404 if (sync && cpu == smp_processor_id()) {
1405 if (load > rq->curr->ravg.demand)
1406 load -= rq->curr->ravg.demand;
1407 else
1408 load = 0;
1409 }
1410
1411 return load;
1412}
1413
1414extern void check_for_migration(struct rq *rq, struct task_struct *p);
1415extern void pre_big_task_count_change(const struct cpumask *cpus);
1416extern void post_big_task_count_change(const struct cpumask *cpus);
1417extern void set_hmp_defaults(void);
1418extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
1419extern unsigned int power_cost(int cpu, u64 demand);
1420extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
1421extern void boost_kick(int cpu);
1422extern int sched_boost(void);
1423extern int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
1424 enum sched_boost_type boost_type);
1425extern enum sched_boost_type sched_boost_type(void);
1426extern int task_will_fit(struct task_struct *p, int cpu);
1427extern int group_will_fit(struct sched_cluster *cluster,
1428 struct related_thread_group *grp, u64 demand);
1429extern u64 cpu_load(int cpu);
1430extern u64 cpu_load_sync(int cpu, int sync);
1431extern int preferred_cluster(struct sched_cluster *cluster,
1432 struct task_struct *p);
1433extern void inc_nr_big_task(struct hmp_sched_stats *stats,
1434 struct task_struct *p);
1435extern void dec_nr_big_task(struct hmp_sched_stats *stats,
1436 struct task_struct *p);
1437extern void inc_rq_hmp_stats(struct rq *rq,
1438 struct task_struct *p, int change_cra);
1439extern void dec_rq_hmp_stats(struct rq *rq,
1440 struct task_struct *p, int change_cra);
1441extern int is_big_task(struct task_struct *p);
1442extern int upmigrate_discouraged(struct task_struct *p);
1443extern struct sched_cluster *rq_cluster(struct rq *rq);
1444extern int nr_big_tasks(struct rq *rq);
1445extern void fixup_nr_big_tasks(struct hmp_sched_stats *stats,
1446 struct task_struct *p, s64 delta);
1447extern void reset_task_stats(struct task_struct *p);
1448extern void reset_cfs_rq_hmp_stats(int cpu, int reset_cra);
1449extern void inc_hmp_sched_stats_fair(struct rq *rq,
1450 struct task_struct *p, int change_cra);
1451extern u64 cpu_upmigrate_discourage_read_u64(struct cgroup_subsys_state *css,
1452 struct cftype *cft);
1453extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,
1454 struct cftype *cft, u64 upmigrate_discourage);
1455
1456#else /* CONFIG_SCHED_HMP */
1457
1458struct hmp_sched_stats;
1459struct related_thread_group;
1460struct sched_cluster;
1461
1462static inline int got_boost_kick(void)
1463{
1464 return 0;
1465}
1466
1467static inline void update_task_ravg(struct task_struct *p, struct rq *rq,
1468 int event, u64 wallclock, u64 irqtime) { }
1469
1470static inline bool early_detection_notify(struct rq *rq, u64 wallclock)
1471{
1472 return 0;
1473}
1474
1475static inline void clear_ed_task(struct task_struct *p, struct rq *rq) { }
1476static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
1477static inline void clear_boost_kick(int cpu) { }
1478static inline void clear_hmp_request(int cpu) { }
1479static inline void mark_task_starting(struct task_struct *p) { }
1480static inline void set_window_start(struct rq *rq) { }
1481static inline void migrate_sync_cpu(int cpu) { }
1482static inline void update_cluster_topology(void) { }
1483static inline void set_task_last_wake(struct task_struct *p, u64 wallclock) { }
1484static inline void set_task_last_switch_out(struct task_struct *p,
1485 u64 wallclock) { }
1486
1487static inline int task_will_fit(struct task_struct *p, int cpu)
1488{
1489 return 1;
1490}
1491
1492static inline unsigned int power_cost(int cpu, u64 demand)
1493{
1494 return SCHED_CAPACITY_SCALE;
1495}
1496
1497static inline int sched_boost(void)
1498{
1499 return 0;
1500}
1501
1502static inline int is_big_task(struct task_struct *p)
1503{
1504 return 0;
1505}
1506
1507static inline int nr_big_tasks(struct rq *rq)
1508{
1509 return 0;
1510}
1511
1512static inline int is_cpu_throttling_imminent(int cpu)
1513{
1514 return 0;
1515}
1516
1517static inline int is_task_migration_throttled(struct task_struct *p)
1518{
1519 return 0;
1520}
1521
1522static inline unsigned int cpu_temp(int cpu)
1523{
1524 return 0;
1525}
1526
1527static inline void
1528inc_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { }
1529
1530static inline void
1531dec_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { }
1532
1533static inline int
1534preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
1535{
1536 return 1;
1537}
1538
1539static inline struct sched_cluster *rq_cluster(struct rq *rq)
1540{
1541 return NULL;
1542}
1543
1544static inline void init_new_task_load(struct task_struct *p) { }
1545
1546static inline u64 scale_load_to_cpu(u64 load, int cpu)
1547{
1548 return load;
1549}
1550
1551static inline unsigned int nr_eligible_big_tasks(int cpu)
1552{
1553 return 0;
1554}
1555
1556static inline int pct_task_load(struct task_struct *p) { return 0; }
1557
1558static inline int cpu_capacity(int cpu)
1559{
1560 return SCHED_CAPACITY_SCALE;
1561}
1562
1563static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; }
1564
1565static inline void inc_cumulative_runnable_avg(struct hmp_sched_stats *stats,
1566 struct task_struct *p)
1567{
1568}
1569
1570static inline void dec_cumulative_runnable_avg(struct hmp_sched_stats *stats,
1571 struct task_struct *p)
1572{
1573}
1574
1575static inline void sched_account_irqtime(int cpu, struct task_struct *curr,
1576 u64 delta, u64 wallclock)
1577{
1578}
1579
1580static inline void sched_account_irqstart(int cpu, struct task_struct *curr,
1581 u64 wallclock)
1582{
1583}
1584
1585static inline int sched_cpu_high_irqload(int cpu) { return 0; }
1586
1587static inline void set_preferred_cluster(struct related_thread_group *grp) { }
1588
1589static inline bool task_in_related_thread_group(struct task_struct *p)
1590{
1591 return false;
1592}
1593
1594static inline
1595struct related_thread_group *task_related_thread_group(struct task_struct *p)
1596{
1597 return NULL;
1598}
1599
1600static inline u32 task_load(struct task_struct *p) { return 0; }
1601
1602static inline int update_preferred_cluster(struct related_thread_group *grp,
1603 struct task_struct *p, u32 old_load)
1604{
1605 return 0;
1606}
1607
1608static inline void add_new_task_to_grp(struct task_struct *new) {}
1609
1610#define sched_freq_legacy_mode 1
1611#define sched_migration_fixup 0
1612#define PRED_DEMAND_DELTA (0)
1613
1614static inline void
1615check_for_freq_change(struct rq *rq, bool check_pred, bool check_groups) { }
1616
1617static inline void notify_migration(int src_cpu, int dest_cpu,
1618 bool src_cpu_dead, struct task_struct *p) { }
1619
1620static inline int same_freq_domain(int src_cpu, int dst_cpu)
1621{
1622 return 1;
1623}
1624
1625static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
1626static inline void pre_big_task_count_change(void) { }
1627static inline void post_big_task_count_change(void) { }
1628static inline void set_hmp_defaults(void) { }
1629
1630static inline void clear_reserved(int cpu) { }
1631
1632#define trace_sched_cpu_load(...)
1633#define trace_sched_cpu_load_lb(...)
1634#define trace_sched_cpu_load_cgroup(...)
1635#define trace_sched_cpu_load_wakeup(...)
1636
1637#endif /* CONFIG_SCHED_HMP */
1638
1639/*
1640 * Returns the rq capacity of any rq in a group. This does not play
1641 * well with groups where rq capacity can change independently.
1642 */
1643#define group_rq_capacity(group) cpu_capacity(group_first_cpu(group))
1644
Peter Zijlstra029632f2011-10-25 10:00:11 +02001645#ifdef CONFIG_CGROUP_SCHED
1646
1647/*
1648 * Return the group to which this tasks belongs.
1649 *
Tejun Heo8af01f52013-08-08 20:11:22 -04001650 * We cannot use task_css() and friends because the cgroup subsystem
1651 * changes that value before the cgroup_subsys::attach() method is called,
1652 * therefore we cannot pin it and might observe the wrong value.
Peter Zijlstra8323f262012-06-22 13:36:05 +02001653 *
1654 * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
1655 * core changes this before calling sched_move_task().
1656 *
1657 * Instead we use a 'copy' which is updated from sched_move_task() while
1658 * holding both task_struct::pi_lock and rq::lock.
Peter Zijlstra029632f2011-10-25 10:00:11 +02001659 */
1660static inline struct task_group *task_group(struct task_struct *p)
1661{
Peter Zijlstra8323f262012-06-22 13:36:05 +02001662 return p->sched_task_group;
Peter Zijlstra029632f2011-10-25 10:00:11 +02001663}
1664
1665/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
1666static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
1667{
1668#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
1669 struct task_group *tg = task_group(p);
1670#endif
1671
1672#ifdef CONFIG_FAIR_GROUP_SCHED
Byungchul Parkad936d82015-10-24 01:16:19 +09001673 set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
Peter Zijlstra029632f2011-10-25 10:00:11 +02001674 p->se.cfs_rq = tg->cfs_rq[cpu];
1675 p->se.parent = tg->se[cpu];
1676#endif
1677
1678#ifdef CONFIG_RT_GROUP_SCHED
1679 p->rt.rt_rq = tg->rt_rq[cpu];
1680 p->rt.parent = tg->rt_se[cpu];
1681#endif
1682}
1683
1684#else /* CONFIG_CGROUP_SCHED */
1685
1686static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
1687static inline struct task_group *task_group(struct task_struct *p)
1688{
1689 return NULL;
1690}
1691
1692#endif /* CONFIG_CGROUP_SCHED */
1693
1694static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1695{
1696 set_task_rq(p, cpu);
1697#ifdef CONFIG_SMP
1698 /*
1699 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1700 * successfuly executed on another CPU. We must ensure that updates of
1701 * per-task data have been completed by this moment.
1702 */
1703 smp_wmb();
Andy Lutomirskic65eacb2016-09-13 14:29:24 -07001704#ifdef CONFIG_THREAD_INFO_IN_TASK
1705 p->cpu = cpu;
1706#else
Peter Zijlstra029632f2011-10-25 10:00:11 +02001707 task_thread_info(p)->cpu = cpu;
Andy Lutomirskic65eacb2016-09-13 14:29:24 -07001708#endif
Peter Zijlstraac66f542013-10-07 11:29:16 +01001709 p->wake_cpu = cpu;
Peter Zijlstra029632f2011-10-25 10:00:11 +02001710#endif
1711}
1712
1713/*
1714 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
1715 */
1716#ifdef CONFIG_SCHED_DEBUG
Ingo Molnarc5905af2012-02-24 08:31:31 +01001717# include <linux/static_key.h>
Peter Zijlstra029632f2011-10-25 10:00:11 +02001718# define const_debug __read_mostly
1719#else
1720# define const_debug const
1721#endif
1722
1723extern const_debug unsigned int sysctl_sched_features;
1724
1725#define SCHED_FEAT(name, enabled) \
1726 __SCHED_FEAT_##name ,
1727
1728enum {
Peter Zijlstra391e43d2011-11-15 17:14:39 +01001729#include "features.h"
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001730 __SCHED_FEAT_NR,
Peter Zijlstra029632f2011-10-25 10:00:11 +02001731};
1732
1733#undef SCHED_FEAT
1734
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001735#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001736#define SCHED_FEAT(name, enabled) \
Ingo Molnarc5905af2012-02-24 08:31:31 +01001737static __always_inline bool static_branch_##name(struct static_key *key) \
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001738{ \
Jason Baron6e76ea82014-07-02 15:52:41 +00001739 return static_key_##enabled(key); \
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001740}
1741
1742#include "features.h"
1743
1744#undef SCHED_FEAT
1745
Ingo Molnarc5905af2012-02-24 08:31:31 +01001746extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001747#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
1748#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
Peter Zijlstra029632f2011-10-25 10:00:11 +02001749#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
Peter Zijlstraf8b6d1c2011-07-06 14:20:14 +02001750#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
Peter Zijlstra029632f2011-10-25 10:00:11 +02001751
Srikar Dronamraju2a595722015-08-11 21:54:21 +05301752extern struct static_key_false sched_numa_balancing;
Mel Gormancb251762016-02-05 09:08:36 +00001753extern struct static_key_false sched_schedstats;
Peter Zijlstracbee9f82012-10-25 14:16:43 +02001754
Peter Zijlstra029632f2011-10-25 10:00:11 +02001755static inline u64 global_rt_period(void)
1756{
1757 return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
1758}
1759
1760static inline u64 global_rt_runtime(void)
1761{
1762 if (sysctl_sched_rt_runtime < 0)
1763 return RUNTIME_INF;
1764
1765 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
1766}
1767
Peter Zijlstra029632f2011-10-25 10:00:11 +02001768static inline int task_current(struct rq *rq, struct task_struct *p)
1769{
1770 return rq->curr == p;
1771}
1772
1773static inline int task_running(struct rq *rq, struct task_struct *p)
1774{
1775#ifdef CONFIG_SMP
1776 return p->on_cpu;
1777#else
1778 return task_current(rq, p);
1779#endif
1780}
1781
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04001782static inline int task_on_rq_queued(struct task_struct *p)
1783{
1784 return p->on_rq == TASK_ON_RQ_QUEUED;
1785}
Peter Zijlstra029632f2011-10-25 10:00:11 +02001786
Kirill Tkhaicca26e82014-08-20 13:47:42 +04001787static inline int task_on_rq_migrating(struct task_struct *p)
1788{
1789 return p->on_rq == TASK_ON_RQ_MIGRATING;
1790}
1791
Peter Zijlstra029632f2011-10-25 10:00:11 +02001792#ifndef prepare_arch_switch
1793# define prepare_arch_switch(next) do { } while (0)
1794#endif
Catalin Marinas01f23e12011-11-27 21:43:10 +00001795#ifndef finish_arch_post_lock_switch
1796# define finish_arch_post_lock_switch() do { } while (0)
1797#endif
Peter Zijlstra029632f2011-10-25 10:00:11 +02001798
Peter Zijlstra029632f2011-10-25 10:00:11 +02001799static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
1800{
1801#ifdef CONFIG_SMP
1802 /*
1803 * We can optimise this out completely for !SMP, because the
1804 * SMP rebalancing from interrupt is the only thing that cares
1805 * here.
1806 */
1807 next->on_cpu = 1;
1808#endif
1809}
1810
1811static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
1812{
1813#ifdef CONFIG_SMP
1814 /*
1815 * After ->on_cpu is cleared, the task can be moved to a different CPU.
1816 * We must ensure this doesn't happen until the switch is completely
1817 * finished.
Peter Zijlstra95913d92015-09-29 14:45:09 +02001818 *
Peter Zijlstrab75a2252015-10-06 14:36:17 +02001819 * In particular, the load of prev->state in finish_task_switch() must
1820 * happen before this.
1821 *
Peter Zijlstra1f03e8d2016-04-04 10:57:12 +02001822 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
Peter Zijlstra029632f2011-10-25 10:00:11 +02001823 */
Peter Zijlstra95913d92015-09-29 14:45:09 +02001824 smp_store_release(&prev->on_cpu, 0);
Peter Zijlstra029632f2011-10-25 10:00:11 +02001825#endif
1826#ifdef CONFIG_DEBUG_SPINLOCK
1827 /* this is a valid case when another task releases the spinlock */
1828 rq->lock.owner = current;
1829#endif
1830 /*
1831 * If we are tracking spinlock dependencies then we have to
1832 * fix up the runqueue lock - which gets 'carried over' from
1833 * prev into current:
1834 */
1835 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
1836
1837 raw_spin_unlock_irq(&rq->lock);
1838}
1839
Li Zefanb13095f2013-03-05 16:06:38 +08001840/*
1841 * wake flags
1842 */
1843#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
1844#define WF_FORK 0x02 /* child wakeup after fork */
1845#define WF_MIGRATED 0x4 /* internal use, task got migrated */
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -07001846#define WF_NO_NOTIFIER 0x08 /* do not notify governor */
Li Zefanb13095f2013-03-05 16:06:38 +08001847
Peter Zijlstra029632f2011-10-25 10:00:11 +02001848/*
1849 * To aid in avoiding the subversion of "niceness" due to uneven distribution
1850 * of tasks with abnormal "nice" values across CPUs the contribution that
1851 * each task makes to its run queue's load is weighted according to its
1852 * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
1853 * scaled version of the new time slice allocation that they receive on time
1854 * slice expiry etc.
1855 */
1856
1857#define WEIGHT_IDLEPRIO 3
1858#define WMULT_IDLEPRIO 1431655765
1859
Andi Kleened82b8a2015-11-29 20:59:43 -08001860extern const int sched_prio_to_weight[40];
1861extern const u32 sched_prio_to_wmult[40];
Peter Zijlstra029632f2011-10-25 10:00:11 +02001862
Peter Zijlstraff77e462016-01-18 15:27:07 +01001863/*
1864 * {de,en}queue flags:
1865 *
1866 * DEQUEUE_SLEEP - task is no longer runnable
1867 * ENQUEUE_WAKEUP - task just became runnable
1868 *
1869 * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
1870 * are in a known state which allows modification. Such pairs
1871 * should preserve as much state as possible.
1872 *
1873 * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
1874 * in the runqueue.
1875 *
1876 * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
1877 * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
Peter Zijlstra59efa0b2016-05-10 18:24:37 +02001878 * ENQUEUE_MIGRATED - the task was migrated during wakeup
Peter Zijlstraff77e462016-01-18 15:27:07 +01001879 *
1880 */
1881
1882#define DEQUEUE_SLEEP 0x01
1883#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
1884#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
1885
Peter Zijlstra1de64442015-09-30 17:44:13 +02001886#define ENQUEUE_WAKEUP 0x01
Peter Zijlstraff77e462016-01-18 15:27:07 +01001887#define ENQUEUE_RESTORE 0x02
1888#define ENQUEUE_MOVE 0x04
1889
1890#define ENQUEUE_HEAD 0x08
1891#define ENQUEUE_REPLENISH 0x10
Li Zefanc82ba9f2013-03-05 16:06:55 +08001892#ifdef CONFIG_SMP
Peter Zijlstra59efa0b2016-05-10 18:24:37 +02001893#define ENQUEUE_MIGRATED 0x20
Li Zefanc82ba9f2013-03-05 16:06:55 +08001894#else
Peter Zijlstra59efa0b2016-05-10 18:24:37 +02001895#define ENQUEUE_MIGRATED 0x00
Li Zefanc82ba9f2013-03-05 16:06:55 +08001896#endif
Li Zefanc82ba9f2013-03-05 16:06:55 +08001897
Peter Zijlstra37e117c2014-02-14 12:25:08 +01001898#define RETRY_TASK ((void *)-1UL)
1899
Li Zefanc82ba9f2013-03-05 16:06:55 +08001900struct sched_class {
1901 const struct sched_class *next;
1902
1903 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
1904 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
1905 void (*yield_task) (struct rq *rq);
1906 bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
1907
1908 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
1909
Peter Zijlstra606dba22012-02-11 06:05:00 +01001910 /*
1911 * It is the responsibility of the pick_next_task() method that will
1912 * return the next task to call put_prev_task() on the @prev task or
1913 * something equivalent.
Peter Zijlstra37e117c2014-02-14 12:25:08 +01001914 *
1915 * May return RETRY_TASK when it finds a higher prio class has runnable
1916 * tasks.
Peter Zijlstra606dba22012-02-11 06:05:00 +01001917 */
1918 struct task_struct * (*pick_next_task) (struct rq *rq,
Peter Zijlstrae7904a22015-08-01 19:25:08 +02001919 struct task_struct *prev,
1920 struct pin_cookie cookie);
Li Zefanc82ba9f2013-03-05 16:06:55 +08001921 void (*put_prev_task) (struct rq *rq, struct task_struct *p);
1922
1923#ifdef CONFIG_SMP
Peter Zijlstraac66f542013-10-07 11:29:16 +01001924 int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
xiaofeng.yan5a4fd032015-09-23 14:55:59 +08001925 void (*migrate_task_rq)(struct task_struct *p);
Li Zefanc82ba9f2013-03-05 16:06:55 +08001926
Li Zefanc82ba9f2013-03-05 16:06:55 +08001927 void (*task_woken) (struct rq *this_rq, struct task_struct *task);
1928
1929 void (*set_cpus_allowed)(struct task_struct *p,
1930 const struct cpumask *newmask);
1931
1932 void (*rq_online)(struct rq *rq);
1933 void (*rq_offline)(struct rq *rq);
1934#endif
1935
1936 void (*set_curr_task) (struct rq *rq);
1937 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1938 void (*task_fork) (struct task_struct *p);
Dario Faggiolie6c390f2013-11-07 14:43:35 +01001939 void (*task_dead) (struct task_struct *p);
Li Zefanc82ba9f2013-03-05 16:06:55 +08001940
Kirill Tkhai67dfa1b2014-10-27 17:40:52 +03001941 /*
1942 * The switched_from() call is allowed to drop rq->lock, therefore we
1943 * cannot assume the switched_from/switched_to pair is serliazed by
1944 * rq->lock. They are however serialized by p->pi_lock.
1945 */
Li Zefanc82ba9f2013-03-05 16:06:55 +08001946 void (*switched_from) (struct rq *this_rq, struct task_struct *task);
1947 void (*switched_to) (struct rq *this_rq, struct task_struct *task);
1948 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1949 int oldprio);
1950
1951 unsigned int (*get_rr_interval) (struct rq *rq,
1952 struct task_struct *task);
1953
Stanislaw Gruszka6e998912014-11-12 16:58:44 +01001954 void (*update_curr) (struct rq *rq);
1955
Vincent Guittotea86cb42016-06-17 13:38:55 +02001956#define TASK_SET_GROUP 0
1957#define TASK_MOVE_GROUP 1
1958
Li Zefanc82ba9f2013-03-05 16:06:55 +08001959#ifdef CONFIG_FAIR_GROUP_SCHED
Vincent Guittotea86cb42016-06-17 13:38:55 +02001960 void (*task_change_group) (struct task_struct *p, int type);
Li Zefanc82ba9f2013-03-05 16:06:55 +08001961#endif
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -07001962#ifdef CONFIG_SCHED_HMP
1963 void (*fixup_hmp_sched_stats)(struct rq *rq, struct task_struct *p,
1964 u32 new_task_load, u32 new_pred_demand);
1965#endif
Li Zefanc82ba9f2013-03-05 16:06:55 +08001966};
Peter Zijlstra029632f2011-10-25 10:00:11 +02001967
Peter Zijlstra3f1d2a32014-02-12 10:49:30 +01001968static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
1969{
1970 prev->sched_class->put_prev_task(rq, prev);
1971}
1972
Peter Zijlstrab2bf6c32016-09-20 22:00:38 +02001973static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
1974{
1975 curr->sched_class->set_curr_task(rq);
1976}
1977
Peter Zijlstra029632f2011-10-25 10:00:11 +02001978#define sched_class_highest (&stop_sched_class)
1979#define for_each_class(class) \
1980 for (class = sched_class_highest; class; class = class->next)
1981
1982extern const struct sched_class stop_sched_class;
Dario Faggioliaab03e02013-11-28 11:14:43 +01001983extern const struct sched_class dl_sched_class;
Peter Zijlstra029632f2011-10-25 10:00:11 +02001984extern const struct sched_class rt_sched_class;
1985extern const struct sched_class fair_sched_class;
1986extern const struct sched_class idle_sched_class;
1987
1988
1989#ifdef CONFIG_SMP
1990
Nicolas Pitre63b2ca32014-05-26 18:19:37 -04001991extern void update_group_capacity(struct sched_domain *sd, int cpu);
Li Zefanb7192032013-03-07 10:00:26 +08001992
Daniel Lezcano7caff662014-01-06 12:34:38 +01001993extern void trigger_load_balance(struct rq *rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +02001994
Peter Zijlstrac5b28032015-05-15 17:43:35 +02001995extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
1996
Peter Zijlstra029632f2011-10-25 10:00:11 +02001997#endif
1998
Daniel Lezcano442bf3a2014-09-04 11:32:09 -04001999#ifdef CONFIG_CPU_IDLE
2000static inline void idle_set_state(struct rq *rq,
2001 struct cpuidle_state *idle_state)
2002{
2003 rq->idle_state = idle_state;
2004}
2005
2006static inline struct cpuidle_state *idle_get_state(struct rq *rq)
2007{
Peter Zijlstra9148a3a2016-09-20 22:34:51 +02002008 SCHED_WARN_ON(!rcu_read_lock_held());
Daniel Lezcano442bf3a2014-09-04 11:32:09 -04002009 return rq->idle_state;
2010}
2011#else
2012static inline void idle_set_state(struct rq *rq,
2013 struct cpuidle_state *idle_state)
2014{
2015}
2016
2017static inline struct cpuidle_state *idle_get_state(struct rq *rq)
2018{
2019 return NULL;
2020}
2021#endif
2022
Peter Zijlstra029632f2011-10-25 10:00:11 +02002023extern void sysrq_sched_debug_show(void);
2024extern void sched_init_granularity(void);
2025extern void update_max_interval(void);
Juri Lelli1baca4c2013-11-07 14:43:38 +01002026
2027extern void init_sched_dl_class(void);
Peter Zijlstra029632f2011-10-25 10:00:11 +02002028extern void init_sched_rt_class(void);
2029extern void init_sched_fair_class(void);
2030
Kirill Tkhai88751252014-06-29 00:03:57 +04002031extern void resched_curr(struct rq *rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +02002032extern void resched_cpu(int cpu);
2033
2034extern struct rt_bandwidth def_rt_bandwidth;
2035extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
2036
Dario Faggioli332ac172013-11-07 14:43:45 +01002037extern struct dl_bandwidth def_dl_bandwidth;
2038extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
Dario Faggioliaab03e02013-11-28 11:14:43 +01002039extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
2040
Dario Faggioli332ac172013-11-07 14:43:45 +01002041unsigned long to_ratio(u64 period, u64 runtime);
2042
Yuyang Du540247f2015-07-15 08:04:39 +08002043extern void init_entity_runnable_average(struct sched_entity *se);
Yuyang Du2b8c41d2016-03-30 04:30:56 +08002044extern void post_init_entity_util_avg(struct sched_entity *se);
Alex Shia75cdaa2013-06-20 10:18:47 +08002045
Frederic Weisbecker76d92ac2015-07-17 22:25:49 +02002046#ifdef CONFIG_NO_HZ_FULL
2047extern bool sched_can_stop_tick(struct rq *rq);
2048
2049/*
2050 * Tick may be needed by tasks in the runqueue depending on their policy and
2051 * requirements. If tick is needed, lets send the target an IPI to kick it out of
2052 * nohz mode if necessary.
2053 */
2054static inline void sched_update_tick_dependency(struct rq *rq)
2055{
2056 int cpu;
2057
2058 if (!tick_nohz_full_enabled())
2059 return;
2060
2061 cpu = cpu_of(rq);
2062
2063 if (!tick_nohz_full_cpu(cpu))
2064 return;
2065
2066 if (sched_can_stop_tick(rq))
2067 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
2068 else
2069 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
2070}
2071#else
2072static inline void sched_update_tick_dependency(struct rq *rq) { }
2073#endif
2074
Kirill Tkhai72465442014-05-09 03:00:14 +04002075static inline void add_nr_running(struct rq *rq, unsigned count)
Peter Zijlstra029632f2011-10-25 10:00:11 +02002076{
Kirill Tkhai72465442014-05-09 03:00:14 +04002077 unsigned prev_nr = rq->nr_running;
2078
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -07002079 sched_update_nr_prod(cpu_of(rq), count, true);
Kirill Tkhai72465442014-05-09 03:00:14 +04002080 rq->nr_running = prev_nr + count;
Frederic Weisbecker9f3660c2013-04-20 14:35:09 +02002081
Kirill Tkhai72465442014-05-09 03:00:14 +04002082 if (prev_nr < 2 && rq->nr_running >= 2) {
Tim Chen4486edd2014-06-23 12:16:49 -07002083#ifdef CONFIG_SMP
2084 if (!rq->rd->overload)
2085 rq->rd->overload = true;
2086#endif
Tim Chen4486edd2014-06-23 12:16:49 -07002087 }
Frederic Weisbecker76d92ac2015-07-17 22:25:49 +02002088
2089 sched_update_tick_dependency(rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +02002090}
2091
Kirill Tkhai72465442014-05-09 03:00:14 +04002092static inline void sub_nr_running(struct rq *rq, unsigned count)
Peter Zijlstra029632f2011-10-25 10:00:11 +02002093{
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -07002094 sched_update_nr_prod(cpu_of(rq), count, false);
Kirill Tkhai72465442014-05-09 03:00:14 +04002095 rq->nr_running -= count;
Frederic Weisbecker76d92ac2015-07-17 22:25:49 +02002096 /* Check if we still need preemption */
2097 sched_update_tick_dependency(rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +02002098}
2099
Frederic Weisbecker265f22a2013-05-03 03:39:05 +02002100static inline void rq_last_tick_reset(struct rq *rq)
2101{
2102#ifdef CONFIG_NO_HZ_FULL
2103 rq->last_sched_tick = jiffies;
2104#endif
2105}
2106
Peter Zijlstra029632f2011-10-25 10:00:11 +02002107extern void update_rq_clock(struct rq *rq);
2108
2109extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
2110extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
2111
2112extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
2113
2114extern const_debug unsigned int sysctl_sched_time_avg;
2115extern const_debug unsigned int sysctl_sched_nr_migrate;
2116extern const_debug unsigned int sysctl_sched_migration_cost;
2117
2118static inline u64 sched_avg_period(void)
2119{
2120 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
2121}
2122
Peter Zijlstra029632f2011-10-25 10:00:11 +02002123#ifdef CONFIG_SCHED_HRTICK
2124
2125/*
2126 * Use hrtick when:
2127 * - enabled by features
2128 * - hrtimer is actually high res
2129 */
2130static inline int hrtick_enabled(struct rq *rq)
2131{
2132 if (!sched_feat(HRTICK))
2133 return 0;
2134 if (!cpu_active(cpu_of(rq)))
2135 return 0;
2136 return hrtimer_is_hres_active(&rq->hrtick_timer);
2137}
2138
2139void hrtick_start(struct rq *rq, u64 delay);
2140
Mike Galbraithb39e66e2011-11-22 15:20:07 +01002141#else
2142
2143static inline int hrtick_enabled(struct rq *rq)
2144{
2145 return 0;
2146}
2147
Peter Zijlstra029632f2011-10-25 10:00:11 +02002148#endif /* CONFIG_SCHED_HRTICK */
2149
2150#ifdef CONFIG_SMP
2151extern void sched_avg_update(struct rq *rq);
Peter Zijlstradfbca412015-03-23 14:19:05 +01002152
2153#ifndef arch_scale_freq_capacity
2154static __always_inline
2155unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
2156{
2157 return SCHED_CAPACITY_SCALE;
2158}
2159#endif
Vincent Guittotb5b48602015-02-27 16:54:08 +01002160
Morten Rasmussen8cd56012015-08-14 17:23:10 +01002161#ifndef arch_scale_cpu_capacity
2162static __always_inline
2163unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
2164{
Dietmar Eggemanne3279a22015-08-15 00:04:41 +01002165 if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
Morten Rasmussen8cd56012015-08-14 17:23:10 +01002166 return sd->smt_gain / sd->span_weight;
2167
2168 return SCHED_CAPACITY_SCALE;
2169}
2170#endif
2171
Peter Zijlstra029632f2011-10-25 10:00:11 +02002172static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
2173{
Vincent Guittotb5b48602015-02-27 16:54:08 +01002174 rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
Peter Zijlstra029632f2011-10-25 10:00:11 +02002175 sched_avg_update(rq);
2176}
2177#else
2178static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
2179static inline void sched_avg_update(struct rq *rq) { }
2180#endif
2181
Peter Zijlstraeb580752015-07-31 21:28:18 +02002182struct rq_flags {
2183 unsigned long flags;
Peter Zijlstrae7904a22015-08-01 19:25:08 +02002184 struct pin_cookie cookie;
Peter Zijlstraeb580752015-07-31 21:28:18 +02002185};
2186
2187struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
Peter Zijlstra3e71a462016-04-28 16:16:33 +02002188 __acquires(rq->lock);
Peter Zijlstraeb580752015-07-31 21:28:18 +02002189struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002190 __acquires(p->pi_lock)
Peter Zijlstra3e71a462016-04-28 16:16:33 +02002191 __acquires(rq->lock);
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002192
Peter Zijlstraeb580752015-07-31 21:28:18 +02002193static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002194 __releases(rq->lock)
2195{
Peter Zijlstrae7904a22015-08-01 19:25:08 +02002196 lockdep_unpin_lock(&rq->lock, rf->cookie);
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002197 raw_spin_unlock(&rq->lock);
2198}
2199
2200static inline void
Peter Zijlstraeb580752015-07-31 21:28:18 +02002201task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002202 __releases(rq->lock)
2203 __releases(p->pi_lock)
2204{
Peter Zijlstrae7904a22015-08-01 19:25:08 +02002205 lockdep_unpin_lock(&rq->lock, rf->cookie);
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002206 raw_spin_unlock(&rq->lock);
Peter Zijlstraeb580752015-07-31 21:28:18 +02002207 raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
Peter Zijlstra3960c8c2015-02-17 13:22:25 +01002208}
2209
Peter Zijlstra029632f2011-10-25 10:00:11 +02002210#ifdef CONFIG_SMP
2211#ifdef CONFIG_PREEMPT
2212
2213static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
2214
2215/*
2216 * fair double_lock_balance: Safely acquires both rq->locks in a fair
2217 * way at the expense of forcing extra atomic operations in all
2218 * invocations. This assures that the double_lock is acquired using the
2219 * same underlying policy as the spinlock_t on this architecture, which
2220 * reduces latency compared to the unfair variant below. However, it
2221 * also adds more overhead and therefore may reduce throughput.
2222 */
2223static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
2224 __releases(this_rq->lock)
2225 __acquires(busiest->lock)
2226 __acquires(this_rq->lock)
2227{
2228 raw_spin_unlock(&this_rq->lock);
2229 double_rq_lock(this_rq, busiest);
2230
2231 return 1;
2232}
2233
2234#else
2235/*
2236 * Unfair double_lock_balance: Optimizes throughput at the expense of
2237 * latency by eliminating extra atomic operations when the locks are
2238 * already in proper order on entry. This favors lower cpu-ids and will
2239 * grant the double lock to lower cpus over higher ids under contention,
2240 * regardless of entry order into the function.
2241 */
2242static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
2243 __releases(this_rq->lock)
2244 __acquires(busiest->lock)
2245 __acquires(this_rq->lock)
2246{
2247 int ret = 0;
2248
2249 if (unlikely(!raw_spin_trylock(&busiest->lock))) {
2250 if (busiest < this_rq) {
2251 raw_spin_unlock(&this_rq->lock);
2252 raw_spin_lock(&busiest->lock);
2253 raw_spin_lock_nested(&this_rq->lock,
2254 SINGLE_DEPTH_NESTING);
2255 ret = 1;
2256 } else
2257 raw_spin_lock_nested(&busiest->lock,
2258 SINGLE_DEPTH_NESTING);
2259 }
2260 return ret;
2261}
2262
2263#endif /* CONFIG_PREEMPT */
2264
2265/*
2266 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
2267 */
2268static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2269{
2270 if (unlikely(!irqs_disabled())) {
2271 /* printk() doesn't work good under rq->lock */
2272 raw_spin_unlock(&this_rq->lock);
2273 BUG_ON(1);
2274 }
2275
2276 return _double_lock_balance(this_rq, busiest);
2277}
2278
2279static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2280 __releases(busiest->lock)
2281{
2282 raw_spin_unlock(&busiest->lock);
2283 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
2284}
2285
Peter Zijlstra74602312013-10-10 20:17:22 +02002286static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
2287{
2288 if (l1 > l2)
2289 swap(l1, l2);
2290
2291 spin_lock(l1);
2292 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
2293}
2294
Mike Galbraith60e69ee2014-04-07 10:55:15 +02002295static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
2296{
2297 if (l1 > l2)
2298 swap(l1, l2);
2299
2300 spin_lock_irq(l1);
2301 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
2302}
2303
Peter Zijlstra74602312013-10-10 20:17:22 +02002304static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
2305{
2306 if (l1 > l2)
2307 swap(l1, l2);
2308
2309 raw_spin_lock(l1);
2310 raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
2311}
2312
Peter Zijlstra029632f2011-10-25 10:00:11 +02002313/*
2314 * double_rq_lock - safely lock two runqueues
2315 *
2316 * Note this does not disable interrupts like task_rq_lock,
2317 * you need to do so manually before calling.
2318 */
2319static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
2320 __acquires(rq1->lock)
2321 __acquires(rq2->lock)
2322{
2323 BUG_ON(!irqs_disabled());
2324 if (rq1 == rq2) {
2325 raw_spin_lock(&rq1->lock);
2326 __acquire(rq2->lock); /* Fake it out ;) */
2327 } else {
2328 if (rq1 < rq2) {
2329 raw_spin_lock(&rq1->lock);
2330 raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
2331 } else {
2332 raw_spin_lock(&rq2->lock);
2333 raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
2334 }
2335 }
2336}
2337
2338/*
2339 * double_rq_unlock - safely unlock two runqueues
2340 *
2341 * Note this does not restore interrupts like task_rq_unlock,
2342 * you need to do so manually after calling.
2343 */
2344static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
2345 __releases(rq1->lock)
2346 __releases(rq2->lock)
2347{
2348 raw_spin_unlock(&rq1->lock);
2349 if (rq1 != rq2)
2350 raw_spin_unlock(&rq2->lock);
2351 else
2352 __release(rq2->lock);
2353}
2354
2355#else /* CONFIG_SMP */
2356
2357/*
2358 * double_rq_lock - safely lock two runqueues
2359 *
2360 * Note this does not disable interrupts like task_rq_lock,
2361 * you need to do so manually before calling.
2362 */
2363static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
2364 __acquires(rq1->lock)
2365 __acquires(rq2->lock)
2366{
2367 BUG_ON(!irqs_disabled());
2368 BUG_ON(rq1 != rq2);
2369 raw_spin_lock(&rq1->lock);
2370 __acquire(rq2->lock); /* Fake it out ;) */
2371}
2372
2373/*
2374 * double_rq_unlock - safely unlock two runqueues
2375 *
2376 * Note this does not restore interrupts like task_rq_unlock,
2377 * you need to do so manually after calling.
2378 */
2379static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
2380 __releases(rq1->lock)
2381 __releases(rq2->lock)
2382{
2383 BUG_ON(rq1 != rq2);
2384 raw_spin_unlock(&rq1->lock);
2385 __release(rq2->lock);
2386}
2387
2388#endif
2389
2390extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
2391extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
Srikar Dronamraju6b55c962015-06-25 22:51:41 +05302392
2393#ifdef CONFIG_SCHED_DEBUG
Peter Zijlstra029632f2011-10-25 10:00:11 +02002394extern void print_cfs_stats(struct seq_file *m, int cpu);
2395extern void print_rt_stats(struct seq_file *m, int cpu);
Wanpeng Liacb32132014-10-31 06:39:33 +08002396extern void print_dl_stats(struct seq_file *m, int cpu);
Srikar Dronamraju6b55c962015-06-25 22:51:41 +05302397extern void
2398print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
Srikar Dronamraju397f2372015-06-25 22:51:43 +05302399
2400#ifdef CONFIG_NUMA_BALANCING
2401extern void
2402show_numa_stats(struct task_struct *p, struct seq_file *m);
2403extern void
2404print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
2405 unsigned long tpf, unsigned long gsf, unsigned long gpf);
2406#endif /* CONFIG_NUMA_BALANCING */
2407#endif /* CONFIG_SCHED_DEBUG */
Peter Zijlstra029632f2011-10-25 10:00:11 +02002408
2409extern void init_cfs_rq(struct cfs_rq *cfs_rq);
Abel Vesa07c54f72015-03-03 13:50:27 +02002410extern void init_rt_rq(struct rt_rq *rt_rq);
2411extern void init_dl_rq(struct dl_rq *dl_rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +02002412
Ben Segall1ee14e62013-10-16 11:16:12 -07002413extern void cfs_bandwidth_usage_inc(void);
2414extern void cfs_bandwidth_usage_dec(void);
Suresh Siddha1c792db2011-12-01 17:07:32 -08002415
Frederic Weisbecker3451d022011-08-10 23:21:01 +02002416#ifdef CONFIG_NO_HZ_COMMON
Suresh Siddha1c792db2011-12-01 17:07:32 -08002417enum rq_nohz_flag_bits {
2418 NOHZ_TICK_STOPPED,
2419 NOHZ_BALANCE_KICK,
2420};
2421
Syed Rameez Mustafadddcab72016-09-07 16:18:27 -07002422#define NOHZ_KICK_ANY 0
2423#define NOHZ_KICK_RESTRICT 1
2424
Suresh Siddha1c792db2011-12-01 17:07:32 -08002425#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
Thomas Gleixner20a5c8c2016-03-10 12:54:20 +01002426
2427extern void nohz_balance_exit_idle(unsigned int cpu);
2428#else
2429static inline void nohz_balance_exit_idle(unsigned int cpu) { }
Suresh Siddha1c792db2011-12-01 17:07:32 -08002430#endif
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002431
2432#ifdef CONFIG_IRQ_TIME_ACCOUNTING
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +02002433struct irqtime {
2434 u64 hardirq_time;
2435 u64 softirq_time;
2436 u64 irq_start_time;
2437 struct u64_stats_sync sync;
2438};
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002439
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +02002440DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002441
2442static inline u64 irq_time_read(int cpu)
2443{
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +02002444 struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
2445 unsigned int seq;
2446 u64 total;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002447
2448 do {
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +02002449 seq = __u64_stats_fetch_begin(&irqtime->sync);
2450 total = irqtime->softirq_time + irqtime->hardirq_time;
2451 } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002452
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +02002453 return total;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002454}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02002455#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002456
2457#ifdef CONFIG_CPU_FREQ
2458DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
2459
2460/**
2461 * cpufreq_update_util - Take a note about CPU utilization changes.
Rafael J. Wysocki12bde332016-08-10 03:11:17 +02002462 * @rq: Runqueue to carry out the update for.
Rafael J. Wysocki58919e82016-08-16 22:14:55 +02002463 * @flags: Update reason flags.
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002464 *
Rafael J. Wysocki58919e82016-08-16 22:14:55 +02002465 * This function is called by the scheduler on the CPU whose utilization is
2466 * being updated.
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002467 *
2468 * It can only be called from RCU-sched read-side critical sections.
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002469 *
2470 * The way cpufreq is currently arranged requires it to evaluate the CPU
2471 * performance state (frequency/voltage) on a regular basis to prevent it from
2472 * being stuck in a completely inadequate performance level for too long.
2473 * That is not guaranteed to happen if the updates are only triggered from CFS,
2474 * though, because they may not be coming in if RT or deadline tasks are active
2475 * all the time (or there are RT and DL tasks only).
2476 *
2477 * As a workaround for that issue, this function is called by the RT and DL
2478 * sched classes to trigger extra cpufreq updates to prevent it from stalling,
2479 * but that really is a band-aid. Going forward it should be replaced with
2480 * solutions targeted more specifically at RT and DL tasks.
2481 */
Rafael J. Wysocki12bde332016-08-10 03:11:17 +02002482static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002483{
Rafael J. Wysocki58919e82016-08-16 22:14:55 +02002484 struct update_util_data *data;
2485
2486 data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
2487 if (data)
Rafael J. Wysocki12bde332016-08-10 03:11:17 +02002488 data->func(data, rq_clock(rq), flags);
2489}
2490
2491static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
2492{
2493 if (cpu_of(rq) == smp_processor_id())
2494 cpufreq_update_util(rq, flags);
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002495}
2496#else
Rafael J. Wysocki12bde332016-08-10 03:11:17 +02002497static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
2498static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
Rafael J. Wysockiadaf9fc2016-03-10 20:44:47 +01002499#endif /* CONFIG_CPU_FREQ */
Linus Torvaldsbe53f582016-03-24 09:42:50 -07002500
Rafael J. Wysocki9bdcb442016-04-02 01:09:12 +02002501#ifdef arch_scale_freq_capacity
2502#ifndef arch_scale_freq_invariant
2503#define arch_scale_freq_invariant() (true)
2504#endif
2505#else /* arch_scale_freq_capacity */
2506#define arch_scale_freq_invariant() (false)
2507#endif