blob: 0050ca56b2699d4b2f46e98325e41158728e9ac1 [file] [log] [blame]
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001/*
Pavankumar Kondetia3c6d222019-07-12 12:17:31 +05302 * Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07003 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 and
6 * only version 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 *
14 * Window Assisted Load Tracking (WALT) implementation credits:
15 * Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park,
16 * Pavan Kumar Kondeti, Olav Haugan
17 *
18 * 2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla
19 * and Todd Kjos
20 */
21
Pavankumar Kondetifaa04442018-06-25 16:13:39 +053022#include <linux/syscore_ops.h>
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070023#include <linux/cpufreq.h>
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080024#include <linux/list_sort.h>
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +053025#include <linux/jiffies.h>
26#include <linux/sched/core_ctl.h>
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070027#include <trace/events/sched.h>
28#include "sched.h"
29#include "walt.h"
30
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080031#include <trace/events/sched.h>
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070032
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080033const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK",
34 "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE",
35 "IRQ_UPDATE"};
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070036
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080037const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
38 "RQ_TO_RQ", "GROUP_TO_GROUP"};
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070039
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080040#define SCHED_FREQ_ACCOUNT_WAIT_TIME 0
41#define SCHED_ACCOUNT_WAIT_TIME 1
42
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -070043#define EARLY_DETECTION_DURATION 9500000
44
Pavankumar Kondetifaa04442018-06-25 16:13:39 +053045static ktime_t ktime_last;
46static bool sched_ktime_suspended;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080047static struct cpu_cycle_counter_cb cpu_cycle_counter_cb;
48static bool use_cycle_counter;
49DEFINE_MUTEX(cluster_lock);
Lingutla Chandrasekhard60cdac2018-05-25 15:22:59 +053050static atomic64_t walt_irq_work_lastq_ws;
51u64 walt_load_reported_window;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -080052
Vikram Mulukutlaaa817bc2017-06-20 12:16:58 -070053static struct irq_work walt_cpufreq_irq_work;
54static struct irq_work walt_migration_irq_work;
55
Pavankumar Kondeti1c847af2019-09-04 10:08:32 +053056void
57walt_fixup_cumulative_runnable_avg(struct rq *rq,
58 struct task_struct *p, u64 new_task_load)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070059{
Joonwoo Park8b1a1ce2017-05-26 11:19:36 -070060 s64 task_load_delta = (s64)new_task_load - task_load(p);
jianzhou4be63542019-12-02 13:57:22 +080061 struct walt_sched_stats *stats = &rq->walt_stats;
Joonwoo Park8b1a1ce2017-05-26 11:19:36 -070062
jianzhou4be63542019-12-02 13:57:22 +080063 stats->cumulative_runnable_avg += task_load_delta;
64 if ((s64)stats->cumulative_runnable_avg < 0)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070065 panic("cra less than zero: tld: %lld, task_load(p) = %u\n",
66 task_load_delta, task_load(p));
Joonwoo Park7f17fff2017-02-03 11:15:31 -080067
jianzhou4be63542019-12-02 13:57:22 +080068 walt_fixup_cum_window_demand(rq, task_load_delta);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -070069}
70
Pavankumar Kondetifaa04442018-06-25 16:13:39 +053071u64 sched_ktime_clock(void)
72{
73 if (unlikely(sched_ktime_suspended))
74 return ktime_to_ns(ktime_last);
75 return ktime_get_ns();
76}
77
78static void sched_resume(void)
79{
80 sched_ktime_suspended = false;
81}
82
83static int sched_suspend(void)
84{
85 ktime_last = ktime_get();
86 sched_ktime_suspended = true;
87 return 0;
88}
89
90static struct syscore_ops sched_syscore_ops = {
91 .resume = sched_resume,
92 .suspend = sched_suspend
93};
94
95static int __init sched_init_ops(void)
96{
97 register_syscore_ops(&sched_syscore_ops);
98 return 0;
99}
100late_initcall(sched_init_ops);
101
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530102static void acquire_rq_locks_irqsave(const cpumask_t *cpus,
103 unsigned long *flags)
104{
Lingutla Chandrasekhar071be092018-07-20 15:23:35 +0530105 int cpu, level = 0;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530106
107 local_irq_save(*flags);
Lingutla Chandrasekhar071be092018-07-20 15:23:35 +0530108 for_each_cpu(cpu, cpus) {
109 if (level == 0)
110 raw_spin_lock(&cpu_rq(cpu)->lock);
111 else
112 raw_spin_lock_nested(&cpu_rq(cpu)->lock, level);
113 level++;
114 }
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530115}
116
117static void release_rq_locks_irqrestore(const cpumask_t *cpus,
118 unsigned long *flags)
119{
120 int cpu;
121
122 for_each_cpu(cpu, cpus)
123 raw_spin_unlock(&cpu_rq(cpu)->lock);
124 local_irq_restore(*flags);
125}
126
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700127#ifdef CONFIG_HZ_300
128/*
129 * Tick interval becomes to 3333333 due to
130 * rounding error when HZ=300.
131 */
132#define MIN_SCHED_RAVG_WINDOW (3333333 * 6)
133#else
Kyle Yane2486b72017-08-25 14:36:53 -0700134/* Min window size (in ns) = 20ms */
135#define MIN_SCHED_RAVG_WINDOW 20000000
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700136#endif
137
138/* Max window size (in ns) = 1s */
139#define MAX_SCHED_RAVG_WINDOW 1000000000
140
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700141/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
142unsigned int __read_mostly walt_disabled = 0;
143
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800144__read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);
145
Pavankumar Kondeti4e13d112018-01-25 01:12:08 +0530146unsigned int sysctl_sched_walt_rotate_big_tasks;
147unsigned int walt_rotation_enabled;
148
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800149/*
150 * sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy
151 * associated with them. This is required for atomic update of those variables
152 * when being modifed via sysctl interface.
153 *
154 * IMPORTANT: Initialize both copies to same value!!
155 */
156
157__read_mostly unsigned int sched_ravg_hist_size = 5;
158__read_mostly unsigned int sysctl_sched_ravg_hist_size = 5;
159
160static __read_mostly unsigned int sched_io_is_busy = 1;
161
162__read_mostly unsigned int sched_window_stats_policy =
163 WINDOW_STATS_MAX_RECENT_AVG;
164__read_mostly unsigned int sysctl_sched_window_stats_policy =
165 WINDOW_STATS_MAX_RECENT_AVG;
166
167/* Window size (in ns) */
168__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
169
Joonwoo Park858d5752017-08-21 12:09:49 -0700170/*
171 * A after-boot constant divisor for cpu_util_freq_walt() to apply the load
172 * boost.
173 */
174__read_mostly unsigned int walt_cpu_util_freq_divisor;
175
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800176/* Initial task load. Newly created tasks are assigned this load. */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800177unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700178
179/*
180 * Maximum possible frequency across all cpus. Task demand and cpu
181 * capacity (cpu_power) metrics are scaled in reference to it.
182 */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800183unsigned int max_possible_freq = 1;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700184
185/*
186 * Minimum possible max_freq across all cpus. This will be same as
187 * max_possible_freq on homogeneous systems and could be different from
188 * max_possible_freq on heterogenous systems. min_max_freq is used to derive
189 * capacity (cpu_power) of cpus.
190 */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800191unsigned int min_max_freq = 1;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700192
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800193unsigned int max_capacity = 1024; /* max(rq->capacity) */
194unsigned int min_capacity = 1024; /* min(rq->capacity) */
195unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
196unsigned int
197min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700198
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800199/* Temporarily disable window-stats activity on all cpus */
200unsigned int __read_mostly sched_disable_window_stats;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700201
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800202/*
203 * Task load is categorized into buckets for the purpose of top task tracking.
204 * The entire range of load from 0 to sched_ravg_window needs to be covered
205 * in NUM_LOAD_INDICES number of buckets. Therefore the size of each bucket
206 * is given by sched_ravg_window / NUM_LOAD_INDICES. Since the default value
207 * of sched_ravg_window is MIN_SCHED_RAVG_WINDOW, use that to compute
208 * sched_load_granule.
209 */
210__read_mostly unsigned int sched_load_granule =
211 MIN_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800212/* Size of bitmaps maintained to track top tasks */
213static const unsigned int top_tasks_bitmap_size =
214 BITS_TO_LONGS(NUM_LOAD_INDICES + 1) * sizeof(unsigned long);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700215
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800216/*
217 * This governs what load needs to be used when reporting CPU busy time
218 * to the cpufreq governor.
219 */
220__read_mostly unsigned int sysctl_sched_freq_reporting_policy;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700221
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800222static int __init set_sched_ravg_window(char *str)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700223{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800224 unsigned int window_size;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700225
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800226 get_option(&str, &window_size);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700227
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800228 if (window_size < MIN_SCHED_RAVG_WINDOW ||
229 window_size > MAX_SCHED_RAVG_WINDOW) {
230 WARN_ON(1);
231 return -EINVAL;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700232 }
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800233
234 sched_ravg_window = window_size;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700235 return 0;
236}
237
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800238early_param("sched_ravg_window", set_sched_ravg_window);
239
Syed Rameez Mustafaf83db1b2017-05-26 16:26:28 -0700240static int __init set_sched_predl(char *str)
241{
242 unsigned int predl;
243
244 get_option(&str, &predl);
245 sched_predl = !!predl;
246 return 0;
247}
248early_param("sched_predl", set_sched_predl);
249
Pavankumar Kondetid3370502017-07-20 11:47:13 +0530250void inc_rq_walt_stats(struct rq *rq, struct task_struct *p)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700251{
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +0530252 inc_nr_big_task(&rq->walt_stats, p);
Pavankumar Kondetid3370502017-07-20 11:47:13 +0530253 walt_inc_cumulative_runnable_avg(rq, p);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700254}
255
Pavankumar Kondetid3370502017-07-20 11:47:13 +0530256void dec_rq_walt_stats(struct rq *rq, struct task_struct *p)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800257{
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +0530258 dec_nr_big_task(&rq->walt_stats, p);
Pavankumar Kondetid3370502017-07-20 11:47:13 +0530259 walt_dec_cumulative_runnable_avg(rq, p);
260}
261
262void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
263 u32 new_task_load, u32 new_pred_demand)
264{
265 s64 task_load_delta = (s64)new_task_load - task_load(p);
266 s64 pred_demand_delta = PRED_DEMAND_DELTA;
267
268 fixup_cumulative_runnable_avg(&rq->walt_stats, task_load_delta,
269 pred_demand_delta);
Pavankumar Kondeti0cebff02017-07-21 16:28:12 +0530270
271 walt_fixup_cum_window_demand(rq, task_load_delta);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800272}
273
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800274/*
275 * Demand aggregation for frequency purpose:
276 *
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800277 * CPU demand of tasks from various related groups is aggregated per-cluster and
278 * added to the "max_busy_cpu" in that cluster, where max_busy_cpu is determined
279 * by just rq->prev_runnable_sum.
280 *
281 * Some examples follow, which assume:
282 * Cluster0 = CPU0-3, Cluster1 = CPU4-7
283 * One related thread group A that has tasks A0, A1, A2
284 *
285 * A->cpu_time[X].curr/prev_sum = counters in which cpu execution stats of
286 * tasks belonging to group A are accumulated when they run on cpu X.
287 *
288 * CX->curr/prev_sum = counters in which cpu execution stats of all tasks
289 * not belonging to group A are accumulated when they run on cpu X
290 *
291 * Lets say the stats for window M was as below:
292 *
293 * C0->prev_sum = 1ms, A->cpu_time[0].prev_sum = 5ms
294 * Task A0 ran 5ms on CPU0
295 * Task B0 ran 1ms on CPU0
296 *
297 * C1->prev_sum = 5ms, A->cpu_time[1].prev_sum = 6ms
298 * Task A1 ran 4ms on CPU1
299 * Task A2 ran 2ms on CPU1
300 * Task B1 ran 5ms on CPU1
301 *
302 * C2->prev_sum = 0ms, A->cpu_time[2].prev_sum = 0
303 * CPU2 idle
304 *
305 * C3->prev_sum = 0ms, A->cpu_time[3].prev_sum = 0
306 * CPU3 idle
307 *
308 * In this case, CPU1 was most busy going by just its prev_sum counter. Demand
309 * from all group A tasks are added to CPU1. IOW, at end of window M, cpu busy
310 * time reported to governor will be:
311 *
312 *
313 * C0 busy time = 1ms
314 * C1 busy time = 5 + 5 + 6 = 16ms
315 *
316 */
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530317__read_mostly int sched_freq_aggregate_threshold;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700318
Vikram Mulukutla1abebc22017-05-12 19:11:51 -0700319static u64
Joonwoo Park84a80882017-02-03 11:15:31 -0800320update_window_start(struct rq *rq, u64 wallclock, int event)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700321{
322 s64 delta;
323 int nr_windows;
Vikram Mulukutla1abebc22017-05-12 19:11:51 -0700324 u64 old_window_start = rq->window_start;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700325
326 delta = wallclock - rq->window_start;
Pavankumar Kondetiee5b4c12018-06-26 11:14:35 +0530327 BUG_ON(delta < 0);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800328 if (delta < sched_ravg_window)
Vikram Mulukutla1abebc22017-05-12 19:11:51 -0700329 return old_window_start;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700330
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800331 nr_windows = div64_u64(delta, sched_ravg_window);
332 rq->window_start += (u64)nr_windows * (u64)sched_ravg_window;
Joonwoo Park84a80882017-02-03 11:15:31 -0800333
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +0530334 rq->cum_window_demand = rq->walt_stats.cumulative_runnable_avg;
Vikram Mulukutla1abebc22017-05-12 19:11:51 -0700335
336 return old_window_start;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700337}
338
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800339int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700340{
Pavankumar Kondetia3c6d222019-07-12 12:17:31 +0530341 unsigned long flags;
342
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800343 mutex_lock(&cluster_lock);
344 if (!cb->get_cpu_cycle_counter) {
345 mutex_unlock(&cluster_lock);
346 return -EINVAL;
347 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700348
Pavankumar Kondetia3c6d222019-07-12 12:17:31 +0530349 acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800350 cpu_cycle_counter_cb = *cb;
351 use_cycle_counter = true;
Pavankumar Kondetia3c6d222019-07-12 12:17:31 +0530352 release_rq_locks_irqrestore(cpu_possible_mask, &flags);
353
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800354 mutex_unlock(&cluster_lock);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700355
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800356 return 0;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700357}
358
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -0700359/*
360 * Assumes rq_lock is held and wallclock was recorded in the same critical
361 * section as this function's invocation.
362 */
363static inline u64 read_cycle_counter(int cpu, u64 wallclock)
364{
365 struct rq *rq = cpu_rq(cpu);
366
367 if (rq->last_cc_update != wallclock) {
368 rq->cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
369 rq->last_cc_update = wallclock;
370 }
371
372 return rq->cycles;
373}
374
375static void update_task_cpu_cycles(struct task_struct *p, int cpu,
376 u64 wallclock)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700377{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800378 if (use_cycle_counter)
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -0700379 p->cpu_cycles = read_cycle_counter(cpu, wallclock);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -0700380}
381
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -0700382void clear_ed_task(struct task_struct *p, struct rq *rq)
383{
384 if (p == rq->ed_task)
385 rq->ed_task = NULL;
386}
387
388bool early_detection_notify(struct rq *rq, u64 wallclock)
389{
390 struct task_struct *p;
391 int loop_max = 10;
392
Pavankumar Kondeti75a3c4e2018-09-06 08:43:17 +0530393 rq->ed_task = NULL;
394
Pavankumar Kondeti4e13d112018-01-25 01:12:08 +0530395 if ((!walt_rotation_enabled && sched_boost_policy() ==
396 SCHED_BOOST_NONE) || !rq->cfs.h_nr_running)
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -0700397 return 0;
398
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -0700399 list_for_each_entry(p, &rq->cfs_tasks, se.group_node) {
400 if (!loop_max)
401 break;
402
403 if (wallclock - p->last_wake_ts >= EARLY_DETECTION_DURATION) {
404 rq->ed_task = p;
405 return 1;
406 }
407
408 loop_max--;
409 }
410
411 return 0;
412}
413
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800414void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
415{
416 struct rq *rq = cpu_rq(cpu);
417
418 if (!rq->window_start || sched_disable_window_stats)
419 return;
420
Maria Yucea0f8a2018-06-27 15:02:42 +0800421 /*
422 * We don’t have to note down an irqstart event when cycle
423 * counter is not used.
424 */
425 if (!use_cycle_counter)
426 return;
427
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800428 if (is_idle_task(curr)) {
429 /* We're here without rq->lock held, IRQ disabled */
430 raw_spin_lock(&rq->lock);
Pavankumar Kondetifaa04442018-06-25 16:13:39 +0530431 update_task_cpu_cycles(curr, cpu, sched_ktime_clock());
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800432 raw_spin_unlock(&rq->lock);
433 }
434}
435
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530436/*
437 * Return total number of tasks "eligible" to run on highest capacity cpu
438 *
439 * This is simply nr_big_tasks for cpus which are not of max_capacity and
440 * nr_running for cpus of max_capacity
441 */
442unsigned int nr_eligible_big_tasks(int cpu)
443{
444 struct rq *rq = cpu_rq(cpu);
445
Pavankumar Kondeti005309282017-05-10 15:43:29 +0530446 if (!is_max_capacity_cpu(cpu))
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +0530447 return rq->walt_stats.nr_big_tasks;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530448
449 return rq->nr_running;
450}
451
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +0530452void clear_walt_request(int cpu)
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530453{
454 struct rq *rq = cpu_rq(cpu);
455 unsigned long flags;
456
457 clear_boost_kick(cpu);
458 clear_reserved(cpu);
459 if (rq->push_task) {
460 struct task_struct *push_task = NULL;
461
462 raw_spin_lock_irqsave(&rq->lock, flags);
463 if (rq->push_task) {
464 clear_reserved(rq->push_cpu);
465 push_task = rq->push_task;
466 rq->push_task = NULL;
467 }
468 rq->active_balance = 0;
469 raw_spin_unlock_irqrestore(&rq->lock, flags);
470 if (push_task)
471 put_task_struct(push_task);
472 }
473}
474
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800475void sched_account_irqtime(int cpu, struct task_struct *curr,
Srinath Sridharan3a73c962016-07-22 13:21:15 +0100476 u64 delta, u64 wallclock)
477{
478 struct rq *rq = cpu_rq(cpu);
479 unsigned long flags, nr_windows;
480 u64 cur_jiffies_ts;
481
482 raw_spin_lock_irqsave(&rq->lock, flags);
483
484 /*
485 * cputime (wallclock) uses sched_clock so use the same here for
486 * consistency.
487 */
488 delta += sched_clock() - wallclock;
489 cur_jiffies_ts = get_jiffies_64();
490
491 if (is_idle_task(curr))
Pavankumar Kondetifaa04442018-06-25 16:13:39 +0530492 update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
Srinath Sridharan3a73c962016-07-22 13:21:15 +0100493 delta);
494
495 nr_windows = cur_jiffies_ts - rq->irqload_ts;
496
497 if (nr_windows) {
498 if (nr_windows < 10) {
499 /* Decay CPU's irqload by 3/4 for each window. */
500 rq->avg_irqload *= (3 * nr_windows);
501 rq->avg_irqload = div64_u64(rq->avg_irqload,
502 4 * nr_windows);
503 } else {
504 rq->avg_irqload = 0;
505 }
506 rq->avg_irqload += rq->cur_irqload;
507 rq->cur_irqload = 0;
508 }
509
510 rq->cur_irqload += delta;
511 rq->irqload_ts = cur_jiffies_ts;
512 raw_spin_unlock_irqrestore(&rq->lock, flags);
513}
514
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800515/*
516 * Special case the last index and provide a fast path for index = 0.
517 * Note that sched_load_granule can change underneath us if we are not
518 * holding any runqueue locks while calling the two functions below.
519 */
520static u32 top_task_load(struct rq *rq)
521{
522 int index = rq->prev_top;
523 u8 prev = 1 - rq->curr_table;
Srinath Sridharan3a73c962016-07-22 13:21:15 +0100524
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800525 if (!index) {
526 int msb = NUM_LOAD_INDICES - 1;
Srinath Sridharan3a73c962016-07-22 13:21:15 +0100527
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800528 if (!test_bit(msb, rq->top_tasks_bitmap[prev]))
529 return 0;
530 else
531 return sched_load_granule;
532 } else if (index == NUM_LOAD_INDICES - 1) {
533 return sched_ravg_window;
534 } else {
535 return (index + 1) * sched_load_granule;
536 }
Srinath Sridharan3a73c962016-07-22 13:21:15 +0100537}
538
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530539u64 freq_policy_load(struct rq *rq)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800540{
541 unsigned int reporting_policy = sysctl_sched_freq_reporting_policy;
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700542 int freq_aggr_thresh = sched_freq_aggregate_threshold;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530543 struct sched_cluster *cluster = rq->cluster;
544 u64 aggr_grp_load = cluster->aggr_grp_load;
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700545 u64 load, tt_load = 0;
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -0700546 u64 coloc_boost_load = cluster->coloc_boost_load;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530547
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700548 if (rq->ed_task != NULL) {
549 load = sched_ravg_window;
550 goto done;
551 }
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -0700552
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700553 if (aggr_grp_load > freq_aggr_thresh)
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530554 load = rq->prev_runnable_sum + aggr_grp_load;
555 else
556 load = rq->prev_runnable_sum + rq->grp_time.prev_runnable_sum;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800557
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -0700558 if (coloc_boost_load)
559 load = max_t(u64, load, coloc_boost_load);
560
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700561 tt_load = top_task_load(rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800562 switch (reporting_policy) {
563 case FREQ_REPORT_MAX_CPU_LOAD_TOP_TASK:
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700564 load = max_t(u64, load, tt_load);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800565 break;
566 case FREQ_REPORT_TOP_TASK:
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700567 load = tt_load;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800568 break;
569 case FREQ_REPORT_CPU_LOAD:
570 break;
571 default:
572 break;
573 }
574
Vikram Mulukutladb410a42017-07-12 12:34:10 -0700575done:
576 trace_sched_load_to_gov(rq, aggr_grp_load, tt_load, freq_aggr_thresh,
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -0700577 load, reporting_policy, walt_rotation_enabled,
578 sysctl_sched_little_cluster_coloc_fmin_khz,
579 coloc_boost_load);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800580 return load;
581}
582
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -0700583/*
584 * In this function we match the accumulated subtractions with the current
585 * and previous windows we are operating with. Ignore any entries where
586 * the window start in the load_subtraction struct does not match either
587 * the curent or the previous window. This could happen whenever CPUs
588 * become idle or busy with interrupts disabled for an extended period.
589 */
590static inline void account_load_subtractions(struct rq *rq)
591{
592 u64 ws = rq->window_start;
593 u64 prev_ws = ws - sched_ravg_window;
594 struct load_subtractions *ls = rq->load_subs;
595 int i;
596
597 for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
598 if (ls[i].window_start == ws) {
599 rq->curr_runnable_sum -= ls[i].subs;
600 rq->nt_curr_runnable_sum -= ls[i].new_subs;
601 } else if (ls[i].window_start == prev_ws) {
602 rq->prev_runnable_sum -= ls[i].subs;
603 rq->nt_prev_runnable_sum -= ls[i].new_subs;
604 }
605
606 ls[i].subs = 0;
607 ls[i].new_subs = 0;
608 }
609
610 BUG_ON((s64)rq->prev_runnable_sum < 0);
611 BUG_ON((s64)rq->curr_runnable_sum < 0);
612 BUG_ON((s64)rq->nt_prev_runnable_sum < 0);
613 BUG_ON((s64)rq->nt_curr_runnable_sum < 0);
614}
615
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800616static inline void create_subtraction_entry(struct rq *rq, u64 ws, int index)
617{
618 rq->load_subs[index].window_start = ws;
619 rq->load_subs[index].subs = 0;
620 rq->load_subs[index].new_subs = 0;
621}
622
623static int get_top_index(unsigned long *bitmap, unsigned long old_top)
624{
625 int index = find_next_bit(bitmap, NUM_LOAD_INDICES, old_top);
626
627 if (index == NUM_LOAD_INDICES)
628 return 0;
629
630 return NUM_LOAD_INDICES - 1 - index;
631}
632
633static bool get_subtraction_index(struct rq *rq, u64 ws)
634{
635 int i;
636 u64 oldest = ULLONG_MAX;
637 int oldest_index = 0;
638
639 for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
640 u64 entry_ws = rq->load_subs[i].window_start;
641
642 if (ws == entry_ws)
643 return i;
644
645 if (entry_ws < oldest) {
646 oldest = entry_ws;
647 oldest_index = i;
648 }
649 }
650
651 create_subtraction_entry(rq, ws, oldest_index);
652 return oldest_index;
653}
654
655static void update_rq_load_subtractions(int index, struct rq *rq,
656 u32 sub_load, bool new_task)
657{
658 rq->load_subs[index].subs += sub_load;
659 if (new_task)
660 rq->load_subs[index].new_subs += sub_load;
661}
662
663void update_cluster_load_subtractions(struct task_struct *p,
664 int cpu, u64 ws, bool new_task)
665{
666 struct sched_cluster *cluster = cpu_cluster(cpu);
667 struct cpumask cluster_cpus = cluster->cpus;
668 u64 prev_ws = ws - sched_ravg_window;
669 int i;
670
671 cpumask_clear_cpu(cpu, &cluster_cpus);
672 raw_spin_lock(&cluster->load_lock);
673
674 for_each_cpu(i, &cluster_cpus) {
675 struct rq *rq = cpu_rq(i);
676 int index;
677
678 if (p->ravg.curr_window_cpu[i]) {
679 index = get_subtraction_index(rq, ws);
680 update_rq_load_subtractions(index, rq,
681 p->ravg.curr_window_cpu[i], new_task);
682 p->ravg.curr_window_cpu[i] = 0;
683 }
684
685 if (p->ravg.prev_window_cpu[i]) {
686 index = get_subtraction_index(rq, prev_ws);
687 update_rq_load_subtractions(index, rq,
688 p->ravg.prev_window_cpu[i], new_task);
689 p->ravg.prev_window_cpu[i] = 0;
690 }
691 }
692
693 raw_spin_unlock(&cluster->load_lock);
694}
695
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800696static inline void inter_cluster_migration_fixup
697 (struct task_struct *p, int new_cpu, int task_cpu, bool new_task)
698{
699 struct rq *dest_rq = cpu_rq(new_cpu);
700 struct rq *src_rq = cpu_rq(task_cpu);
701
702 if (same_freq_domain(new_cpu, task_cpu))
703 return;
704
705 p->ravg.curr_window_cpu[new_cpu] = p->ravg.curr_window;
706 p->ravg.prev_window_cpu[new_cpu] = p->ravg.prev_window;
707
708 dest_rq->curr_runnable_sum += p->ravg.curr_window;
709 dest_rq->prev_runnable_sum += p->ravg.prev_window;
710
711 src_rq->curr_runnable_sum -= p->ravg.curr_window_cpu[task_cpu];
712 src_rq->prev_runnable_sum -= p->ravg.prev_window_cpu[task_cpu];
713
714 if (new_task) {
715 dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
716 dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
717
718 src_rq->nt_curr_runnable_sum -=
719 p->ravg.curr_window_cpu[task_cpu];
720 src_rq->nt_prev_runnable_sum -=
721 p->ravg.prev_window_cpu[task_cpu];
722 }
723
724 p->ravg.curr_window_cpu[task_cpu] = 0;
725 p->ravg.prev_window_cpu[task_cpu] = 0;
726
727 update_cluster_load_subtractions(p, task_cpu,
728 src_rq->window_start, new_task);
729
730 BUG_ON((s64)src_rq->prev_runnable_sum < 0);
731 BUG_ON((s64)src_rq->curr_runnable_sum < 0);
732 BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
733 BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);
734}
735
John Dias0c4a0662018-06-22 12:27:38 -0700736static u32 load_to_index(u32 load)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800737{
John Dias0c4a0662018-06-22 12:27:38 -0700738 u32 index = load / sched_load_granule;
739
740 return min(index, (u32)(NUM_LOAD_INDICES - 1));
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800741}
742
743static void
744migrate_top_tasks(struct task_struct *p, struct rq *src_rq, struct rq *dst_rq)
745{
746 int index;
747 int top_index;
748 u32 curr_window = p->ravg.curr_window;
749 u32 prev_window = p->ravg.prev_window;
750 u8 src = src_rq->curr_table;
751 u8 dst = dst_rq->curr_table;
752 u8 *src_table;
753 u8 *dst_table;
754
755 if (curr_window) {
756 src_table = src_rq->top_tasks[src];
757 dst_table = dst_rq->top_tasks[dst];
758 index = load_to_index(curr_window);
759 src_table[index] -= 1;
760 dst_table[index] += 1;
761
762 if (!src_table[index])
763 __clear_bit(NUM_LOAD_INDICES - index - 1,
764 src_rq->top_tasks_bitmap[src]);
765
766 if (dst_table[index] == 1)
767 __set_bit(NUM_LOAD_INDICES - index - 1,
768 dst_rq->top_tasks_bitmap[dst]);
769
770 if (index > dst_rq->curr_top)
771 dst_rq->curr_top = index;
772
773 top_index = src_rq->curr_top;
774 if (index == top_index && !src_table[index])
775 src_rq->curr_top = get_top_index(
776 src_rq->top_tasks_bitmap[src], top_index);
777 }
778
779 if (prev_window) {
780 src = 1 - src;
781 dst = 1 - dst;
782 src_table = src_rq->top_tasks[src];
783 dst_table = dst_rq->top_tasks[dst];
784 index = load_to_index(prev_window);
785 src_table[index] -= 1;
786 dst_table[index] += 1;
787
788 if (!src_table[index])
789 __clear_bit(NUM_LOAD_INDICES - index - 1,
790 src_rq->top_tasks_bitmap[src]);
791
792 if (dst_table[index] == 1)
793 __set_bit(NUM_LOAD_INDICES - index - 1,
794 dst_rq->top_tasks_bitmap[dst]);
795
796 if (index > dst_rq->prev_top)
797 dst_rq->prev_top = index;
798
799 top_index = src_rq->prev_top;
800 if (index == top_index && !src_table[index])
801 src_rq->prev_top = get_top_index(
802 src_rq->top_tasks_bitmap[src], top_index);
803 }
804}
805
806void fixup_busy_time(struct task_struct *p, int new_cpu)
807{
808 struct rq *src_rq = task_rq(p);
809 struct rq *dest_rq = cpu_rq(new_cpu);
810 u64 wallclock;
811 u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
812 u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
813 u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
814 u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
815 bool new_task;
816 struct related_thread_group *grp;
817
818 if (!p->on_rq && p->state != TASK_WAKING)
819 return;
820
821 if (exiting_task(p)) {
822 clear_ed_task(p, src_rq);
823 return;
824 }
825
826 if (p->state == TASK_WAKING)
827 double_rq_lock(src_rq, dest_rq);
828
829 if (sched_disable_window_stats)
830 goto done;
831
Pavankumar Kondetifaa04442018-06-25 16:13:39 +0530832 wallclock = sched_ktime_clock();
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800833
834 update_task_ravg(task_rq(p)->curr, task_rq(p),
835 TASK_UPDATE,
836 wallclock, 0);
837 update_task_ravg(dest_rq->curr, dest_rq,
838 TASK_UPDATE, wallclock, 0);
839
840 update_task_ravg(p, task_rq(p), TASK_MIGRATE,
841 wallclock, 0);
842
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -0700843 update_task_cpu_cycles(p, new_cpu, wallclock);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800844
Pavankumar Kondeti0cebff02017-07-21 16:28:12 +0530845 /*
846 * When a task is migrating during the wakeup, adjust
847 * the task's contribution towards cumulative window
848 * demand.
849 */
850 if (p->state == TASK_WAKING && p->last_sleep_ts >=
851 src_rq->window_start) {
852 walt_fixup_cum_window_demand(src_rq, -(s64)p->ravg.demand);
853 walt_fixup_cum_window_demand(dest_rq, p->ravg.demand);
Joonwoo Park84a80882017-02-03 11:15:31 -0800854 }
855
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800856 new_task = is_new_task(p);
857 /* Protected by rq_lock */
858 grp = p->grp;
859
860 /*
861 * For frequency aggregation, we continue to do migration fixups
862 * even for intra cluster migrations. This is because, the aggregated
863 * load has to reported on a single CPU regardless.
864 */
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +0530865 if (grp) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800866 struct group_cpu_time *cpu_time;
867
868 cpu_time = &src_rq->grp_time;
869 src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
870 src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
871 src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
872 src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
873
874 cpu_time = &dest_rq->grp_time;
875 dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
876 dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
877 dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
878 dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
879
880 if (p->ravg.curr_window) {
881 *src_curr_runnable_sum -= p->ravg.curr_window;
882 *dst_curr_runnable_sum += p->ravg.curr_window;
883 if (new_task) {
884 *src_nt_curr_runnable_sum -=
885 p->ravg.curr_window;
886 *dst_nt_curr_runnable_sum +=
887 p->ravg.curr_window;
888 }
889 }
890
891 if (p->ravg.prev_window) {
892 *src_prev_runnable_sum -= p->ravg.prev_window;
893 *dst_prev_runnable_sum += p->ravg.prev_window;
894 if (new_task) {
895 *src_nt_prev_runnable_sum -=
896 p->ravg.prev_window;
897 *dst_nt_prev_runnable_sum +=
898 p->ravg.prev_window;
899 }
900 }
901 } else {
902 inter_cluster_migration_fixup(p, new_cpu,
903 task_cpu(p), new_task);
904 }
905
906 migrate_top_tasks(p, src_rq, dest_rq);
907
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +0530908 if (!same_freq_domain(new_cpu, task_cpu(p))) {
909 src_rq->notif_pending = true;
910 dest_rq->notif_pending = true;
Maria Yu702cec92019-08-13 17:12:33 +0800911 sched_irq_work_queue(&walt_migration_irq_work);
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +0530912 }
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800913
914 if (p == src_rq->ed_task) {
915 src_rq->ed_task = NULL;
Pavankumar Kondeti844aa172018-01-25 01:12:08 +0530916 dest_rq->ed_task = p;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800917 }
918
919done:
920 if (p->state == TASK_WAKING)
921 double_rq_unlock(src_rq, dest_rq);
922}
923
924void set_window_start(struct rq *rq)
925{
926 static int sync_cpu_available;
927
Joonwoo Park77a47cc2017-05-16 11:13:00 -0700928 if (likely(rq->window_start))
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800929 return;
930
931 if (!sync_cpu_available) {
Joonwoo Park77a47cc2017-05-16 11:13:00 -0700932 rq->window_start = 1;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800933 sync_cpu_available = 1;
Pavankumar Kondeti6737d8c2017-06-01 16:23:16 +0530934 atomic64_set(&walt_irq_work_lastq_ws, rq->window_start);
Lingutla Chandrasekhard60cdac2018-05-25 15:22:59 +0530935 walt_load_reported_window =
936 atomic64_read(&walt_irq_work_lastq_ws);
937
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -0800938 } else {
939 struct rq *sync_rq = cpu_rq(cpumask_any(cpu_online_mask));
940
941 raw_spin_unlock(&rq->lock);
942 double_rq_lock(rq, sync_rq);
943 rq->window_start = sync_rq->window_start;
944 rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
945 rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
946 raw_spin_unlock(&sync_rq->lock);
947 }
948
949 rq->curr->ravg.mark_start = rq->window_start;
950}
951
952unsigned int max_possible_efficiency = 1;
953unsigned int min_possible_efficiency = UINT_MAX;
954
955#define INC_STEP 8
956#define DEC_STEP 2
957#define CONSISTENT_THRES 16
958#define INC_STEP_BIG 16
959/*
960 * bucket_increase - update the count of all buckets
961 *
962 * @buckets: array of buckets tracking busy time of a task
963 * @idx: the index of bucket to be incremented
964 *
965 * Each time a complete window finishes, count of bucket that runtime
966 * falls in (@idx) is incremented. Counts of all other buckets are
967 * decayed. The rate of increase and decay could be different based
968 * on current count in the bucket.
969 */
970static inline void bucket_increase(u8 *buckets, int idx)
971{
972 int i, step;
973
974 for (i = 0; i < NUM_BUSY_BUCKETS; i++) {
975 if (idx != i) {
976 if (buckets[i] > DEC_STEP)
977 buckets[i] -= DEC_STEP;
978 else
979 buckets[i] = 0;
980 } else {
981 step = buckets[i] >= CONSISTENT_THRES ?
982 INC_STEP_BIG : INC_STEP;
983 if (buckets[i] > U8_MAX - step)
984 buckets[i] = U8_MAX;
985 else
986 buckets[i] += step;
987 }
988 }
989}
990
991static inline int busy_to_bucket(u32 normalized_rt)
992{
993 int bidx;
994
995 bidx = mult_frac(normalized_rt, NUM_BUSY_BUCKETS, max_task_load());
996 bidx = min(bidx, NUM_BUSY_BUCKETS - 1);
997
998 /*
999 * Combine lowest two buckets. The lowest frequency falls into
1000 * 2nd bucket and thus keep predicting lowest bucket is not
1001 * useful.
1002 */
1003 if (!bidx)
1004 bidx++;
1005
1006 return bidx;
1007}
1008
1009/*
1010 * get_pred_busy - calculate predicted demand for a task on runqueue
1011 *
1012 * @rq: runqueue of task p
1013 * @p: task whose prediction is being updated
1014 * @start: starting bucket. returned prediction should not be lower than
1015 * this bucket.
1016 * @runtime: runtime of the task. returned prediction should not be lower
1017 * than this runtime.
1018 * Note: @start can be derived from @runtime. It's passed in only to
1019 * avoid duplicated calculation in some cases.
1020 *
1021 * A new predicted busy time is returned for task @p based on @runtime
1022 * passed in. The function searches through buckets that represent busy
1023 * time equal to or bigger than @runtime and attempts to find the bucket to
1024 * to use for prediction. Once found, it searches through historical busy
1025 * time and returns the latest that falls into the bucket. If no such busy
1026 * time exists, it returns the medium of that bucket.
1027 */
1028static u32 get_pred_busy(struct rq *rq, struct task_struct *p,
1029 int start, u32 runtime)
1030{
1031 int i;
1032 u8 *buckets = p->ravg.busy_buckets;
1033 u32 *hist = p->ravg.sum_history;
1034 u32 dmin, dmax;
1035 u64 cur_freq_runtime = 0;
1036 int first = NUM_BUSY_BUCKETS, final;
1037 u32 ret = runtime;
1038
1039 /* skip prediction for new tasks due to lack of history */
1040 if (unlikely(is_new_task(p)))
1041 goto out;
1042
1043 /* find minimal bucket index to pick */
1044 for (i = start; i < NUM_BUSY_BUCKETS; i++) {
1045 if (buckets[i]) {
1046 first = i;
1047 break;
1048 }
1049 }
1050 /* if no higher buckets are filled, predict runtime */
1051 if (first >= NUM_BUSY_BUCKETS)
1052 goto out;
1053
1054 /* compute the bucket for prediction */
1055 final = first;
1056
1057 /* determine demand range for the predicted bucket */
1058 if (final < 2) {
1059 /* lowest two buckets are combined */
1060 dmin = 0;
1061 final = 1;
1062 } else {
1063 dmin = mult_frac(final, max_task_load(), NUM_BUSY_BUCKETS);
1064 }
1065 dmax = mult_frac(final + 1, max_task_load(), NUM_BUSY_BUCKETS);
1066
1067 /*
1068 * search through runtime history and return first runtime that falls
1069 * into the range of predicted bucket.
1070 */
1071 for (i = 0; i < sched_ravg_hist_size; i++) {
1072 if (hist[i] >= dmin && hist[i] < dmax) {
1073 ret = hist[i];
1074 break;
1075 }
1076 }
1077 /* no historical runtime within bucket found, use average of the bin */
1078 if (ret < dmin)
1079 ret = (dmin + dmax) / 2;
1080 /*
1081 * when updating in middle of a window, runtime could be higher
1082 * than all recorded history. Always predict at least runtime.
1083 */
1084 ret = max(runtime, ret);
1085out:
1086 trace_sched_update_pred_demand(rq, p, runtime,
1087 mult_frac((unsigned int)cur_freq_runtime, 100,
1088 sched_ravg_window), ret);
1089 return ret;
1090}
1091
1092static inline u32 calc_pred_demand(struct rq *rq, struct task_struct *p)
1093{
1094 if (p->ravg.pred_demand >= p->ravg.curr_window)
1095 return p->ravg.pred_demand;
1096
1097 return get_pred_busy(rq, p, busy_to_bucket(p->ravg.curr_window),
1098 p->ravg.curr_window);
1099}
1100
1101/*
1102 * predictive demand of a task is calculated at the window roll-over.
1103 * if the task current window busy time exceeds the predicted
1104 * demand, update it here to reflect the task needs.
1105 */
1106void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
1107{
1108 u32 new, old;
1109
Syed Rameez Mustafaf83db1b2017-05-26 16:26:28 -07001110 if (!sched_predl)
1111 return;
1112
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001113 if (is_idle_task(p) || exiting_task(p))
1114 return;
1115
1116 if (event != PUT_PREV_TASK && event != TASK_UPDATE &&
1117 (!SCHED_FREQ_ACCOUNT_WAIT_TIME ||
1118 (event != TASK_MIGRATE &&
1119 event != PICK_NEXT_TASK)))
1120 return;
1121
1122 /*
1123 * TASK_UPDATE can be called on sleeping task, when its moved between
1124 * related groups
1125 */
1126 if (event == TASK_UPDATE) {
1127 if (!p->on_rq && !SCHED_FREQ_ACCOUNT_WAIT_TIME)
1128 return;
1129 }
1130
1131 new = calc_pred_demand(rq, p);
1132 old = p->ravg.pred_demand;
1133
1134 if (old >= new)
1135 return;
1136
1137 if (task_on_rq_queued(p) && (!task_has_dl_policy(p) ||
1138 !p->dl.dl_throttled))
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +05301139 p->sched_class->fixup_walt_sched_stats(rq, p,
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001140 p->ravg.demand,
1141 new);
1142
1143 p->ravg.pred_demand = new;
1144}
1145
1146void clear_top_tasks_bitmap(unsigned long *bitmap)
1147{
1148 memset(bitmap, 0, top_tasks_bitmap_size);
1149 __set_bit(NUM_LOAD_INDICES, bitmap);
1150}
1151
1152static void update_top_tasks(struct task_struct *p, struct rq *rq,
1153 u32 old_curr_window, int new_window, bool full_window)
1154{
1155 u8 curr = rq->curr_table;
1156 u8 prev = 1 - curr;
1157 u8 *curr_table = rq->top_tasks[curr];
1158 u8 *prev_table = rq->top_tasks[prev];
1159 int old_index, new_index, update_index;
1160 u32 curr_window = p->ravg.curr_window;
1161 u32 prev_window = p->ravg.prev_window;
1162 bool zero_index_update;
1163
1164 if (old_curr_window == curr_window && !new_window)
1165 return;
1166
1167 old_index = load_to_index(old_curr_window);
1168 new_index = load_to_index(curr_window);
1169
1170 if (!new_window) {
1171 zero_index_update = !old_curr_window && curr_window;
1172 if (old_index != new_index || zero_index_update) {
1173 if (old_curr_window)
1174 curr_table[old_index] -= 1;
1175 if (curr_window)
1176 curr_table[new_index] += 1;
1177 if (new_index > rq->curr_top)
1178 rq->curr_top = new_index;
1179 }
1180
1181 if (!curr_table[old_index])
1182 __clear_bit(NUM_LOAD_INDICES - old_index - 1,
1183 rq->top_tasks_bitmap[curr]);
1184
1185 if (curr_table[new_index] == 1)
1186 __set_bit(NUM_LOAD_INDICES - new_index - 1,
1187 rq->top_tasks_bitmap[curr]);
1188
1189 return;
1190 }
1191
1192 /*
1193 * The window has rolled over for this task. By the time we get
1194 * here, curr/prev swaps would has already occurred. So we need
1195 * to use prev_window for the new index.
1196 */
1197 update_index = load_to_index(prev_window);
1198
1199 if (full_window) {
1200 /*
1201 * Two cases here. Either 'p' ran for the entire window or
1202 * it didn't run at all. In either case there is no entry
1203 * in the prev table. If 'p' ran the entire window, we just
1204 * need to create a new entry in the prev table. In this case
1205 * update_index will be correspond to sched_ravg_window
1206 * so we can unconditionally update the top index.
1207 */
1208 if (prev_window) {
1209 prev_table[update_index] += 1;
1210 rq->prev_top = update_index;
1211 }
1212
1213 if (prev_table[update_index] == 1)
1214 __set_bit(NUM_LOAD_INDICES - update_index - 1,
1215 rq->top_tasks_bitmap[prev]);
1216 } else {
1217 zero_index_update = !old_curr_window && prev_window;
1218 if (old_index != update_index || zero_index_update) {
1219 if (old_curr_window)
1220 prev_table[old_index] -= 1;
1221
1222 prev_table[update_index] += 1;
1223
1224 if (update_index > rq->prev_top)
1225 rq->prev_top = update_index;
1226
1227 if (!prev_table[old_index])
1228 __clear_bit(NUM_LOAD_INDICES - old_index - 1,
1229 rq->top_tasks_bitmap[prev]);
1230
1231 if (prev_table[update_index] == 1)
1232 __set_bit(NUM_LOAD_INDICES - update_index - 1,
1233 rq->top_tasks_bitmap[prev]);
1234 }
1235 }
1236
1237 if (curr_window) {
1238 curr_table[new_index] += 1;
1239
1240 if (new_index > rq->curr_top)
1241 rq->curr_top = new_index;
1242
1243 if (curr_table[new_index] == 1)
1244 __set_bit(NUM_LOAD_INDICES - new_index - 1,
1245 rq->top_tasks_bitmap[curr]);
1246 }
1247}
1248
1249static void rollover_top_tasks(struct rq *rq, bool full_window)
1250{
1251 u8 curr_table = rq->curr_table;
1252 u8 prev_table = 1 - curr_table;
1253 int curr_top = rq->curr_top;
1254
1255 clear_top_tasks_table(rq->top_tasks[prev_table]);
1256 clear_top_tasks_bitmap(rq->top_tasks_bitmap[prev_table]);
1257
1258 if (full_window) {
1259 curr_top = 0;
1260 clear_top_tasks_table(rq->top_tasks[curr_table]);
1261 clear_top_tasks_bitmap(
1262 rq->top_tasks_bitmap[curr_table]);
1263 }
1264
1265 rq->curr_table = prev_table;
1266 rq->prev_top = curr_top;
1267 rq->curr_top = 0;
1268}
1269
1270static u32 empty_windows[NR_CPUS];
1271
1272static void rollover_task_window(struct task_struct *p, bool full_window)
1273{
1274 u32 *curr_cpu_windows = empty_windows;
1275 u32 curr_window;
1276 int i;
1277
1278 /* Rollover the sum */
1279 curr_window = 0;
1280
1281 if (!full_window) {
1282 curr_window = p->ravg.curr_window;
1283 curr_cpu_windows = p->ravg.curr_window_cpu;
1284 }
1285
1286 p->ravg.prev_window = curr_window;
1287 p->ravg.curr_window = 0;
1288
1289 /* Roll over individual CPU contributions */
1290 for (i = 0; i < nr_cpu_ids; i++) {
1291 p->ravg.prev_window_cpu[i] = curr_cpu_windows[i];
1292 p->ravg.curr_window_cpu[i] = 0;
1293 }
1294}
1295
1296void sched_set_io_is_busy(int val)
1297{
1298 sched_io_is_busy = val;
1299}
1300
1301static inline int cpu_is_waiting_on_io(struct rq *rq)
1302{
1303 if (!sched_io_is_busy)
1304 return 0;
1305
1306 return atomic_read(&rq->nr_iowait);
Srinath Sridharan3a73c962016-07-22 13:21:15 +01001307}
1308
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001309static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p,
1310 u64 irqtime, int event)
1311{
1312 if (is_idle_task(p)) {
1313 /* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */
1314 if (event == PICK_NEXT_TASK)
1315 return 0;
1316
1317 /* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */
1318 return irqtime || cpu_is_waiting_on_io(rq);
1319 }
1320
1321 if (event == TASK_WAKE)
1322 return 0;
1323
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001324 if (event == PUT_PREV_TASK || event == IRQ_UPDATE)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001325 return 1;
1326
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001327 /*
1328 * TASK_UPDATE can be called on sleeping task, when its moved between
1329 * related groups
1330 */
1331 if (event == TASK_UPDATE) {
1332 if (rq->curr == p)
1333 return 1;
1334
1335 return p->on_rq ? SCHED_FREQ_ACCOUNT_WAIT_TIME : 0;
1336 }
1337
1338 /* TASK_MIGRATE, PICK_NEXT_TASK left */
1339 return SCHED_FREQ_ACCOUNT_WAIT_TIME;
1340}
1341
1342#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
1343
1344static inline u64 scale_exec_time(u64 delta, struct rq *rq)
1345{
1346 u32 freq;
1347
1348 freq = cpu_cycles_to_freq(rq->cc.cycles, rq->cc.time);
1349 delta = DIV64_U64_ROUNDUP(delta * freq, max_possible_freq);
1350 delta *= rq->cluster->exec_scale_factor;
1351 delta >>= 10;
1352
1353 return delta;
1354}
1355
Syed Rameez Mustafae14a2332017-05-19 14:42:35 -07001356/* Convert busy time to frequency equivalent
1357 * Assumes load is scaled to 1024
1358 */
Pavankumar Kondeti591995f2017-08-25 13:23:24 +05301359static inline unsigned int load_to_freq(struct rq *rq, unsigned int load)
Syed Rameez Mustafae14a2332017-05-19 14:42:35 -07001360{
1361 return mult_frac(cpu_max_possible_freq(cpu_of(rq)), load,
Pavankumar Kondeti591995f2017-08-25 13:23:24 +05301362 (unsigned int) capacity_orig_of(cpu_of(rq)));
Syed Rameez Mustafae14a2332017-05-19 14:42:35 -07001363}
1364
1365bool do_pl_notif(struct rq *rq)
1366{
1367 u64 prev = rq->old_busy_time;
1368 u64 pl = rq->walt_stats.pred_demands_sum;
1369 int cpu = cpu_of(rq);
1370
1371 /* If already at max freq, bail out */
1372 if (capacity_orig_of(cpu) == capacity_curr_of(cpu))
1373 return false;
1374
1375 prev = max(prev, rq->old_estimated_time);
1376
1377 pl = div64_u64(pl, sched_ravg_window >> SCHED_CAPACITY_SHIFT);
1378
1379 /* 400 MHz filter. */
1380 return (pl > prev) && (load_to_freq(rq, pl - prev) > 400000);
1381}
1382
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001383static void rollover_cpu_window(struct rq *rq, bool full_window)
1384{
1385 u64 curr_sum = rq->curr_runnable_sum;
1386 u64 nt_curr_sum = rq->nt_curr_runnable_sum;
1387 u64 grp_curr_sum = rq->grp_time.curr_runnable_sum;
1388 u64 grp_nt_curr_sum = rq->grp_time.nt_curr_runnable_sum;
1389
1390 if (unlikely(full_window)) {
1391 curr_sum = 0;
1392 nt_curr_sum = 0;
1393 grp_curr_sum = 0;
1394 grp_nt_curr_sum = 0;
1395 }
1396
1397 rq->prev_runnable_sum = curr_sum;
1398 rq->nt_prev_runnable_sum = nt_curr_sum;
1399 rq->grp_time.prev_runnable_sum = grp_curr_sum;
1400 rq->grp_time.nt_prev_runnable_sum = grp_nt_curr_sum;
1401
1402 rq->curr_runnable_sum = 0;
1403 rq->nt_curr_runnable_sum = 0;
1404 rq->grp_time.curr_runnable_sum = 0;
1405 rq->grp_time.nt_curr_runnable_sum = 0;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001406}
1407
1408/*
1409 * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
1410 */
1411static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001412 int event, u64 wallclock, u64 irqtime)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001413{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001414 int new_window, full_window = 0;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001415 int p_is_curr_task = (p == rq->curr);
1416 u64 mark_start = p->ravg.mark_start;
1417 u64 window_start = rq->window_start;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001418 u32 window_size = sched_ravg_window;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001419 u64 delta;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001420 u64 *curr_runnable_sum = &rq->curr_runnable_sum;
1421 u64 *prev_runnable_sum = &rq->prev_runnable_sum;
1422 u64 *nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
1423 u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
1424 bool new_task;
1425 struct related_thread_group *grp;
1426 int cpu = rq->cpu;
1427 u32 old_curr_window = p->ravg.curr_window;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001428
1429 new_window = mark_start < window_start;
1430 if (new_window) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001431 full_window = (window_start - mark_start) >= window_size;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001432 if (p->ravg.active_windows < USHRT_MAX)
1433 p->ravg.active_windows++;
1434 }
1435
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001436 new_task = is_new_task(p);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001437
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001438 /*
1439 * Handle per-task window rollover. We don't care about the idle
1440 * task or exiting tasks.
1441 */
1442 if (!is_idle_task(p) && !exiting_task(p)) {
1443 if (new_window)
1444 rollover_task_window(p, full_window);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001445 }
1446
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001447 if (p_is_curr_task && new_window) {
1448 rollover_cpu_window(rq, full_window);
1449 rollover_top_tasks(rq, full_window);
1450 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001451
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001452 if (!account_busy_for_cpu_time(rq, p, irqtime, event))
1453 goto done;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001454
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001455 grp = p->grp;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05301456 if (grp) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001457 struct group_cpu_time *cpu_time = &rq->grp_time;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001458
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001459 curr_runnable_sum = &cpu_time->curr_runnable_sum;
1460 prev_runnable_sum = &cpu_time->prev_runnable_sum;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001461
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001462 nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
1463 nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001464 }
1465
1466 if (!new_window) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001467 /*
1468 * account_busy_for_cpu_time() = 1 so busy time needs
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001469 * to be accounted to the current window. No rollover
1470 * since we didn't start a new window. An example of this is
1471 * when a task starts execution and then sleeps within the
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001472 * same window.
1473 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001474
1475 if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq))
1476 delta = wallclock - mark_start;
1477 else
1478 delta = irqtime;
1479 delta = scale_exec_time(delta, rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001480 *curr_runnable_sum += delta;
1481 if (new_task)
1482 *nt_curr_runnable_sum += delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001483
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001484 if (!is_idle_task(p) && !exiting_task(p)) {
1485 p->ravg.curr_window += delta;
1486 p->ravg.curr_window_cpu[cpu] += delta;
1487 }
1488
1489 goto done;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001490 }
1491
1492 if (!p_is_curr_task) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001493 /*
1494 * account_busy_for_cpu_time() = 1 so busy time needs
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001495 * to be accounted to the current window. A new window
1496 * has also started, but p is not the current task, so the
1497 * window is not rolled over - just split up and account
1498 * as necessary into curr and prev. The window is only
1499 * rolled over when a new window is processed for the current
1500 * task.
1501 *
1502 * Irqtime can't be accounted by a task that isn't the
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001503 * currently running task.
1504 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001505
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001506 if (!full_window) {
1507 /*
1508 * A full window hasn't elapsed, account partial
1509 * contribution to previous completed window.
1510 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001511 delta = scale_exec_time(window_start - mark_start, rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001512 if (!exiting_task(p)) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001513 p->ravg.prev_window += delta;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001514 p->ravg.prev_window_cpu[cpu] += delta;
1515 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001516 } else {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001517 /*
1518 * Since at least one full window has elapsed,
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001519 * the contribution to the previous window is the
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001520 * full window (window_size).
1521 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001522 delta = scale_exec_time(window_size, rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001523 if (!exiting_task(p)) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001524 p->ravg.prev_window = delta;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001525 p->ravg.prev_window_cpu[cpu] = delta;
1526 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001527 }
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001528
1529 *prev_runnable_sum += delta;
1530 if (new_task)
1531 *nt_prev_runnable_sum += delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001532
1533 /* Account piece of busy time in the current window. */
1534 delta = scale_exec_time(wallclock - window_start, rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001535 *curr_runnable_sum += delta;
1536 if (new_task)
1537 *nt_curr_runnable_sum += delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001538
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001539 if (!exiting_task(p)) {
1540 p->ravg.curr_window = delta;
1541 p->ravg.curr_window_cpu[cpu] = delta;
1542 }
1543
1544 goto done;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001545 }
1546
1547 if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001548 /*
1549 * account_busy_for_cpu_time() = 1 so busy time needs
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001550 * to be accounted to the current window. A new window
1551 * has started and p is the current task so rollover is
1552 * needed. If any of these three above conditions are true
1553 * then this busy time can't be accounted as irqtime.
1554 *
1555 * Busy time for the idle task or exiting tasks need not
1556 * be accounted.
1557 *
1558 * An example of this would be a task that starts execution
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001559 * and then sleeps once a new window has begun.
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001560 */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001561
1562 if (!full_window) {
1563 /*
1564 * A full window hasn't elapsed, account partial
1565 * contribution to previous completed window.
1566 */
1567 delta = scale_exec_time(window_start - mark_start, rq);
1568 if (!is_idle_task(p) && !exiting_task(p)) {
1569 p->ravg.prev_window += delta;
1570 p->ravg.prev_window_cpu[cpu] += delta;
1571 }
1572 } else {
1573 /*
1574 * Since at least one full window has elapsed,
1575 * the contribution to the previous window is the
1576 * full window (window_size).
1577 */
1578 delta = scale_exec_time(window_size, rq);
1579 if (!is_idle_task(p) && !exiting_task(p)) {
1580 p->ravg.prev_window = delta;
1581 p->ravg.prev_window_cpu[cpu] = delta;
1582 }
1583 }
1584
1585 /*
1586 * Rollover is done here by overwriting the values in
1587 * prev_runnable_sum and curr_runnable_sum.
1588 */
1589 *prev_runnable_sum += delta;
1590 if (new_task)
1591 *nt_prev_runnable_sum += delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001592
1593 /* Account piece of busy time in the current window. */
1594 delta = scale_exec_time(wallclock - window_start, rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001595 *curr_runnable_sum += delta;
1596 if (new_task)
1597 *nt_curr_runnable_sum += delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001598
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001599 if (!is_idle_task(p) && !exiting_task(p)) {
1600 p->ravg.curr_window = delta;
1601 p->ravg.curr_window_cpu[cpu] = delta;
1602 }
1603
1604 goto done;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001605 }
1606
1607 if (irqtime) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001608 /*
1609 * account_busy_for_cpu_time() = 1 so busy time needs
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001610 * to be accounted to the current window. A new window
1611 * has started and p is the current task so rollover is
1612 * needed. The current task must be the idle task because
1613 * irqtime is not accounted for any other task.
1614 *
1615 * Irqtime will be accounted each time we process IRQ activity
1616 * after a period of idleness, so we know the IRQ busy time
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001617 * started at wallclock - irqtime.
1618 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001619
1620 BUG_ON(!is_idle_task(p));
1621 mark_start = wallclock - irqtime;
1622
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001623 /*
1624 * Roll window over. If IRQ busy time was just in the current
1625 * window then that is all that need be accounted.
1626 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001627 if (mark_start > window_start) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001628 *curr_runnable_sum = scale_exec_time(irqtime, rq);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001629 return;
1630 }
1631
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001632 /*
1633 * The IRQ busy time spanned multiple windows. Process the
1634 * busy time preceding the current window start first.
1635 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001636 delta = window_start - mark_start;
1637 if (delta > window_size)
1638 delta = window_size;
1639 delta = scale_exec_time(delta, rq);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001640 *prev_runnable_sum += delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001641
1642 /* Process the remaining IRQ busy time in the current window. */
1643 delta = wallclock - window_start;
1644 rq->curr_runnable_sum = scale_exec_time(delta, rq);
1645
1646 return;
1647 }
1648
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001649done:
1650 if (!is_idle_task(p) && !exiting_task(p))
1651 update_top_tasks(p, rq, old_curr_window,
1652 new_window, full_window);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001653}
1654
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001655
1656static inline u32 predict_and_update_buckets(struct rq *rq,
1657 struct task_struct *p, u32 runtime) {
1658
1659 int bidx;
1660 u32 pred_demand;
1661
Syed Rameez Mustafaf83db1b2017-05-26 16:26:28 -07001662 if (!sched_predl)
1663 return 0;
1664
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001665 bidx = busy_to_bucket(runtime);
1666 pred_demand = get_pred_busy(rq, p, bidx, runtime);
1667 bucket_increase(p->ravg.busy_buckets, bidx);
1668
1669 return pred_demand;
1670}
1671
1672static int
1673account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001674{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001675 /*
1676 * No need to bother updating task demand for exiting tasks
1677 * or the idle task.
1678 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001679 if (exiting_task(p) || is_idle_task(p))
1680 return 0;
1681
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001682 /*
1683 * When a task is waking up it is completing a segment of non-busy
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001684 * time. Likewise, if wait time is not treated as busy time, then
1685 * when a task begins to run or is migrated, it is not running and
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001686 * is completing a segment of non-busy time.
1687 */
1688 if (event == TASK_WAKE || (!SCHED_ACCOUNT_WAIT_TIME &&
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001689 (event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
1690 return 0;
1691
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001692 /*
1693 * TASK_UPDATE can be called on sleeping task, when its moved between
1694 * related groups
1695 */
1696 if (event == TASK_UPDATE) {
1697 if (rq->curr == p)
1698 return 1;
1699
1700 return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0;
1701 }
1702
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001703 return 1;
1704}
1705
1706/*
1707 * Called when new window is starting for a task, to record cpu usage over
1708 * recently concluded window(s). Normally 'samples' should be 1. It can be > 1
1709 * when, say, a real-time task runs without preemption for several windows at a
1710 * stretch.
1711 */
1712static void update_history(struct rq *rq, struct task_struct *p,
1713 u32 runtime, int samples, int event)
1714{
1715 u32 *hist = &p->ravg.sum_history[0];
1716 int ridx, widx;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001717 u32 max = 0, avg, demand, pred_demand;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001718 u64 sum = 0;
Joonwoo Park84a80882017-02-03 11:15:31 -08001719 u64 prev_demand;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001720
1721 /* Ignore windows where task had no activity */
1722 if (!runtime || is_idle_task(p) || exiting_task(p) || !samples)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001723 goto done;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001724
Joonwoo Park84a80882017-02-03 11:15:31 -08001725 prev_demand = p->ravg.demand;
1726
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001727 /* Push new 'runtime' value onto stack */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001728 widx = sched_ravg_hist_size - 1;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001729 ridx = widx - samples;
1730 for (; ridx >= 0; --widx, --ridx) {
1731 hist[widx] = hist[ridx];
1732 sum += hist[widx];
1733 if (hist[widx] > max)
1734 max = hist[widx];
1735 }
1736
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001737 for (widx = 0; widx < samples && widx < sched_ravg_hist_size; widx++) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001738 hist[widx] = runtime;
1739 sum += hist[widx];
1740 if (hist[widx] > max)
1741 max = hist[widx];
1742 }
1743
1744 p->ravg.sum = 0;
1745
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001746 if (sched_window_stats_policy == WINDOW_STATS_RECENT) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001747 demand = runtime;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001748 } else if (sched_window_stats_policy == WINDOW_STATS_MAX) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001749 demand = max;
1750 } else {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001751 avg = div64_u64(sum, sched_ravg_hist_size);
1752 if (sched_window_stats_policy == WINDOW_STATS_AVG)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001753 demand = avg;
1754 else
1755 demand = max(avg, runtime);
1756 }
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001757 pred_demand = predict_and_update_buckets(rq, p, runtime);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001758
1759 /*
1760 * A throttled deadline sched class task gets dequeued without
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +05301761 * changing p->on_rq. Since the dequeue decrements walt stats
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001762 * avoid decrementing it here again.
Pavankumar Kondeti0cebff02017-07-21 16:28:12 +05301763 *
1764 * When window is rolled over, the cumulative window demand
1765 * is reset to the cumulative runnable average (contribution from
1766 * the tasks on the runqueue). If the current task is dequeued
1767 * already, it's demand is not included in the cumulative runnable
1768 * average. So add the task demand separately to cumulative window
1769 * demand.
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001770 */
Pavankumar Kondeti0cebff02017-07-21 16:28:12 +05301771 if (!task_has_dl_policy(p) || !p->dl.dl_throttled) {
1772 if (task_on_rq_queued(p))
1773 p->sched_class->fixup_walt_sched_stats(rq, p, demand,
1774 pred_demand);
1775 else if (rq->curr == p)
1776 walt_fixup_cum_window_demand(rq, demand);
1777 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001778
1779 p->ravg.demand = demand;
Syed Rameez Mustafaf3f7bf82017-04-11 17:43:48 -07001780 p->ravg.coloc_demand = div64_u64(sum, sched_ravg_hist_size);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001781 p->ravg.pred_demand = pred_demand;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001782
1783done:
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001784 trace_sched_update_history(rq, p, runtime, samples, event);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001785}
1786
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001787static u64 add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001788{
1789 delta = scale_exec_time(delta, rq);
1790 p->ravg.sum += delta;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001791 if (unlikely(p->ravg.sum > sched_ravg_window))
1792 p->ravg.sum = sched_ravg_window;
1793
1794 return delta;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001795}
1796
1797/*
1798 * Account cpu demand of task and/or update task's cpu demand history
1799 *
1800 * ms = p->ravg.mark_start;
1801 * wc = wallclock
1802 * ws = rq->window_start
1803 *
1804 * Three possibilities:
1805 *
1806 * a) Task event is contained within one window.
1807 * window_start < mark_start < wallclock
1808 *
1809 * ws ms wc
1810 * | | |
1811 * V V V
1812 * |---------------|
1813 *
1814 * In this case, p->ravg.sum is updated *iff* event is appropriate
1815 * (ex: event == PUT_PREV_TASK)
1816 *
1817 * b) Task event spans two windows.
1818 * mark_start < window_start < wallclock
1819 *
1820 * ms ws wc
1821 * | | |
1822 * V V V
1823 * -----|-------------------
1824 *
1825 * In this case, p->ravg.sum is updated with (ws - ms) *iff* event
1826 * is appropriate, then a new window sample is recorded followed
1827 * by p->ravg.sum being set to (wc - ws) *iff* event is appropriate.
1828 *
1829 * c) Task event spans more than two windows.
1830 *
1831 * ms ws_tmp ws wc
1832 * | | | |
1833 * V V V V
1834 * ---|-------|-------|-------|-------|------
1835 * | |
1836 * |<------ nr_full_windows ------>|
1837 *
1838 * In this case, p->ravg.sum is updated with (ws_tmp - ms) first *iff*
1839 * event is appropriate, window sample of p->ravg.sum is recorded,
1840 * 'nr_full_window' samples of window_size is also recorded *iff*
1841 * event is appropriate and finally p->ravg.sum is set to (wc - ws)
1842 * *iff* event is appropriate.
1843 *
1844 * IMPORTANT : Leave p->ravg.mark_start unchanged, as update_cpu_busy_time()
1845 * depends on it!
1846 */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001847static u64 update_task_demand(struct task_struct *p, struct rq *rq,
1848 int event, u64 wallclock)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001849{
1850 u64 mark_start = p->ravg.mark_start;
1851 u64 delta, window_start = rq->window_start;
1852 int new_window, nr_full_windows;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001853 u32 window_size = sched_ravg_window;
1854 u64 runtime;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001855
1856 new_window = mark_start < window_start;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001857 if (!account_busy_for_task_demand(rq, p, event)) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001858 if (new_window)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001859 /*
1860 * If the time accounted isn't being accounted as
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001861 * busy time, and a new window started, only the
1862 * previous window need be closed out with the
1863 * pre-existing demand. Multiple windows may have
1864 * elapsed, but since empty windows are dropped,
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001865 * it is not necessary to account those.
1866 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001867 update_history(rq, p, p->ravg.sum, 1, event);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001868 return 0;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001869 }
1870
1871 if (!new_window) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001872 /*
1873 * The simple case - busy time contained within the existing
1874 * window.
1875 */
1876 return add_to_task_demand(rq, p, wallclock - mark_start);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001877 }
1878
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001879 /*
1880 * Busy time spans at least two windows. Temporarily rewind
1881 * window_start to first window boundary after mark_start.
1882 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001883 delta = window_start - mark_start;
1884 nr_full_windows = div64_u64(delta, window_size);
1885 window_start -= (u64)nr_full_windows * (u64)window_size;
1886
1887 /* Process (window_start - mark_start) first */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001888 runtime = add_to_task_demand(rq, p, window_start - mark_start);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001889
1890 /* Push new sample(s) into task's demand history */
1891 update_history(rq, p, p->ravg.sum, 1, event);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001892 if (nr_full_windows) {
1893 u64 scaled_window = scale_exec_time(window_size, rq);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001894
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001895 update_history(rq, p, scaled_window, nr_full_windows, event);
1896 runtime += nr_full_windows * scaled_window;
1897 }
1898
1899 /*
1900 * Roll window_start back to current to process any remainder
1901 * in current window.
1902 */
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001903 window_start += (u64)nr_full_windows * (u64)window_size;
1904
1905 /* Process (wallclock - window_start) next */
1906 mark_start = window_start;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001907 runtime += add_to_task_demand(rq, p, wallclock - mark_start);
1908
1909 return runtime;
1910}
1911
1912static void
1913update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
1914 u64 wallclock, u64 irqtime)
1915{
1916 u64 cur_cycles;
1917 int cpu = cpu_of(rq);
1918
1919 lockdep_assert_held(&rq->lock);
1920
1921 if (!use_cycle_counter) {
1922 rq->cc.cycles = cpu_cur_freq(cpu);
1923 rq->cc.time = 1;
1924 return;
1925 }
1926
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -07001927 cur_cycles = read_cycle_counter(cpu, wallclock);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001928
1929 /*
1930 * If current task is idle task and irqtime == 0 CPU was
1931 * indeed idle and probably its cycle counter was not
1932 * increasing. We still need estimatied CPU frequency
1933 * for IO wait time accounting. Use the previously
1934 * calculated frequency in such a case.
1935 */
1936 if (!is_idle_task(rq->curr) || irqtime) {
1937 if (unlikely(cur_cycles < p->cpu_cycles))
1938 rq->cc.cycles = cur_cycles + (U64_MAX - p->cpu_cycles);
1939 else
1940 rq->cc.cycles = cur_cycles - p->cpu_cycles;
1941 rq->cc.cycles = rq->cc.cycles * NSEC_PER_MSEC;
1942
1943 if (event == IRQ_UPDATE && is_idle_task(p))
1944 /*
1945 * Time between mark_start of idle task and IRQ handler
1946 * entry time is CPU cycle counter stall period.
1947 * Upon IRQ handler entry sched_account_irqstart()
1948 * replenishes idle task's cpu cycle counter so
1949 * rq->cc.cycles now represents increased cycles during
1950 * IRQ handler rather than time between idle entry and
1951 * IRQ exit. Thus use irqtime as time delta.
1952 */
1953 rq->cc.time = irqtime;
1954 else
1955 rq->cc.time = wallclock - p->ravg.mark_start;
1956 BUG_ON((s64)rq->cc.time < 0);
1957 }
1958
1959 p->cpu_cycles = cur_cycles;
1960
Puja Gupta8a965a22017-11-16 14:18:04 -08001961 trace_sched_get_task_cpu_cycles(cpu, event, rq->cc.cycles, rq->cc.time, p);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001962}
1963
Vikram Mulukutla1abebc22017-05-12 19:11:51 -07001964static inline void run_walt_irq_work(u64 old_window_start, struct rq *rq)
1965{
1966 u64 result;
1967
1968 if (old_window_start == rq->window_start)
1969 return;
1970
Pavankumar Kondeti6737d8c2017-06-01 16:23:16 +05301971 result = atomic64_cmpxchg(&walt_irq_work_lastq_ws, old_window_start,
Vikram Mulukutla1abebc22017-05-12 19:11:51 -07001972 rq->window_start);
1973 if (result == old_window_start)
Maria Yu702cec92019-08-13 17:12:33 +08001974 sched_irq_work_queue(&walt_cpufreq_irq_work);
Vikram Mulukutla1abebc22017-05-12 19:11:51 -07001975}
1976
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001977/* Reflect task activity on its demand and cpu's busy time statistics */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001978void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
1979 u64 wallclock, u64 irqtime)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001980{
Pavankumar Kondetid4127502017-07-20 08:56:15 +05301981 u64 old_window_start;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001982
1983 if (!rq->window_start || sched_disable_window_stats ||
1984 p->ravg.mark_start == wallclock)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001985 return;
1986
1987 lockdep_assert_held(&rq->lock);
1988
Vikram Mulukutla1abebc22017-05-12 19:11:51 -07001989 old_window_start = update_window_start(rq, wallclock, event);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001990
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001991 if (!p->ravg.mark_start) {
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -07001992 update_task_cpu_cycles(p, cpu_of(rq), wallclock);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001993 goto done;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001994 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001995
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001996 update_task_rq_cpu_cycles(p, rq, event, wallclock, irqtime);
Pavankumar Kondetid4127502017-07-20 08:56:15 +05301997 update_task_demand(p, rq, event, wallclock);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07001998 update_cpu_busy_time(p, rq, event, wallclock, irqtime);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08001999 update_task_pred_demand(rq, p, event);
Pavankumar Kondetie729cba2018-03-13 16:03:15 +05302000
2001 if (exiting_task(p))
2002 goto done;
2003
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002004 trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime,
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05302005 rq->cc.cycles, rq->cc.time, &rq->grp_time);
Vikram Mulukutla0b062902017-03-20 16:27:04 -07002006 trace_sched_update_task_ravg_mini(p, rq, event, wallclock, irqtime,
2007 rq->cc.cycles, rq->cc.time, &rq->grp_time);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002008
Pavankumar Kondetie729cba2018-03-13 16:03:15 +05302009done:
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002010 p->ravg.mark_start = wallclock;
Vikram Mulukutla1abebc22017-05-12 19:11:51 -07002011
2012 run_walt_irq_work(old_window_start, rq);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002013}
2014
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002015u32 sched_get_init_task_load(struct task_struct *p)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002016{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002017 return p->init_load_pct;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002018}
2019
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002020int sched_set_init_task_load(struct task_struct *p, int init_load_pct)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002021{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002022 if (init_load_pct < 0 || init_load_pct > 100)
2023 return -EINVAL;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002024
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002025 p->init_load_pct = init_load_pct;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002026
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002027 return 0;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002028}
2029
Pavankumar Kondeti736630c2018-09-20 15:31:36 +05302030void init_new_task_load(struct task_struct *p)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002031{
2032 int i;
Lingutla Chandrasekhar57eb0712018-03-12 10:20:08 +05302033 u32 init_load_windows;
2034 u32 init_load_pct;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002035
2036 p->init_load_pct = 0;
2037 rcu_assign_pointer(p->grp, NULL);
2038 INIT_LIST_HEAD(&p->grp_list);
2039 memset(&p->ravg, 0, sizeof(struct ravg));
2040 p->cpu_cycles = 0;
2041
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002042 p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
2043 p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
2044
2045 /* Don't have much choice. CPU frequency would be bogus */
2046 BUG_ON(!p->ravg.curr_window_cpu || !p->ravg.prev_window_cpu);
2047
Lingutla Chandrasekhar57eb0712018-03-12 10:20:08 +05302048 if (current->init_load_pct)
2049 init_load_pct = current->init_load_pct;
2050 else
2051 init_load_pct = sysctl_sched_init_task_load_pct;
2052
2053 init_load_windows = div64_u64((u64)init_load_pct *
2054 (u64)sched_ravg_window, 100);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002055
2056 p->ravg.demand = init_load_windows;
Syed Rameez Mustafaf3f7bf82017-04-11 17:43:48 -07002057 p->ravg.coloc_demand = init_load_windows;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002058 p->ravg.pred_demand = 0;
2059 for (i = 0; i < RAVG_HIST_SIZE_MAX; ++i)
2060 p->ravg.sum_history[i] = init_load_windows;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05302061 p->misfit = false;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002062}
2063
Pavankumar Kondeti4d091222018-01-10 15:15:41 +05302064/*
2065 * kfree() may wakeup kswapd. So this function should NOT be called
2066 * with any CPU's rq->lock acquired.
2067 */
Pavankumar Kondeti708c46b2017-06-12 11:27:27 +05302068void free_task_load_ptrs(struct task_struct *p)
2069{
2070 kfree(p->ravg.curr_window_cpu);
2071 kfree(p->ravg.prev_window_cpu);
2072
2073 /*
2074 * update_task_ravg() can be called for exiting tasks. While the
2075 * function itself ensures correct behavior, the corresponding
2076 * trace event requires that these pointers be NULL.
2077 */
2078 p->ravg.curr_window_cpu = NULL;
2079 p->ravg.prev_window_cpu = NULL;
2080}
2081
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002082void reset_task_stats(struct task_struct *p)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002083{
2084 u32 sum = 0;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002085 u32 *curr_window_ptr = NULL;
2086 u32 *prev_window_ptr = NULL;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002087
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002088 if (exiting_task(p)) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002089 sum = EXITING_TASK_MARKER;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002090 } else {
2091 curr_window_ptr = p->ravg.curr_window_cpu;
2092 prev_window_ptr = p->ravg.prev_window_cpu;
2093 memset(curr_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
2094 memset(prev_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
2095 }
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002096
2097 memset(&p->ravg, 0, sizeof(struct ravg));
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002098
2099 p->ravg.curr_window_cpu = curr_window_ptr;
2100 p->ravg.prev_window_cpu = prev_window_ptr;
2101
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002102 /* Retain EXITING_TASK marker */
2103 p->ravg.sum_history[0] = sum;
2104}
2105
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002106void mark_task_starting(struct task_struct *p)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002107{
2108 u64 wallclock;
2109 struct rq *rq = task_rq(p);
2110
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002111 if (!rq->window_start || sched_disable_window_stats) {
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002112 reset_task_stats(p);
2113 return;
2114 }
2115
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05302116 wallclock = sched_ktime_clock();
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002117 p->ravg.mark_start = p->last_wake_ts = wallclock;
Pavankumar Kondeti4e13d112018-01-25 01:12:08 +05302118 p->last_enqueued_ts = wallclock;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002119 p->last_switch_out_ts = 0;
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -07002120 update_task_cpu_cycles(p, cpu_of(rq), wallclock);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002121}
2122
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002123static cpumask_t all_cluster_cpus = CPU_MASK_NONE;
2124DECLARE_BITMAP(all_cluster_ids, NR_CPUS);
2125struct sched_cluster *sched_cluster[NR_CPUS];
2126int num_clusters;
2127
2128struct list_head cluster_head;
2129
2130static void
2131insert_cluster(struct sched_cluster *cluster, struct list_head *head)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002132{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002133 struct sched_cluster *tmp;
2134 struct list_head *iter = head;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002135
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002136 list_for_each_entry(tmp, head, list) {
2137 if (cluster->max_power_cost < tmp->max_power_cost)
2138 break;
2139 iter = &tmp->list;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002140 }
2141
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002142 list_add(&cluster->list, iter);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002143}
2144
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002145static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002146{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002147 struct sched_cluster *cluster = NULL;
2148
2149 cluster = kzalloc(sizeof(struct sched_cluster), GFP_ATOMIC);
2150 if (!cluster) {
2151 __WARN_printf("Cluster allocation failed. Possible bad scheduling\n");
2152 return NULL;
2153 }
2154
2155 INIT_LIST_HEAD(&cluster->list);
2156 cluster->max_power_cost = 1;
2157 cluster->min_power_cost = 1;
2158 cluster->capacity = 1024;
2159 cluster->max_possible_capacity = 1024;
2160 cluster->efficiency = 1;
2161 cluster->load_scale_factor = 1024;
2162 cluster->cur_freq = 1;
2163 cluster->max_freq = 1;
2164 cluster->max_mitigated_freq = UINT_MAX;
2165 cluster->min_freq = 1;
2166 cluster->max_possible_freq = 1;
2167 cluster->dstate = 0;
2168 cluster->dstate_wakeup_energy = 0;
2169 cluster->dstate_wakeup_latency = 0;
2170 cluster->freq_init_done = false;
2171
2172 raw_spin_lock_init(&cluster->load_lock);
2173 cluster->cpus = *cpus;
2174 cluster->efficiency = arch_get_cpu_efficiency(cpumask_first(cpus));
2175
2176 if (cluster->efficiency > max_possible_efficiency)
2177 max_possible_efficiency = cluster->efficiency;
2178 if (cluster->efficiency < min_possible_efficiency)
2179 min_possible_efficiency = cluster->efficiency;
2180
2181 cluster->notifier_sent = 0;
2182 return cluster;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002183}
2184
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002185static void add_cluster(const struct cpumask *cpus, struct list_head *head)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002186{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002187 struct sched_cluster *cluster = alloc_new_cluster(cpus);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002188 int i;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002189
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002190 if (!cluster)
2191 return;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002192
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002193 for_each_cpu(i, cpus)
2194 cpu_rq(i)->cluster = cluster;
2195
2196 insert_cluster(cluster, head);
2197 set_bit(num_clusters, all_cluster_ids);
2198 num_clusters++;
2199}
2200
2201static int compute_max_possible_capacity(struct sched_cluster *cluster)
2202{
2203 int capacity = 1024;
2204
2205 capacity *= capacity_scale_cpu_efficiency(cluster);
2206 capacity >>= 10;
2207
2208 capacity *= (1024 * cluster->max_possible_freq) / min_max_freq;
2209 capacity >>= 10;
2210
2211 return capacity;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002212}
2213
Pavankumar Kondetif51d5392018-11-28 11:57:29 +05302214void walt_update_min_max_capacity(void)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002215{
2216 unsigned long flags;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002217
Vikram Mulukutlae71a8452017-02-03 12:38:53 -08002218 acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002219 __update_min_max_capacity();
Vikram Mulukutlae71a8452017-02-03 12:38:53 -08002220 release_rq_locks_irqrestore(cpu_possible_mask, &flags);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002221}
2222
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002223unsigned int max_power_cost = 1;
2224
2225static int
2226compare_clusters(void *priv, struct list_head *a, struct list_head *b)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002227{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002228 struct sched_cluster *cluster1, *cluster2;
2229 int ret;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002230
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002231 cluster1 = container_of(a, struct sched_cluster, list);
2232 cluster2 = container_of(b, struct sched_cluster, list);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002233
2234 /*
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002235 * Don't assume higher capacity means higher power. If the
2236 * power cost is same, sort the higher capacity cluster before
2237 * the lower capacity cluster to start placing the tasks
2238 * on the higher capacity cluster.
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002239 */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002240 ret = cluster1->max_power_cost > cluster2->max_power_cost ||
2241 (cluster1->max_power_cost == cluster2->max_power_cost &&
2242 cluster1->max_possible_capacity <
2243 cluster2->max_possible_capacity);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002244
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002245 return ret;
2246}
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002247
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302248static void sort_clusters(void)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002249{
2250 struct sched_cluster *cluster;
2251 struct list_head new_head;
2252 unsigned int tmp_max = 1;
2253
2254 INIT_LIST_HEAD(&new_head);
2255
2256 for_each_sched_cluster(cluster) {
2257 cluster->max_power_cost = power_cost(cluster_first_cpu(cluster),
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302258 true);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002259 cluster->min_power_cost = power_cost(cluster_first_cpu(cluster),
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302260 false);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002261
2262 if (cluster->max_power_cost > tmp_max)
2263 tmp_max = cluster->max_power_cost;
2264 }
2265 max_power_cost = tmp_max;
2266
2267 move_list(&new_head, &cluster_head, true);
2268
2269 list_sort(NULL, &new_head, compare_clusters);
2270 assign_cluster_ids(&new_head);
2271
2272 /*
2273 * Ensure cluster ids are visible to all CPUs before making
2274 * cluster_head visible.
2275 */
2276 move_list(&cluster_head, &new_head, false);
2277}
2278
Pavankumar Kondeti59dfcb42017-11-17 16:32:07 +05302279int __read_mostly min_power_cpu;
2280
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302281void walt_sched_energy_populated_callback(void)
2282{
Pavankumar Kondeti649af692017-10-11 14:21:50 +05302283 struct sched_cluster *cluster;
Pavankumar Kondeti0d91fac2017-10-11 14:50:28 +05302284 int prev_max = 0, next_min = 0;
Pavankumar Kondeti649af692017-10-11 14:21:50 +05302285
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302286 mutex_lock(&cluster_lock);
Pavankumar Kondeti649af692017-10-11 14:21:50 +05302287
2288 if (num_clusters == 1) {
2289 sysctl_sched_is_big_little = 0;
2290 mutex_unlock(&cluster_lock);
2291 return;
2292 }
2293
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302294 sort_clusters();
Pavankumar Kondeti649af692017-10-11 14:21:50 +05302295
2296 for_each_sched_cluster(cluster) {
2297 if (cluster->min_power_cost > prev_max) {
2298 prev_max = cluster->max_power_cost;
2299 continue;
2300 }
2301 /*
2302 * We assume no overlap in the power curves of
2303 * clusters on a big.LITTLE system.
2304 */
2305 sysctl_sched_is_big_little = 0;
Pavankumar Kondeti0d91fac2017-10-11 14:50:28 +05302306 next_min = cluster->min_power_cost;
2307 }
2308
2309 /*
2310 * Find the OPP at which the lower power cluster
2311 * power is overlapping with the next cluster.
2312 */
2313 if (!sysctl_sched_is_big_little) {
2314 int cpu = cluster_first_cpu(sched_cluster[0]);
2315 struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1];
2316 int i;
2317
2318 for (i = 1; i < sge->nr_cap_states; i++) {
2319 if (sge->cap_states[i].power >= next_min) {
2320 sched_smp_overlap_capacity =
2321 sge->cap_states[i-1].cap;
2322 break;
2323 }
2324 }
Pavankumar Kondeti59dfcb42017-11-17 16:32:07 +05302325
2326 min_power_cpu = cpu;
Pavankumar Kondeti649af692017-10-11 14:21:50 +05302327 }
2328
Pavankumar Kondetic5927f12017-10-11 12:36:12 +05302329 mutex_unlock(&cluster_lock);
2330}
2331
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002332static void update_all_clusters_stats(void)
2333{
2334 struct sched_cluster *cluster;
2335 u64 highest_mpc = 0, lowest_mpc = U64_MAX;
Vikram Mulukutlae71a8452017-02-03 12:38:53 -08002336 unsigned long flags;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002337
Vikram Mulukutlae71a8452017-02-03 12:38:53 -08002338 acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002339
2340 for_each_sched_cluster(cluster) {
2341 u64 mpc;
2342
2343 cluster->capacity = compute_capacity(cluster);
2344 mpc = cluster->max_possible_capacity =
2345 compute_max_possible_capacity(cluster);
2346 cluster->load_scale_factor = compute_load_scale_factor(cluster);
2347
2348 cluster->exec_scale_factor =
2349 DIV_ROUND_UP(cluster->efficiency * 1024,
2350 max_possible_efficiency);
2351
2352 if (mpc > highest_mpc)
2353 highest_mpc = mpc;
2354
2355 if (mpc < lowest_mpc)
2356 lowest_mpc = mpc;
2357 }
2358
2359 max_possible_capacity = highest_mpc;
2360 min_max_possible_capacity = lowest_mpc;
2361
2362 __update_min_max_capacity();
2363 sched_update_freq_max_load(cpu_possible_mask);
Vikram Mulukutlae71a8452017-02-03 12:38:53 -08002364 release_rq_locks_irqrestore(cpu_possible_mask, &flags);
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002365}
2366
2367void update_cluster_topology(void)
2368{
2369 struct cpumask cpus = *cpu_possible_mask;
2370 const struct cpumask *cluster_cpus;
2371 struct list_head new_head;
2372 int i;
2373
2374 INIT_LIST_HEAD(&new_head);
2375
2376 for_each_cpu(i, &cpus) {
2377 cluster_cpus = cpu_coregroup_mask(i);
2378 cpumask_or(&all_cluster_cpus, &all_cluster_cpus, cluster_cpus);
2379 cpumask_andnot(&cpus, &cpus, cluster_cpus);
2380 add_cluster(cluster_cpus, &new_head);
2381 }
2382
2383 assign_cluster_ids(&new_head);
2384
2385 /*
2386 * Ensure cluster ids are visible to all CPUs before making
2387 * cluster_head visible.
2388 */
2389 move_list(&cluster_head, &new_head, false);
2390 update_all_clusters_stats();
2391}
2392
2393struct sched_cluster init_cluster = {
2394 .list = LIST_HEAD_INIT(init_cluster.list),
2395 .id = 0,
2396 .max_power_cost = 1,
2397 .min_power_cost = 1,
2398 .capacity = 1024,
2399 .max_possible_capacity = 1024,
2400 .efficiency = 1,
2401 .load_scale_factor = 1024,
2402 .cur_freq = 1,
2403 .max_freq = 1,
2404 .max_mitigated_freq = UINT_MAX,
2405 .min_freq = 1,
2406 .max_possible_freq = 1,
2407 .dstate = 0,
2408 .dstate_wakeup_energy = 0,
2409 .dstate_wakeup_latency = 0,
2410 .exec_scale_factor = 1024,
2411 .notifier_sent = 0,
2412 .wake_up_idle = 0,
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05302413 .aggr_grp_load = 0,
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -07002414 .coloc_boost_load = 0,
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002415};
2416
2417void init_clusters(void)
2418{
2419 bitmap_clear(all_cluster_ids, 0, NR_CPUS);
2420 init_cluster.cpus = *cpu_possible_mask;
2421 raw_spin_lock_init(&init_cluster.load_lock);
2422 INIT_LIST_HEAD(&cluster_head);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002423}
2424
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08002425static unsigned long cpu_max_table_freq[NR_CPUS];
2426
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002427static int cpufreq_notifier_policy(struct notifier_block *nb,
2428 unsigned long val, void *data)
2429{
2430 struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002431 struct sched_cluster *cluster = NULL;
2432 struct cpumask policy_cluster = *policy->related_cpus;
2433 unsigned int orig_max_freq = 0;
2434 int i, j, update_capacity = 0;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002435
2436 if (val != CPUFREQ_NOTIFY && val != CPUFREQ_REMOVE_POLICY &&
2437 val != CPUFREQ_CREATE_POLICY)
2438 return 0;
2439
2440 if (val == CPUFREQ_REMOVE_POLICY || val == CPUFREQ_CREATE_POLICY) {
Pavankumar Kondetif51d5392018-11-28 11:57:29 +05302441 walt_update_min_max_capacity();
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002442 return 0;
2443 }
2444
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002445 max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
2446 if (min_max_freq == 1)
2447 min_max_freq = UINT_MAX;
2448 min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
2449 BUG_ON(!min_max_freq);
2450 BUG_ON(!policy->max);
2451
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08002452 for_each_cpu(i, &policy_cluster)
2453 cpu_max_table_freq[i] = policy->cpuinfo.max_freq;
2454
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002455 for_each_cpu(i, &policy_cluster) {
2456 cluster = cpu_rq(i)->cluster;
2457 cpumask_andnot(&policy_cluster, &policy_cluster,
2458 &cluster->cpus);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002459
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002460 orig_max_freq = cluster->max_freq;
2461 cluster->min_freq = policy->min;
2462 cluster->max_freq = policy->max;
2463 cluster->cur_freq = policy->cur;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002464
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002465 if (!cluster->freq_init_done) {
2466 mutex_lock(&cluster_lock);
2467 for_each_cpu(j, &cluster->cpus)
2468 cpumask_copy(&cpu_rq(j)->freq_domain_cpumask,
2469 policy->related_cpus);
2470 cluster->max_possible_freq = policy->cpuinfo.max_freq;
2471 cluster->max_possible_capacity =
2472 compute_max_possible_capacity(cluster);
2473 cluster->freq_init_done = true;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002474
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002475 sort_clusters();
2476 update_all_clusters_stats();
2477 mutex_unlock(&cluster_lock);
2478 continue;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002479 }
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002480
2481 update_capacity += (orig_max_freq != cluster->max_freq);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002482 }
2483
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002484 if (update_capacity)
2485 update_cpu_cluster_capacity(policy->related_cpus);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002486
2487 return 0;
2488}
2489
2490static struct notifier_block notifier_policy_block = {
2491 .notifier_call = cpufreq_notifier_policy
2492};
2493
Pavankumar Kondeticac747e2017-10-20 09:54:05 +05302494static int cpufreq_notifier_trans(struct notifier_block *nb,
2495 unsigned long val, void *data)
2496{
2497 struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
2498 unsigned int cpu = freq->cpu, new_freq = freq->new;
2499 unsigned long flags;
2500 struct sched_cluster *cluster;
2501 struct cpumask policy_cpus = cpu_rq(cpu)->freq_domain_cpumask;
2502 int i, j;
2503
2504 if (val != CPUFREQ_POSTCHANGE)
2505 return NOTIFY_DONE;
2506
2507 if (cpu_cur_freq(cpu) == new_freq)
2508 return NOTIFY_OK;
2509
2510 for_each_cpu(i, &policy_cpus) {
2511 cluster = cpu_rq(i)->cluster;
2512
2513 if (!use_cycle_counter) {
2514 for_each_cpu(j, &cluster->cpus) {
2515 struct rq *rq = cpu_rq(j);
2516
2517 raw_spin_lock_irqsave(&rq->lock, flags);
2518 update_task_ravg(rq->curr, rq, TASK_UPDATE,
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05302519 sched_ktime_clock(), 0);
Pavankumar Kondeticac747e2017-10-20 09:54:05 +05302520 raw_spin_unlock_irqrestore(&rq->lock, flags);
2521 }
2522 }
2523
2524 cluster->cur_freq = new_freq;
2525 cpumask_andnot(&policy_cpus, &policy_cpus, &cluster->cpus);
2526 }
2527
2528 return NOTIFY_OK;
2529}
2530
2531static struct notifier_block notifier_trans_block = {
2532 .notifier_call = cpufreq_notifier_trans
2533};
2534
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002535static int register_walt_callback(void)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002536{
Pavankumar Kondeticac747e2017-10-20 09:54:05 +05302537 int ret;
2538
2539 ret = cpufreq_register_notifier(&notifier_policy_block,
2540 CPUFREQ_POLICY_NOTIFIER);
2541 if (!ret)
2542 ret = cpufreq_register_notifier(&notifier_trans_block,
2543 CPUFREQ_TRANSITION_NOTIFIER);
2544
2545 return ret;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002546}
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002547/*
2548 * cpufreq callbacks can be registered at core_initcall or later time.
2549 * Any registration done prior to that is "forgotten" by cpufreq. See
2550 * initialization of variable init_cpufreq_transition_notifier_list_called
2551 * for further information.
2552 */
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002553core_initcall(register_walt_callback);
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002554
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002555static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
2556 struct task_struct *p, int event);
2557
2558/*
2559 * Enable colocation and frequency aggregation for all threads in a process.
2560 * The children inherits the group id from the parent.
2561 */
2562unsigned int __read_mostly sysctl_sched_enable_thread_grouping;
2563
2564/* Maximum allowed threshold before freq aggregation must be enabled */
2565#define MAX_FREQ_AGGR_THRESH 1000
2566
2567struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
2568static LIST_HEAD(active_related_thread_groups);
2569DEFINE_RWLOCK(related_thread_group_lock);
2570
2571unsigned int __read_mostly sysctl_sched_freq_aggregate_threshold_pct;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002572
2573/*
2574 * Task groups whose aggregate demand on a cpu is more than
2575 * sched_group_upmigrate need to be up-migrated if possible.
2576 */
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05302577unsigned int __read_mostly sched_group_upmigrate = 20000000;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002578unsigned int __read_mostly sysctl_sched_group_upmigrate_pct = 100;
2579
2580/*
2581 * Task groups, once up-migrated, will need to drop their aggregate
2582 * demand to less than sched_group_downmigrate before they are "down"
2583 * migrated.
2584 */
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05302585unsigned int __read_mostly sched_group_downmigrate = 19000000;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002586unsigned int __read_mostly sysctl_sched_group_downmigrate_pct = 95;
2587
2588static int
2589group_will_fit(struct sched_cluster *cluster, struct related_thread_group *grp,
2590 u64 demand, bool group_boost)
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002591{
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002592 int cpu = cluster_first_cpu(cluster);
2593 int prev_capacity = 0;
2594 unsigned int threshold = sched_group_upmigrate;
2595 u64 load;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002596
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002597 if (cluster->capacity == max_capacity)
2598 return 1;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002599
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002600 if (group_boost)
2601 return 0;
2602
2603 if (!demand)
2604 return 1;
2605
2606 if (grp->preferred_cluster)
2607 prev_capacity = grp->preferred_cluster->capacity;
2608
2609 if (cluster->capacity < prev_capacity)
2610 threshold = sched_group_downmigrate;
2611
2612 load = scale_load_to_cpu(demand, cpu);
2613 if (load < threshold)
2614 return 1;
2615
2616 return 0;
2617}
2618
2619unsigned long __weak arch_get_cpu_efficiency(int cpu)
2620{
2621 return SCHED_CAPACITY_SCALE;
2622}
2623
2624/* Return cluster which can offer required capacity for group */
2625static struct sched_cluster *best_cluster(struct related_thread_group *grp,
2626 u64 total_demand, bool group_boost)
2627{
2628 struct sched_cluster *cluster = NULL;
2629
2630 for_each_sched_cluster(cluster) {
2631 if (group_will_fit(cluster, grp, total_demand, group_boost))
2632 return cluster;
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002633 }
2634
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002635 return sched_cluster[0];
Srivatsa Vaddagiri26c21542016-05-31 09:08:38 -07002636}
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002637
2638int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
2639{
2640 struct related_thread_group *grp;
2641 int rc = 1;
2642
2643 rcu_read_lock();
2644
2645 grp = task_related_thread_group(p);
2646 if (grp)
2647 rc = (grp->preferred_cluster == cluster);
2648
2649 rcu_read_unlock();
2650 return rc;
2651}
2652
2653static void _set_preferred_cluster(struct related_thread_group *grp)
2654{
2655 struct task_struct *p;
2656 u64 combined_demand = 0;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002657 bool group_boost = false;
2658 u64 wallclock;
2659
2660 if (list_empty(&grp->tasks))
2661 return;
2662
Pavankumar Kondeti0a713072017-10-11 19:01:42 +05302663 if (!sysctl_sched_is_big_little) {
2664 grp->preferred_cluster = sched_cluster[0];
2665 return;
2666 }
2667
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05302668 wallclock = sched_ktime_clock();
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002669
2670 /*
2671 * wakeup of two or more related tasks could race with each other and
2672 * could result in multiple calls to _set_preferred_cluster being issued
2673 * at same time. Avoid overhead in such cases of rechecking preferred
2674 * cluster
2675 */
2676 if (wallclock - grp->last_update < sched_ravg_window / 10)
2677 return;
2678
2679 list_for_each_entry(p, &grp->tasks, grp_list) {
Abhijeet Dharmapurikar53ee4232018-06-15 09:34:34 -07002680 if (task_boost_policy(p) == SCHED_BOOST_ON_BIG) {
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002681 group_boost = true;
2682 break;
2683 }
2684
2685 if (p->ravg.mark_start < wallclock -
2686 (sched_ravg_window * sched_ravg_hist_size))
2687 continue;
2688
Syed Rameez Mustafaf3f7bf82017-04-11 17:43:48 -07002689 combined_demand += p->ravg.coloc_demand;
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002690
2691 }
2692
2693 grp->preferred_cluster = best_cluster(grp,
2694 combined_demand, group_boost);
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05302695 grp->last_update = sched_ktime_clock();
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002696 trace_sched_set_preferred_cluster(grp, combined_demand);
2697}
2698
2699void set_preferred_cluster(struct related_thread_group *grp)
2700{
2701 raw_spin_lock(&grp->lock);
2702 _set_preferred_cluster(grp);
2703 raw_spin_unlock(&grp->lock);
2704}
2705
2706int update_preferred_cluster(struct related_thread_group *grp,
2707 struct task_struct *p, u32 old_load)
2708{
2709 u32 new_load = task_load(p);
2710
2711 if (!grp)
2712 return 0;
2713
2714 /*
2715 * Update if task's load has changed significantly or a complete window
2716 * has passed since we last updated preference
2717 */
2718 if (abs(new_load - old_load) > sched_ravg_window / 4 ||
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05302719 sched_ktime_clock() - grp->last_update > sched_ravg_window)
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08002720 return 1;
2721
2722 return 0;
2723}
2724
2725DEFINE_MUTEX(policy_mutex);
2726
2727#define pct_to_real(tunable) \
2728 (div64_u64((u64)tunable * (u64)max_task_load(), 100))
2729
2730unsigned int update_freq_aggregate_threshold(unsigned int threshold)
2731{
2732 unsigned int old_threshold;
2733
2734 mutex_lock(&policy_mutex);
2735
2736 old_threshold = sysctl_sched_freq_aggregate_threshold_pct;
2737
2738 sysctl_sched_freq_aggregate_threshold_pct = threshold;
2739 sched_freq_aggregate_threshold =
2740 pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);
2741
2742 mutex_unlock(&policy_mutex);
2743
2744 return old_threshold;
2745}
2746
2747#define ADD_TASK 0
2748#define REM_TASK 1
2749
2750#define DEFAULT_CGROUP_COLOC_ID 1
2751
2752static inline struct related_thread_group*
2753lookup_related_thread_group(unsigned int group_id)
2754{
2755 return related_thread_groups[group_id];
2756}
2757
2758int alloc_related_thread_groups(void)
2759{
2760 int i, ret;
2761 struct related_thread_group *grp;
2762
2763 /* groupd_id = 0 is invalid as it's special id to remove group. */
2764 for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
2765 grp = kzalloc(sizeof(*grp), GFP_NOWAIT);
2766 if (!grp) {
2767 ret = -ENOMEM;
2768 goto err;
2769 }
2770
2771 grp->id = i;
2772 INIT_LIST_HEAD(&grp->tasks);
2773 INIT_LIST_HEAD(&grp->list);
2774 raw_spin_lock_init(&grp->lock);
2775
2776 related_thread_groups[i] = grp;
2777 }
2778
2779 return 0;
2780
2781err:
2782 for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
2783 grp = lookup_related_thread_group(i);
2784 if (grp) {
2785 kfree(grp);
2786 related_thread_groups[i] = NULL;
2787 } else {
2788 break;
2789 }
2790 }
2791
2792 return ret;
2793}
2794
2795static void remove_task_from_group(struct task_struct *p)
2796{
2797 struct related_thread_group *grp = p->grp;
2798 struct rq *rq;
2799 int empty_group = 1;
2800 struct rq_flags rf;
2801
2802 raw_spin_lock(&grp->lock);
2803
2804 rq = __task_rq_lock(p, &rf);
2805 transfer_busy_time(rq, p->grp, p, REM_TASK);
2806 list_del_init(&p->grp_list);
2807 rcu_assign_pointer(p->grp, NULL);
2808 __task_rq_unlock(rq, &rf);
2809
2810
2811 if (!list_empty(&grp->tasks)) {
2812 empty_group = 0;
2813 _set_preferred_cluster(grp);
2814 }
2815
2816 raw_spin_unlock(&grp->lock);
2817
2818 /* Reserved groups cannot be destroyed */
2819 if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID)
2820 /*
2821 * We test whether grp->list is attached with list_empty()
2822 * hence re-init the list after deletion.
2823 */
2824 list_del_init(&grp->list);
2825}
2826
2827static int
2828add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
2829{
2830 struct rq *rq;
2831 struct rq_flags rf;
2832
2833 raw_spin_lock(&grp->lock);
2834
2835 /*
2836 * Change p->grp under rq->lock. Will prevent races with read-side
2837 * reference of p->grp in various hot-paths
2838 */
2839 rq = __task_rq_lock(p, &rf);
2840 transfer_busy_time(rq, grp, p, ADD_TASK);
2841 list_add(&p->grp_list, &grp->tasks);
2842 rcu_assign_pointer(p->grp, grp);
2843 __task_rq_unlock(rq, &rf);
2844
2845 _set_preferred_cluster(grp);
2846
2847 raw_spin_unlock(&grp->lock);
2848
2849 return 0;
2850}
2851
2852void add_new_task_to_grp(struct task_struct *new)
2853{
2854 unsigned long flags;
2855 struct related_thread_group *grp;
2856 struct task_struct *leader = new->group_leader;
2857 unsigned int leader_grp_id = sched_get_group_id(leader);
2858
2859 if (!sysctl_sched_enable_thread_grouping &&
2860 leader_grp_id != DEFAULT_CGROUP_COLOC_ID)
2861 return;
2862
2863 if (thread_group_leader(new))
2864 return;
2865
2866 if (leader_grp_id == DEFAULT_CGROUP_COLOC_ID) {
2867 if (!same_schedtune(new, leader))
2868 return;
2869 }
2870
2871 write_lock_irqsave(&related_thread_group_lock, flags);
2872
2873 rcu_read_lock();
2874 grp = task_related_thread_group(leader);
2875 rcu_read_unlock();
2876
2877 /*
2878 * It's possible that someone already added the new task to the
2879 * group. A leader's thread group is updated prior to calling
2880 * this function. It's also possible that the leader has exited
2881 * the group. In either case, there is nothing else to do.
2882 */
2883 if (!grp || new->grp) {
2884 write_unlock_irqrestore(&related_thread_group_lock, flags);
2885 return;
2886 }
2887
2888 raw_spin_lock(&grp->lock);
2889
2890 rcu_assign_pointer(new->grp, grp);
2891 list_add(&new->grp_list, &grp->tasks);
2892
2893 raw_spin_unlock(&grp->lock);
2894 write_unlock_irqrestore(&related_thread_group_lock, flags);
2895}
2896
2897static int __sched_set_group_id(struct task_struct *p, unsigned int group_id)
2898{
2899 int rc = 0;
2900 unsigned long flags;
2901 struct related_thread_group *grp = NULL;
2902
2903 if (group_id >= MAX_NUM_CGROUP_COLOC_ID)
2904 return -EINVAL;
2905
2906 raw_spin_lock_irqsave(&p->pi_lock, flags);
2907 write_lock(&related_thread_group_lock);
2908
2909 /* Switching from one group to another directly is not permitted */
2910 if ((current != p && p->flags & PF_EXITING) ||
2911 (!p->grp && !group_id) ||
2912 (p->grp && group_id))
2913 goto done;
2914
2915 if (!group_id) {
2916 remove_task_from_group(p);
2917 goto done;
2918 }
2919
2920 grp = lookup_related_thread_group(group_id);
2921 if (list_empty(&grp->list))
2922 list_add(&grp->list, &active_related_thread_groups);
2923
2924 rc = add_task_to_group(p, grp);
2925done:
2926 write_unlock(&related_thread_group_lock);
2927 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2928
2929 return rc;
2930}
2931
2932int sched_set_group_id(struct task_struct *p, unsigned int group_id)
2933{
2934 /* DEFAULT_CGROUP_COLOC_ID is a reserved id */
2935 if (group_id == DEFAULT_CGROUP_COLOC_ID)
2936 return -EINVAL;
2937
2938 return __sched_set_group_id(p, group_id);
2939}
2940
2941unsigned int sched_get_group_id(struct task_struct *p)
2942{
2943 unsigned int group_id;
2944 struct related_thread_group *grp;
2945
2946 rcu_read_lock();
2947 grp = task_related_thread_group(p);
2948 group_id = grp ? grp->id : 0;
2949 rcu_read_unlock();
2950
2951 return group_id;
2952}
2953
2954#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
2955/*
2956 * We create a default colocation group at boot. There is no need to
2957 * synchronize tasks between cgroups at creation time because the
2958 * correct cgroup hierarchy is not available at boot. Therefore cgroup
2959 * colocation is turned off by default even though the colocation group
2960 * itself has been allocated. Furthermore this colocation group cannot
2961 * be destroyted once it has been created. All of this has been as part
2962 * of runtime optimizations.
2963 *
2964 * The job of synchronizing tasks to the colocation group is done when
2965 * the colocation flag in the cgroup is turned on.
2966 */
2967static int __init create_default_coloc_group(void)
2968{
2969 struct related_thread_group *grp = NULL;
2970 unsigned long flags;
2971
2972 grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
2973 write_lock_irqsave(&related_thread_group_lock, flags);
2974 list_add(&grp->list, &active_related_thread_groups);
2975 write_unlock_irqrestore(&related_thread_group_lock, flags);
2976
2977 update_freq_aggregate_threshold(MAX_FREQ_AGGR_THRESH);
2978 return 0;
2979}
2980late_initcall(create_default_coloc_group);
2981
2982int sync_cgroup_colocation(struct task_struct *p, bool insert)
2983{
2984 unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
2985
2986 return __sched_set_group_id(p, grp_id);
2987}
2988#endif
2989
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08002990void update_cpu_cluster_capacity(const cpumask_t *cpus)
2991{
2992 int i;
2993 struct sched_cluster *cluster;
2994 struct cpumask cpumask;
2995 unsigned long flags;
2996
2997 cpumask_copy(&cpumask, cpus);
2998 acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
2999
3000 for_each_cpu(i, &cpumask) {
3001 cluster = cpu_rq(i)->cluster;
3002 cpumask_andnot(&cpumask, &cpumask, &cluster->cpus);
3003
3004 cluster->capacity = compute_capacity(cluster);
3005 cluster->load_scale_factor = compute_load_scale_factor(cluster);
3006 }
3007
3008 __update_min_max_capacity();
3009
3010 release_rq_locks_irqrestore(cpu_possible_mask, &flags);
3011}
3012
3013static unsigned long max_cap[NR_CPUS];
3014static unsigned long thermal_cap_cpu[NR_CPUS];
3015
3016unsigned long thermal_cap(int cpu)
3017{
Pavankumar Kondetif85a9de2018-01-11 13:46:20 +05303018 return thermal_cap_cpu[cpu] ?: SCHED_CAPACITY_SCALE;
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08003019}
3020
3021unsigned long do_thermal_cap(int cpu, unsigned long thermal_max_freq)
3022{
3023 struct sched_domain *sd;
3024 struct sched_group *sg;
3025 struct rq *rq = cpu_rq(cpu);
3026 int nr_cap_states;
3027
3028 if (!max_cap[cpu]) {
3029 rcu_read_lock();
3030 sd = rcu_dereference(per_cpu(sd_ea, cpu));
3031 if (!sd || !sd->groups || !sd->groups->sge ||
3032 !sd->groups->sge->cap_states) {
3033 rcu_read_unlock();
3034 return rq->cpu_capacity_orig;
3035 }
3036 sg = sd->groups;
3037 nr_cap_states = sg->sge->nr_cap_states;
3038 max_cap[cpu] = sg->sge->cap_states[nr_cap_states - 1].cap;
3039 rcu_read_unlock();
3040 }
3041
Vikram Mulukutla17112232017-10-13 18:29:14 -07003042 if (cpu_max_table_freq[cpu])
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08003043 return div64_ul(thermal_max_freq * max_cap[cpu],
3044 cpu_max_table_freq[cpu]);
Vikram Mulukutla17112232017-10-13 18:29:14 -07003045 else
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08003046 return rq->cpu_capacity_orig;
Vikram Mulukutlad0ba1882017-02-03 12:56:26 -08003047}
3048
3049static DEFINE_SPINLOCK(cpu_freq_min_max_lock);
3050void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax)
3051{
3052 struct cpumask cpumask;
3053 struct sched_cluster *cluster;
3054 int i, update_capacity = 0;
3055 unsigned long flags;
3056
3057 spin_lock_irqsave(&cpu_freq_min_max_lock, flags);
3058 cpumask_copy(&cpumask, cpus);
3059
3060 for_each_cpu(i, &cpumask)
3061 thermal_cap_cpu[i] = do_thermal_cap(i, fmax);
3062
3063 for_each_cpu(i, &cpumask) {
3064 cluster = cpu_rq(i)->cluster;
3065 cpumask_andnot(&cpumask, &cpumask, &cluster->cpus);
3066 update_capacity += (cluster->max_mitigated_freq != fmax);
3067 cluster->max_mitigated_freq = fmax;
3068 }
3069 spin_unlock_irqrestore(&cpu_freq_min_max_lock, flags);
3070
3071 if (update_capacity)
3072 update_cpu_cluster_capacity(cpus);
3073}
3074
Vikram Mulukutlac7b54b82017-07-12 11:34:54 -07003075void note_task_waking(struct task_struct *p, u64 wallclock)
3076{
3077 p->last_wake_ts = wallclock;
3078}
3079
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08003080/*
3081 * Task's cpu usage is accounted in:
3082 * rq->curr/prev_runnable_sum, when its ->grp is NULL
3083 * grp->cpu_time[cpu]->curr/prev_runnable_sum, when its ->grp is !NULL
3084 *
3085 * Transfer task's cpu usage between those counters when transitioning between
3086 * groups
3087 */
3088static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
3089 struct task_struct *p, int event)
3090{
3091 u64 wallclock;
3092 struct group_cpu_time *cpu_time;
3093 u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
3094 u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
3095 u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
3096 u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
3097 int migrate_type;
3098 int cpu = cpu_of(rq);
3099 bool new_task;
3100 int i;
3101
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05303102 wallclock = sched_ktime_clock();
Joonwoo Parkf7d6cd42017-01-17 15:19:43 -08003103
3104 update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
3105 update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
3106 new_task = is_new_task(p);
3107
3108 cpu_time = &rq->grp_time;
3109 if (event == ADD_TASK) {
3110 migrate_type = RQ_TO_GROUP;
3111
3112 src_curr_runnable_sum = &rq->curr_runnable_sum;
3113 dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
3114 src_prev_runnable_sum = &rq->prev_runnable_sum;
3115 dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
3116
3117 src_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
3118 dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
3119 src_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
3120 dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
3121
3122 *src_curr_runnable_sum -= p->ravg.curr_window_cpu[cpu];
3123 *src_prev_runnable_sum -= p->ravg.prev_window_cpu[cpu];
3124 if (new_task) {
3125 *src_nt_curr_runnable_sum -=
3126 p->ravg.curr_window_cpu[cpu];
3127 *src_nt_prev_runnable_sum -=
3128 p->ravg.prev_window_cpu[cpu];
3129 }
3130
3131 update_cluster_load_subtractions(p, cpu,
3132 rq->window_start, new_task);
3133
3134 } else {
3135 migrate_type = GROUP_TO_RQ;
3136
3137 src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
3138 dst_curr_runnable_sum = &rq->curr_runnable_sum;
3139 src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
3140 dst_prev_runnable_sum = &rq->prev_runnable_sum;
3141
3142 src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
3143 dst_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
3144 src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
3145 dst_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
3146
3147 *src_curr_runnable_sum -= p->ravg.curr_window;
3148 *src_prev_runnable_sum -= p->ravg.prev_window;
3149 if (new_task) {
3150 *src_nt_curr_runnable_sum -= p->ravg.curr_window;
3151 *src_nt_prev_runnable_sum -= p->ravg.prev_window;
3152 }
3153
3154 /*
3155 * Need to reset curr/prev windows for all CPUs, not just the
3156 * ones in the same cluster. Since inter cluster migrations
3157 * did not result in the appropriate book keeping, the values
3158 * per CPU would be inaccurate.
3159 */
3160 for_each_possible_cpu(i) {
3161 p->ravg.curr_window_cpu[i] = 0;
3162 p->ravg.prev_window_cpu[i] = 0;
3163 }
3164 }
3165
3166 *dst_curr_runnable_sum += p->ravg.curr_window;
3167 *dst_prev_runnable_sum += p->ravg.prev_window;
3168 if (new_task) {
3169 *dst_nt_curr_runnable_sum += p->ravg.curr_window;
3170 *dst_nt_prev_runnable_sum += p->ravg.prev_window;
3171 }
3172
3173 /*
3174 * When a task enter or exits a group, it's curr and prev windows are
3175 * moved to a single CPU. This behavior might be sub-optimal in the
3176 * exit case, however, it saves us the overhead of handling inter
3177 * cluster migration fixups while the task is part of a related group.
3178 */
3179 p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window;
3180 p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window;
3181
3182 trace_sched_migration_update_sum(p, migrate_type, rq);
3183
3184 BUG_ON((s64)*src_curr_runnable_sum < 0);
3185 BUG_ON((s64)*src_prev_runnable_sum < 0);
3186 BUG_ON((s64)*src_nt_curr_runnable_sum < 0);
3187 BUG_ON((s64)*src_nt_prev_runnable_sum < 0);
3188}
3189
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -07003190unsigned int sysctl_sched_little_cluster_coloc_fmin_khz;
3191static u64 coloc_boost_load;
3192
3193void walt_map_freq_to_load(void)
3194{
3195 struct sched_cluster *cluster;
3196
3197 for_each_sched_cluster(cluster) {
3198 if (is_min_capacity_cluster(cluster)) {
3199 int fcpu = cluster_first_cpu(cluster);
3200
3201 coloc_boost_load = div64_u64(
3202 ((u64)sched_ravg_window *
3203 arch_scale_cpu_capacity(NULL, fcpu) *
3204 sysctl_sched_little_cluster_coloc_fmin_khz),
3205 (u64)1024 * cpu_max_possible_freq(fcpu));
3206 coloc_boost_load = div64_u64(coloc_boost_load << 2, 5);
3207 break;
3208 }
3209 }
3210}
3211
3212static void walt_update_coloc_boost_load(void)
3213{
3214 struct related_thread_group *grp;
3215 struct sched_cluster *cluster;
3216
3217 if (!sysctl_sched_little_cluster_coloc_fmin_khz ||
3218 sysctl_sched_boost == CONSERVATIVE_BOOST)
3219 return;
3220
3221 grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
3222 if (!grp || !grp->preferred_cluster ||
3223 is_min_capacity_cluster(grp->preferred_cluster))
3224 return;
3225
3226 for_each_sched_cluster(cluster) {
3227 if (is_min_capacity_cluster(cluster)) {
3228 cluster->coloc_boost_load = coloc_boost_load;
3229 break;
3230 }
3231 }
3232}
3233
3234int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table,
3235 int write, void __user *buffer, size_t *lenp,
3236 loff_t *ppos)
3237{
3238 int ret;
3239 static DEFINE_MUTEX(mutex);
3240
3241 mutex_lock(&mutex);
3242
3243 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
3244 if (ret || !write)
3245 goto done;
3246
3247 walt_map_freq_to_load();
3248
3249done:
3250 mutex_unlock(&mutex);
3251 return ret;
3252}
3253
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303254/*
3255 * Runs in hard-irq context. This should ideally run just after the latest
3256 * window roll-over.
3257 */
3258void walt_irq_work(struct irq_work *irq_work)
3259{
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -07003260 struct sched_cluster *cluster;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303261 struct rq *rq;
3262 int cpu;
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -07003263 u64 wc, total_grp_load = 0;
Vikram Mulukutlaaa817bc2017-06-20 12:16:58 -07003264 int flag = SCHED_CPUFREQ_WALT;
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +05303265 bool is_migration = false;
Joel Fernandes (Google)13947572018-06-18 11:36:21 -07003266 int level = 0;
Vikram Mulukutlaaa817bc2017-06-20 12:16:58 -07003267
3268 /* Am I the window rollover work or the migration work? */
3269 if (irq_work == &walt_migration_irq_work)
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +05303270 is_migration = true;
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303271
Joel Fernandes (Google)13947572018-06-18 11:36:21 -07003272 for_each_cpu(cpu, cpu_possible_mask) {
3273 if (level == 0)
3274 raw_spin_lock(&cpu_rq(cpu)->lock);
3275 else
3276 raw_spin_lock_nested(&cpu_rq(cpu)->lock, level);
3277 level++;
3278 }
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303279
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05303280 wc = sched_ktime_clock();
Lingutla Chandrasekhard60cdac2018-05-25 15:22:59 +05303281 walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws);
Pavankumar Kondetifaa04442018-06-25 16:13:39 +05303282
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -07003283 for_each_sched_cluster(cluster) {
Pavankumar Kondeti07b40c52017-06-23 16:17:20 +05303284 u64 aggr_grp_load = 0;
3285
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -07003286 raw_spin_lock(&cluster->load_lock);
3287
3288 for_each_cpu(cpu, &cluster->cpus) {
3289 rq = cpu_rq(cpu);
3290 if (rq->curr) {
3291 update_task_ravg(rq->curr, rq,
3292 TASK_UPDATE, wc, 0);
3293 account_load_subtractions(rq);
Pavankumar Kondeti07b40c52017-06-23 16:17:20 +05303294 aggr_grp_load += rq->grp_time.prev_runnable_sum;
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -07003295 }
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -07003296 }
3297
Pavankumar Kondeti07b40c52017-06-23 16:17:20 +05303298 cluster->aggr_grp_load = aggr_grp_load;
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -07003299 total_grp_load = aggr_grp_load;
3300 cluster->coloc_boost_load = 0;
Pavankumar Kondeti07b40c52017-06-23 16:17:20 +05303301
Syed Rameez Mustafa25de0112017-05-10 12:09:15 -07003302 raw_spin_unlock(&cluster->load_lock);
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303303 }
3304
Satya Durga Srinivasu Prabhalac18b4032018-04-17 11:21:23 -07003305 if (total_grp_load)
3306 walt_update_coloc_boost_load();
3307
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +05303308 for_each_sched_cluster(cluster) {
3309 for_each_cpu(cpu, &cluster->cpus) {
3310 int nflag = flag;
3311
3312 rq = cpu_rq(cpu);
3313
3314 if (is_migration) {
3315 if (rq->notif_pending) {
3316 nflag |= SCHED_CPUFREQ_INTERCLUSTER_MIG;
3317 rq->notif_pending = false;
3318 } else {
3319 nflag |= SCHED_CPUFREQ_FORCE_UPDATE;
3320 }
3321 }
3322
3323 cpufreq_update_util(rq, nflag);
3324 }
3325 }
Pavankumar Kondeti07b40c52017-06-23 16:17:20 +05303326
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303327 for_each_cpu(cpu, cpu_possible_mask)
3328 raw_spin_unlock(&cpu_rq(cpu)->lock);
3329
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +05303330 if (!is_migration)
Vikram Mulukutlaaa817bc2017-06-20 12:16:58 -07003331 core_ctl_check(this_rq()->window_start);
Syed Rameez Mustafa20acfe72017-01-30 09:35:46 +05303332}
Joonwoo Park6f188472017-06-16 11:32:17 -07003333
Pavankumar Kondeti4e13d112018-01-25 01:12:08 +05303334void walt_rotation_checkpoint(int nr_big)
3335{
3336 if (!hmp_capable())
3337 return;
3338
3339 if (!sysctl_sched_walt_rotate_big_tasks || sched_boost() != NO_BOOST) {
3340 walt_rotation_enabled = 0;
3341 return;
3342 }
3343
3344 walt_rotation_enabled = nr_big >= num_possible_cpus();
3345}
3346
Joonwoo Park6f188472017-06-16 11:32:17 -07003347int walt_proc_update_handler(struct ctl_table *table, int write,
3348 void __user *buffer, size_t *lenp,
3349 loff_t *ppos)
3350{
3351 int ret;
3352 unsigned int *data = (unsigned int *)table->data;
3353 static DEFINE_MUTEX(mutex);
3354
3355 mutex_lock(&mutex);
3356 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
3357 if (ret || !write) {
3358 mutex_unlock(&mutex);
3359 return ret;
3360 }
3361
3362 if (data == &sysctl_sched_group_upmigrate_pct)
3363 sched_group_upmigrate =
3364 pct_to_real(sysctl_sched_group_upmigrate_pct);
3365 else if (data == &sysctl_sched_group_downmigrate_pct)
3366 sched_group_downmigrate =
3367 pct_to_real(sysctl_sched_group_downmigrate_pct);
3368 else
3369 ret = -EINVAL;
3370 mutex_unlock(&mutex);
3371
3372 return ret;
3373}
Vikram Mulukutlae625d402017-07-17 12:31:52 -07003374
3375void walt_sched_init(struct rq *rq)
3376{
3377 int j;
3378
3379 cpumask_set_cpu(cpu_of(rq), &rq->freq_domain_cpumask);
Vikram Mulukutlaaa817bc2017-06-20 12:16:58 -07003380 init_irq_work(&walt_migration_irq_work, walt_irq_work);
3381 init_irq_work(&walt_cpufreq_irq_work, walt_irq_work);
Pavankumar Kondeti4e13d112018-01-25 01:12:08 +05303382 walt_rotate_work_init();
3383
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +05303384 rq->walt_stats.cumulative_runnable_avg = 0;
Vikram Mulukutlae625d402017-07-17 12:31:52 -07003385 rq->window_start = 0;
3386 rq->cum_window_start = 0;
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +05303387 rq->walt_stats.nr_big_tasks = 0;
3388 rq->walt_flags = 0;
Vikram Mulukutlae625d402017-07-17 12:31:52 -07003389 rq->cur_irqload = 0;
3390 rq->avg_irqload = 0;
3391 rq->irqload_ts = 0;
3392 rq->static_cpu_pwr_cost = 0;
3393 rq->cc.cycles = 1;
3394 rq->cc.time = 1;
3395 rq->cstate = 0;
3396 rq->wakeup_latency = 0;
3397 rq->wakeup_energy = 0;
3398
3399 /*
3400 * All cpus part of same cluster by default. This avoids the
3401 * need to check for rq->cluster being non-NULL in hot-paths
3402 * like select_best_cpu()
3403 */
3404 rq->cluster = &init_cluster;
3405 rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
3406 rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
3407 memset(&rq->grp_time, 0, sizeof(struct group_cpu_time));
3408 rq->old_busy_time = 0;
3409 rq->old_estimated_time = 0;
3410 rq->old_busy_time_group = 0;
Pavankumar Kondeti84f72d72017-07-20 11:00:45 +05303411 rq->walt_stats.pred_demands_sum = 0;
Vikram Mulukutlae625d402017-07-17 12:31:52 -07003412 rq->ed_task = NULL;
3413 rq->curr_table = 0;
3414 rq->prev_top = 0;
3415 rq->curr_top = 0;
Vikram Mulukutla77ecebb2017-05-30 14:38:55 -07003416 rq->last_cc_update = 0;
3417 rq->cycles = 0;
Vikram Mulukutlae625d402017-07-17 12:31:52 -07003418 for (j = 0; j < NUM_TRACKED_WINDOWS; j++) {
3419 memset(&rq->load_subs[j], 0,
3420 sizeof(struct load_subtractions));
3421 rq->top_tasks[j] = kcalloc(NUM_LOAD_INDICES,
3422 sizeof(u8), GFP_NOWAIT);
3423 /* No other choice */
3424 BUG_ON(!rq->top_tasks[j]);
3425 clear_top_tasks_bitmap(rq->top_tasks_bitmap[j]);
3426 }
3427 rq->cum_window_demand = 0;
Pavankumar Kondeti4d5dd1c2018-03-19 10:47:09 +05303428 rq->notif_pending = false;
Joonwoo Park858d5752017-08-21 12:09:49 -07003429
3430 walt_cpu_util_freq_divisor =
3431 (sched_ravg_window >> SCHED_CAPACITY_SHIFT) * 100;
Vikram Mulukutlae625d402017-07-17 12:31:52 -07003432}