Merge "Merge remote-tracking branch 'dev/msm-4.9-sched' into msm-4.9"

commit: e3765a4b431c1a2ccc9b7b04c46795a7070c5e68 [log] [tgz]
author: Linux Build Service Account <lnxbuild@localhost> Fri Mar 23 05:51:09 2018 -0700
committer: Gerrit - the friendly Code Review server <code-review@localhost> Fri Mar 23 05:51:08 2018 -0700
tree: 919178913aea24140d6ae99054f13a49f590ea9a
parent: ef2396c53bbbf6b164724900cd64d301e7150c0e [diff]
parent: 93b3f0f67418564b3990249545617d058aeaa652 [diff]
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 41e9107..62d32bc 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h

@@ -28,6 +28,8 @@
 
 #ifdef CONFIG_CPU_FREQ
 #define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+#define arch_scale_max_freq_capacity cpufreq_scale_max_freq_capacity
+#define arch_scale_min_freq_capacity cpufreq_scale_min_freq_capacity
 #endif
 #define arch_scale_cpu_capacity scale_cpu_capacity
 extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index dad9fcb..aa8fc3f 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c

@@ -55,13 +55,7 @@
 
 unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-#ifdef CONFIG_CPU_FREQ
-	unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
-
-	return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
-#else
 	return per_cpu(cpu_scale, cpu);
-#endif
 }
 
 static void set_capacity_scale(unsigned int cpu, unsigned long capacity)

diff --git a/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi b/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi
index 1551952..575cf12 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi

@@ -430,7 +430,7 @@
 			1766400 160
 		>;
 		idle-cost-data = <
-			22 18 14 12
+			10 8 6 4
 		>;
 	};
 	CPU_COST_1: core-cost1 {

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index b58f429..866c518 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h

@@ -36,7 +36,10 @@
 #ifdef CONFIG_CPU_FREQ
 #define arch_scale_freq_capacity cpufreq_scale_freq_capacity
 extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
-extern unsigned long cpufreq_scale_max_freq_capacity(int cpu);
+#define arch_scale_max_freq_capacity cpufreq_scale_max_freq_capacity
+extern unsigned long cpufreq_scale_max_freq_capacity(struct sched_domain *sd, int cpu);
+#define arch_scale_min_freq_capacity cpufreq_scale_min_freq_capacity
+extern unsigned long cpufreq_scale_min_freq_capacity(struct sched_domain *sd, int cpu);
 #endif
 #define arch_scale_cpu_capacity scale_cpu_capacity
 extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index f2fe96f..e3bc878 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c

@@ -50,13 +50,7 @@
 
 unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-#ifdef CONFIG_CPU_FREQ
-	unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
-
-	return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
-#else
 	return per_cpu(cpu_scale, cpu);
-#endif
 }
 
 static void set_capacity_scale(unsigned int cpu, unsigned long capacity)

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index bb540a5..597aa57 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig

@@ -102,14 +102,6 @@
 	  governor. If unsure have a look at the help section of the
 	  driver. Fallback governor will be the performance governor.
 
-config CPU_FREQ_DEFAULT_GOV_SCHED
-	bool "sched"
-	select CPU_FREQ_GOV_SCHED
-	help
-	  Use the CPUfreq governor 'sched' as default. This scales
-	  cpu frequency using CPU utilization estimates from the
-	  scheduler.
-
 config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
 	bool "interactive"
 	select CPU_FREQ_GOV_INTERACTIVE
@@ -238,19 +230,6 @@
 
 	  If in doubt, say N.
 
-config CPU_FREQ_GOV_SCHED
-	bool "'sched' cpufreq governor"
-	depends on CPU_FREQ
-	depends on SMP
-	select CPU_FREQ_GOV_COMMON
-	help
-	  'sched' - this governor scales cpu frequency from the
-	  scheduler as a function of cpu capacity utilization. It does
-	  not evaluate utilization on a periodic basis (as ondemand
-	  does) but instead is event-driven by the scheduler.
-
-	  If in doubt, say N.
-
 config CPU_FREQ_GOV_SCHEDUTIL
 	bool "'schedutil' cpufreq policy governor"
 	depends on CPU_FREQ && SMP

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ff72d8a..8059ef9 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -325,33 +325,24 @@
  *********************************************************************/
 
 static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_PER_CPU(unsigned long, max_freq_cpu);
 static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_PER_CPU(unsigned long, min_freq_scale);
 
 static void
-scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs)
+scale_freq_capacity(const cpumask_t *cpus, unsigned long cur_freq,
+		    unsigned long max_freq)
 {
-	unsigned long cur = freqs ? freqs->new : policy->cur;
-	unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max;
-	struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo;
+	unsigned long scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
 	int cpu;
 
-	pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n",
-		 cpumask_pr_args(policy->cpus), cur, policy->max, scale);
-
-	for_each_cpu(cpu, policy->cpus)
+	for_each_cpu(cpu, cpus) {
 		per_cpu(freq_scale, cpu) = scale;
+		per_cpu(max_freq_cpu, cpu) = max_freq;
+	}
 
-	if (freqs)
-		return;
-
-	scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq;
-
-	pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n",
-		 cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq,
-		 scale);
-
-	for_each_cpu(cpu, policy->cpus)
-		per_cpu(max_freq_scale, cpu) = scale;
+	pr_debug("cpus %*pbl cur freq/max freq %lu/%lu kHz freq scale %lu\n",
+		 cpumask_pr_args(cpus), cur_freq, max_freq, scale);
 }
 
 unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu)
@@ -359,11 +350,62 @@
 	return per_cpu(freq_scale, cpu);
 }
 
-unsigned long cpufreq_scale_max_freq_capacity(int cpu)
+static void
+scale_max_freq_capacity(const cpumask_t *cpus, unsigned long policy_max_freq)
+{
+	unsigned long scale, max_freq;
+	int cpu = cpumask_first(cpus);
+
+	if (cpu >= nr_cpu_ids)
+		return;
+
+	max_freq = per_cpu(max_freq_cpu, cpu);
+
+	if (!max_freq)
+		return;
+
+	scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
+
+	for_each_cpu(cpu, cpus)
+		per_cpu(max_freq_scale, cpu) = scale;
+
+	pr_debug("cpus %*pbl policy max freq/max freq %lu/%lu kHz max freq scale %lu\n",
+		 cpumask_pr_args(cpus), policy_max_freq, max_freq, scale);
+}
+
+unsigned long cpufreq_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return per_cpu(max_freq_scale, cpu);
 }
 
+static void
+scale_min_freq_capacity(const cpumask_t *cpus, unsigned long policy_min_freq)
+{
+	unsigned long scale, max_freq;
+	int cpu = cpumask_first(cpus);
+
+	if (cpu >= nr_cpu_ids)
+		return;
+
+	max_freq = per_cpu(max_freq_cpu, cpu);
+
+	if (!max_freq)
+		return;
+
+	scale = (policy_min_freq << SCHED_CAPACITY_SHIFT) / max_freq;
+
+	for_each_cpu(cpu, cpus)
+		per_cpu(min_freq_scale, cpu) = scale;
+
+	pr_debug("cpus %*pbl policy min freq/max freq %lu/%lu kHz min freq scale %lu\n",
+		 cpumask_pr_args(cpus), policy_min_freq, max_freq, scale);
+}
+
+unsigned long cpufreq_scale_min_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(min_freq_scale, cpu);
+}
+
 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs, unsigned int state)
 {
@@ -471,7 +513,7 @@
 
 	spin_unlock(&policy->transition_lock);
 
-	scale_freq_capacity(policy, freqs);
+	scale_freq_capacity(policy->cpus, freqs->new, policy->cpuinfo.max_freq);
 #ifdef CONFIG_SMP
 	for_each_cpu(cpu, policy->cpus)
 		trace_cpu_capacity(capacity_curr_of(cpu), cpu);
@@ -2275,7 +2317,8 @@
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 			CPUFREQ_NOTIFY, new_policy);
 
-	scale_freq_capacity(new_policy, NULL);
+	scale_max_freq_capacity(policy->cpus, policy->max);
+	scale_min_freq_capacity(policy->cpus, policy->min);
 
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 23beb58..45d5522 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h

@@ -941,5 +941,6 @@
 
 struct sched_domain;
 unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
-unsigned long cpufreq_scale_max_freq_capacity(int cpu);
+unsigned long cpufreq_scale_max_freq_capacity(struct sched_domain *sd, int cpu);
+unsigned long cpufreq_scale_min_freq_capacity(struct sched_domain *sd, int cpu);
 #endif /* _LINUX_CPUFREQ_H */

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3e97574..0f57407 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h

@@ -21,7 +21,6 @@
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_is_big_little;
 extern unsigned int sysctl_sched_sync_hint_enable;
-extern unsigned int sysctl_sched_initial_task_util;
 extern unsigned int sysctl_sched_cstate_aware;
 extern unsigned int sysctl_sched_capacity_margin;
 extern unsigned int sysctl_sched_capacity_margin_down;

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 63f2baf..23a3b9a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h

@@ -643,160 +643,118 @@
 #ifdef CONFIG_SMP
 TRACE_EVENT(sched_cpu_util,
 
-	TP_PROTO(struct task_struct *p, int cpu, int task_util, unsigned long curr_util, unsigned long new_cum_util, int sync),
+	TP_PROTO(int cpu),
 
-	TP_ARGS(p, cpu, task_util, curr_util, new_cum_util, sync),
+	TP_ARGS(cpu),
 
 	TP_STRUCT__entry(
-		__array(char, comm, TASK_COMM_LEN	)
-		__field(int, pid			)
 		__field(unsigned int, cpu			)
-		__field(int, task_util				)
 		__field(unsigned int, nr_running		)
-		__field(long, cpu_util			)
+		__field(long, cpu_util				)
 		__field(long, cpu_util_cum			)
-		__field(long, new_cum_util			)
 		__field(unsigned int, capacity_curr		)
 		__field(unsigned int, capacity			)
-		__field(unsigned long, curr_util		)
-		__field(int, sync				)
+		__field(unsigned int, capacity_orig		)
 		__field(int, idle_state				)
-		__field(unsigned int, irqload		)
-		__field(int, high_irqload		)
-		__field(int, task_in_cum_demand		)
+		__field(u64, irqload				)
 	),
 
 	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid			= p->pid;
 		__entry->cpu			= cpu;
-		__entry->task_util		= task_util;
 		__entry->nr_running		= cpu_rq(cpu)->nr_running;
 		__entry->cpu_util		= cpu_util(cpu);
 		__entry->cpu_util_cum		= cpu_util_cum(cpu, 0);
-		__entry->new_cum_util		= new_cum_util;
-		__entry->task_in_cum_demand	= task_in_cum_window_demand(cpu_rq(cpu), p);
 		__entry->capacity_curr		= capacity_curr_of(cpu);
 		__entry->capacity		= capacity_of(cpu);
-		__entry->curr_util		= curr_util;
-		__entry->sync			= sync;
+		__entry->capacity_orig		= capacity_orig_of(cpu);
 		__entry->idle_state		= idle_get_state_idx(cpu_rq(cpu));
 		__entry->irqload		= sched_irqload(cpu);
-		__entry->high_irqload		= sched_cpu_high_irqload(cpu);
 	),
 
-	TP_printk("comm=%s pid=%d cpu=%d task_util=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld new_cum_util=%ld task_in_cum=%d capacity_curr=%u capacity=%u curr_util=%ld sync=%d idle_state=%d irqload=%u high_irqload=%u",
-		__entry->comm, __entry->pid, __entry->cpu, __entry->task_util, __entry->nr_running, __entry->cpu_util, __entry->cpu_util_cum, __entry->new_cum_util, __entry->task_in_cum_demand, __entry->capacity_curr, __entry->capacity, __entry->curr_util, __entry->sync, __entry->idle_state, __entry->irqload, __entry->high_irqload)
+	TP_printk("cpu=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld capacity_curr=%u capacity=%u capacity_orig=%u idle_state=%d irqload=%llu",
+		__entry->cpu, __entry->nr_running, __entry->cpu_util, __entry->cpu_util_cum, __entry->capacity_curr, __entry->capacity, __entry->capacity_orig, __entry->idle_state, __entry->irqload)
 );
 
-TRACE_EVENT(sched_energy_diff_packing,
+TRACE_EVENT(sched_energy_diff,
 
-	TP_PROTO(struct task_struct *p, unsigned long task_util,
-		 int targeted_cpus, int nrg_pack, int nrg_spread),
+	TP_PROTO(struct task_struct *p, int prev_cpu, unsigned int prev_energy,
+		 int next_cpu, unsigned int next_energy,
+		 int backup_cpu, unsigned int backup_energy),
 
-	TP_ARGS(p, task_util, targeted_cpus, nrg_pack, nrg_spread),
+	TP_ARGS(p, prev_cpu, prev_energy, next_cpu, next_energy,
+		backup_cpu, backup_energy),
 
 	TP_STRUCT__entry(
-		__array(char, comm, TASK_COMM_LEN	)
-		__field(int, pid			)
-		__field(unsigned long, task_util	)
-		__field(int, targeted_cpus		)
-		__field(int, nrg_pack		)
-		__field(int, nrg_spread		)
+		__field(int, pid		)
+		__field(int, prev_cpu		)
+		__field(int, prev_energy	)
+		__field(int, next_cpu		)
+		__field(int, next_energy	)
+		__field(int, backup_cpu		)
+		__field(int, backup_energy	)
 	),
 
 	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid			= p->pid;
-		__entry->task_util		= task_util;
-		__entry->targeted_cpus		= targeted_cpus;
-		__entry->nrg_pack		= nrg_pack;
-		__entry->nrg_spread		= nrg_spread;
+		__entry->prev_cpu		= prev_cpu;
+		__entry->prev_energy		= prev_energy;
+		__entry->next_cpu		= next_cpu;
+		__entry->next_energy		= next_energy;
+		__entry->backup_cpu		= backup_cpu;
+		__entry->backup_energy		= backup_energy;
 	),
 
-	TP_printk("comm=%s pid=%d task_util=%lu targeted_cpus=%d nrg_pack=%d nrg_spread=%d nrg_diff=%d",
-		__entry->comm, __entry->pid, __entry->task_util,
-		__entry->targeted_cpus, __entry->nrg_pack,
-		__entry->nrg_spread, __entry->nrg_pack - __entry->nrg_spread)
+	TP_printk("pid=%d prev_cpu=%d prev_energy=%u next_cpu=%d next_energy=%u backup_cpu=%d backup_energy=%u",
+		__entry->pid, __entry->prev_cpu, __entry->prev_energy,
+		__entry->next_cpu, __entry->next_energy,
+		__entry->backup_cpu, __entry->backup_energy)
 );
 
-DECLARE_EVENT_CLASS(sched_task_util,
+TRACE_EVENT(sched_task_util,
 
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
+	TP_PROTO(struct task_struct *p, int next_cpu, int backup_cpu,
+		 int target_cpu, bool sync, bool need_idle,
+		 bool placement_boost, int rtg_cpu),
 
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle),
+	TP_ARGS(p, next_cpu, backup_cpu, target_cpu, sync, need_idle,
+		placement_boost, rtg_cpu),
 
 	TP_STRUCT__entry(
-		__array(char, comm, TASK_COMM_LEN	)
 		__field(int, pid			)
-		__field(int, task_cpu			)
-		__field(unsigned long, task_util	)
-		__field(unsigned long, cpu_util_freq	)
-		__field(int, nominated_cpu		)
+		__array(char, comm, TASK_COMM_LEN	)
+		__field(unsigned long, util		)
+		__field(int, prev_cpu			)
+		__field(int, next_cpu			)
+		__field(int, backup_cpu			)
 		__field(int, target_cpu			)
-		__field(int, ediff			)
+		__field(bool, sync			)
 		__field(bool, need_idle			)
+		__field(bool, placement_boost		)
+		__field(int, rtg_cpu			)
 		__field(u64, latency			)
 	),
 
 	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid			= p->pid;
-		__entry->task_cpu		= task_cpu;
-		__entry->task_util		= task_util;
-		__entry->cpu_util_freq		= cpu_util_freq(target_cpu, NULL);
-		__entry->nominated_cpu		= nominated_cpu;
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->util			= task_util(p);
+		__entry->prev_cpu		= task_cpu(p);
+		__entry->next_cpu		= next_cpu;
+		__entry->backup_cpu		= backup_cpu;
 		__entry->target_cpu		= target_cpu;
-		__entry->ediff			= ediff;
+		__entry->sync			= sync;
 		__entry->need_idle		= need_idle;
+		__entry->placement_boost	= placement_boost;
+		__entry->rtg_cpu		= rtg_cpu;
 		__entry->latency		= p->ravg.mark_start ?
 						  ktime_get_ns() -
 						  p->ravg.mark_start : 0;
 	),
 
-	TP_printk("comm=%s pid=%d task_cpu=%d task_util=%lu nominated_cpu=%d target_cpu=%d energy_diff=%d need_idle=%d latency=%llu",
-		__entry->comm, __entry->pid, __entry->task_cpu, __entry->task_util, __entry->nominated_cpu, __entry->target_cpu, __entry->ediff, __entry->need_idle, __entry->latency)
+	TP_printk("pid=%d comm=%s util=%lu prev_cpu=%d next_cpu=%d backup_cpu=%d target_cpu=%d sync=%d need_idle=%d placement_boost=%d rtg_cpu=%d latency=%llu",
+		__entry->pid, __entry->comm, __entry->util, __entry->prev_cpu, __entry->next_cpu, __entry->backup_cpu, __entry->target_cpu, __entry->sync, __entry->need_idle, __entry->placement_boost, __entry->rtg_cpu, __entry->latency)
 );
 
-DEFINE_EVENT(sched_task_util, sched_task_util_bias_to_waker,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_colocated,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_boosted,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_overutilzed,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_energy_diff,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_energy_aware,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_imbalance,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
-
-DEFINE_EVENT(sched_task_util, sched_task_util_need_idle,
-	TP_PROTO(struct task_struct *p, int task_cpu, unsigned long task_util, int nominated_cpu, int target_cpu, int ediff, bool need_idle),
-	TP_ARGS(p, task_cpu, task_util, nominated_cpu, target_cpu, ediff, need_idle)
-);
 #endif
 
 /*
@@ -1433,22 +1391,36 @@
 );
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int sysctl_sched_use_walt_task_util;
+extern unsigned int sched_ravg_window;
+extern unsigned int walt_disabled;
+#endif
+
 /*
  * Tracepoint for accounting sched averages for tasks.
  */
 TRACE_EVENT(sched_load_avg_task,
-	TP_PROTO(struct task_struct *tsk, struct sched_avg *avg),
-	TP_ARGS(tsk, avg),
+
+	TP_PROTO(struct task_struct *tsk, struct sched_avg *avg, void *_ravg),
+
+	TP_ARGS(tsk, avg, _ravg),
+
 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN		)
 		__field( pid_t,	pid				)
 		__field( int,	cpu				)
 		__field( unsigned long,	load_avg		)
 		__field( unsigned long,	util_avg		)
+		__field( unsigned long,	util_avg_pelt		)
+		__field( u32,		util_avg_walt		)
 		__field( u64,		load_sum		)
 		__field( u32,		util_sum		)
 		__field( u32,		period_contrib		)
 	),
+
 	TP_fast_assign(
 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 		__entry->pid			= tsk->pid;
@@ -1458,79 +1430,124 @@
 		__entry->load_sum		= avg->load_sum;
 		__entry->util_sum		= avg->util_sum;
 		__entry->period_contrib		= avg->period_contrib;
+		__entry->util_avg_pelt  = avg->util_avg;
+		__entry->util_avg_walt  = 0;
+#ifdef CONFIG_SCHED_WALT
+		__entry->util_avg_walt = ((struct ravg*)_ravg)->demand /
+					 (sched_ravg_window >> SCHED_CAPACITY_SHIFT);
+		if (!walt_disabled && sysctl_sched_use_walt_task_util)
+			__entry->util_avg = __entry->util_avg_walt;
+#endif
 	),
-	TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu load_sum=%llu"
+	TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu "
+			"util_avg_pelt=%lu util_avg_walt=%u load_sum=%llu"
 		  " util_sum=%u period_contrib=%u",
 		  __entry->comm,
 		  __entry->pid,
 		  __entry->cpu,
 		  __entry->load_avg,
 		  __entry->util_avg,
+		  __entry->util_avg_pelt,
+		  __entry->util_avg_walt,
 		  (u64)__entry->load_sum,
 		  (u32)__entry->util_sum,
 		  (u32)__entry->period_contrib)
 );
+
 /*
  * Tracepoint for accounting sched averages for cpus.
  */
 TRACE_EVENT(sched_load_avg_cpu,
+
 	TP_PROTO(int cpu, struct cfs_rq *cfs_rq),
+
 	TP_ARGS(cpu, cfs_rq),
+
 	TP_STRUCT__entry(
 		__field( int,	cpu				)
 		__field( unsigned long,	load_avg		)
 		__field( unsigned long,	util_avg		)
+		__field( unsigned long,	util_avg_pelt		)
+		__field( u32,		util_avg_walt		)
 	),
+
 	TP_fast_assign(
 		__entry->cpu			= cpu;
 		__entry->load_avg		= cfs_rq->avg.load_avg;
 		__entry->util_avg		= cfs_rq->avg.util_avg;
+		__entry->util_avg_pelt	= cfs_rq->avg.util_avg;
+		__entry->util_avg_walt	= 0;
+#ifdef CONFIG_SCHED_WALT
+		__entry->util_avg_walt = div64_ul(cpu_rq(cpu)->prev_runnable_sum,
+					 sched_ravg_window >> SCHED_CAPACITY_SHIFT);
+		if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+			__entry->util_avg		= __entry->util_avg_walt;
+#endif
 	),
-	TP_printk("cpu=%d load_avg=%lu util_avg=%lu",
-		  __entry->cpu, __entry->load_avg, __entry->util_avg)
+
+	TP_printk("cpu=%d load_avg=%lu util_avg=%lu "
+			  "util_avg_pelt=%lu util_avg_walt=%u",
+		  __entry->cpu, __entry->load_avg, __entry->util_avg,
+		  __entry->util_avg_pelt, __entry->util_avg_walt)
 );
+
 /*
  * Tracepoint for sched_tune_config settings
  */
 TRACE_EVENT(sched_tune_config,
+
 	TP_PROTO(int boost),
+
 	TP_ARGS(boost),
+
 	TP_STRUCT__entry(
 		__field( int,	boost		)
 	),
+
 	TP_fast_assign(
 		__entry->boost 	= boost;
 	),
+
 	TP_printk("boost=%d ", __entry->boost)
 );
+
 /*
  * Tracepoint for accounting CPU  boosted utilization
  */
 TRACE_EVENT(sched_boost_cpu,
+
 	TP_PROTO(int cpu, unsigned long util, long margin),
+
 	TP_ARGS(cpu, util, margin),
+
 	TP_STRUCT__entry(
 		__field( int,		cpu			)
 		__field( unsigned long,	util			)
 		__field(long,		margin			)
 	),
+
 	TP_fast_assign(
 		__entry->cpu	= cpu;
 		__entry->util	= util;
 		__entry->margin	= margin;
 	),
+
 	TP_printk("cpu=%d util=%lu margin=%ld",
 		  __entry->cpu,
 		  __entry->util,
 		  __entry->margin)
 );
+
 /*
  * Tracepoint for schedtune_tasks_update
  */
 TRACE_EVENT(sched_tune_tasks_update,
+
 	TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx,
 		int boost, int max_boost),
+
 	TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost),
+
 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN	)
 		__field( pid_t,		pid		)
@@ -1540,6 +1557,7 @@
 		__field( int,		boost		)
 		__field( int,		max_boost	)
 	),
+
 	TP_fast_assign(
 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 		__entry->pid		= tsk->pid;
@@ -1549,104 +1567,109 @@
 		__entry->boost		= boost;
 		__entry->max_boost	= max_boost;
 	),
+
 	TP_printk("pid=%d comm=%s "
 			"cpu=%d tasks=%d idx=%d boost=%d max_boost=%d",
 		__entry->pid, __entry->comm,
 		__entry->cpu, __entry->tasks, __entry->idx,
 		__entry->boost, __entry->max_boost)
 );
+
 /*
  * Tracepoint for schedtune_boostgroup_update
  */
 TRACE_EVENT(sched_tune_boostgroup_update,
+
 	TP_PROTO(int cpu, int variation, int max_boost),
+
 	TP_ARGS(cpu, variation, max_boost),
+
 	TP_STRUCT__entry(
 		__field( int,	cpu		)
 		__field( int,	variation	)
 		__field( int,	max_boost	)
 	),
+
 	TP_fast_assign(
 		__entry->cpu		= cpu;
 		__entry->variation	= variation;
 		__entry->max_boost	= max_boost;
 	),
+
 	TP_printk("cpu=%d variation=%d max_boost=%d",
 		__entry->cpu, __entry->variation, __entry->max_boost)
 );
+
 /*
  * Tracepoint for accounting task boosted utilization
  */
 TRACE_EVENT(sched_boost_task,
+
 	TP_PROTO(struct task_struct *tsk, unsigned long util, long margin),
+
 	TP_ARGS(tsk, util, margin),
+
 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN		)
 		__field( pid_t,		pid			)
 		__field( unsigned long,	util			)
 		__field( long,		margin			)
+
 	),
+
 	TP_fast_assign(
 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 		__entry->pid	= tsk->pid;
 		__entry->util	= util;
 		__entry->margin	= margin;
 	),
+
 	TP_printk("comm=%s pid=%d util=%lu margin=%ld",
 		  __entry->comm, __entry->pid,
 		  __entry->util,
 		  __entry->margin)
 );
+
 /*
- * Tracepoint for accounting sched group energy
+ * Tracepoint for find_best_target
  */
-TRACE_EVENT(sched_energy_diff,
-	TP_PROTO(struct task_struct *tsk, int scpu, int dcpu, int udelta,
-		int nrgb, int nrga, int nrgd, int capb, int capa, int capd,
-		int nrgn, int nrgp),
-	TP_ARGS(tsk, scpu, dcpu, udelta,
-		nrgb, nrga, nrgd, capb, capa, capd,
-		nrgn, nrgp),
+TRACE_EVENT(sched_find_best_target,
+
+	TP_PROTO(struct task_struct *tsk, bool prefer_idle,
+		unsigned long min_util, int start_cpu,
+		int best_idle, int best_active, int target),
+
+	TP_ARGS(tsk, prefer_idle, min_util, start_cpu,
+		best_idle, best_active, target),
+
 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid	)
-		__field( int,	scpu	)
-		__field( int,	dcpu	)
-		__field( int,	udelta	)
-		__field( int,	nrgb	)
-		__field( int,	nrga	)
-		__field( int,	nrgd	)
-		__field( int,	capb	)
-		__field( int,	capa	)
-		__field( int,	capd	)
-		__field( int,	nrgn	)
-		__field( int,	nrgp	)
+		__field( pid_t,	pid			)
+		__field( unsigned long,	min_util	)
+		__field( bool,	prefer_idle		)
+		__field( int,	start_cpu		)
+		__field( int,	best_idle		)
+		__field( int,	best_active		)
+		__field( int,	target			)
 	),
+
 	TP_fast_assign(
 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 		__entry->pid		= tsk->pid;
-		__entry->scpu 		= scpu;
-		__entry->dcpu 		= dcpu;
-		__entry->udelta 	= udelta;
-		__entry->nrgb 		= nrgb;
-		__entry->nrga 		= nrga;
-		__entry->nrgd 		= nrgd;
-		__entry->capb 		= capb;
-		__entry->capa 		= capa;
-		__entry->capd 		= capd;
-		__entry->nrgn 		= nrgn;
-		__entry->nrgp 		= nrgp;
+		__entry->min_util	= min_util;
+		__entry->prefer_idle	= prefer_idle;
+		__entry->start_cpu 	= start_cpu;
+		__entry->best_idle	= best_idle;
+		__entry->best_active	= best_active;
+		__entry->target		= target;
 	),
-	TP_printk("pid=%d comm=%s "
-			"src_cpu=%d dst_cpu=%d usage_delta=%d "
-			"nrg_before=%d nrg_after=%d nrg_diff=%d "
-			"cap_before=%d cap_after=%d cap_delta=%d "
-			"nrg_delta=%d nrg_payoff=%d",
+
+	TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d "
+		  "best_idle=%d best_active=%d target=%d",
 		__entry->pid, __entry->comm,
-		__entry->scpu, __entry->dcpu, __entry->udelta,
-		__entry->nrgb, __entry->nrga, __entry->nrgd,
-		__entry->capb, __entry->capa, __entry->capd,
-		__entry->nrgn, __entry->nrgp)
+		__entry->prefer_idle, __entry->start_cpu,
+		__entry->best_idle, __entry->best_active,
+		__entry->target)
 );
 
 TRACE_EVENT(sched_group_energy,

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 4b87c4e..38cbc0d 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile

@@ -28,4 +28,3 @@
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o
-obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ad2b980..23a7f9c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -3233,70 +3233,6 @@
 
 unsigned int capacity_margin_freq = 1280; /* ~20% margin */
 
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-
-static inline
-unsigned long sum_capacity_reqs(unsigned long cfs_cap,
-				struct sched_capacity_reqs *scr, int cpu)
-{
-	unsigned long total = add_capacity_margin(cfs_cap + scr->rt, cpu);
-	return total += scr->dl;
-}
-
-unsigned long boosted_cpu_util(int cpu);
-static void sched_freq_tick_pelt(int cpu)
-{
-	unsigned long cpu_utilization = boosted_cpu_util(cpu);
-	unsigned long capacity_curr = capacity_curr_of(cpu);
-	struct sched_capacity_reqs *scr;
-
-	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-	if (sum_capacity_reqs(cpu_utilization, scr, cpu) < capacity_curr)
-		return;
-
-	/*
-	 * To make free room for a task that is building up its "real"
-	 * utilization and to harm its performance the least, request
-	 * a jump to a higher OPP as soon as the margin of free capacity
-	 * is impacted (specified by capacity_margin_freq).
-	 */
-	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-}
-
-#ifdef CONFIG_SCHED_WALT
-static void sched_freq_tick_walt(int cpu)
-{
-	unsigned long cpu_utilization = cpu_util_freq(cpu, NULL);
-
-	if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
-		return sched_freq_tick_pelt(cpu);
-
-	cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE /
-			  capacity_orig_of(cpu);
-	/*
-	 * It is likely that the load is growing so we
-	 * keep the added margin in our request as an
-	 * extra boost.
-	 */
-	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-
-}
-#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
-#else
-#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
-#endif /* CONFIG_SCHED_WALT */
-
-static void sched_freq_tick(int cpu)
-{
-	if (!sched_freq())
-		return;
-
-	_sched_freq_tick(cpu);
-}
-#else
-static inline void sched_freq_tick(int cpu) { }
-#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
-
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -3326,7 +3262,6 @@
 	curr->sched_class->task_tick(rq, curr, 0);
 	cpu_load_update_active(rq);
 	calc_global_load_tick(rq);
-	sched_freq_tick(cpu);
 
 	early_notif = early_detection_notify(rq, wallclock);
 	if (early_notif)

diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
deleted file mode 100644
index 843eaa7..0000000
--- a/kernel/sched/cpufreq_sched.c
+++ /dev/null

@@ -1,503 +0,0 @@
-/*
- *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpufreq.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/percpu.h>
-#include <linux/irq_work.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/cpufreq_sched.h>
-
-#include "sched.h"
-
-#define THROTTLE_DOWN_NSEC	50000000 /* 50ms default */
-#define THROTTLE_UP_NSEC	500000 /* 500us default */
-
-struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
-static bool __read_mostly cpufreq_driver_slow;
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-static struct cpufreq_governor cpufreq_gov_sched;
-#endif
-
-static DEFINE_PER_CPU(unsigned long, enabled);
-DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
-
-struct gov_tunables {
-	struct gov_attr_set attr_set;
-	unsigned int up_throttle_nsec;
-	unsigned int down_throttle_nsec;
-};
-
-/**
- * gov_data - per-policy data internal to the governor
- * @up_throttle: next throttling period expiry if increasing OPP
- * @down_throttle: next throttling period expiry if decreasing OPP
- * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
- * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
- * @task: worker thread for dvfs transition that may block/sleep
- * @irq_work: callback used to wake up worker thread
- * @requested_freq: last frequency requested by the sched governor
- *
- * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
- * per-policy instance of it is created when the cpufreq_sched governor receives
- * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
- * member of struct cpufreq_policy.
- *
- * Readers of this data must call down_read(policy->rwsem). Writers must
- * call down_write(policy->rwsem).
- */
-struct gov_data {
-	ktime_t up_throttle;
-	ktime_t down_throttle;
-	struct gov_tunables *tunables;
-	struct list_head tunables_hook;
-	struct task_struct *task;
-	struct irq_work irq_work;
-	unsigned int requested_freq;
-};
-
-static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
-					    unsigned int freq)
-{
-	struct gov_data *gd = policy->governor_data;
-
-	/* avoid race with cpufreq_sched_stop */
-	if (!down_write_trylock(&policy->rwsem))
-		return;
-
-	__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
-
-	gd->up_throttle = ktime_add_ns(ktime_get(),
-				       gd->tunables->up_throttle_nsec);
-	gd->down_throttle = ktime_add_ns(ktime_get(),
-					 gd->tunables->down_throttle_nsec);
-	up_write(&policy->rwsem);
-}
-
-static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
-{
-	ktime_t now = ktime_get();
-
-	ktime_t throttle = gd->requested_freq < cur_freq ?
-		gd->down_throttle : gd->up_throttle;
-
-	if (ktime_after(now, throttle))
-		return false;
-
-	while (1) {
-		int usec_left = ktime_to_ns(ktime_sub(throttle, now));
-
-		usec_left /= NSEC_PER_USEC;
-		trace_cpufreq_sched_throttled(usec_left);
-		usleep_range(usec_left, usec_left + 100);
-		now = ktime_get();
-		if (ktime_after(now, throttle))
-			return true;
-	}
-}
-
-/*
- * we pass in struct cpufreq_policy. This is safe because changing out the
- * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
- * which tears down all of the data structures and __cpufreq_governor(policy,
- * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
- * new policy pointer
- */
-static int cpufreq_sched_thread(void *data)
-{
-	struct sched_param param;
-	struct cpufreq_policy *policy;
-	struct gov_data *gd;
-	unsigned int new_request = 0;
-	unsigned int last_request = 0;
-	int ret;
-
-	policy = (struct cpufreq_policy *) data;
-	gd = policy->governor_data;
-
-	param.sched_priority = 50;
-	ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
-	if (ret) {
-		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
-		do_exit(-EINVAL);
-	} else {
-		pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
-				__func__, gd->task->pid);
-	}
-
-	do {
-		new_request = gd->requested_freq;
-		if (new_request == last_request) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (kthread_should_stop())
-				break;
-			schedule();
-		} else {
-			/*
-			 * if the frequency thread sleeps while waiting to be
-			 * unthrottled, start over to check for a newer request
-			 */
-			if (finish_last_request(gd, policy->cur))
-				continue;
-			last_request = new_request;
-			cpufreq_sched_try_driver_target(policy, new_request);
-		}
-	} while (!kthread_should_stop());
-
-	return 0;
-}
-
-static void cpufreq_sched_irq_work(struct irq_work *irq_work)
-{
-	struct gov_data *gd;
-
-	gd = container_of(irq_work, struct gov_data, irq_work);
-	if (!gd)
-		return;
-
-	wake_up_process(gd->task);
-}
-
-static void update_fdomain_capacity_request(int cpu)
-{
-	unsigned int freq_new, index_new, cpu_tmp;
-	struct cpufreq_policy *policy;
-	struct gov_data *gd;
-	unsigned long capacity = 0;
-
-	/*
-	 * Avoid grabbing the policy if possible. A test is still
-	 * required after locking the CPU's policy to avoid racing
-	 * with the governor changing.
-	 */
-	if (!per_cpu(enabled, cpu))
-		return;
-
-	policy = cpufreq_cpu_get(cpu);
-	if (IS_ERR_OR_NULL(policy))
-		return;
-
-	if (policy->governor != &cpufreq_gov_sched ||
-	    !policy->governor_data)
-		goto out;
-
-	gd = policy->governor_data;
-
-	/* find max capacity requested by cpus in this policy */
-	for_each_cpu(cpu_tmp, policy->cpus) {
-		struct sched_capacity_reqs *scr;
-
-		scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
-		capacity = max(capacity, scr->total);
-	}
-
-	/* Convert the new maximum capacity request into a cpu frequency */
-	freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
-	index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L);
-	freq_new = policy->freq_table[index_new].frequency;
-
-	if (freq_new > policy->max)
-		freq_new = policy->max;
-
-	if (freq_new < policy->min)
-		freq_new = policy->min;
-
-	trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
-					gd->requested_freq);
-	if (freq_new == gd->requested_freq)
-		goto out;
-
-	gd->requested_freq = freq_new;
-
-	/*
-	 * Throttling is not yet supported on platforms with fast cpufreq
-	 * drivers.
-	 */
-	if (cpufreq_driver_slow)
-		irq_work_queue_on(&gd->irq_work, cpu);
-	else
-		cpufreq_sched_try_driver_target(policy, freq_new);
-
-out:
-	cpufreq_cpu_put(policy);
-}
-
-void update_cpu_capacity_request(int cpu, bool request)
-{
-	unsigned long new_capacity;
-	struct sched_capacity_reqs *scr;
-
-	/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
-	lockdep_assert_held(&cpu_rq(cpu)->lock);
-
-	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-
-#ifdef CONFIG_SCHED_WALT
-	if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
-		new_capacity = scr->cfs + scr->rt;
-#endif
-	new_capacity = scr->cfs;
-	new_capacity = new_capacity * capacity_margin_freq
-		/ SCHED_CAPACITY_SCALE;
-	new_capacity += scr->dl;
-
-	if (new_capacity == scr->total)
-		return;
-
-	trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
-
-	scr->total = new_capacity;
-	if (request)
-		update_fdomain_capacity_request(cpu);
-}
-
-static inline void set_sched_freq(void)
-{
-	static_key_slow_inc(&__sched_freq);
-}
-
-static inline void clear_sched_freq(void)
-{
-	static_key_slow_dec(&__sched_freq);
-}
-
-/* Tunables */
-static struct gov_tunables *global_tunables;
-
-static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set)
-{
-	return container_of(attr_set, struct gov_tunables, attr_set);
-}
-
-static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-
-	return sprintf(buf, "%u\n", tunables->up_throttle_nsec);
-}
-
-static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set,
-				      const char *buf, size_t count)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-	int ret;
-	long unsigned int val;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret < 0)
-		return ret;
-	tunables->up_throttle_nsec = val;
-	return count;
-}
-
-static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-
-	return sprintf(buf, "%u\n", tunables->down_throttle_nsec);
-}
-
-static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set,
-					const char *buf, size_t count)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-	int ret;
-	long unsigned int val;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret < 0)
-		return ret;
-	tunables->down_throttle_nsec = val;
-	return count;
-}
-
-static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec);
-static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec);
-
-static struct attribute *schedfreq_attributes[] = {
-	&up_throttle_nsec.attr,
-	&down_throttle_nsec.attr,
-	NULL
-};
-
-static struct kobj_type tunables_ktype = {
-	.default_attrs = schedfreq_attributes,
-	.sysfs_ops = &governor_sysfs_ops,
-};
-
-static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
-{
-	struct gov_data *gd;
-	int cpu;
-	int rc;
-
-	for_each_cpu(cpu, policy->cpus)
-		memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
-		       sizeof(struct sched_capacity_reqs));
-
-	gd = kzalloc(sizeof(*gd), GFP_KERNEL);
-	if (!gd)
-		return -ENOMEM;
-
-	policy->governor_data = gd;
-
-	if (!global_tunables) {
-		gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL);
-		if (!gd->tunables)
-			goto free_gd;
-
-		gd->tunables->up_throttle_nsec =
-			policy->cpuinfo.transition_latency ?
-			policy->cpuinfo.transition_latency :
-			THROTTLE_UP_NSEC;
-		gd->tunables->down_throttle_nsec =
-			THROTTLE_DOWN_NSEC;
-
-		rc = kobject_init_and_add(&gd->tunables->attr_set.kobj,
-					  &tunables_ktype,
-					  get_governor_parent_kobj(policy),
-					  "%s", cpufreq_gov_sched.name);
-		if (rc)
-			goto free_tunables;
-
-		gov_attr_set_init(&gd->tunables->attr_set,
-				  &gd->tunables_hook);
-
-		pr_debug("%s: throttle_threshold = %u [ns]\n",
-			 __func__, gd->tunables->up_throttle_nsec);
-
-		if (!have_governor_per_policy())
-			global_tunables = gd->tunables;
-	} else {
-		gd->tunables = global_tunables;
-		gov_attr_set_get(&global_tunables->attr_set,
-				 &gd->tunables_hook);
-	}
-
-	policy->governor_data = gd;
-	if (cpufreq_driver_is_slow()) {
-		cpufreq_driver_slow = true;
-		gd->task = kthread_create(cpufreq_sched_thread, policy,
-					  "kschedfreq:%d",
-					  cpumask_first(policy->related_cpus));
-		if (IS_ERR_OR_NULL(gd->task)) {
-			pr_err("%s: failed to create kschedfreq thread\n",
-			       __func__);
-			goto free_tunables;
-		}
-		get_task_struct(gd->task);
-		kthread_bind_mask(gd->task, policy->related_cpus);
-		wake_up_process(gd->task);
-		init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
-	}
-
-	set_sched_freq();
-
-	return 0;
-
-free_tunables:
-	kfree(gd->tunables);
-free_gd:
-	policy->governor_data = NULL;
-	kfree(gd);
-	return -ENOMEM;
-}
-
-static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
-{
-	unsigned int count;
-	struct gov_data *gd = policy->governor_data;
-
-	clear_sched_freq();
-	if (cpufreq_driver_slow) {
-		kthread_stop(gd->task);
-		put_task_struct(gd->task);
-	}
-
-	count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook);
-	if (!count) {
-		if (!have_governor_per_policy())
-			global_tunables = NULL;
-		kfree(gd->tunables);
-	}
-
-	policy->governor_data = NULL;
-
-	kfree(gd);
-}
-
-static int cpufreq_sched_start(struct cpufreq_policy *policy)
-{
-	int cpu;
-
-	for_each_cpu(cpu, policy->cpus)
-		per_cpu(enabled, cpu) = 1;
-
-	return 0;
-}
-
-static void cpufreq_sched_limits(struct cpufreq_policy *policy)
-{
-	unsigned int clamp_freq;
-	struct gov_data *gd = policy->governor_data;;
-
-	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
-		policy->cpu, policy->min, policy->max,
-		policy->cur);
-
-	clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
-
-	if (policy->cur != clamp_freq)
-		__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
-}
-
-static void cpufreq_sched_stop(struct cpufreq_policy *policy)
-{
-	int cpu;
-
-	for_each_cpu(cpu, policy->cpus)
-		per_cpu(enabled, cpu) = 0;
-}
-
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-static
-#endif
-struct cpufreq_governor cpufreq_gov_sched = {
-	.name			= "sched",
-	.init                   = cpufreq_sched_policy_init,
-	.exit                   = cpufreq_sched_policy_exit,
-	.start                  = cpufreq_sched_start,
-	.stop                   = cpufreq_sched_stop,
-	.limits                 = cpufreq_sched_limits,
-	.owner			= THIS_MODULE,
-};
-
-static int __init cpufreq_sched_init(void)
-{
-	int cpu;
-
-	for_each_cpu(cpu, cpu_possible_mask)
-		per_cpu(enabled, cpu) = 0;
-	return cpufreq_register_governor(&cpufreq_gov_sched);
-}
-
-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-struct cpufreq_governor *cpufreq_default_governor(void)
-{
-        return &cpufreq_gov_sched;
-}
-#endif
-
-/* Try to make this the default governor */
-fs_initcall(cpufreq_sched_init);

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 32b67eb..53974cc 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c

@@ -19,10 +19,6 @@
 #include "sched.h"
 #include "tune.h"
 
-#ifdef CONFIG_SCHED_WALT
-unsigned long boosted_cpu_util(int cpu);
-#endif
-
 #define SUGOV_KTHREAD_PRIORITY	50
 
 struct sugov_tunables {
@@ -182,8 +178,7 @@
 	*util = min(rq->cfs.avg.util_avg, cfs_max);
 	*max = cfs_max;
 
-	*util = cpu_util_freq(cpu, &loadcpu->walt_load);
-	*util = boosted_cpu_util(cpu);
+	*util = boosted_cpu_util(cpu, &loadcpu->walt_load);
 }
 
 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
old mode 100755
new mode 100644
index 1cb03a4..75ea4ff
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -87,7 +87,6 @@
 
 unsigned int sysctl_sched_is_big_little = 1;
 unsigned int sysctl_sched_sync_hint_enable = 1;
-unsigned int sysctl_sched_initial_task_util = 0;
 unsigned int sysctl_sched_cstate_aware = 1;
 DEFINE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
 
@@ -165,6 +164,7 @@
  */
 unsigned int sysctl_sched_capacity_margin = 1078; /* ~5% margin */
 unsigned int sysctl_sched_capacity_margin_down = 1205; /* ~15% margin */
+#define capacity_margin sysctl_sched_capacity_margin
 
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
@@ -288,10 +288,6 @@
 	return mul_u64_u32_shr(delta_exec, fact, shift);
 }
 
-#ifdef CONFIG_SMP
-static int active_load_balance_cpu_stop(void *data);
-#endif
-
 const struct sched_class fair_sched_class;
 
 /**************************************************************
@@ -792,9 +788,7 @@
 	/*
 	 * At this point, util_avg won't be used in select_task_rq_fair anyway
 	 */
-	sa->util_avg =  sched_freq() ?
-		sysctl_sched_initial_task_util :
-		0;
+	sa->util_avg = 0;
 	sa->util_sum = 0;
 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
@@ -3379,6 +3373,7 @@
 	struct rq *rq = rq_of(cfs_rq);
 	int cpu = cpu_of(rq);
 	int decayed;
+	void *ptr = NULL;
 
 	/*
 	 * Track task load average for carrying it to new CPU after migrated, and
@@ -3396,8 +3391,12 @@
 	if (decayed && (flags & UPDATE_TG))
 		update_tg_load_avg(cfs_rq, 0);
 
-	if (entity_is_task(se))
-		trace_sched_load_avg_task(task_of(se), &se->avg);
+	if (entity_is_task(se)) {
+#ifdef CONFIG_SCHED_WALT
+		ptr = (void *)&(task_of(se)->ravg);
+#endif
+		trace_sched_load_avg_task(task_of(se), &se->avg, ptr);
+	}
 }
 
 /**
@@ -4851,24 +4850,6 @@
 #ifdef CONFIG_SMP
 static unsigned long capacity_orig_of(int cpu);
 static unsigned long cpu_util(int cpu);
-unsigned long boosted_cpu_util(int cpu);
-#else
-#define boosted_cpu_util(cpu) cpu_util_freq(cpu)
-#endif
-
-#ifdef CONFIG_SMP
-static void update_capacity_of(int cpu)
-{
-	unsigned long req_cap;
-
-	if (!sched_freq())
-		return;
-
-	/* Convert scale-invariant capacity to cpu. */
-	req_cap = boosted_cpu_util(cpu);
-	req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
-	set_cfs_cpu_capacity(cpu, true, req_cap);
-}
 #endif
 
 /*
@@ -4883,7 +4864,6 @@
 	struct sched_entity *se = &p->se;
 #ifdef CONFIG_SMP
 	int task_new = flags & ENQUEUE_WAKEUP_NEW;
-	int task_wakeup = flags & ENQUEUE_WAKEUP;
 #endif
 
 #ifdef CONFIG_SCHED_WALT
@@ -4961,16 +4941,6 @@
 			rq->rd->overutilized = true;
 			trace_sched_overutilized(true);
 		}
-
-		/*
-		 * We want to potentially trigger a freq switch
-		 * request only for tasks that are waking up; this is
-		 * because we get here also during load balancing, but
-		 * in these cases it seems wise to trigger as single
-		 * request after load balancing is done.
-		 */
-		if (task_new || task_wakeup)
-			update_capacity_of(cpu_of(rq));
 	}
 
 #endif /* CONFIG_SMP */
@@ -5048,13 +5018,6 @@
 	 */
 	schedtune_dequeue_task(p, cpu_of(rq));
 
-	if (!se) {
-		if (rq->cfs.nr_running)
-			update_capacity_of(cpu_of(rq));
-		else if (sched_freq())
-			set_cfs_cpu_capacity(cpu_of(rq), false, 0);
-	}
-
 #endif /* CONFIG_SMP */
 
 	hrtick_update(rq);
@@ -5527,6 +5490,15 @@
 }
 
 /*
+ * Externally visible function. Let's keep the one above
+ * so that the check is inlined/optimized in the sched paths.
+ */
+bool sched_is_energy_aware(void)
+{
+	return energy_aware();
+}
+
+/*
  * Returns the current capacity of cpu after applying both
  * cpu and freq scaling.
  */
@@ -5538,44 +5510,88 @@
 }
 
 /*
- * Externally visible function. Let's keep the one above
- * so that the check is inlined/optimized in the sched paths.
+ * Returns the current capacity of cpu after applying both
+ * cpu and min freq scaling.
  */
-bool sched_is_energy_aware(void)
+unsigned long capacity_min_of(int cpu)
 {
-	return energy_aware();
+	if (!sched_feat(MIN_CAPACITY_CAPPING))
+		return 0;
+	return arch_scale_cpu_capacity(NULL, cpu) *
+	       arch_scale_min_freq_capacity(NULL, cpu)
+	       >> SCHED_CAPACITY_SHIFT;
 }
 
+/*
+ * CPU candidates.
+ *
+ * These are labels to reference CPU candidates for an energy_diff.
+ * Currently we support only two possible candidates: the task's previous CPU
+ * and another candiate CPU.
+ * More advanced/aggressive EAS selection policies can consider more
+ * candidates.
+ */
+#define EAS_CPU_PRV	0
+#define EAS_CPU_NXT	1
+#define EAS_CPU_BKP	2
+#define EAS_CPU_CNT	3
+
+/*
+ * energy_diff - supports the computation of the estimated energy impact in
+ * moving a "task"'s "util_delta" between different CPU candidates.
+ */
 struct energy_env {
+	/* Utilization to move */
+	struct task_struct	*p;
+	int			util_delta;
+
+	/* Mask of CPUs candidates to evaluate */
+	cpumask_t		cpus_mask;
+
+	/* CPU candidates to evaluate */
+	struct {
+
+		/* CPU ID, must be in cpus_mask */
+		int	cpu_id;
+
+		/*
+		 * Index (into sched_group_energy::cap_states) of the OPP the
+		 * CPU needs to run at if the task is placed on it.
+		 * This includes the both active and blocked load, due to
+		 * other tasks on this CPU,  as well as the task's own
+		 * utilization.
+		 */
+		int	cap_idx;
+		int	cap;
+
+		/* Estimated system energy */
+		unsigned int energy;
+
+		/* Estimated energy variation wrt EAS_CPU_PRV */
+		int	nrg_delta;
+
+	} cpu[EAS_CPU_CNT];
+
+	/*
+	 * Index (into energy_env::cpu) of the morst energy efficient CPU for
+	 * the specified energy_env::task
+	 */
+	int			next_idx;
+
+	/* Support data */
 	struct sched_group	*sg_top;
 	struct sched_group	*sg_cap;
-	int			cap_idx;
-	int			util_delta;
-	int			src_cpu;
-	int			dst_cpu;
-	int			energy;
-	int			payoff;
-	int			sync_cpu;
-	unsigned long		curr_util;
-	struct task_struct	*task;
-	struct {
-		int before;
-		int after;
-		int delta;
-		int diff;
-	} nrg;
-	struct {
-		int before;
-		int after;
-		int delta;
-	} cap;
+	struct sched_group	*sg;
 };
 
+static int cpu_util_wake(int cpu, struct task_struct *p);
+
 /*
  * __cpu_norm_util() returns the cpu util relative to a specific capacity,
- * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
- * energy calculations. Using the scale-invariant util returned by
- * cpu_util() and approximating scale-invariant util by:
+ * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE], which is useful for
+ * energy calculations.
+ *
+ * Since util is a scale-invariant utilization defined as:
  *
  *   util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time
  *
@@ -5585,83 +5601,41 @@
  *
  *   norm_util = running_time/time ~ util/capacity
  */
-static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
+static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
 {
-	int util = cpu_util_cum(cpu, delta);
-
 	if (util >= capacity)
 		return SCHED_CAPACITY_SCALE;
 
-	return DIV_ROUND_UP(util << SCHED_CAPACITY_SHIFT, capacity);
+	return (util << SCHED_CAPACITY_SHIFT)/capacity;
 }
 
-static inline bool
-bias_to_waker_cpu(struct task_struct *p, int cpu, struct cpumask *rtg_target)
+static unsigned long group_max_util(struct energy_env *eenv, int cpu_idx)
 {
-	int rtg_target_cpu = rtg_target ? cpumask_first(rtg_target) : cpu;
-
-	return cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) &&
-	       cpu_active(cpu) && !cpu_isolated(cpu) &&
-	       capacity_orig_of(cpu) >= capacity_orig_of(rtg_target_cpu) &&
-	       task_fits_max(p, cpu);
-}
-
-static int calc_util_delta(struct energy_env *eenv, int cpu)
-{
-#ifdef CONFIG_SCHED_WALT
-	if (cpu == eenv->src_cpu) {
-		if (!walt_disabled && sysctl_sched_use_walt_task_util &&
-		    !task_in_cum_window_demand(cpu_rq(cpu), eenv->task)) {
-			if (eenv->util_delta == 0)
-				/*
-				 * energy before - calculate energy cost when
-				 * the new task is placed onto src_cpu.  The
-				 * task is not on a runqueue so its util is not
-				 * in the WALT's cr_avg as it's discounted when
-				 * it slept last time.  Hence return task's util
-				 * as delta to calculate energy cost of src_cpu
-				 * as if the new task on it.
-				 */
-				return task_util(eenv->task);
-			/*
-			 * energy after - WALT's cr_avg already doesn't have the
-			 * new task's util accounted in.  Thus return 0 delta to
-			 * calculate energy cost of the src_cpu without the
-			 * task's util.
-			 */
-			return 0;
-		}
-		/*
-		 * Task is already on a runqueue for example while load
-		 * balancing.  WALT's cpu util already accounted the task's
-		 * util.  return 0 delta for energy before so energy calculation
-		 * to be done with the task's util accounted, return -task_util
-		 * for energy after so the calculation to be doen with
-		 * discounted task's util.
-		 */
-		return -eenv->util_delta;
-	}
-#else
-	if (cpu == eenv->src_cpu)
-		return -eenv->util_delta;
-#endif
-	if (cpu == eenv->dst_cpu)
-		return eenv->util_delta;
-	return 0;
-}
-
-static
-unsigned long group_max_util(struct energy_env *eenv)
-{
-	int i, delta;
 	unsigned long max_util = 0;
+	unsigned long util;
+	int cpu;
 
-	for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
-		delta = calc_util_delta(eenv, i);
-		/* substract sync_cpu's rq->curr util to discount its cost */
-		if (eenv->sync_cpu == i)
-			delta -= eenv->curr_util;
-		max_util = max(max_util, cpu_util_cum(i, delta));
+	for_each_cpu(cpu, sched_group_cpus(eenv->sg_cap)) {
+		util = cpu_util_wake(cpu, eenv->p);
+
+		/*
+		 * If we are looking at the target CPU specified by the eenv,
+		 * then we should add the (estimated) utilization of the task
+		 * assuming we will wake it up on that CPU.
+		 */
+		if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id))
+			util += eenv->util_delta;
+
+		max_util = max(max_util, util);
+
+		/*
+		 * Take into account any minimum frequency imposed
+		 * elsewhere which limits the energy states available
+		 * If the MIN_CAPACITY_CAPPING feature is not enabled
+		 * capacity_min_of will return 0 (not capped).
+		 */
+		max_util = max(max_util, capacity_min_of(cpu));
+
 	}
 
 	return max_util;
@@ -5669,60 +5643,64 @@
 
 /*
  * group_norm_util() returns the approximated group util relative to it's
- * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in
- * energy calculations. Since task executions may or may not overlap in time in
- * the group the true normalized util is between max(cpu_norm_util(i)) and
- * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The
- * latter is used as the estimate as it leads to a more pessimistic energy
+ * current capacity (busy ratio), in the range [0..SCHED_LOAD_SCALE], for use
+ * in energy calculations.
+ *
+ * Since task executions may or may not overlap in time in the group the true
+ * normalized util is between MAX(cpu_norm_util(i)) and SUM(cpu_norm_util(i))
+ * when iterating over all CPUs in the group.
+ * The latter estimate is used as it leads to a more pessimistic energy
  * estimate (more busy).
  */
 static unsigned
-long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
+long group_norm_util(struct energy_env *eenv, int cpu_idx)
 {
-	int i, delta;
-	unsigned long util_sum = 0;
-	unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
+	unsigned long capacity = eenv->cpu[cpu_idx].cap;
+	unsigned long util, util_sum = 0;
+	int cpu;
 
-	for_each_cpu(i, sched_group_cpus(sg)) {
-		delta = calc_util_delta(eenv, i);
-		/* substract sync_cpu's rq->curr util to discount its cost */
-		if (eenv->sync_cpu == i)
-			delta -= eenv->curr_util;
-		util_sum += __cpu_norm_util(i, capacity, delta);
+	for_each_cpu(cpu, sched_group_cpus(eenv->sg)) {
+		util = cpu_util_wake(cpu, eenv->p);
+
+		/*
+		 * If we are looking at the target CPU specified by the eenv,
+		 * then we should add the (estimated) utilization of the task
+		 * assuming we will wake it up on that CPU.
+		 */
+		if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id))
+			util += eenv->util_delta;
+
+		util_sum += __cpu_norm_util(util, capacity);
 	}
 
-	if (util_sum > SCHED_CAPACITY_SCALE)
-		return SCHED_CAPACITY_SCALE;
-	return util_sum;
+	return min_t(unsigned long, util_sum, SCHED_CAPACITY_SCALE);
 }
 
-static int __find_new_capacity(unsigned long util,
-			       const struct sched_group_energy const *sge)
+static int find_new_capacity(struct energy_env *eenv, int cpu_idx)
 {
-	int idx;
+	const struct sched_group_energy *sge = eenv->sg->sge;
+	int idx, max_idx = sge->nr_cap_states - 1;
+	unsigned long util = group_max_util(eenv, cpu_idx);
+
+	/* default is max_cap if we don't find a match */
+	eenv->cpu[cpu_idx].cap_idx = max_idx;
+	eenv->cpu[cpu_idx].cap = sge->cap_states[max_idx].cap;
 
 	for (idx = 0; idx < sge->nr_cap_states; idx++) {
-		if (sge->cap_states[idx].cap >= util)
-			return idx;
+		if (sge->cap_states[idx].cap >= util) {
+			/* Keep track of SG's capacity */
+			eenv->cpu[cpu_idx].cap_idx = idx;
+			eenv->cpu[cpu_idx].cap = sge->cap_states[idx].cap;
+			break;
+		}
 	}
 
-	return (sge->nr_cap_states - 1);
+	return eenv->cpu[cpu_idx].cap_idx;
 }
 
-static int find_new_capacity(struct energy_env *eenv,
-			     const struct sched_group_energy const *sge)
+static int group_idle_state(struct energy_env *eenv, int cpu_idx)
 {
-	int idx;
-	unsigned long util = group_max_util(eenv);
-
-	idx = __find_new_capacity(util, sge);
-	eenv->cap_idx = idx;
-
-	return idx;
-}
-
-static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
-{
+	struct sched_group *sg = eenv->sg;
 	int i, state = INT_MAX;
 	int src_in_grp, dst_in_grp;
 	long grp_util = 0;
@@ -5731,31 +5709,29 @@
 	for_each_cpu(i, sched_group_cpus(sg))
 		state = min(state, idle_get_state_idx(cpu_rq(i)));
 
-	if (unlikely(state == INT_MAX))
-		return -EINVAL;
-
 	/* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
 	state++;
 
-	/*
-	 * Try to estimate if a deeper idle state is
-	 * achievable when we move the task.
-	 */
-	for_each_cpu(i, sched_group_cpus(sg))
-		grp_util += cpu_util(i);
-
-	src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg));
-	dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg));
+	src_in_grp = cpumask_test_cpu(eenv->cpu[EAS_CPU_PRV].cpu_id,
+				      sched_group_cpus(sg));
+	dst_in_grp = cpumask_test_cpu(eenv->cpu[cpu_idx].cpu_id,
+				      sched_group_cpus(sg));
 	if (src_in_grp == dst_in_grp) {
 		/* both CPUs under consideration are in the same group or not in
 		 * either group, migration should leave idle state the same.
 		 */
 		goto end;
 	}
-	/* add or remove util as appropriate to indicate what group util
-	 * will be (worst case - no concurrent execution) after moving the task
+
+	/*
+	 * Try to estimate if a deeper idle state is
+	 * achievable when we move the task.
 	 */
-	grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta;
+	for_each_cpu(i, sched_group_cpus(sg)) {
+		grp_util += cpu_util_wake(i, eenv->p);
+		if (unlikely(i == eenv->cpu[cpu_idx].cpu_id))
+			grp_util += eenv->util_delta;
+	}
 
 	if (grp_util <=
 		((long)sg->sgc->max_capacity * (int)sg->group_weight)) {
@@ -5789,22 +5765,65 @@
 }
 
 /*
- * sched_group_energy(): Computes the absolute energy consumption of cpus
- * belonging to the sched_group including shared resources shared only by
- * members of the group. Iterates over all cpus in the hierarchy below the
- * sched_group starting from the bottom working it's way up before going to
- * the next cpu until all cpus are covered at all levels. The current
- * implementation is likely to gather the same util statistics multiple times.
- * This can probably be done in a faster but more complex way.
- * Note: sched_group_energy() may fail when racing with sched_domain updates.
+ * calc_sg_energy: compute energy for the eenv's SG (i.e. eenv->sg).
+ *
+ * This works in iterations to compute the SG's energy for each CPU
+ * candidate defined by the energy_env's cpu array.
+ *
+ * NOTE: in the following computations for busy_energy and idle_energy we do
+ * not shift by SCHED_CAPACITY_SHIFT in order to reduce rounding errors.
+ * The required scaling will be performed just one time, by the calling
+ * functions, once we accumulated the contributons for all the SGs.
  */
-static int sched_group_energy(struct energy_env *eenv)
+static void calc_sg_energy(struct energy_env *eenv)
 {
-	struct sched_domain *sd;
-	int cpu;
-	u64 total_energy = 0;
+	struct sched_group *sg = eenv->sg;
+	int busy_energy, idle_energy;
+	unsigned int busy_power;
+	unsigned int idle_power;
+	unsigned long sg_util;
+	int cap_idx, idle_idx;
+	int total_energy = 0;
+	int cpu_idx;
+
+	for (cpu_idx = EAS_CPU_PRV; cpu_idx < EAS_CPU_CNT; ++cpu_idx) {
+
+
+		if (eenv->cpu[cpu_idx].cpu_id == -1)
+			continue;
+		/* Compute ACTIVE energy */
+		cap_idx = find_new_capacity(eenv, cpu_idx);
+		busy_power = sg->sge->cap_states[cap_idx].power;
+		/*
+		 * in order to calculate cpu_norm_util, we need to know which
+		 * capacity level the group will be at, so calculate that first
+		 */
+		sg_util = group_norm_util(eenv, cpu_idx);
+
+		busy_energy   = sg_util * busy_power;
+
+		/* Compute IDLE energy */
+		idle_idx = group_idle_state(eenv, cpu_idx);
+		idle_power = sg->sge->idle_states[idle_idx].power;
+
+		idle_energy   = SCHED_CAPACITY_SCALE - sg_util;
+		idle_energy  *= idle_power;
+
+		total_energy = busy_energy + idle_energy;
+		eenv->cpu[cpu_idx].energy += total_energy;
+	}
+}
+
+/*
+ * compute_energy() computes the absolute variation in energy consumption by
+ * moving eenv.util_delta from EAS_CPU_PRV to EAS_CPU_NXT.
+ *
+ * NOTE: compute_energy() may fail when racing with sched_domain updates, in
+ *       which case we abort by returning -EINVAL.
+ */
+static int compute_energy(struct energy_env *eenv)
+{
 	struct cpumask visit_cpus;
-	struct sched_group *sg;
 	int cpu_count;
 
 	WARN_ON(!eenv->sg_top->sge);
@@ -5824,8 +5843,8 @@
 
 	while (!cpumask_empty(&visit_cpus)) {
 		struct sched_group *sg_shared_cap = NULL;
-
-		cpu = cpumask_first(&visit_cpus);
+		int cpu = cpumask_first(&visit_cpus);
+		struct sched_domain *sd;
 
 		/*
 		 * Is the group utilization affected by cpus outside this
@@ -5834,69 +5853,29 @@
 		 * when we took visit_cpus.
 		 */
 		sd = rcu_dereference(per_cpu(sd_scs, cpu));
-
 		if (sd && sd->parent)
 			sg_shared_cap = sd->parent->groups;
 
 		for_each_domain(cpu, sd) {
-			sg = sd->groups;
+			struct sched_group *sg = sd->groups;
 
 			/* Has this sched_domain already been visited? */
 			if (sd->child && group_first_cpu(sg) != cpu)
 				break;
 
 			do {
-				unsigned long group_util;
-				int sg_busy_energy, sg_idle_energy;
-				int cap_idx, idle_idx;
-
+				eenv->sg_cap = sg;
 				if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
 					eenv->sg_cap = sg_shared_cap;
-				else
-					eenv->sg_cap = sg;
 
-				cap_idx = find_new_capacity(eenv, sg->sge);
+				/*
+				 * Compute the energy for all the candidate
+				 * CPUs in the current visited SG.
+				 */
+				eenv->sg = sg;
+				calc_sg_energy(eenv);
 
-				if (sg->group_weight == 1) {
-					/* Remove capacity of src CPU (before task move) */
-					if (eenv->util_delta == 0 &&
-					    cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
-						eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
-						eenv->cap.delta -= eenv->cap.before;
-					}
-					/* Add capacity of dst CPU  (after task move) */
-					if (eenv->util_delta != 0 &&
-					    cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
-						eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
-						eenv->cap.delta += eenv->cap.after;
-					}
-				}
-
-				idle_idx = group_idle_state(eenv, sg);
-				if (unlikely(idle_idx < 0))
-					return idle_idx;
-
-				if (idle_idx > sg->sge->nr_idle_states - 1)
-					idle_idx = sg->sge->nr_idle_states - 1;
-
-				group_util = group_norm_util(eenv, sg);
-				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power);
-
-				if (idle_idx == 0)
-					sg_idle_energy = ((SCHED_CAPACITY_SCALE - group_util)
-							* sg->sge->cap_states[cap_idx].power);
-				else
-					sg_idle_energy = ((SCHED_CAPACITY_SCALE - group_util)
-							* sg->sge->idle_states[idle_idx].power);
-
-				total_energy += sg_busy_energy + sg_idle_energy;
-
-				trace_sched_group_energy(group_first_cpu(sg),
-					group_util, total_energy,
-					sg_busy_energy, sg_idle_energy,
-					idle_idx,
-					sg->sge->cap_states[eenv->cap_idx].cap);
-
+				/* remove CPUs we have just visited */
 				if (!sd->child) {
 					/*
 					 * cpu_count here is the number of
@@ -5937,7 +5916,6 @@
 		continue;
 	}
 
-	eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT;
 	return 0;
 }
 
@@ -5947,166 +5925,106 @@
 }
 
 /*
- * energy_diff(): Estimate the energy impact of changing the utilization
- * distribution. eenv specifies the change: utilisation amount, source, and
- * destination cpu. Source or destination cpu may be -1 in which case the
- * utilization is removed from or added to the system (e.g. task wake-up). If
- * both are specified, the utilization is migrated.
+ * select_energy_cpu_idx(): estimate the energy impact of changing the
+ * utilization distribution.
+ *
+ * The eenv parameter specifies the changes: utilisation amount and a pair of
+ * possible CPU candidates (the previous CPU and a different target CPU).
+ *
+ * This function returns the index of a CPU candidate specified by the
+ * energy_env which corresponds to the first CPU saving energy.
+ * Thus, 0 (EAS_CPU_PRV) means that non of the CPU candidate is more energy
+ * efficient than running on prev_cpu. This is also the value returned in case
+ * of abort due to error conditions during the computations.
+ * A value greater than zero means that the first energy-efficient CPU is the
+ * one represented by eenv->cpu[eenv->next_idx].cpu_id.
  */
-static inline int __energy_diff(struct energy_env *eenv)
+static inline int select_energy_cpu_idx(struct energy_env *eenv)
 {
 	struct sched_domain *sd;
 	struct sched_group *sg;
-	int sd_cpu = -1, energy_before = 0, energy_after = 0;
-	int diff, margin;
+	int sd_cpu = -1;
+	int cpu_idx;
+	int margin;
 
-	struct energy_env eenv_before = {
-		.util_delta	= 0,
-		.src_cpu	= eenv->src_cpu,
-		.dst_cpu	= eenv->dst_cpu,
-		.nrg		= { 0, 0, 0, 0},
-		.cap		= { 0, 0, 0 },
-		.task		= eenv->task,
-		.sync_cpu       = eenv->sync_cpu,
-	};
-
-	if (eenv->src_cpu == eenv->dst_cpu)
-		return 0;
-
-	sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu;
+	sd_cpu = eenv->cpu[EAS_CPU_PRV].cpu_id;
 	sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));
-
 	if (!sd)
-		return 0; /* Error */
+		return EAS_CPU_PRV;
+
+	cpumask_clear(&eenv->cpus_mask);
+	for (cpu_idx = EAS_CPU_PRV; cpu_idx < EAS_CPU_CNT; ++cpu_idx) {
+		int cpu = eenv->cpu[cpu_idx].cpu_id;
+
+		if (cpu < 0)
+			continue;
+		cpumask_set_cpu(cpu, &eenv->cpus_mask);
+	}
 
 	sg = sd->groups;
-
 	do {
-		if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) {
-			eenv_before.sg_top = eenv->sg_top = sg;
+		/* Skip SGs which do not contains a candidate CPU */
+		if (!cpumask_intersects(&eenv->cpus_mask, sched_group_cpus(sg)))
+			continue;
 
-			if (sched_group_energy(&eenv_before))
-				return 0; /* Invalid result abort */
-			energy_before += eenv_before.energy;
+		eenv->sg_top = sg;
+		/* energy is unscaled to reduce rounding errors */
+		if (compute_energy(eenv) == -EINVAL)
+			return EAS_CPU_PRV;
 
-			/* Keep track of SRC cpu (before) capacity */
-			eenv->cap.before = eenv_before.cap.before;
-			eenv->cap.delta = eenv_before.cap.delta;
-
-			if (sched_group_energy(eenv))
-				return 0; /* Invalid result abort */
-			energy_after += eenv->energy;
-		}
 	} while (sg = sg->next, sg != sd->groups);
 
-	eenv->nrg.before = energy_before;
-	eenv->nrg.after = energy_after;
-	eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before;
-	eenv->payoff = 0;
-#ifndef CONFIG_SCHED_TUNE
-	trace_sched_energy_diff(eenv->task,
-			eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
-			eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
-			eenv->cap.before, eenv->cap.after, eenv->cap.delta,
-			eenv->nrg.delta, eenv->payoff);
-#endif
+	/* Scale energy before comparisons */
+	for (cpu_idx = EAS_CPU_PRV; cpu_idx < EAS_CPU_CNT; ++cpu_idx)
+		eenv->cpu[cpu_idx].energy >>= SCHED_CAPACITY_SHIFT;
+
 	/*
-	 * Dead-zone margin preventing too many migrations.
+	 * Compute the dead-zone margin used to prevent too many task
+	 * migrations with negligible energy savings.
+	 * An energy saving is considered meaningful if it reduces the energy
+	 * consumption of EAS_CPU_PRV CPU candidate by at least ~1.56%
 	 */
+	margin = eenv->cpu[EAS_CPU_PRV].energy >> 6;
 
-	margin = eenv->nrg.before >> 6; /* ~1.56% */
+	/*
+	 * By default the EAS_CPU_PRV CPU is considered the most energy
+	 * efficient, with a 0 energy variation.
+	 */
+	eenv->next_idx = EAS_CPU_PRV;
 
-	diff = eenv->nrg.after - eenv->nrg.before;
-
-	eenv->nrg.diff = (abs(diff) < margin) ? 0 : eenv->nrg.diff;
-
-	return eenv->nrg.diff;
-}
-
-#ifdef CONFIG_SCHED_TUNE
-
-struct target_nrg schedtune_target_nrg;
-extern bool schedtune_initialized;
-/*
- * System energy normalization
- * Returns the normalized value, in the range [0..SCHED_CAPACITY_SCALE],
- * corresponding to the specified energy variation.
- */
-static inline int
-normalize_energy(int energy_diff)
-{
-	u32 normalized_nrg;
-
-	/* during early setup, we don't know the extents */
-	if (unlikely(!schedtune_initialized))
-		return energy_diff < 0 ? -1 : 1 ;
-
-#ifdef CONFIG_SCHED_DEBUG
-	{
-	int max_delta;
-
-	/* Check for boundaries */
-	max_delta  = schedtune_target_nrg.max_power;
-	max_delta -= schedtune_target_nrg.min_power;
-	WARN_ON(abs(energy_diff) >= max_delta);
+	trace_sched_energy_diff(eenv->p, eenv->cpu[EAS_CPU_PRV].cpu_id,
+				eenv->cpu[EAS_CPU_PRV].energy,
+				eenv->cpu[EAS_CPU_NXT].cpu_id,
+				eenv->cpu[EAS_CPU_NXT].energy,
+				eenv->cpu[EAS_CPU_BKP].cpu_id,
+				eenv->cpu[EAS_CPU_BKP].energy);
+	/*
+	 * Compare the other CPU candidates to find a CPU which can be
+	 * more energy efficient then EAS_CPU_PRV
+	 */
+	for (cpu_idx = EAS_CPU_NXT; cpu_idx < EAS_CPU_CNT; ++cpu_idx) {
+		/* Skip not valid scheduled candidates */
+		if (eenv->cpu[cpu_idx].cpu_id < 0)
+			continue;
+		/* Compute energy delta wrt EAS_CPU_PRV */
+		eenv->cpu[cpu_idx].nrg_delta =
+			eenv->cpu[cpu_idx].energy -
+			eenv->cpu[EAS_CPU_PRV].energy;
+		/* filter energy variations within the dead-zone margin */
+		if (abs(eenv->cpu[cpu_idx].nrg_delta) < margin)
+			eenv->cpu[cpu_idx].nrg_delta = 0;
+		/* update the schedule candidate with min(nrg_delta) */
+		if (eenv->cpu[cpu_idx].nrg_delta <
+		    eenv->cpu[eenv->next_idx].nrg_delta) {
+			eenv->next_idx = cpu_idx;
+			if (sched_feat(FBT_STRICT_ORDER))
+				break;
+		}
 	}
-#endif
 
-	/* Do scaling using positive numbers to increase the range */
-	normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff;
-
-	/* Scale by energy magnitude */
-	normalized_nrg <<= SCHED_CAPACITY_SHIFT;
-
-	/* Normalize on max energy for target platform */
-	normalized_nrg = reciprocal_divide(
-			normalized_nrg, schedtune_target_nrg.rdiv);
-
-	return (energy_diff < 0) ? -normalized_nrg : normalized_nrg;
+	return eenv->next_idx;
 }
 
-static inline int
-energy_diff(struct energy_env *eenv)
-{
-	int boost = schedtune_task_boost(eenv->task);
-	int nrg_delta;
-
-	/* Conpute "absolute" energy diff */
-	__energy_diff(eenv);
-
-	/* Return energy diff when boost margin is 0 */
-	if (boost == 0)
-		return eenv->nrg.diff;
-
-	/* Compute normalized energy diff */
-	nrg_delta = normalize_energy(eenv->nrg.diff);
-	eenv->nrg.delta = nrg_delta;
-
-	eenv->payoff = schedtune_accept_deltas(
-			eenv->nrg.delta,
-			eenv->cap.delta,
-			eenv->task);
-
-	trace_sched_energy_diff(eenv->task,
-			eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
-			eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
-			eenv->cap.before, eenv->cap.after, eenv->cap.delta,
-			eenv->nrg.delta, eenv->payoff);
-
-	/*
-	 * When SchedTune is enabled, the energy_diff() function will return
-	 * the computed energy payoff value. Since the energy_diff() return
-	 * value is expected to be negative by its callers, this evaluation
-	 * function return a negative value each time the evaluation return a
-	 * positive payoff, which is the condition for the acceptance of
-	 * a scheduling decision
-	 */
-	return -eenv->payoff;
-}
-#else /* CONFIG_SCHED_TUNE */
-#define energy_diff(eenv) __energy_diff(eenv)
-#endif
-
 /*
  * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
  *
@@ -6203,7 +6121,7 @@
 	return 1;
 }
 
-static inline unsigned long boosted_task_util(struct task_struct *task);
+static inline unsigned long boosted_task_util(struct task_struct *p);
 
 static inline bool __task_fits(struct task_struct *p, int cpu, int util)
 {
@@ -6234,20 +6152,14 @@
 	return __task_fits(p, cpu, 0);
 }
 
-static inline bool task_fits_spare(struct task_struct *p, int cpu)
+bool __cpu_overutilized(int cpu, int delta)
 {
-	return __task_fits(p, cpu, cpu_util(cpu));
-}
-
-bool __cpu_overutilized(int cpu, unsigned long util)
-{
-	return (capacity_orig_of(cpu) * 1024 <
-		util * sysctl_sched_capacity_margin);
+	return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin);
 }
 
 bool cpu_overutilized(int cpu)
 {
-	return __cpu_overutilized(cpu, cpu_util(cpu));
+	return __cpu_overutilized(cpu, 0);
 }
 
 #ifdef CONFIG_SCHED_TUNE
@@ -6292,16 +6204,16 @@
 }
 
 static inline long
-schedtune_task_margin(struct task_struct *task)
+schedtune_task_margin(struct task_struct *p)
 {
-	int boost = schedtune_task_boost(task);
+	int boost = schedtune_task_boost(p);
 	unsigned long util;
 	long margin;
 
 	if (boost == 0)
 		return 0;
 
-	util = task_util(task);
+	util = task_util(p);
 	margin = schedtune_margin(util, boost);
 
 	return margin;
@@ -6316,7 +6228,7 @@
 }
 
 static inline int
-schedtune_task_margin(struct task_struct *task)
+schedtune_task_margin(struct task_struct *p)
 {
 	return 0;
 }
@@ -6324,9 +6236,9 @@
 #endif /* CONFIG_SCHED_TUNE */
 
 unsigned long
-boosted_cpu_util(int cpu)
+boosted_cpu_util(int cpu, struct sched_walt_cpu_load *walt_load)
 {
-	unsigned long util = cpu_util_freq(cpu, NULL);
+	unsigned long util = cpu_util_freq(cpu, walt_load);
 	long margin = schedtune_cpu_margin(util, cpu);
 
 	trace_sched_boost_cpu(cpu, util, margin);
@@ -6335,41 +6247,48 @@
 }
 
 static inline unsigned long
-boosted_task_util(struct task_struct *task)
+boosted_task_util(struct task_struct *p)
 {
-	unsigned long util = task_util(task);
-	long margin = schedtune_task_margin(task);
+	unsigned long util = task_util(p);
+	long margin = schedtune_task_margin(p);
 
-	trace_sched_boost_task(task, util, margin);
+	trace_sched_boost_task(p, util, margin);
 
 	return util + margin;
 }
 
+static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
+{
+	return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
+}
+
 /*
  * find_idlest_group finds and returns the least busy CPU group within the
  * domain.
+ *
+ * Assumes p is allowed on at least one CPU in sd.
  */
 static struct sched_group *
 find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		  int this_cpu, int sd_flag)
 {
 	struct sched_group *idlest = NULL, *group = sd->groups;
-	struct sched_group *fit_group = NULL, *spare_group = NULL;
-	unsigned long min_load = ULONG_MAX, this_load = 0;
-	unsigned long fit_capacity = ULONG_MAX;
-	unsigned long max_spare_capacity;
-
+	struct sched_group *most_spare_sg = NULL;
+	unsigned long min_runnable_load = ULONG_MAX;
+	unsigned long this_runnable_load = ULONG_MAX;
+	unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
+	unsigned long most_spare = 0, this_spare = 0;
 	int load_idx = sd->forkexec_idx;
-	int imbalance = 100 + (sd->imbalance_pct-100)/2;
-
-	max_spare_capacity = sysctl_sched_capacity_margin -
-			     SCHED_CAPACITY_SCALE;
+	int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
+	unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
+				(sd->imbalance_pct-100) / 100;
 
 	if (sd_flag & SD_BALANCE_WAKE)
 		load_idx = sd->wake_idx;
 
 	do {
-		unsigned long load, avg_load, spare_capacity;
+		unsigned long load, avg_load, runnable_load;
+		unsigned long spare_cap, max_spare_cap;
 		int local_group;
 		int i;
 
@@ -6381,8 +6300,13 @@
 		local_group = cpumask_test_cpu(this_cpu,
 					       sched_group_cpus(group));
 
-		/* Tally up the load of all CPUs in the group */
+		/*
+		 * Tally up the load of all CPUs in the group and find
+		 * the group containing the CPU with most spare capacity.
+		 */
 		avg_load = 0;
+		runnable_load = 0;
+		max_spare_cap = 0;
 
 		for_each_cpu(i, sched_group_cpus(group)) {
 			/* Bias balancing toward cpus of our domain */
@@ -6391,55 +6315,85 @@
 			else
 				load = target_load(i, load_idx);
 
-			avg_load += load;
+			runnable_load += load;
 
-			/*
-			 * Look for most energy-efficient group that can fit
-			 * that can fit the task.
-			 */
-			if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) {
-				fit_capacity = capacity_of(i);
-				fit_group = group;
-			}
+			avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
 
-			/*
-			 * Look for group which has most spare capacity on a
-			 * single cpu.
-			 */
-			spare_capacity = capacity_of(i) - cpu_util(i);
-			if (spare_capacity > max_spare_capacity) {
-				max_spare_capacity = spare_capacity;
-				spare_group = group;
-			}
+			spare_cap = capacity_spare_wake(i, p);
+
+			if (spare_cap > max_spare_cap)
+				max_spare_cap = spare_cap;
 		}
 
 		/* Adjust by relative CPU capacity of the group */
-		avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
+		avg_load = (avg_load * SCHED_CAPACITY_SCALE) /
+					group->sgc->capacity;
+		runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
+					group->sgc->capacity;
 
 		if (local_group) {
-			this_load = avg_load;
-		} else if (avg_load < min_load) {
-			min_load = avg_load;
-			idlest = group;
+			this_runnable_load = runnable_load;
+			this_avg_load = avg_load;
+			this_spare = max_spare_cap;
+		} else {
+			if (min_runnable_load > (runnable_load + imbalance)) {
+				/*
+				 * The runnable load is significantly smaller
+				 *  so we can pick this new cpu
+				 */
+				min_runnable_load = runnable_load;
+				min_avg_load = avg_load;
+				idlest = group;
+			} else if ((runnable_load < (min_runnable_load + imbalance)) &&
+					(100*min_avg_load > imbalance_scale*avg_load)) {
+				/*
+				 * The runnable loads are close so we take
+				 * into account blocked load through avg_load
+				 *  which is blocked + runnable load
+				 */
+				min_avg_load = avg_load;
+				idlest = group;
+			}
+
+			if (most_spare < max_spare_cap) {
+				most_spare = max_spare_cap;
+				most_spare_sg = group;
+			}
 		}
 	} while (group = group->next, group != sd->groups);
 
-	if (fit_group)
-		return fit_group;
+	/*
+	 * The cross-over point between using spare capacity or least load
+	 * is too conservative for high utilization tasks on partially
+	 * utilized systems if we require spare_capacity > task_util(p),
+	 * so we allow for some task stuffing by using
+	 * spare_capacity > task_util(p)/2.
+	 * spare capacity can't be used for fork because the utilization has
+	 * not been set yet as it need to get a rq to init the utilization
+	 */
+	if (sd_flag & SD_BALANCE_FORK)
+		goto skip_spare;
 
-	if (spare_group)
-		return spare_group;
+	if (this_spare > task_util(p) / 2 &&
+	    imbalance_scale*this_spare > 100*most_spare)
+		return NULL;
+	else if (most_spare > task_util(p) / 2)
+		return most_spare_sg;
 
-	if (!idlest || 100*this_load < imbalance*min_load)
+skip_spare:
+	if (!idlest ||
+	    (min_runnable_load > (this_runnable_load + imbalance)) ||
+	    ((this_runnable_load < (min_runnable_load + imbalance)) &&
+			(100*this_avg_load < imbalance_scale*min_avg_load)))
 		return NULL;
 	return idlest;
 }
 
 /*
- * find_idlest_cpu - find the idlest cpu among the cpus in group.
+ * find_idlest_group_cpu - find the idlest cpu among the cpus in group.
  */
 static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 {
 	unsigned long load, min_load = ULONG_MAX;
 	unsigned int min_exit_latency = UINT_MAX;
@@ -6454,7 +6408,7 @@
 
 	/* Traverse only the allowed CPUs */
 	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
-		if (task_fits_spare(p, i)) {
+		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
 			if (idle && idle->exit_latency < min_exit_latency) {
@@ -6466,8 +6420,7 @@
 				min_exit_latency = idle->exit_latency;
 				latest_idle_timestamp = rq->idle_stamp;
 				shallowest_idle_cpu = i;
-			} else if (idle_cpu(i) &&
-				   (!idle || idle->exit_latency == min_exit_latency) &&
+			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
 				   rq->idle_stamp > latest_idle_timestamp) {
 				/*
 				 * If equal or no active idle state, then
@@ -6476,13 +6429,6 @@
 				 */
 				latest_idle_timestamp = rq->idle_stamp;
 				shallowest_idle_cpu = i;
-			} else if (shallowest_idle_cpu == -1) {
-				/*
-				 * If we haven't found an idle CPU yet
-				 * pick a non-idle one that can fit the task as
-				 * fallback.
-				 */
-				shallowest_idle_cpu = i;
 			}
 		} else if (shallowest_idle_cpu == -1) {
 			load = weighted_cpuload(i);
@@ -6496,6 +6442,68 @@
 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
+static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
+				  int cpu, int prev_cpu, int sd_flag)
+{
+	int wu = sd_flag & SD_BALANCE_WAKE;
+	int cas_cpu = -1;
+	int new_cpu = cpu;
+
+	if (wu) {
+		schedstat_inc(p->se.statistics.nr_wakeups_cas_attempts);
+		schedstat_inc(this_rq()->eas_stats.cas_attempts);
+	}
+
+	if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
+		return prev_cpu;
+
+	while (sd) {
+		struct sched_group *group;
+		struct sched_domain *tmp;
+		int weight;
+
+		if (wu)
+			schedstat_inc(sd->eas_stats.cas_attempts);
+
+		if (!(sd->flags & sd_flag)) {
+			sd = sd->child;
+			continue;
+		}
+
+		group = find_idlest_group(sd, p, cpu, sd_flag);
+		if (!group) {
+			sd = sd->child;
+			continue;
+		}
+
+		new_cpu = find_idlest_group_cpu(group, p, cpu);
+		if (new_cpu == cpu) {
+			/* Now try balancing at a lower domain level of cpu */
+			sd = sd->child;
+			continue;
+		}
+
+		/* Now try balancing at a lower domain level of new_cpu */
+		cpu = cas_cpu = new_cpu;
+		weight = sd->span_weight;
+		sd = NULL;
+		for_each_domain(cpu, tmp) {
+			if (weight <= tmp->span_weight)
+				break;
+			if (tmp->flags & sd_flag)
+				sd = tmp;
+		}
+		/* while loop will break here if sd == NULL */
+	}
+
+	if (wu && (cas_cpu >= 0)) {
+		schedstat_inc(p->se.statistics.nr_wakeups_cas_count);
+		schedstat_inc(this_rq()->eas_stats.cas_count);
+	}
+
+	return new_cpu;
+}
+
 #ifdef CONFIG_SCHED_SMT
 
 static inline void set_idle_cores(int cpu, int val)
@@ -6571,7 +6579,7 @@
 				idle = false;
 		}
 
-		if (!cpu_isolated(cpu) && idle)
+		if (idle)
 			return core;
 	}
 
@@ -6593,8 +6601,6 @@
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
 		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 			continue;
-		if (cpu_isolated(cpu))
-			continue;
 		if (idle_cpu(cpu))
 			return cpu;
 	}
@@ -6647,8 +6653,6 @@
 	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
 		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 			continue;
-		if (cpu_isolated(cpu))
-			continue;
 		if (idle_cpu(cpu))
 			break;
 	}
@@ -6677,7 +6681,7 @@
 	schedstat_inc(this_rq()->eas_stats.sis_attempts);
 
 	if (!sysctl_sched_cstate_aware) {
-		if (idle_cpu(target) && !cpu_isolated(target)) {
+		if (idle_cpu(target)) {
 			schedstat_inc(p->se.statistics.nr_wakeups_sis_idle);
 			schedstat_inc(this_rq()->eas_stats.sis_idle);
 			return target;
@@ -6686,8 +6690,7 @@
 		/*
 		 * If the prevous cpu is cache affine and idle, don't be stupid.
 		 */
-		if (i != target && cpus_share_cache(i, target) &&
-				idle_cpu(i) && !cpu_isolated(i)) {
+		if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) {
 			schedstat_inc(p->se.statistics.nr_wakeups_sis_cache_affine);
 			schedstat_inc(this_rq()->eas_stats.sis_cache_affine);
 			return i;
@@ -6728,9 +6731,6 @@
 					unsigned long new_usage = boosted_task_util(p);
 					unsigned long capacity_orig = capacity_orig_of(i);
 
-					if (cpu_isolated(i))
-						continue;
-
 					if (new_usage > capacity_orig || !idle_cpu(i))
 						goto next;
 
@@ -6750,9 +6750,6 @@
 				}
 			} else {
 				for_each_cpu(i, sched_group_cpus(sg)) {
-					if (cpu_isolated(i))
-						continue;
-
 					if (i == target || !idle_cpu(i))
 						goto next;
 				}
@@ -6780,463 +6777,654 @@
 }
  
 /*
- * Should task be woken to any available idle cpu?
- *
- * Waking tasks to idle cpu has mixed implications on both performance and
- * power. In many cases, scheduler can't estimate correctly impact of using idle
- * cpus on either performance or power. PF_WAKE_UP_IDLE allows external kernel
- * module to pass a strong hint to scheduler that the task in question should be
- * woken to idle cpu, generally to improve performance.
+ * cpu_util_wake: Compute cpu utilization with any contributions from
+ * the waking task p removed.  check_for_migration() looks for a better CPU of
+ * rq->curr. For that case we should return cpu util with contributions from
+ * currently running task p removed.
  */
+static int cpu_util_wake(int cpu, struct task_struct *p)
+{
+	unsigned long util, capacity;
+
+#ifdef CONFIG_SCHED_WALT
+	/*
+	 * WALT does not decay idle tasks in the same manner
+	 * as PELT, so it makes little sense to subtract task
+	 * utilization from cpu utilization. Instead just use
+	 * cpu_util for this case.
+	 */
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util &&
+	    p->state == TASK_WAKING)
+		return cpu_util(cpu);
+#endif
+	/* Task has no contribution or is new */
+	if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
+		return cpu_util(cpu);
+
+	capacity = capacity_orig_of(cpu);
+	util = max_t(long, cpu_util(cpu) - task_util(p), 0);
+
+	return (util >= capacity) ? capacity : util;
+}
+
+struct find_best_target_env {
+	struct cpumask *rtg_target;
+	bool need_idle;
+	bool placement_boost;
+	bool avoid_prev_cpu;
+};
+
+static bool is_packing_eligible(struct task_struct *p, int target_cpu,
+				struct find_best_target_env *fbt_env,
+				unsigned int target_cpus_count)
+{
+	unsigned long tutil, estimated_capacity;
+
+	if (fbt_env->placement_boost || fbt_env->need_idle)
+		return false;
+
+	if (target_cpus_count != 1)
+		return true;
+
+	if (task_in_cum_window_demand(cpu_rq(target_cpu), p))
+		tutil = 0;
+	else
+		tutil = task_util(p);
+
+	estimated_capacity = cpu_util_cum(target_cpu, tutil);
+	estimated_capacity = add_capacity_margin(estimated_capacity,
+						 target_cpu);
+
+	/*
+	 * If there is only one active CPU and it is already above its current
+	 * capacity, avoid placing additional task on the CPU.
+	 */
+	return (estimated_capacity <= capacity_curr_of(target_cpu));
+}
+
+static inline bool skip_sg(struct task_struct *p, struct sched_group *sg,
+			   struct cpumask *rtg_target)
+{
+	int fcpu = group_first_cpu(sg);
+
+	/* Are all CPUs isolated in this group? */
+	if (!sg->group_weight)
+		return true;
+
+	if (!task_fits_max(p, fcpu))
+		return true;
+
+	if (rtg_target && !cpumask_test_cpu(fcpu, rtg_target))
+		return true;
+
+	return false;
+}
+
+static int start_cpu(bool boosted)
+{
+	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+	int start_cpu;
+
+	start_cpu = boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
+
+	return walt_start_cpu(start_cpu);
+}
+
+static inline int find_best_target(struct task_struct *p, int *backup_cpu,
+				   bool boosted, bool prefer_idle,
+				   struct find_best_target_env *fbt_env)
+{
+	unsigned long min_util = boosted_task_util(p);
+	unsigned long target_capacity = ULONG_MAX;
+	unsigned long min_wake_util = ULONG_MAX;
+	unsigned long target_max_spare_cap = 0;
+	unsigned long target_util = ULONG_MAX;
+	unsigned long best_active_util = ULONG_MAX;
+	unsigned long target_idle_max_spare_cap = 0;
+	int best_idle_cstate = INT_MAX;
+	struct sched_domain *sd;
+	struct sched_group *sg;
+	int best_active_cpu = -1;
+	int best_idle_cpu = -1;
+	int target_cpu = -1;
+	int cpu, i;
+	unsigned int active_cpus_count = 0;
+
+	*backup_cpu = -1;
+
+	schedstat_inc(p->se.statistics.nr_wakeups_fbt_attempts);
+	schedstat_inc(this_rq()->eas_stats.fbt_attempts);
+
+	/* Find start CPU based on boost value */
+	cpu = start_cpu(boosted);
+	if (cpu < 0) {
+		schedstat_inc(p->se.statistics.nr_wakeups_fbt_no_cpu);
+		schedstat_inc(this_rq()->eas_stats.fbt_no_cpu);
+		return -1;
+	}
+
+	/* Find SD for the start CPU */
+	sd = rcu_dereference(per_cpu(sd_ea, cpu));
+	if (!sd) {
+		schedstat_inc(p->se.statistics.nr_wakeups_fbt_no_sd);
+		schedstat_inc(this_rq()->eas_stats.fbt_no_sd);
+		return -1;
+	}
+
+	/* Scan CPUs in all SDs */
+	sg = sd->groups;
+	do {
+		cpumask_t search_cpus;
+		bool do_rotate = false, avoid_prev_cpu = false;
+
+		if (skip_sg(p, sg, fbt_env->rtg_target))
+			continue;
+
+		cpumask_copy(&search_cpus, tsk_cpus_allowed(p));
+		cpumask_and(&search_cpus, &search_cpus, sched_group_cpus(sg));
+		i = find_first_cpu_bit(p, &search_cpus, sg, &avoid_prev_cpu,
+				       &do_rotate, &first_cpu_bit_env);
+		if (do_rotate)
+			fbt_env->avoid_prev_cpu = avoid_prev_cpu;
+
+retry:
+		while ((i = cpumask_next(i, &search_cpus)) < nr_cpu_ids) {
+			unsigned long capacity_curr = capacity_curr_of(i);
+			unsigned long capacity_orig = capacity_orig_of(i);
+			unsigned long wake_util, new_util, min_capped_util;
+
+			cpumask_clear_cpu(i, &search_cpus);
+			if (avoid_prev_cpu && i == task_cpu(p))
+				continue;
+
+			if (!cpu_online(i) || cpu_isolated(i) || is_reserved(i))
+				continue;
+
+			if (walt_cpu_high_irqload(i))
+				continue;
+
+			trace_sched_cpu_util(i);
+
+			/*
+			 * p's blocked utilization is still accounted for on prev_cpu
+			 * so prev_cpu will receive a negative bias due to the double
+			 * accounting. However, the blocked utilization may be zero.
+			 */
+			wake_util = cpu_util_wake(i, p);
+			new_util = wake_util + task_util(p);
+
+			/*
+			 * Ensure minimum capacity to grant the required boost.
+			 * The target CPU can be already at a capacity level higher
+			 * than the one required to boost the task.
+			 */
+			new_util = max(min_util, new_util);
+
+			/*
+			 * Include minimum capacity constraint:
+			 * new_util contains the required utilization including
+			 * boost. min_capped_util also takes into account a
+			 * minimum capacity cap imposed on the CPU by external
+			 * actors.
+			 */
+			min_capped_util = max(new_util, capacity_min_of(i));
+
+			if (new_util > capacity_orig)
+				continue;
+
+			/*
+			 * Case A) Latency sensitive tasks
+			 *
+			 * Unconditionally favoring tasks that prefer idle CPU to
+			 * improve latency.
+			 *
+			 * Looking for:
+			 * - an idle CPU, whatever its idle_state is, since
+			 *   the first CPUs we explore are more likely to be
+			 *   reserved for latency sensitive tasks.
+			 * - a non idle CPU where the task fits in its current
+			 *   capacity and has the maximum spare capacity.
+			 * - a non idle CPU with lower contention from other
+			 *   tasks and running at the lowest possible OPP.
+			 *
+			 * The last two goals tries to favor a non idle CPU
+			 * where the task can run as if it is "almost alone".
+			 * A maximum spare capacity CPU is favoured since
+			 * the task already fits into that CPU's capacity
+			 * without waiting for an OPP chance.
+			 *
+			 * The following code path is the only one in the CPUs
+			 * exploration loop which is always used by
+			 * prefer_idle tasks. It exits the loop with wither a
+			 * best_active_cpu or a target_cpu which should
+			 * represent an optimal choice for latency sensitive
+			 * tasks.
+			 */
+			if (prefer_idle) {
+
+				/*
+				 * Case A.1: IDLE CPU
+				 * Return the first IDLE CPU we find.
+				 */
+				if (idle_cpu(i)) {
+					schedstat_inc(p->se.statistics.nr_wakeups_fbt_pref_idle);
+					schedstat_inc(this_rq()->eas_stats.fbt_pref_idle);
+
+					trace_sched_find_best_target(p,
+							prefer_idle, min_util,
+							cpu, best_idle_cpu,
+							best_active_cpu, i);
+
+					return i;
+				}
+
+				/*
+				 * Case A.2: Target ACTIVE CPU
+				 * Favor CPUs with max spare capacity.
+				 */
+				if ((capacity_curr > new_util) &&
+					(capacity_orig - new_util > target_max_spare_cap)) {
+					target_max_spare_cap = capacity_orig - new_util;
+					target_cpu = i;
+					continue;
+				}
+				if (target_cpu != -1)
+					continue;
+
+
+				/*
+				 * Case A.3: Backup ACTIVE CPU
+				 * Favor CPUs with:
+				 * - lower utilization due to other tasks
+				 * - lower utilization with the task in
+				 */
+				if (wake_util > min_wake_util)
+					continue;
+				if (new_util > best_active_util)
+					continue;
+				min_wake_util = wake_util;
+				best_active_util = new_util;
+				best_active_cpu = i;
+				continue;
+			}
+
+			/*
+			 * Favor CPUs with smaller capacity for Non latency
+			 * sensitive tasks.
+			 */
+			if (capacity_orig > target_capacity)
+				continue;
+
+			/*
+			 * Case B) Non latency sensitive tasks on IDLE CPUs.
+			 *
+			 * Find an optimal backup IDLE CPU for non latency
+			 * sensitive tasks.
+			 *
+			 * Looking for:
+			 * - minimizing the capacity_orig,
+			 *   i.e. preferring LITTLE CPUs
+			 * - favoring shallowest idle states
+			 *   i.e. avoid to wakeup deep-idle CPUs
+			 *
+			 * The following code path is used by non latency
+			 * sensitive tasks if IDLE CPUs are available. If at
+			 * least one of such CPUs are available it sets the
+			 * best_idle_cpu to the most suitable idle CPU to be
+			 * selected.
+			 *
+			 * If idle CPUs are available, favour these CPUs to
+			 * improve performances by spreading tasks.
+			 * Indeed, the energy_diff() computed by the caller
+			 * will take care to ensure the minimization of energy
+			 * consumptions without affecting performance.
+			 */
+			if (idle_cpu(i)) {
+				int idle_idx = idle_get_state_idx(cpu_rq(i));
+
+				/* Favor CPUs that won't end up running at a
+				 * high OPP.
+				 */
+				if ((capacity_orig - min_capped_util) <
+					target_idle_max_spare_cap)
+					continue;
+
+				/*
+				 * Skip CPUs in deeper idle state, but only
+				 * if they are also less energy efficient.
+				 * IOW, prefer a deep IDLE LITTLE CPU vs a
+				 * shallow idle big CPU.
+				 */
+				if (sysctl_sched_cstate_aware &&
+				    best_idle_cstate <= idle_idx)
+					continue;
+
+				/* Keep track of best idle CPU */
+				target_capacity = capacity_orig;
+				target_idle_max_spare_cap = capacity_orig -
+							    min_capped_util;
+				best_idle_cstate = idle_idx;
+				best_idle_cpu = i;
+				continue;
+			}
+
+			/*
+			 * Case C) Non latency sensitive tasks on ACTIVE CPUs.
+			 *
+			 * Pack tasks in the most energy efficient capacities.
+			 *
+			 * This task packing strategy prefers more energy
+			 * efficient CPUs (i.e. pack on smaller maximum
+			 * capacity CPUs) while also trying to spread tasks to
+			 * run them all at the lower OPP.
+			 *
+			 * This assumes for example that it's more energy
+			 * efficient to run two tasks on two CPUs at a lower
+			 * OPP than packing both on a single CPU but running
+			 * that CPU at an higher OPP.
+			 *
+			 * Thus, this case keep track of the CPU with the
+			 * smallest maximum capacity and highest spare maximum
+			 * capacity.
+			 */
+
+			active_cpus_count++;
+
+			/* Favor CPUs with maximum spare capacity */
+			if ((capacity_orig - min_capped_util) <
+				target_max_spare_cap)
+				continue;
+
+			target_max_spare_cap = capacity_orig - min_capped_util;
+			target_capacity = capacity_orig;
+			target_util = new_util;
+			target_cpu = i;
+		}
+
+		if (do_rotate) {
+			/*
+			 * We started iteration somewhere in the middle of
+			 * cpumask.  Iterate once again from bit 0 to the
+			 * previous starting point bit.
+			 */
+			do_rotate = false;
+			i = -1;
+			goto retry;
+		}
+
+		if (!sysctl_sched_is_big_little && !prefer_idle) {
+
+			/*
+			 * If we find an idle CPU in the primary cluster,
+			 * stop the search. We select this idle CPU or
+			 * the active CPU (if there is one), whichever
+			 * saves the energy.
+			 */
+			if (best_idle_cpu != -1)
+				break;
+
+			if (fbt_env->placement_boost) {
+				target_capacity = ULONG_MAX;
+				continue;
+			}
+
+			/*
+			 * If we found an active CPU and its utilization
+			 * is below the minimum packing threshold (overlap),
+			 * no need to search further. Otherwise reset
+			 * the target_capacity and continue the search.
+			 */
+			if (target_cpu != -1 && target_util <
+					sched_smp_overlap_capacity)
+				break;
+
+			target_capacity = ULONG_MAX;
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	if (best_idle_cpu != -1 && !is_packing_eligible(p, target_cpu, fbt_env,
+					active_cpus_count)) {
+		if (target_cpu == task_cpu(p))
+			fbt_env->avoid_prev_cpu = true;
+
+		target_cpu = best_idle_cpu;
+		best_idle_cpu = -1;
+	}
+
+	/*
+	 * For non latency sensitive tasks, cases B and C in the previous loop,
+	 * we pick the best IDLE CPU only if we was not able to find a target
+	 * ACTIVE CPU.
+	 *
+	 * Policies priorities:
+	 *
+	 * - prefer_idle tasks:
+	 *
+	 *   a) IDLE CPU available, we return immediately
+	 *   b) ACTIVE CPU where task fits and has the bigger maximum spare
+	 *      capacity (i.e. target_cpu)
+	 *   c) ACTIVE CPU with less contention due to other tasks
+	 *      (i.e. best_active_cpu)
+	 *
+	 * - NON prefer_idle tasks:
+	 *
+	 *   a) ACTIVE CPU: target_cpu
+	 *   b) IDLE CPU: best_idle_cpu
+	 */
+	if (target_cpu == -1)
+		target_cpu = prefer_idle
+			? best_active_cpu
+			: best_idle_cpu;
+	else
+		*backup_cpu = prefer_idle
+		? best_active_cpu
+		: best_idle_cpu;
+
+	trace_sched_find_best_target(p, prefer_idle, min_util, cpu,
+				     best_idle_cpu, best_active_cpu,
+				     target_cpu);
+
+	schedstat_inc(p->se.statistics.nr_wakeups_fbt_count);
+	schedstat_inc(this_rq()->eas_stats.fbt_count);
+
+	return target_cpu;
+}
+
+/*
+ * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
+ * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
+ * 
+ * In that case WAKE_AFFINE doesn't make sense and we'll let
+ * BALANCE_WAKE sort things out.
+ */
+static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
+{
+	long min_cap, max_cap;
+	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
+	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+	/* Minimum capacity is close to max, no need to abort wake_affine */
+	if (max_cap - min_cap < max_cap >> 3)
+		return 0;
+
+	/* Bring task utilization in sync with prev_cpu */
+	sync_entity_load_avg(&p->se);
+
+	return min_cap * 1024 < task_util(p) * capacity_margin;
+}
+
 static inline int wake_to_idle(struct task_struct *p)
 {
 	return (current->flags & PF_WAKE_UP_IDLE) ||
 		 (p->flags & PF_WAKE_UP_IDLE);
 }
 
-static bool
-is_packing_eligible(struct task_struct *p, unsigned long task_util,
-		    struct sched_group *sg_target,
-		    unsigned long target_cpu_new_util_cum,
-		    int targeted_cpus)
+static inline bool
+bias_to_waker_cpu(struct task_struct *p, int cpu, struct cpumask *rtg_target)
 {
-	int cpu_cap_idx_pack, cpu_cap_idx_spread, cap_idx0, cap_idx1;
+	int rtg_target_cpu = rtg_target ? cpumask_first(rtg_target) : cpu;
 
-	if (targeted_cpus > 1)
-		/*
-		 * More than one CPUs were evaulated and target_cpu is the
-		 * least loaded CPU among the CPUs.  Thus target_cpu won't
-		 * raise OPP.
-		 */
-		return true;
-
-	/*
-	 * There is only one CPU out of C-state.
-	 *
-	 * cpu_cap_idx_pack contains estimated OPP index of target_cpu when we
-	 * pack the new task onto the target_cpu.
-	 * cap_idx0 and cap_idx1 contain OPP indices of two CPUs, one for
-	 * target_cpu without new task's load, one other for new idle CPU with
-	 * task's load.
-	 *
-	 *   Pack :                       Spread :
-	 *  cap_idx_pack is new OPP.     max(cap_idx0, cap_idx1) is new OPP.
-	 *  ________________             ________________
-	 *  |              |             |              | ______________
-	 *  | cap_idx_pack |             |   cap_idx0   | |  cap_idx1  |
-	 *  | (target_cpu) |             | (target_cpu) | | (idle cpu) |
-	 *  ----------------             ---------------- --------------
-	 *
-	 * The target_cpu's current capacity can be much more than target_cpu's
-	 * current utilization due to for example hysteresis while task
-	 * migration.  In that the case, packing onto the target_cpu based on
-	 * current capacity would deprive chance to lower the OPP and will end
-	 * up making target_cpu to keep the higher OOP longer than spreading.
-	 *
-	 * Try task packing only when packing won't make to keep the current
-	 * OPP longer than wihout packing.
-	 */
-
-	cpu_cap_idx_pack = __find_new_capacity(target_cpu_new_util_cum,
-					       sg_target->sge);
-
-	cap_idx0 = __find_new_capacity(target_cpu_new_util_cum - task_util,
-				       sg_target->sge);
-	cap_idx1 = __find_new_capacity(task_util, sg_target->sge);
-
-	cpu_cap_idx_spread = max(cap_idx0, cap_idx1);
-
-	trace_sched_energy_diff_packing(p, task_util, targeted_cpus,
-					cpu_cap_idx_pack, cpu_cap_idx_spread);
-
-	return cpu_cap_idx_pack == cpu_cap_idx_spread;
+	return cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) &&
+	       cpu_active(cpu) && !cpu_isolated(cpu) &&
+	       capacity_orig_of(cpu) >= capacity_orig_of(rtg_target_cpu) &&
+	       task_fits_max(p, cpu);
 }
 
-unsigned int sched_smp_overlap_capacity = SCHED_CAPACITY_SCALE;
-
-static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
+static inline struct cpumask *find_rtg_target(struct task_struct *p)
 {
-	struct sched_domain *sd;
-	struct sched_group *sg, *sg_target, *start_sg;
-	int target_max_cap = INT_MAX;
-	int target_cpu = -1, targeted_cpus = 0;
-	unsigned long task_util_boosted = 0, curr_util = 0;
-	long new_util, new_util_cum;
-	int i;
-	int ediff = -1;
-	int cpu = smp_processor_id();
-	int min_util_cpu = -1;
-	int min_util_cpu_idle_idx = INT_MAX;
-	long min_util_cpu_util_cum = LONG_MAX;
-	unsigned int min_util = UINT_MAX;
-	int cpu_idle_idx;
-	int min_idle_idx_cpu;
-	int min_idle_idx = INT_MAX;
-	bool safe_to_pack = false;
-	unsigned int target_cpu_util = UINT_MAX;
-	long target_cpu_new_util_cum = LONG_MAX;
-	struct cpumask *rtg_target = NULL;
-	int isolated_candidate = -1;
-	bool need_idle;
-	enum sched_boost_policy placement_boost = task_sched_boost(p) ?
-				sched_boost_policy() : SCHED_BOOST_NONE;
 	struct related_thread_group *grp;
-	cpumask_t search_cpus;
-	int prev_cpu = task_cpu(p);
-	int start_cpu = walt_start_cpu(prev_cpu);
-	bool do_rotate = false;
-	bool avoid_prev_cpu = false;
+	struct cpumask *rtg_target;
 
-	sd = rcu_dereference(per_cpu(sd_ea, start_cpu));
+	rcu_read_lock();
 
-	if (!sd)
-		return target;
-
-	sg = sd->groups;
-	sg_target = sg;
-
-	sync = sync && sysctl_sched_sync_hint_enable;
-
-	curr_util = boosted_task_util(cpu_rq(cpu)->curr);
-
-	need_idle = wake_to_idle(p) || schedtune_prefer_idle(p);
-	if (need_idle)
-		sync = 0;
 	grp = task_related_thread_group(p);
-	if (grp && grp->preferred_cluster)
+	if (grp && grp->preferred_cluster) {
 		rtg_target = &grp->preferred_cluster->cpus;
-
-	if (sync && bias_to_waker_cpu(p, cpu, rtg_target)) {
-		trace_sched_task_util_bias_to_waker(p, prev_cpu,
-					task_util(p), cpu, cpu, 0, need_idle);
-		return cpu;
+		if (!task_fits_max(p, cpumask_first(rtg_target)))
+			rtg_target = NULL;
+	} else {
+		rtg_target = NULL;
 	}
 
-	task_util_boosted = boosted_task_util(p);
-	if (sysctl_sched_is_big_little) {
-		/*
-		 * Find group with sufficient capacity. We only get here if no cpu is
-		 * overutilized. We may end up overutilizing a cpu by adding the task,
-		 * but that should not be any worse than select_idle_sibling().
-		 * load_balance() should sort it out later as we get above the tipping
-		 * point.
-		 */
-		do {
-			int max_cap_cpu;
-			cpumask_t avail_cpus;
+	rcu_read_unlock();
 
-			/* Are all CPUs isolated in this group? */
-			if (unlikely(!sg->group_weight))
-				continue;
+	return rtg_target;
+}
 
-			/* Can this task run on any CPUs of this group? */
-			cpumask_and(&avail_cpus, sched_group_cpus(sg),
-							tsk_cpus_allowed(p));
-			cpumask_andnot(&avail_cpus, &avail_cpus,
-							cpu_isolated_mask);
-			if (cpumask_empty(&avail_cpus))
-				continue;
+static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync)
+{
+	bool boosted, prefer_idle;
+	struct sched_domain *sd;
+	int target_cpu;
+	int backup_cpu = -1;
+	int next_cpu = -1;
+	struct cpumask *rtg_target = find_rtg_target(p);
+	struct find_best_target_env fbt_env;
 
-			/* Assuming all cpus are the same in group */
-			max_cap_cpu = group_first_cpu(sg);
+	schedstat_inc(p->se.statistics.nr_wakeups_secb_attempts);
+	schedstat_inc(this_rq()->eas_stats.secb_attempts);
 
-			/*
-			 * Assume smaller max capacity means more energy-efficient.
-			 * Ideally we should query the energy model for the right
-			 * answer but it easily ends up in an exhaustive search.
-			 */
-			if (capacity_orig_of(max_cap_cpu) < target_max_cap &&
-			    task_fits_max(p, max_cap_cpu)) {
-				sg_target = sg;
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	boosted = schedtune_task_boost(p) > 0;
+	prefer_idle = schedtune_prefer_idle(p) > 0;
+#else
+	boosted = get_sysctl_sched_cfs_boost() > 0;
+	prefer_idle = 0;
+#endif
 
-				if (rtg_target) {
-					/*
-					 * For tasks that belong to a related
-					 * thread group, select the preferred
-					 * cluster if the task can fit there,
-					 * otherwise select the cluster which
-					 * can fit the task.
-					 */
-					if (cpumask_test_cpu(max_cap_cpu,
-							     rtg_target))
-						break;
-					continue;
-				}
-
-				target_max_cap = capacity_of(max_cap_cpu);
-			}
-		} while (sg = sg->next, sg != sd->groups);
+	fbt_env.rtg_target = rtg_target;
+	if (sched_feat(EAS_USE_NEED_IDLE) && prefer_idle) {
+		fbt_env.need_idle = true;
+		prefer_idle = false;
+	} else {
+		fbt_env.need_idle = wake_to_idle(p);
 	}
+	fbt_env.placement_boost = task_sched_boost(p) ?
+				  sched_boost_policy() != SCHED_BOOST_NONE :
+				  false;
+	fbt_env.avoid_prev_cpu = false;
 
-	start_sg = sg_target;
-next_sg:
-	cpumask_copy(&search_cpus, tsk_cpus_allowed(p));
-	cpumask_and(&search_cpus, &search_cpus,
-		    sched_group_cpus(sg_target));
+	if (prefer_idle || fbt_env.need_idle)
+		sync = 0;
 
-	i = find_first_cpu_bit(p, &search_cpus, sg_target,
-			       &avoid_prev_cpu, &do_rotate,
-			       &first_cpu_bit_env);
+	if (sysctl_sched_sync_hint_enable && sync) {
+		int cpu = smp_processor_id();
 
-retry:
-	/* Find cpu with sufficient capacity */
-	while ((i = cpumask_next(i, &search_cpus)) < nr_cpu_ids) {
-		cpumask_clear_cpu(i, &search_cpus);
-
-		if (cpu_isolated(i))
-			continue;
-
-		if (isolated_candidate == -1)
-			isolated_candidate = i;
-
-		if (avoid_prev_cpu && i == prev_cpu)
-			continue;
-
-		if (is_reserved(i))
-			continue;
-
-		if (sched_cpu_high_irqload(i))
-			continue;
-
-		/*
-		 * Since this code is inside sched_is_big_little,
-		 * we are going to assume that boost policy is
-		 * SCHED_BOOST_ON_BIG.
-		 */
-		if (placement_boost != SCHED_BOOST_NONE) {
-			new_util = cpu_util(i);
-			if (new_util < min_util) {
-				min_util_cpu = i;
-				min_util = new_util;
-			}
-			continue;
-		}
-
-		/*
-		 * p's blocked utilization is still accounted for on prev_cpu
-		 * so prev_cpu will receive a negative bias due to the double
-		 * accounting. However, the blocked utilization may be zero.
-		 */
-		new_util = cpu_util(i) + task_util_boosted;
-
-		if (task_in_cum_window_demand(cpu_rq(i), p))
-			new_util_cum = cpu_util_cum(i, 0) +
-				       task_util_boosted - task_util(p);
-		else
-			new_util_cum = cpu_util_cum(i, 0) +
-				       task_util_boosted;
-
-		if (sync && i == cpu)
-			new_util -= curr_util;
-
-		trace_sched_cpu_util(p, i, task_util_boosted, curr_util,
-				     new_util_cum, sync);
-
-		/*
-		 * Ensure minimum capacity to grant the required boost.
-		 * The target CPU can be already at a capacity level higher
-		 * than the one required to boost the task.
-		 */
-		if (new_util > capacity_orig_of(i))
-			continue;
-
-		cpu_idle_idx = idle_get_state_idx(cpu_rq(i));
-
-		if (!need_idle &&
-		    add_capacity_margin(new_util_cum, i) <
-		    capacity_curr_of(i)) {
-			if (sysctl_sched_cstate_aware) {
-				if (cpu_idle_idx < min_idle_idx) {
-					min_idle_idx = cpu_idle_idx;
-					min_idle_idx_cpu = i;
-					target_cpu = i;
-					target_cpu_util = new_util;
-					target_cpu_new_util_cum =
-					    new_util_cum;
-					targeted_cpus = 1;
-				} else if (cpu_idle_idx ==
-					   min_idle_idx &&
-					   (target_cpu_util >
-					    new_util ||
-					    (target_cpu_util ==
-					     new_util &&
-					     (i == prev_cpu ||
-					      (target_cpu !=
-					       prev_cpu &&
-					       target_cpu_new_util_cum >
-					       new_util_cum))))) {
-					min_idle_idx_cpu = i;
-					target_cpu = i;
-					target_cpu_util = new_util;
-					target_cpu_new_util_cum =
-					    new_util_cum;
-					targeted_cpus++;
-				}
-			} else if (cpu_rq(i)->nr_running) {
-				target_cpu = i;
-				do_rotate = false;
-				break;
-			}
-		} else if (!need_idle) {
-			/*
-			 * At least one CPU other than target_cpu is
-			 * going to raise CPU's OPP higher than current
-			 * because current CPU util is more than current
-			 * capacity + margin.  We can safely do task
-			 * packing without worrying about doing such
-			 * itself raises OPP.
-			 */
-			safe_to_pack = true;
-		}
-
-		/*
-		 * cpu has capacity at higher OPP, keep it as
-		 * fallback.
-		 */
-		if (new_util < min_util) {
-			min_util_cpu = i;
-			min_util = new_util;
-			min_util_cpu_idle_idx = cpu_idle_idx;
-			min_util_cpu_util_cum = new_util_cum;
-		} else if (sysctl_sched_cstate_aware &&
-			   min_util == new_util) {
-			if (min_util_cpu == task_cpu(p))
-				continue;
-
-			if (i == task_cpu(p) ||
-			    (cpu_idle_idx < min_util_cpu_idle_idx ||
-			     (cpu_idle_idx == min_util_cpu_idle_idx &&
-			      min_util_cpu_util_cum > new_util_cum))) {
-				min_util_cpu = i;
-				min_util_cpu_idle_idx = cpu_idle_idx;
-				min_util_cpu_util_cum = new_util_cum;
-			}
+		if (bias_to_waker_cpu(p, cpu, rtg_target)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_secb_sync);
+			schedstat_inc(this_rq()->eas_stats.secb_sync);
+			return cpu;
 		}
 	}
 
-	if (do_rotate) {
-		/*
-		 * We started iteration somewhere in the middle of
-		 * cpumask.  Iterate once again from bit 0 to the
-		 * previous starting point bit.
-		 */
-		do_rotate = false;
-		i = -1;
-		goto retry;
+	rcu_read_lock();
+
+	sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
+	if (!sd) {
+		target_cpu = prev_cpu;
+		goto unlock;
 	}
 
-	/*
-	 * If we don't find a CPU that fits this task without
-	 * increasing OPP above sched_smp_overlap_capacity or
-	 * when placement boost is active, expand the search to
-	 * the other groups on a SMP system.
-	 */
-	if (!sysctl_sched_is_big_little &&
-			(placement_boost == SCHED_BOOST_ON_ALL ||
-			(target_cpu == -1 && min_util_cpu_util_cum >
-					     sched_smp_overlap_capacity))) {
-		if (sg_target->next != start_sg) {
-			sg_target = sg_target->next;
-			goto next_sg;
-		}
+	sync_entity_load_avg(&p->se);
+
+	/* Find a cpu with sufficient capacity */
+	next_cpu = find_best_target(p, &backup_cpu, boosted, prefer_idle,
+				    &fbt_env);
+	if (next_cpu == -1) {
+		target_cpu = prev_cpu;
+		goto unlock;
 	}
 
-	if (target_cpu == -1 ||
-	    (target_cpu != min_util_cpu && !safe_to_pack &&
-	     !is_packing_eligible(p, task_util_boosted, sg_target,
-				  target_cpu_new_util_cum,
-				  targeted_cpus))) {
-		if (likely(min_util_cpu != -1))
-			target_cpu = min_util_cpu;
-		else if (cpu_isolated(task_cpu(p)) &&
-				isolated_candidate != -1)
-			target_cpu = isolated_candidate;
-		else
-			target_cpu = task_cpu(p);
+	if (fbt_env.placement_boost || fbt_env.need_idle ||
+			fbt_env.avoid_prev_cpu || (rtg_target &&
+			!cpumask_test_cpu(prev_cpu, rtg_target))) {
+		target_cpu = next_cpu;
+		goto unlock;
 	}
 
-	if (target_cpu != task_cpu(p) && !avoid_prev_cpu &&
-	    !cpu_isolated(task_cpu(p))) {
+	/* Unconditionally prefer IDLE CPUs for boosted/prefer_idle tasks */
+	if ((boosted || prefer_idle) && idle_cpu(next_cpu)) {
+		schedstat_inc(p->se.statistics.nr_wakeups_secb_idle_bt);
+		schedstat_inc(this_rq()->eas_stats.secb_idle_bt);
+		target_cpu = next_cpu;
+		goto unlock;
+	}
+
+	target_cpu = prev_cpu;
+	if (next_cpu != prev_cpu) {
+		int delta = 0;
 		struct energy_env eenv = {
-			.util_delta	= task_util(p),
-			.src_cpu	= task_cpu(p),
-			.dst_cpu	= target_cpu,
-			.task		= p,
-			.sync_cpu	= sync ? smp_processor_id() : -1,
-			.curr_util	= curr_util,
+			.p              = p,
+			.util_delta     = task_util(p),
+			/* Task's previous CPU candidate */
+			.cpu[EAS_CPU_PRV] = {
+				.cpu_id = prev_cpu,
+			},
+			/* Main alternative CPU candidate */
+			.cpu[EAS_CPU_NXT] = {
+				.cpu_id = next_cpu,
+			},
+			/* Backup alternative CPU candidate */
+			.cpu[EAS_CPU_BKP] = {
+				.cpu_id = backup_cpu,
+			},
 		};
 
-		/*
-		 * We always want to migrate the task to the preferred cluster.
-		 */
-		if (rtg_target) {
-			trace_sched_task_util_colocated(p, task_cpu(p),
-						task_util(p),
-						cpumask_first(rtg_target),
-						target_cpu, 0, need_idle);
-			return target_cpu;
-		}
-
-		if (need_idle) {
-			trace_sched_task_util_need_idle(p, task_cpu(p),
-						task_util(p),
-						target_cpu, target_cpu,
-						0, need_idle);
-			return target_cpu;
-		}
-
-		/*
-		 * We always want to migrate the task to the best CPU when
-		 * placement boost is active.
-		 */
-		if (placement_boost) {
-			trace_sched_task_util_boosted(p, task_cpu(p),
-						task_util(p),
-						target_cpu,
-						target_cpu, 0, need_idle);
-			return target_cpu;
-		}
 
 #ifdef CONFIG_SCHED_WALT
-		if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
-			task_util_boosted = 0;
-#else
-		task_util_boosted = 0;
+		if (!walt_disabled && sysctl_sched_use_walt_cpu_util &&
+			p->state == TASK_WAKING)
+			delta = task_util(p);
 #endif
 		/* Not enough spare capacity on previous cpu */
-		if (__cpu_overutilized(task_cpu(p),
-				       cpu_util(task_cpu(p)) +
-						task_util_boosted)) {
-			trace_sched_task_util_overutilzed(p, task_cpu(p),
-						task_util(p), target_cpu,
-						target_cpu, 0, need_idle);
-			return target_cpu;
+		if (__cpu_overutilized(prev_cpu, delta)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_secb_insuff_cap);
+			schedstat_inc(this_rq()->eas_stats.secb_insuff_cap);
+			target_cpu = next_cpu;
+			goto unlock;
 		}
 
-		ediff = energy_diff(&eenv);
-
-		if (!sysctl_sched_cstate_aware) {
-			if (ediff >= 0) {
-				trace_sched_task_util_energy_diff(p,
-						task_cpu(p), task_util(p),
-						target_cpu, task_cpu(p), ediff,
-						need_idle);
-				return task_cpu(p);
-			}
-		} else {
-			if (ediff > 0) {
-				trace_sched_task_util_energy_diff(p,
-						task_cpu(p), task_util(p),
-						target_cpu, task_cpu(p), ediff,
-						need_idle);
-				return task_cpu(p);
-			}
+		/* Check if EAS_CPU_NXT is a more energy efficient CPU */
+		if (select_energy_cpu_idx(&eenv) != EAS_CPU_PRV) {
+			schedstat_inc(p->se.statistics.nr_wakeups_secb_nrg_sav);
+			schedstat_inc(this_rq()->eas_stats.secb_nrg_sav);
+			target_cpu = eenv.cpu[eenv.next_idx].cpu_id;
+			goto unlock;
 		}
+
+		schedstat_inc(p->se.statistics.nr_wakeups_secb_no_nrg_sav);
+		schedstat_inc(this_rq()->eas_stats.secb_no_nrg_sav);
+		target_cpu = prev_cpu;
+		goto unlock;
 	}
 
-	trace_sched_task_util_energy_aware(p, task_cpu(p), task_util(p),
-					   target_cpu, target_cpu, ediff,
-					   need_idle);
+	schedstat_inc(p->se.statistics.nr_wakeups_secb_count);
+	schedstat_inc(this_rq()->eas_stats.secb_count);
+
+unlock:
+	trace_sched_task_util(p, next_cpu, backup_cpu, target_cpu, sync,
+			      fbt_env.need_idle, fbt_env.placement_boost,
+			      rtg_target ? cpumask_first(rtg_target) : -1);
+	rcu_read_unlock();
 	return target_cpu;
 }
 
@@ -7261,20 +7449,15 @@
 	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
 
-	if (energy_aware()) {
-		rcu_read_lock();
-		new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync);
-		rcu_read_unlock();
-		return new_cpu;
-	}
-
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
-		want_affine = (!wake_wide(p) && task_fits_max(p, cpu) &&
-			cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) ||
-			energy_aware();
+		want_affine = (!wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
+			cpumask_test_cpu(cpu, tsk_cpus_allowed(p)));
 	}
 
+	if (energy_aware())
+		return select_energy_cpu_brute(p, prev_cpu, sync);
+
 	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
@@ -7302,45 +7485,21 @@
 			new_cpu = cpu;
 	}
 
+	if (sd && !(sd_flag & SD_BALANCE_FORK)) {
+		/*
+		 * We're going to need the task's util for capacity_spare_wake
+		 * in find_idlest_group. Sync it up to prev_cpu's
+		 * last_update_time.
+		 */
+		sync_entity_load_avg(&p->se);
+	}
+
 	if (!sd) {
-		if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
-			new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync);
-		else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
+		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
-	} else while (sd) {
-		struct sched_group *group;
-		int weight;
-
-		if (!(sd->flags & sd_flag)) {
-			sd = sd->child;
-			continue;
-		}
-
-		group = find_idlest_group(sd, p, cpu, sd_flag);
-		if (!group) {
-			sd = sd->child;
-			continue;
-		}
-
-		new_cpu = find_idlest_cpu(group, p, cpu);
-		if (new_cpu == -1 || new_cpu == cpu) {
-			/* Now try balancing at a lower domain level of cpu */
-			sd = sd->child;
-			continue;
-		}
-
-		/* Now try balancing at a lower domain level of new_cpu */
-		cpu = new_cpu;
-		weight = sd->span_weight;
-		sd = NULL;
-		for_each_domain(cpu, tmp) {
-			if (weight <= tmp->span_weight)
-				break;
-			if (tmp->flags & sd_flag)
-				sd = tmp;
-		}
-		/* while loop will break here if sd == NULL */
+	} else {
+		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
 	}
 	rcu_read_unlock();
 
@@ -8304,10 +8463,6 @@
 {
 	raw_spin_lock(&rq->lock);
 	attach_task(rq, p);
-	/*
-	 * We want to potentially raise target_cpu's OPP.
-	 */
-	update_capacity_of(cpu_of(rq));
 	raw_spin_unlock(&rq->lock);
 }
 
@@ -8329,11 +8484,6 @@
 		attach_task(env->dst_rq, p);
 	}
 
-	/*
-	 * We want to potentially raise env.dst_cpu's OPP.
-	 */
-	update_capacity_of(env->dst_cpu);
-
 	raw_spin_unlock(&env->dst_rq->lock);
 }
 
@@ -8569,6 +8719,9 @@
 
 	cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
+	capacity *= arch_scale_max_freq_capacity(sd, cpu);
+	capacity >>= SCHED_CAPACITY_SHIFT;
+
 	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
 
 	raw_spin_lock_irqsave(&mcc->lock, flags);
@@ -8800,6 +8953,38 @@
 	return group_other;
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+/*
+ * idle load balancing data
+ *  - used by the nohz balance, but we want it available here
+ *    so that we can see which CPUs have no tick.
+ */
+static struct {
+	cpumask_var_t idle_cpus_mask;
+	atomic_t nr_cpus;
+	unsigned long next_balance;     /* in jiffy units */
+} nohz ____cacheline_aligned;
+
+static inline void update_cpu_stats_if_tickless(struct rq *rq)
+{
+	/* only called from update_sg_lb_stats when irqs are disabled */
+	if (cpumask_test_cpu(rq->cpu, nohz.idle_cpus_mask)) {
+		/* rate limit updates to once-per-jiffie at most */
+		if (READ_ONCE(jiffies) <= rq->last_load_update_tick)
+			return;
+
+		raw_spin_lock(&rq->lock);
+		update_rq_clock(rq);
+		cpu_load_update_idle(rq);
+		update_cfs_rq_load_avg(rq->clock_task, &rq->cfs, false);
+		raw_spin_unlock(&rq->lock);
+	}
+}
+
+#else
+static inline void update_cpu_stats_if_tickless(struct rq *rq) { }
+#endif
+
 /**
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @env: The load balancing environment.
@@ -8830,6 +9015,12 @@
 		if (cpu_isolated(i))
 			continue;
 
+		/* if we are entering idle and there are CPUs with
+		 * their tick stopped, do an update for them
+		 */
+		if (env->idle == CPU_NEWLY_IDLE)
+			update_cpu_stats_if_tickless(rq);
+
 		/* Bias balancing toward cpus of our domain */
 		if (local_group)
 			load = target_load(i, load_idx);
@@ -9729,11 +9920,6 @@
 		 * ld_moved     - cumulative load moved across iterations
 		 */
 		cur_ld_moved = detach_tasks(&env);
-		/*
-		 * We want to potentially lower env.src_cpu's OPP.
-		 */
-		if (cur_ld_moved)
-			update_capacity_of(env.src_cpu);
 
 		/*
 		 * We've detached some tasks from busiest_rq. Every
@@ -9981,7 +10167,6 @@
 	struct sched_domain *sd;
 	int pulled_task = 0;
 	u64 curr_cost = 0;
-	long removed_util=0;
 
 	if (cpu_isolated(this_cpu))
 		return 0;
@@ -10006,17 +10191,6 @@
 
 	raw_spin_unlock(&this_rq->lock);
 
-	/*
-	 * If removed_util_avg is !0 we most probably migrated some task away
-	 * from this_cpu. In this case we might be willing to trigger an OPP
-	 * update, but we want to do so if we don't find anybody else to pull
-	 * here (we will trigger an OPP update with the pulled task's enqueue
-	 * anyway).
-	 *
-	 * Record removed_util before calling update_blocked_averages, and use
-	 * it below (before returning) to see if an OPP update is required.
-	 */
-	removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg);
 	update_blocked_averages(this_cpu);
 	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
@@ -10083,13 +10257,6 @@
 
 	if (pulled_task)
 		this_rq->idle_stamp = 0;
-	else if (removed_util) {
-		/*
-		 * No task pulled and someone has been migrated away.
-		 * Good case to trigger an OPP update.
-		 */
-		update_capacity_of(this_cpu);
-	}
 
 	return pulled_task;
 }
@@ -10171,10 +10338,6 @@
 		p = detach_one_task(&env);
 		if (p) {
 			schedstat_inc(sd->alb_pushed);
-			/*
-			 * We want to potentially lower env.src_cpu's OPP.
-			 */
-			update_capacity_of(env.src_cpu);
 			/* Active balancing done, reset the failure counter. */
 			sd->nr_balance_failed = 0;
 			moved = true;
@@ -10220,12 +10383,6 @@
  *   needed, they will kick the idle load balancer, which then does idle
  *   load balancing for all the idle CPUs.
  */
-static struct {
-	cpumask_var_t idle_cpus_mask;
-	atomic_t nr_cpus;
-	unsigned long next_balance;     /* in jiffy units */
-} nohz ____cacheline_aligned;
-
 static inline int find_new_ilb(int type)
 {
 	int ilb = nr_cpu_ids;
@@ -11628,7 +11785,7 @@
 
 		raw_spin_lock(&migration_lock);
 		rcu_read_lock();
-		new_cpu = energy_aware_wake_cpu(p, cpu, 0);
+		new_cpu = select_energy_cpu_brute(p, cpu, 0);
 		rcu_read_unlock();
 		if (capacity_orig_of(new_cpu) > capacity_orig_of(cpu)) {
 			active_balance = kick_active_balance(rq, p, new_cpu);

diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index a1afd13..ef52be4 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h

@@ -83,3 +83,24 @@
 #else
 SCHED_FEAT(ENERGY_AWARE, false)
 #endif
+
+/*
+ * Minimum capacity capping. Keep track of minimum capacity factor when
+ * minimum frequency available to a policy is modified.
+ * If enabled, this can be used to inform the scheduler about capacity
+ * restrictions.
+ */
+SCHED_FEAT(MIN_CAPACITY_CAPPING, false)
+
+/*
+ * Enforce the priority of candidates selected by find_best_target()
+ * ON: If the target CPU saves any energy, use that.
+ * OFF: Use whichever of target or backup saves most.
+ */
+SCHED_FEAT(FBT_STRICT_ORDER, false)
+/*
+ * Enforce schedtune.prefer_idle to take need_idle path.
+ * ON: schedtune.prefer_idle is replaced with need_idle
+ * OFF: schedtune.prefer_idle is honored as is.
+ */
+SCHED_FEAT(EAS_USE_NEED_IDLE, true)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1317c9f..8329e9c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -1471,7 +1471,7 @@
 
 extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
 
-bool __cpu_overutilized(int cpu, unsigned long util);
+bool __cpu_overutilized(int cpu, int delta);
 bool cpu_overutilized(int cpu);
 
 #endif
@@ -1699,6 +1699,26 @@
 }
 #endif
 
+#ifndef arch_scale_max_freq_capacity
+static __always_inline
+unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
+#ifndef arch_scale_min_freq_capacity
+static __always_inline
+unsigned long arch_scale_min_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * Multiplied with any capacity value, this scale factor will return
+	 * 0, which represents an un-capped state
+	 */
+	return 0;
+}
+#endif
+
 #ifndef arch_scale_cpu_capacity
 static __always_inline
 unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
@@ -1892,6 +1912,8 @@
 
 #endif /* CONFIG_SCHED_WALT */
 
+extern unsigned long
+boosted_cpu_util(int cpu, struct sched_walt_cpu_load *walt_load);
 #endif
 
 extern unsigned int capacity_margin_freq;
@@ -1906,76 +1928,6 @@
 	return cpu_capacity;
 }
 
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-#define capacity_max SCHED_CAPACITY_SCALE
-extern struct static_key __sched_freq;
-
-static inline bool sched_freq(void)
-{
-	return static_key_false(&__sched_freq);
-}
-
-DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
-void update_cpu_capacity_request(int cpu, bool request);
-
-static inline void set_cfs_cpu_capacity(int cpu, bool request,
-					unsigned long capacity)
-{
-	struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-
-#ifdef CONFIG_SCHED_WALT
-       if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
-		int rtdl = scr->rt + scr->dl;
-		/*
-		 * WALT tracks the utilization of a CPU considering the load
-		 * generated by all the scheduling classes.
-		 * Since the following call to:
-		 *    update_cpu_capacity
-		 * is already adding the RT and DL utilizations let's remove
-		 * these contributions from the WALT signal.
-		 */
-		if (capacity > rtdl)
-			capacity -= rtdl;
-		else
-			capacity = 0;
-	}
-#endif
-	if (scr->cfs != capacity) {
-		scr->cfs = capacity;
-		update_cpu_capacity_request(cpu, request);
-	}
-}
-
-static inline void set_rt_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{
-	if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
-		per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
-		update_cpu_capacity_request(cpu, request);
-	}
-}
-
-static inline void set_dl_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{
-	if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
-		per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
-		update_cpu_capacity_request(cpu, request);
-	}
-}
-#else
-static inline bool sched_freq(void) { return false; }
-static inline void set_cfs_cpu_capacity(int cpu, bool request,
-					unsigned long capacity)
-{ }
-static inline void set_rt_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{ }
-static inline void set_dl_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{ }
-#endif
-
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
 	rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));

diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 93643ba..bdcd174 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c

@@ -20,7 +20,7 @@
 unsigned int sysctl_sched_cfs_boost __read_mostly;
 
 extern struct reciprocal_value schedtune_spc_rdiv;
-extern struct target_nrg schedtune_target_nrg;
+struct target_nrg schedtune_target_nrg;
 
 /* Performance Boost region (B) threshold params */
 static int perf_boost_idx;

diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 33daa99..b4d815c 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c

@@ -2193,6 +2193,7 @@
 
 int __read_mostly min_power_cpu;
 
+unsigned int sched_smp_overlap_capacity;
 void walt_sched_energy_populated_callback(void)
 {
 	struct sched_cluster *cluster;

diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index 7edae12..414c4ae 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h

@@ -181,6 +181,7 @@
 {
 	return sched_irqload(cpu) >= sysctl_sched_cpu_high_irqload;
 }
+#define walt_cpu_high_irqload(cpu) sched_cpu_high_irqload(cpu)
 
 static inline int exiting_task(struct task_struct *p)
 {
commit	e3765a4b431c1a2ccc9b7b04c46795a7070c5e68	[log] [tgz]
author	Linux Build Service Account <lnxbuild@localhost>	Fri Mar 23 05:51:09 2018 -0700
committer	Gerrit - the friendly Code Review server <code-review@localhost>	Fri Mar 23 05:51:08 2018 -0700
tree	919178913aea24140d6ae99054f13a49f590ea9a
parent	ef2396c53bbbf6b164724900cd64d301e7150c0e [diff]
parent	93b3f0f67418564b3990249545617d058aeaa652 [diff]