sched: use cumulative window demand for task placement

Use cumulative window demand to estimate CPU frequency at the end of
upcoming WALT window boundary.  Newly waking up task will be placed onto
an already active CPU rather than waking up an idle CPU when by doing so
won't increase current CPU's frequency.

Change-Id: I3bef11fdd05c60995aba11d28c3ea8af925a1d44
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index ba2b0ba..6dbc8a1 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -699,9 +699,9 @@
 #ifdef CONFIG_SMP
 TRACE_EVENT(sched_cpu_util,
 
-	TP_PROTO(struct task_struct *p, int cpu, int task_util, unsigned long curr_util, int sync),
+	TP_PROTO(struct task_struct *p, int cpu, int task_util, unsigned long curr_util, unsigned long new_cum_util, int sync),
 
-	TP_ARGS(p, cpu, task_util, curr_util, sync),
+	TP_ARGS(p, cpu, task_util, curr_util, new_cum_util, sync),
 
 	TP_STRUCT__entry(
 		__array(char, comm, TASK_COMM_LEN	)
@@ -710,6 +710,8 @@
 		__field(int, task_util				)
 		__field(unsigned int, nr_running		)
 		__field(long, cpu_util			)
+		__field(long, cpu_util_cum			)
+		__field(long, new_cum_util			)
 		__field(unsigned int, capacity_curr		)
 		__field(unsigned int, capacity			)
 		__field(unsigned long, curr_util		)
@@ -717,6 +719,7 @@
 		__field(int, idle_state				)
 		__field(unsigned int, irqload		)
 		__field(int, high_irqload		)
+		__field(int, task_in_cum_demand		)
 	),
 
 	TP_fast_assign(
@@ -726,6 +729,9 @@
 		__entry->task_util		= task_util;
 		__entry->nr_running		= cpu_rq(cpu)->nr_running;
 		__entry->cpu_util		= cpu_util(cpu);
+		__entry->cpu_util_cum		= cpu_util_cum(cpu, 0);
+		__entry->new_cum_util		= new_cum_util;
+		__entry->task_in_cum_demand	= task_in_cum_window_demand(cpu_rq(cpu), p);
 		__entry->capacity_curr		= capacity_curr_of(cpu);
 		__entry->capacity		= capacity_of(cpu);
 		__entry->curr_util		= curr_util;
@@ -735,8 +741,8 @@
 		__entry->high_irqload		= sched_cpu_high_irqload(cpu);
 	),
 
-	TP_printk("comm=%s pid=%d cpu=%d task_util=%d nr_running=%d cpu_util=%ld capacity_curr=%u capacity=%u curr_util=%ld sync=%d idle_state=%d irqload=%u high_irqload=%u",
-		__entry->comm, __entry->pid, __entry->cpu, __entry->task_util, __entry->nr_running, __entry->cpu_util, __entry->capacity_curr, __entry->capacity, __entry->curr_util, __entry->sync, __entry->idle_state, __entry->irqload, __entry->high_irqload)
+	TP_printk("comm=%s pid=%d cpu=%d task_util=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld new_cum_util=%ld task_in_cum=%d capacity_curr=%u capacity=%u curr_util=%ld sync=%d idle_state=%d irqload=%u high_irqload=%u",
+		__entry->comm, __entry->pid, __entry->cpu, __entry->task_util, __entry->nr_running, __entry->cpu_util, __entry->cpu_util_cum, __entry->new_cum_util, __entry->task_in_cum_demand, __entry->capacity_curr, __entry->capacity, __entry->curr_util, __entry->sync, __entry->idle_state, __entry->irqload, __entry->high_irqload)
 );
 
 DECLARE_EVENT_CLASS(sched_task_util,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 779c1f0..b480b36 100755
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5379,7 +5379,7 @@
  */
 static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
 {
-	int util = __cpu_util(cpu, delta);
+	int util = cpu_util_cum(cpu, delta);
 
 	if (util >= capacity)
 		return SCHED_CAPACITY_SCALE;
@@ -5404,7 +5404,7 @@
 #ifdef CONFIG_SCHED_WALT
 	if (cpu == eenv->src_cpu) {
 		if (!walt_disabled && sysctl_sched_use_walt_task_util &&
-		     eenv->task->state == TASK_WAKING) {
+		    !task_in_cum_window_demand(cpu_rq(cpu), eenv->task)) {
 			if (eenv->util_delta == 0)
 				/*
 				 * energy before - calculate energy cost when
@@ -5454,7 +5454,7 @@
 		/* substract sync_cpu's rq->curr util to discount its cost */
 		if (eenv->sync_cpu == i)
 			delta -= eenv->curr_util;
-		max_util = max(max_util, __cpu_util(i, delta));
+		max_util = max(max_util, cpu_util_cum(i, delta));
 	}
 
 	return max_util;
@@ -6588,7 +6588,7 @@
 	int target_max_cap = INT_MAX;
 	int target_cpu;
 	unsigned long task_util_boosted = 0, curr_util = 0;
-	long new_util;
+	long new_util, new_util_cum;
 	int i;
 	int ediff = 0;
 	int cpu = smp_processor_id();
@@ -6649,11 +6649,19 @@
 			 * accounting. However, the blocked utilization may be zero.
 			 */
 			new_util = cpu_util(i) + task_util_boosted;
+
+			if (task_in_cum_window_demand(cpu_rq(i), p))
+				new_util_cum = cpu_util_cum(i, 0) +
+					       task_util_boosted - task_util(p);
+			else
+				new_util_cum = cpu_util_cum(i, 0) +
+					       task_util_boosted;
+
 			if (sync && i == cpu)
 				new_util -= curr_util;
 
-			trace_sched_cpu_util(p, i, task_util_boosted,
-					     curr_util, sync);
+			trace_sched_cpu_util(p, i, task_util_boosted, curr_util,
+					     new_util_cum, sync);
 
 			if (sched_cpu_high_irqload(cpu))
 				continue;
@@ -6669,7 +6677,7 @@
 			cpu_idle_idx = cpu_rq(i)->nr_running ? -1 :
 				       idle_get_state_idx(cpu_rq(i));
 
-			if (add_capacity_margin(new_util) <
+			if (add_capacity_margin(new_util_cum) <
 			    capacity_curr_of(i)) {
 				if (sysctl_sched_cstate_aware) {
 					if (cpu_idle_idx < min_idle_idx ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 173470c..942173c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1766,6 +1766,25 @@
 	unsigned long pl;
 };
 
+static inline unsigned long cpu_util_cum(int cpu, int delta)
+{
+	u64 util = cpu_rq(cpu)->cfs.avg.util_avg;
+	unsigned long capacity = capacity_orig_of(cpu);
+
+#ifdef CONFIG_SCHED_WALT
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+		util = cpu_rq(cpu)->cum_window_demand;
+		util = div64_u64(util,
+				 sched_ravg_window >> SCHED_CAPACITY_SHIFT);
+	}
+#endif
+	delta += util;
+	if (delta < 0)
+		return 0;
+
+	return (delta >= capacity) ? capacity : delta;
+}
+
 static inline unsigned long
 cpu_util_freq(int cpu, struct sched_walt_cpu_load *walt_load)
 {