sched: improve the scheduler

This change is for general scheduler improvement.

Change-Id: Ice980dde340bff8362b4f2adc679423d8f54e8e4
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c770d..290e2b2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1777,7 +1777,7 @@
 	u32 init_load_pct;
 	u64 last_wake_ts;
 	u64 last_switch_out_ts;
-	u64 last_cpu_selected_ts;
+	u64 last_enqueued_ts;
 	struct related_thread_group *grp;
 	struct list_head grp_list;
 	u64 cpu_cycles;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 12bd032..3e97574 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -34,6 +34,7 @@
 extern unsigned int sysctl_sched_boost;
 extern unsigned int sysctl_sched_group_upmigrate_pct;
 extern unsigned int sysctl_sched_group_downmigrate_pct;
+extern unsigned int sysctl_sched_walt_rotate_big_tasks;
 
 extern int
 walt_proc_update_handler(struct ctl_table *table, int write,
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0125cde..63f2baf 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -596,8 +596,8 @@
 
 TRACE_EVENT(sched_load_to_gov,
 
-	TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load, u64 freq_aggr_thresh, u64 load, int policy),
-	TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy),
+	TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load, u64 freq_aggr_thresh, u64 load, int policy, int big_task_rotation),
+	TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy, big_task_rotation),
 
 	TP_STRUCT__entry(
 		__field(	int,	cpu			)
@@ -612,6 +612,7 @@
 		__field(	u64,	grp_nt_ps		)
 		__field(	u64,	pl			)
 		__field(	u64,    load			)
+		__field(	int,    big_task_rotation	)
 	),
 
 	TP_fast_assign(
@@ -627,13 +628,15 @@
 		__entry->grp_nt_ps	= rq->grp_time.nt_prev_runnable_sum;
 		__entry->pl		= rq->walt_stats.pred_demands_sum;
 		__entry->load		= load;
+		__entry->big_task_rotation = big_task_rotation;
 	),
 
-	TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu",
+	TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu big_task_rotation=%d",
 		__entry->cpu, __entry->policy, __entry->ed_task_pid,
 		__entry->aggr_grp_load, __entry->freq_aggr_thresh,
 		__entry->tt_load, __entry->rq_ps, __entry->grp_rq_ps,
-		__entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load)
+		__entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load,
+		__entry->big_task_rotation)
 );
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7b02ae6..31b45b7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -775,6 +775,7 @@
 	if (!(flags & ENQUEUE_RESTORE))
 		sched_info_queued(rq, p);
 	p->sched_class->enqueue_task(rq, p, flags);
+	walt_update_last_enqueue(p);
 	trace_sched_enq_deq_task(p, 1, cpumask_bits(&p->cpus_allowed)[0]);
 }
 
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index cc5a97c..c0a8a2a 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2014-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -482,6 +482,7 @@
 
 	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
 				 &max_nr, &big_max_nr);
+	walt_rotation_checkpoint(big_avg);
 
 	spin_lock_irqsave(&state_lock, flags);
 	for_each_cluster(cluster, index) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1ff2e5e..55c3957 100755
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11430,6 +11430,141 @@
 	return rc;
 }
 
+#ifdef CONFIG_SCHED_WALT
+struct walt_rotate_work {
+	struct work_struct w;
+	struct task_struct *src_task;
+	struct task_struct *dst_task;
+	int src_cpu;
+	int dst_cpu;
+};
+
+static DEFINE_PER_CPU(struct walt_rotate_work, walt_rotate_works);
+
+static void walt_rotate_work_func(struct work_struct *work)
+{
+	struct walt_rotate_work *wr = container_of(work,
+				struct walt_rotate_work, w);
+
+	migrate_swap(wr->src_task, wr->dst_task);
+
+	put_task_struct(wr->src_task);
+	put_task_struct(wr->dst_task);
+
+	clear_reserved(wr->src_cpu);
+	clear_reserved(wr->dst_cpu);
+}
+
+void walt_rotate_work_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct walt_rotate_work *wr = &per_cpu(walt_rotate_works, i);
+
+		INIT_WORK(&wr->w, walt_rotate_work_func);
+	}
+}
+
+#define WALT_ROTATION_THRESHOLD_NS	16000000
+static void walt_check_for_rotation(struct rq *src_rq)
+{
+	u64 wc, wait, max_wait = 0, run, max_run = 0;
+	int deserved_cpu = nr_cpu_ids, dst_cpu = nr_cpu_ids;
+	int i, src_cpu = cpu_of(src_rq);
+	struct rq *dst_rq;
+	struct walt_rotate_work *wr = NULL;
+
+	if (!walt_rotation_enabled)
+		return;
+
+	if (got_boost_kick())
+		return;
+
+	if (is_max_capacity_cpu(src_cpu))
+		return;
+
+	wc = ktime_get_ns();
+	for_each_possible_cpu(i) {
+		struct rq *rq = cpu_rq(i);
+
+		if (is_max_capacity_cpu(i))
+			break;
+
+		if (is_reserved(i))
+			continue;
+
+		if (!rq->misfit_task || rq->curr->sched_class !=
+						&fair_sched_class)
+			continue;
+
+		wait = wc - rq->curr->last_enqueued_ts;
+		if (wait > max_wait) {
+			max_wait = wait;
+			deserved_cpu = i;
+		}
+	}
+
+	if (deserved_cpu != src_cpu)
+		return;
+
+	for_each_possible_cpu(i) {
+		struct rq *rq = cpu_rq(i);
+
+		if (!is_max_capacity_cpu(i))
+			continue;
+
+		if (is_reserved(i))
+			continue;
+
+		if (rq->curr->sched_class != &fair_sched_class)
+			continue;
+
+		if (rq->nr_running > 1)
+			continue;
+
+		run = wc - rq->curr->last_enqueued_ts;
+
+		if (run < WALT_ROTATION_THRESHOLD_NS)
+			continue;
+
+		if (run > max_run) {
+			max_run = run;
+			dst_cpu = i;
+		}
+	}
+
+	if (dst_cpu == nr_cpu_ids)
+		return;
+
+	dst_rq = cpu_rq(dst_cpu);
+
+	double_rq_lock(src_rq, dst_rq);
+	if (dst_rq->curr->sched_class == &fair_sched_class) {
+		get_task_struct(src_rq->curr);
+		get_task_struct(dst_rq->curr);
+
+		mark_reserved(src_cpu);
+		mark_reserved(dst_cpu);
+		wr = &per_cpu(walt_rotate_works, src_cpu);
+
+		wr->src_task = src_rq->curr;
+		wr->dst_task = dst_rq->curr;
+
+		wr->src_cpu = src_cpu;
+		wr->dst_cpu = dst_cpu;
+	}
+	double_rq_unlock(src_rq, dst_rq);
+
+	if (wr)
+		queue_work_on(src_cpu, system_highpri_wq, &wr->w);
+}
+#else
+static inline void walt_check_for_rotation(struct rq *rq)
+{
+}
+#endif
+
 static DEFINE_RAW_SPINLOCK(migration_lock);
 void check_for_migration(struct rq *rq, struct task_struct *p)
 {
@@ -11459,6 +11594,8 @@
 					&rq->active_balance_work);
 				return;
 			}
+		} else {
+			walt_check_for_rotation(rq);
 		}
 		raw_spin_unlock(&migration_lock);
 	}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5508248..e0aa30d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -943,8 +943,8 @@
 };
 extern void sched_setnuma(struct task_struct *p, int node);
 extern int migrate_task_to(struct task_struct *p, int cpu);
-extern int migrate_swap(struct task_struct *, struct task_struct *);
 #endif /* CONFIG_NUMA_BALANCING */
+extern int migrate_swap(struct task_struct *cur, struct task_struct *p);
 
 #ifdef CONFIG_SMP
 
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index b7da03f..23fd885 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -88,6 +88,9 @@
 
 __read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);
 
+unsigned int sysctl_sched_walt_rotate_big_tasks;
+unsigned int walt_rotation_enabled;
+
 /*
  * sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy
  * associated with them. This is required for atomic update of those variables
@@ -316,7 +319,8 @@
 	struct task_struct *p;
 	int loop_max = 10;
 
-	if (sched_boost_policy() == SCHED_BOOST_NONE || !rq->cfs.h_nr_running)
+	if ((!walt_rotation_enabled && sched_boost_policy() ==
+			SCHED_BOOST_NONE) || !rq->cfs.h_nr_running)
 		return 0;
 
 	rq->ed_task = NULL;
@@ -487,7 +491,7 @@
 
 done:
 	trace_sched_load_to_gov(rq, aggr_grp_load, tt_load, freq_aggr_thresh,
-				load, reporting_policy);
+				load, reporting_policy, walt_rotation_enabled);
 	return load;
 }
 
@@ -2019,7 +2023,7 @@
 
 	wallclock = ktime_get_ns();
 	p->ravg.mark_start = p->last_wake_ts = wallclock;
-	p->last_cpu_selected_ts = wallclock;
+	p->last_enqueued_ts = wallclock;
 	p->last_switch_out_ts = 0;
 	update_task_cpu_cycles(p, cpu_of(rq));
 }
@@ -3143,6 +3147,19 @@
 		core_ctl_check(this_rq()->window_start);
 }
 
+void walt_rotation_checkpoint(int nr_big)
+{
+	if (!hmp_capable())
+		return;
+
+	if (!sysctl_sched_walt_rotate_big_tasks || sched_boost() != NO_BOOST) {
+		walt_rotation_enabled = 0;
+		return;
+	}
+
+	walt_rotation_enabled = nr_big >= num_possible_cpus();
+}
+
 int walt_proc_update_handler(struct ctl_table *table, int write,
 			     void __user *buffer, size_t *lenp,
 			     loff_t *ppos)
@@ -3178,6 +3195,8 @@
 	cpumask_set_cpu(cpu_of(rq), &rq->freq_domain_cpumask);
 	init_irq_work(&walt_migration_irq_work, walt_irq_work);
 	init_irq_work(&walt_cpufreq_irq_work, walt_irq_work);
+	walt_rotate_work_init();
+
 	rq->walt_stats.cumulative_runnable_avg = 0;
 	rq->window_start = 0;
 	rq->cum_window_start = 0;
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index c8780cf..da53ea4 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -291,9 +291,20 @@
 	return sysctl_sched_is_big_little ? prev_cpu : min_power_cpu;
 }
 
+static inline void walt_update_last_enqueue(struct task_struct *p)
+{
+	p->last_enqueued_ts = ktime_get_ns();
+}
+extern void walt_rotate_work_init(void);
+extern void walt_rotation_checkpoint(int nr_big);
+extern unsigned int walt_rotation_enabled;
+
 #else /* CONFIG_SCHED_WALT */
 
 static inline void walt_sched_init(struct rq *rq) { }
+static inline void walt_rotate_work_init(void) { }
+static inline void walt_rotation_checkpoint(int nr_big) { }
+static inline void walt_update_last_enqueue(struct task_struct *p) { }
 
 static inline void update_task_ravg(struct task_struct *p, struct rq *rq,
 				int event, u64 wallclock, u64 irqtime) { }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a01c821..b057784 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -325,6 +325,15 @@
 		.extra1         = &zero,
 		.extra2		= &three,
 	},
+	{
+		.procname	= "sched_walt_rotate_big_tasks",
+		.data		= &sysctl_sched_walt_rotate_big_tasks,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 	{
 		.procname	= "sched_upmigrate",