sched_avg: add run queue averaging
Add code to calculate the run queue depth of a cpu and iowait
depth of the cpu.
The scheduler calls in to sched_update_nr_prod whenever there
is a runqueue change. This function maintains the runqueue average
and the iowait of that cpu in that time interval.
Whoever wants to know the runqueue average is expected to call
sched_get_nr_running_avg periodically to get the accumulated
runqueue and iowait averages for all the cpus.
Change-Id: Id8cb2ecf0ed479f090a83ccb72dd59c53fa73e0c
Signed-off-by: Jeff Ohlstein <johlstei@codeaurora.org>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9509d..1f13da3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -142,6 +142,8 @@
extern unsigned long nr_iowait_cpu(int cpu);
extern unsigned long this_cpu_load(void);
+extern void sched_update_nr_prod(int cpu, unsigned long nr, bool inc);
+extern void sched_get_nr_running_avg(int *avg, int *iowait_avg);
extern void calc_global_load(unsigned long ticks);
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 9a7dd35..3ede7d9 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -11,7 +11,7 @@
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
-obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
+obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o sched_avg.o
obj-$(CONFIG_SMP) += cpupri.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fb3acba..451bd4f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -916,11 +916,13 @@
static inline void inc_nr_running(struct rq *rq)
{
+ sched_update_nr_prod(cpu_of(rq), rq->nr_running, true);
rq->nr_running++;
}
static inline void dec_nr_running(struct rq *rq)
{
+ sched_update_nr_prod(cpu_of(rq), rq->nr_running, false);
rq->nr_running--;
}
diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c
new file mode 100644
index 0000000..8eaf2f7
--- /dev/null
+++ b/kernel/sched/sched_avg.c
@@ -0,0 +1,106 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Scheduler hook for average runqueue determination
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/sched.h>
+#include <linux/math64.h>
+
+static DEFINE_PER_CPU(u64, nr_prod_sum);
+static DEFINE_PER_CPU(u64, last_time);
+static DEFINE_PER_CPU(u64, nr);
+static DEFINE_PER_CPU(unsigned long, iowait_prod_sum);
+static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
+static s64 last_get_time;
+
+/**
+ * sched_get_nr_running_avg
+ * @return: Average nr_running and iowait value since last poll.
+ * Returns the avg * 100 to return up to two decimal points
+ * of accuracy.
+ *
+ * Obtains the average nr_running value since the last poll.
+ * This function may not be called concurrently with itself
+ */
+void sched_get_nr_running_avg(int *avg, int *iowait_avg)
+{
+ int cpu;
+ u64 curr_time = sched_clock();
+ u64 diff = curr_time - last_get_time;
+ u64 tmp_avg = 0, tmp_iowait = 0;
+
+ *avg = 0;
+ *iowait_avg = 0;
+
+ if (!diff)
+ return;
+
+ last_get_time = curr_time;
+ /* read and reset nr_running counts */
+ for_each_possible_cpu(cpu) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
+ tmp_avg += per_cpu(nr_prod_sum, cpu);
+ tmp_avg += per_cpu(nr, cpu) *
+ (curr_time - per_cpu(last_time, cpu));
+ tmp_iowait = per_cpu(iowait_prod_sum, cpu);
+ tmp_iowait += nr_iowait_cpu(cpu) *
+ (curr_time - per_cpu(last_time, cpu));
+ per_cpu(last_time, cpu) = curr_time;
+ per_cpu(nr_prod_sum, cpu) = 0;
+ per_cpu(iowait_prod_sum, cpu) = 0;
+ spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
+ }
+
+ *avg = (int)div64_u64(tmp_avg * 100, diff);
+ *iowait_avg = (int)div64_u64(tmp_iowait * 100, diff);
+
+ BUG_ON(*avg < 0);
+ pr_debug("%s - avg:%d\n", __func__, *avg);
+ BUG_ON(*iowait_avg < 0);
+ pr_debug("%s - avg:%d\n", __func__, *iowait_avg);
+}
+EXPORT_SYMBOL(sched_get_nr_running_avg);
+
+/**
+ * sched_update_nr_prod
+ * @cpu: The core id of the nr running driver.
+ * @nr: Updated nr running value for cpu.
+ * @inc: Whether we are increasing or decreasing the count
+ * @return: N/A
+ *
+ * Update average with latest nr_running value for CPU
+ */
+void sched_update_nr_prod(int cpu, unsigned long nr_running, bool inc)
+{
+ int diff;
+ s64 curr_time;
+ unsigned long flags;
+
+ spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
+ curr_time = sched_clock();
+ diff = curr_time - per_cpu(last_time, cpu);
+ per_cpu(last_time, cpu) = curr_time;
+ per_cpu(nr, cpu) = nr_running + (inc ? 1 : -1);
+
+ BUG_ON(per_cpu(nr, cpu) < 0);
+
+ per_cpu(nr_prod_sum, cpu) += nr_running * diff;
+ per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
+ spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
+}
+EXPORT_SYMBOL(sched_update_nr_prod);