soc: qcom: rq_stats: add snapshot of run queue stats driver
Add a snapshot of msm_rq_stats driver as of msm-4.4 commit
<c605e110ab186049> (Merge "usb: gadget: composite: Return bcdUSB
0x0310 for Superspeed and higher").
Change-Id: I723310d12616f61598b2815e5ed3024418966ecc
Signed-off-by: Kyle Yan <kyan@codeaurora.org>
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 5d401ba..48d6faa 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,14 @@
the entities if corruption is suspected.
If unsure, say N
+config QCOM_RUN_QUEUE_STATS
+ bool "Enable collection and exporting of QTI Run Queue stats to userspace"
+ help
+ This option enables the driver to periodically collecting the statistics
+ of kernel run queue information and calculate the load of the system.
+ This information is exported to usespace via sysfs entries and userspace
+ algorithms uses info and decide when to turn on/off the cpu cores.
+
config QCOM_GSBI
tristate "QCOM General Serial Bus Interface"
depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 058b6f7..6ad72fc 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -17,3 +17,4 @@
obj-$(CONFIG_MSM_GLADIATOR_ERP_V2) += gladiator_erp_v2.o
obj-$(CONFIG_QCOM_WATCHDOG_V2) += watchdog_v2.o
obj-$(CONFIG_QCOM_MEMORY_DUMP_V2) += memory_dump_v2.o
+obj-$(CONFIG_QCOM_RUN_QUEUE_STATS) += rq_stats.o
diff --git a/drivers/soc/qcom/rq_stats.c b/drivers/soc/qcom/rq_stats.c
new file mode 100644
index 0000000..5850c46
--- /dev/null
+++ b/drivers/soc/qcom/rq_stats.c
@@ -0,0 +1,396 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/cpu.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/rq_stats.h>
+#include <linux/cpufreq.h>
+#include <linux/kernel_stat.h>
+#include <linux/tick.h>
+#include <asm/smp_plat.h>
+#include <linux/suspend.h>
+
+#define MAX_LONG_SIZE 24
+#define DEFAULT_RQ_POLL_JIFFIES 1
+#define DEFAULT_DEF_TIMER_JIFFIES 5
+
+struct notifier_block freq_transition;
+struct notifier_block cpu_hotplug;
+
+struct cpu_load_data {
+ cputime64_t prev_cpu_idle;
+ cputime64_t prev_cpu_wall;
+ unsigned int avg_load_maxfreq;
+ unsigned int samples;
+ unsigned int window_size;
+ unsigned int cur_freq;
+ unsigned int policy_max;
+ cpumask_var_t related_cpus;
+ struct mutex cpu_load_mutex;
+};
+
+static DEFINE_PER_CPU(struct cpu_load_data, cpuload);
+
+
+static int update_average_load(unsigned int freq, unsigned int cpu)
+{
+
+ struct cpu_load_data *pcpu = &per_cpu(cpuload, cpu);
+ cputime64_t cur_wall_time, cur_idle_time;
+ unsigned int idle_time, wall_time;
+ unsigned int cur_load, load_at_max_freq;
+
+ cur_idle_time = get_cpu_idle_time(cpu, &cur_wall_time, 0);
+
+ wall_time = (unsigned int) (cur_wall_time - pcpu->prev_cpu_wall);
+ pcpu->prev_cpu_wall = cur_wall_time;
+
+ idle_time = (unsigned int) (cur_idle_time - pcpu->prev_cpu_idle);
+ pcpu->prev_cpu_idle = cur_idle_time;
+
+
+ if (unlikely(wall_time <= 0 || wall_time < idle_time))
+ return 0;
+
+ cur_load = 100 * (wall_time - idle_time) / wall_time;
+
+ /* Calculate the scaled load across CPU */
+ load_at_max_freq = (cur_load * freq) / pcpu->policy_max;
+
+ if (!pcpu->avg_load_maxfreq) {
+ /* This is the first sample in this window*/
+ pcpu->avg_load_maxfreq = load_at_max_freq;
+ pcpu->window_size = wall_time;
+ } else {
+ /*
+ * The is already a sample available in this window.
+ * Compute weighted average with prev entry, so that we get
+ * the precise weighted load.
+ */
+ pcpu->avg_load_maxfreq =
+ ((pcpu->avg_load_maxfreq * pcpu->window_size) +
+ (load_at_max_freq * wall_time)) /
+ (wall_time + pcpu->window_size);
+
+ pcpu->window_size += wall_time;
+ }
+
+ return 0;
+}
+
+static unsigned int report_load_at_max_freq(void)
+{
+ int cpu;
+ struct cpu_load_data *pcpu;
+ unsigned int total_load = 0;
+
+ for_each_online_cpu(cpu) {
+ pcpu = &per_cpu(cpuload, cpu);
+ mutex_lock(&pcpu->cpu_load_mutex);
+ update_average_load(pcpu->cur_freq, cpu);
+ total_load += pcpu->avg_load_maxfreq;
+ pcpu->avg_load_maxfreq = 0;
+ mutex_unlock(&pcpu->cpu_load_mutex);
+ }
+ return total_load;
+}
+
+static int cpufreq_transition_handler(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freqs = data;
+ struct cpu_load_data *this_cpu = &per_cpu(cpuload, freqs->cpu);
+ int j;
+
+ switch (val) {
+ case CPUFREQ_POSTCHANGE:
+ for_each_cpu(j, this_cpu->related_cpus) {
+ struct cpu_load_data *pcpu = &per_cpu(cpuload, j);
+
+ mutex_lock(&pcpu->cpu_load_mutex);
+ update_average_load(freqs->old, j);
+ pcpu->cur_freq = freqs->new;
+ mutex_unlock(&pcpu->cpu_load_mutex);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void update_related_cpus(void)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct cpu_load_data *this_cpu = &per_cpu(cpuload, cpu);
+ struct cpufreq_policy cpu_policy;
+
+ cpufreq_get_policy(&cpu_policy, cpu);
+ cpumask_copy(this_cpu->related_cpus, cpu_policy.cpus);
+ }
+}
+static int cpu_hotplug_handler(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ unsigned int cpu = (unsigned long)data;
+ struct cpu_load_data *this_cpu = &per_cpu(cpuload, cpu);
+
+ switch (val) {
+ case CPU_ONLINE:
+ if (!this_cpu->cur_freq)
+ this_cpu->cur_freq = cpufreq_quick_get(cpu);
+ update_related_cpus();
+ /* fall through */
+ case CPU_ONLINE_FROZEN:
+ this_cpu->avg_load_maxfreq = 0;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int system_suspend_handler(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ switch (val) {
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ case PM_POST_RESTORE:
+ rq_info.hotplug_disabled = 0;
+ break;
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ rq_info.hotplug_disabled = 1;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+
+static ssize_t hotplug_disable_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned int val = rq_info.hotplug_disabled;
+
+ return snprintf(buf, MAX_LONG_SIZE, "%d\n", val);
+}
+
+static struct kobj_attribute hotplug_disabled_attr = __ATTR_RO(hotplug_disable);
+
+static void def_work_fn(struct work_struct *work)
+{
+ /* Notify polling threads on change of value */
+ sysfs_notify(rq_info.kobj, NULL, "def_timer_ms");
+}
+
+static ssize_t run_queue_avg_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned int val = 0;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&rq_lock, flags);
+ /* rq avg currently available only on one core */
+ val = rq_info.rq_avg;
+ rq_info.rq_avg = 0;
+ spin_unlock_irqrestore(&rq_lock, flags);
+
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n", val/10, val%10);
+}
+
+static struct kobj_attribute run_queue_avg_attr = __ATTR_RO(run_queue_avg);
+
+static ssize_t show_run_queue_poll_ms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int ret = 0;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&rq_lock, flags);
+ ret = snprintf(buf, MAX_LONG_SIZE, "%u\n",
+ jiffies_to_msecs(rq_info.rq_poll_jiffies));
+ spin_unlock_irqrestore(&rq_lock, flags);
+
+ return ret;
+}
+
+static ssize_t store_run_queue_poll_ms(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int val = 0;
+ unsigned long flags = 0;
+ static DEFINE_MUTEX(lock_poll_ms);
+
+ mutex_lock(&lock_poll_ms);
+
+ spin_lock_irqsave(&rq_lock, flags);
+ if (kstrtouint(buf, 0, &val))
+ count = -EINVAL;
+ else
+ rq_info.rq_poll_jiffies = msecs_to_jiffies(val);
+ spin_unlock_irqrestore(&rq_lock, flags);
+
+ mutex_unlock(&lock_poll_ms);
+
+ return count;
+}
+
+static struct kobj_attribute run_queue_poll_ms_attr =
+ __ATTR(run_queue_poll_ms, S_IWUSR | S_IRUSR, show_run_queue_poll_ms,
+ store_run_queue_poll_ms);
+
+static ssize_t show_def_timer_ms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int64_t diff;
+ unsigned int udiff;
+
+ diff = ktime_to_ns(ktime_get()) - rq_info.def_start_time;
+ do_div(diff, 1000 * 1000);
+ udiff = (unsigned int) diff;
+
+ return snprintf(buf, MAX_LONG_SIZE, "%u\n", udiff);
+}
+
+static ssize_t store_def_timer_ms(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ unsigned int val = 0;
+
+ if (kstrtouint(buf, 0, &val))
+ return -EINVAL;
+
+ rq_info.def_timer_jiffies = msecs_to_jiffies(val);
+
+ rq_info.def_start_time = ktime_to_ns(ktime_get());
+ return count;
+}
+
+static struct kobj_attribute def_timer_ms_attr =
+ __ATTR(def_timer_ms, S_IWUSR | S_IRUSR, show_def_timer_ms,
+ store_def_timer_ms);
+
+static ssize_t show_cpu_normalized_load(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return snprintf(buf, MAX_LONG_SIZE, "%u\n", report_load_at_max_freq());
+}
+
+static struct kobj_attribute cpu_normalized_load_attr =
+ __ATTR(cpu_normalized_load, S_IWUSR | S_IRUSR, show_cpu_normalized_load,
+ NULL);
+
+static struct attribute *rq_attrs[] = {
+ &cpu_normalized_load_attr.attr,
+ &def_timer_ms_attr.attr,
+ &run_queue_avg_attr.attr,
+ &run_queue_poll_ms_attr.attr,
+ &hotplug_disabled_attr.attr,
+ NULL,
+};
+
+static struct attribute_group rq_attr_group = {
+ .attrs = rq_attrs,
+};
+
+static int init_rq_attribs(void)
+{
+ int err;
+
+ rq_info.rq_avg = 0;
+ rq_info.attr_group = &rq_attr_group;
+
+ /* Create /sys/devices/system/cpu/cpu0/rq-stats/... */
+ rq_info.kobj = kobject_create_and_add("rq-stats",
+ &get_cpu_device(0)->kobj);
+ if (!rq_info.kobj)
+ return -ENOMEM;
+
+ err = sysfs_create_group(rq_info.kobj, rq_info.attr_group);
+ if (err)
+ kobject_put(rq_info.kobj);
+ else
+ kobject_uevent(rq_info.kobj, KOBJ_ADD);
+
+ return err;
+}
+
+static int __init msm_rq_stats_init(void)
+{
+ int ret;
+ int i;
+ struct cpufreq_policy cpu_policy;
+
+#ifndef CONFIG_SMP
+ /* Bail out if this is not an SMP Target */
+ rq_info.init = 0;
+ return -EPERM;
+#endif
+
+ rq_wq = create_singlethread_workqueue("rq_stats");
+ BUG_ON(!rq_wq);
+ INIT_WORK(&rq_info.def_timer_work, def_work_fn);
+ spin_lock_init(&rq_lock);
+ rq_info.rq_poll_jiffies = DEFAULT_RQ_POLL_JIFFIES;
+ rq_info.def_timer_jiffies = DEFAULT_DEF_TIMER_JIFFIES;
+ rq_info.rq_poll_last_jiffy = 0;
+ rq_info.def_timer_last_jiffy = 0;
+ rq_info.hotplug_disabled = 0;
+ ret = init_rq_attribs();
+
+ rq_info.init = 1;
+
+ for_each_possible_cpu(i) {
+ struct cpu_load_data *pcpu = &per_cpu(cpuload, i);
+
+ mutex_init(&pcpu->cpu_load_mutex);
+ cpufreq_get_policy(&cpu_policy, i);
+ pcpu->policy_max = cpu_policy.cpuinfo.max_freq;
+ if (cpu_online(i))
+ pcpu->cur_freq = cpufreq_quick_get(i);
+ cpumask_copy(pcpu->related_cpus, cpu_policy.cpus);
+ }
+ freq_transition.notifier_call = cpufreq_transition_handler;
+ cpu_hotplug.notifier_call = cpu_hotplug_handler;
+ cpufreq_register_notifier(&freq_transition,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ register_hotcpu_notifier(&cpu_hotplug);
+
+ return ret;
+}
+late_initcall(msm_rq_stats_init);
+
+static int __init msm_rq_stats_early_init(void)
+{
+#ifndef CONFIG_SMP
+ /* Bail out if this is not an SMP Target */
+ rq_info.init = 0;
+ return -EPERM;
+#endif
+
+ pm_notifier(system_suspend_handler, 0);
+ return 0;
+}
+core_initcall(msm_rq_stats_early_init);
diff --git a/include/linux/rq_stats.h b/include/linux/rq_stats.h
new file mode 100644
index 0000000..44cd842
--- /dev/null
+++ b/include/linux/rq_stats.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2011,2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+struct rq_data {
+ unsigned int rq_avg;
+ unsigned long rq_poll_jiffies;
+ unsigned long def_timer_jiffies;
+ unsigned long rq_poll_last_jiffy;
+ unsigned long rq_poll_total_jiffies;
+ unsigned long def_timer_last_jiffy;
+ unsigned int hotplug_disabled;
+ int64_t def_start_time;
+ struct attribute_group *attr_group;
+ struct kobject *kobj;
+ struct work_struct def_timer_work;
+ int init;
+};
+
+extern spinlock_t rq_lock;
+extern struct rq_data rq_info;
+extern struct workqueue_struct *rq_wq;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 204fdc8..bb14142 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/context_tracking.h>
+#include <linux/rq_stats.h>
#include <asm/irq_regs.h>
@@ -30,6 +31,10 @@
#include <trace/events/timer.h>
+struct rq_data rq_info;
+struct workqueue_struct *rq_wq;
+spinlock_t rq_lock;
+
/*
* Per-CPU nohz control structure
*/
@@ -1138,6 +1143,42 @@
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
+static void update_rq_stats(void)
+{
+ unsigned long jiffy_gap = 0;
+ unsigned int rq_avg = 0;
+ unsigned long flags = 0;
+
+ jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy;
+ if (jiffy_gap >= rq_info.rq_poll_jiffies) {
+ spin_lock_irqsave(&rq_lock, flags);
+ if (!rq_info.rq_avg)
+ rq_info.rq_poll_total_jiffies = 0;
+ rq_avg = nr_running() * 10;
+ if (rq_info.rq_poll_total_jiffies) {
+ rq_avg = (rq_avg * jiffy_gap) +
+ (rq_info.rq_avg *
+ rq_info.rq_poll_total_jiffies);
+ do_div(rq_avg,
+ rq_info.rq_poll_total_jiffies + jiffy_gap);
+ }
+ rq_info.rq_avg = rq_avg;
+ rq_info.rq_poll_total_jiffies += jiffy_gap;
+ rq_info.rq_poll_last_jiffy = jiffies;
+ spin_unlock_irqrestore(&rq_lock, flags);
+ }
+}
+static void wakeup_user(void)
+{
+ unsigned long jiffy_gap;
+
+ jiffy_gap = jiffies - rq_info.def_timer_last_jiffy;
+ if (jiffy_gap >= rq_info.def_timer_jiffies) {
+ rq_info.def_timer_last_jiffy = jiffies;
+ queue_work(rq_wq, &rq_info.def_timer_work);
+ }
+}
+
/*
* We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled.
@@ -1155,8 +1196,20 @@
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
- if (regs)
+ if (regs) {
tick_sched_handle(ts, regs);
+ if (rq_info.init == 1 &&
+ tick_do_timer_cpu == smp_processor_id()) {
+ /*
+ * update run queue statistics
+ */
+ update_rq_stats();
+ /*
+ * wakeup user if needed
+ */
+ wakeup_user();
+ }
+ }
/* No need to reprogram if we are in idle or full dynticks mode */
if (unlikely(ts->tick_stopped))