soc: qcom: rq_stats: add snapshot of run queue stats driver

Add a snapshot of msm_rq_stats driver as of msm-4.4 commit
<c605e110ab186049> (Merge "usb: gadget: composite: Return bcdUSB
 0x0310 for Superspeed and higher").

Change-Id: I723310d12616f61598b2815e5ed3024418966ecc
Signed-off-by: Kyle Yan <kyan@codeaurora.org>
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 5d401ba..48d6faa 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,14 @@
 	  the entities if corruption is suspected.
 	  If unsure, say N
 
+config QCOM_RUN_QUEUE_STATS
+       bool "Enable collection and exporting of QTI Run Queue stats to userspace"
+       help
+        This option enables the driver to periodically collecting the statistics
+        of kernel run queue information and calculate the load of the system.
+        This information is exported to usespace via sysfs entries and userspace
+        algorithms uses info and decide when to turn on/off the cpu cores.
+
 config QCOM_GSBI
         tristate "QCOM General Serial Bus Interface"
         depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 058b6f7..6ad72fc 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -17,3 +17,4 @@
 obj-$(CONFIG_MSM_GLADIATOR_ERP_V2) += gladiator_erp_v2.o
 obj-$(CONFIG_QCOM_WATCHDOG_V2) += watchdog_v2.o
 obj-$(CONFIG_QCOM_MEMORY_DUMP_V2) += memory_dump_v2.o
+obj-$(CONFIG_QCOM_RUN_QUEUE_STATS) += rq_stats.o
diff --git a/drivers/soc/qcom/rq_stats.c b/drivers/soc/qcom/rq_stats.c
new file mode 100644
index 0000000..5850c46
--- /dev/null
+++ b/drivers/soc/qcom/rq_stats.c
@@ -0,0 +1,396 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/cpu.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/rq_stats.h>
+#include <linux/cpufreq.h>
+#include <linux/kernel_stat.h>
+#include <linux/tick.h>
+#include <asm/smp_plat.h>
+#include <linux/suspend.h>
+
+#define MAX_LONG_SIZE 24
+#define DEFAULT_RQ_POLL_JIFFIES 1
+#define DEFAULT_DEF_TIMER_JIFFIES 5
+
+struct notifier_block freq_transition;
+struct notifier_block cpu_hotplug;
+
+struct cpu_load_data {
+	cputime64_t prev_cpu_idle;
+	cputime64_t prev_cpu_wall;
+	unsigned int avg_load_maxfreq;
+	unsigned int samples;
+	unsigned int window_size;
+	unsigned int cur_freq;
+	unsigned int policy_max;
+	cpumask_var_t related_cpus;
+	struct mutex cpu_load_mutex;
+};
+
+static DEFINE_PER_CPU(struct cpu_load_data, cpuload);
+
+
+static int update_average_load(unsigned int freq, unsigned int cpu)
+{
+
+	struct cpu_load_data *pcpu = &per_cpu(cpuload, cpu);
+	cputime64_t cur_wall_time, cur_idle_time;
+	unsigned int idle_time, wall_time;
+	unsigned int cur_load, load_at_max_freq;
+
+	cur_idle_time = get_cpu_idle_time(cpu, &cur_wall_time, 0);
+
+	wall_time = (unsigned int) (cur_wall_time - pcpu->prev_cpu_wall);
+	pcpu->prev_cpu_wall = cur_wall_time;
+
+	idle_time = (unsigned int) (cur_idle_time - pcpu->prev_cpu_idle);
+	pcpu->prev_cpu_idle = cur_idle_time;
+
+
+	if (unlikely(wall_time <= 0 || wall_time < idle_time))
+		return 0;
+
+	cur_load = 100 * (wall_time - idle_time) / wall_time;
+
+	/* Calculate the scaled load across CPU */
+	load_at_max_freq = (cur_load * freq) / pcpu->policy_max;
+
+	if (!pcpu->avg_load_maxfreq) {
+		/* This is the first sample in this window*/
+		pcpu->avg_load_maxfreq = load_at_max_freq;
+		pcpu->window_size = wall_time;
+	} else {
+		/*
+		 * The is already a sample available in this window.
+		 * Compute weighted average with prev entry, so that we get
+		 * the precise weighted load.
+		 */
+		pcpu->avg_load_maxfreq =
+			((pcpu->avg_load_maxfreq * pcpu->window_size) +
+			(load_at_max_freq * wall_time)) /
+			(wall_time + pcpu->window_size);
+
+		pcpu->window_size += wall_time;
+	}
+
+	return 0;
+}
+
+static unsigned int report_load_at_max_freq(void)
+{
+	int cpu;
+	struct cpu_load_data *pcpu;
+	unsigned int total_load = 0;
+
+	for_each_online_cpu(cpu) {
+		pcpu = &per_cpu(cpuload, cpu);
+		mutex_lock(&pcpu->cpu_load_mutex);
+		update_average_load(pcpu->cur_freq, cpu);
+		total_load += pcpu->avg_load_maxfreq;
+		pcpu->avg_load_maxfreq = 0;
+		mutex_unlock(&pcpu->cpu_load_mutex);
+	}
+	return total_load;
+}
+
+static int cpufreq_transition_handler(struct notifier_block *nb,
+			unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freqs = data;
+	struct cpu_load_data *this_cpu = &per_cpu(cpuload, freqs->cpu);
+	int j;
+
+	switch (val) {
+	case CPUFREQ_POSTCHANGE:
+		for_each_cpu(j, this_cpu->related_cpus) {
+			struct cpu_load_data *pcpu = &per_cpu(cpuload, j);
+
+			mutex_lock(&pcpu->cpu_load_mutex);
+			update_average_load(freqs->old, j);
+			pcpu->cur_freq = freqs->new;
+			mutex_unlock(&pcpu->cpu_load_mutex);
+		}
+		break;
+	}
+	return 0;
+}
+
+static void update_related_cpus(void)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, cpu_online_mask) {
+		struct cpu_load_data *this_cpu = &per_cpu(cpuload, cpu);
+		struct cpufreq_policy cpu_policy;
+
+		cpufreq_get_policy(&cpu_policy, cpu);
+		cpumask_copy(this_cpu->related_cpus, cpu_policy.cpus);
+	}
+}
+static int cpu_hotplug_handler(struct notifier_block *nb,
+			unsigned long val, void *data)
+{
+	unsigned int cpu = (unsigned long)data;
+	struct cpu_load_data *this_cpu = &per_cpu(cpuload, cpu);
+
+	switch (val) {
+	case CPU_ONLINE:
+		if (!this_cpu->cur_freq)
+			this_cpu->cur_freq = cpufreq_quick_get(cpu);
+		update_related_cpus();
+		/* fall through */
+	case CPU_ONLINE_FROZEN:
+		this_cpu->avg_load_maxfreq = 0;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int system_suspend_handler(struct notifier_block *nb,
+				unsigned long val, void *data)
+{
+	switch (val) {
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+	case PM_POST_RESTORE:
+		rq_info.hotplug_disabled = 0;
+		break;
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		rq_info.hotplug_disabled = 1;
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+	return NOTIFY_OK;
+}
+
+
+static ssize_t hotplug_disable_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	unsigned int val = rq_info.hotplug_disabled;
+
+	return snprintf(buf, MAX_LONG_SIZE, "%d\n", val);
+}
+
+static struct kobj_attribute hotplug_disabled_attr = __ATTR_RO(hotplug_disable);
+
+static void def_work_fn(struct work_struct *work)
+{
+	/* Notify polling threads on change of value */
+	sysfs_notify(rq_info.kobj, NULL, "def_timer_ms");
+}
+
+static ssize_t run_queue_avg_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	unsigned int val = 0;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&rq_lock, flags);
+	/* rq avg currently available only on one core */
+	val = rq_info.rq_avg;
+	rq_info.rq_avg = 0;
+	spin_unlock_irqrestore(&rq_lock, flags);
+
+	return snprintf(buf, PAGE_SIZE, "%d.%d\n", val/10, val%10);
+}
+
+static struct kobj_attribute run_queue_avg_attr = __ATTR_RO(run_queue_avg);
+
+static ssize_t show_run_queue_poll_ms(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	int ret = 0;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&rq_lock, flags);
+	ret = snprintf(buf, MAX_LONG_SIZE, "%u\n",
+		       jiffies_to_msecs(rq_info.rq_poll_jiffies));
+	spin_unlock_irqrestore(&rq_lock, flags);
+
+	return ret;
+}
+
+static ssize_t store_run_queue_poll_ms(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	unsigned long flags = 0;
+	static DEFINE_MUTEX(lock_poll_ms);
+
+	mutex_lock(&lock_poll_ms);
+
+	spin_lock_irqsave(&rq_lock, flags);
+	if (kstrtouint(buf, 0, &val))
+		count = -EINVAL;
+	else
+		rq_info.rq_poll_jiffies = msecs_to_jiffies(val);
+	spin_unlock_irqrestore(&rq_lock, flags);
+
+	mutex_unlock(&lock_poll_ms);
+
+	return count;
+}
+
+static struct kobj_attribute run_queue_poll_ms_attr =
+	__ATTR(run_queue_poll_ms, S_IWUSR | S_IRUSR, show_run_queue_poll_ms,
+			store_run_queue_poll_ms);
+
+static ssize_t show_def_timer_ms(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	int64_t diff;
+	unsigned int udiff;
+
+	diff = ktime_to_ns(ktime_get()) - rq_info.def_start_time;
+	do_div(diff, 1000 * 1000);
+	udiff = (unsigned int) diff;
+
+	return snprintf(buf, MAX_LONG_SIZE, "%u\n", udiff);
+}
+
+static ssize_t store_def_timer_ms(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	unsigned int val = 0;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	rq_info.def_timer_jiffies = msecs_to_jiffies(val);
+
+	rq_info.def_start_time = ktime_to_ns(ktime_get());
+	return count;
+}
+
+static struct kobj_attribute def_timer_ms_attr =
+	__ATTR(def_timer_ms, S_IWUSR | S_IRUSR, show_def_timer_ms,
+			store_def_timer_ms);
+
+static ssize_t show_cpu_normalized_load(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, MAX_LONG_SIZE, "%u\n", report_load_at_max_freq());
+}
+
+static struct kobj_attribute cpu_normalized_load_attr =
+	__ATTR(cpu_normalized_load, S_IWUSR | S_IRUSR, show_cpu_normalized_load,
+			NULL);
+
+static struct attribute *rq_attrs[] = {
+	&cpu_normalized_load_attr.attr,
+	&def_timer_ms_attr.attr,
+	&run_queue_avg_attr.attr,
+	&run_queue_poll_ms_attr.attr,
+	&hotplug_disabled_attr.attr,
+	NULL,
+};
+
+static struct attribute_group rq_attr_group = {
+	.attrs = rq_attrs,
+};
+
+static int init_rq_attribs(void)
+{
+	int err;
+
+	rq_info.rq_avg = 0;
+	rq_info.attr_group = &rq_attr_group;
+
+	/* Create /sys/devices/system/cpu/cpu0/rq-stats/... */
+	rq_info.kobj = kobject_create_and_add("rq-stats",
+			&get_cpu_device(0)->kobj);
+	if (!rq_info.kobj)
+		return -ENOMEM;
+
+	err = sysfs_create_group(rq_info.kobj, rq_info.attr_group);
+	if (err)
+		kobject_put(rq_info.kobj);
+	else
+		kobject_uevent(rq_info.kobj, KOBJ_ADD);
+
+	return err;
+}
+
+static int __init msm_rq_stats_init(void)
+{
+	int ret;
+	int i;
+	struct cpufreq_policy cpu_policy;
+
+#ifndef CONFIG_SMP
+	/* Bail out if this is not an SMP Target */
+	rq_info.init = 0;
+	return -EPERM;
+#endif
+
+	rq_wq = create_singlethread_workqueue("rq_stats");
+	BUG_ON(!rq_wq);
+	INIT_WORK(&rq_info.def_timer_work, def_work_fn);
+	spin_lock_init(&rq_lock);
+	rq_info.rq_poll_jiffies = DEFAULT_RQ_POLL_JIFFIES;
+	rq_info.def_timer_jiffies = DEFAULT_DEF_TIMER_JIFFIES;
+	rq_info.rq_poll_last_jiffy = 0;
+	rq_info.def_timer_last_jiffy = 0;
+	rq_info.hotplug_disabled = 0;
+	ret = init_rq_attribs();
+
+	rq_info.init = 1;
+
+	for_each_possible_cpu(i) {
+		struct cpu_load_data *pcpu = &per_cpu(cpuload, i);
+
+		mutex_init(&pcpu->cpu_load_mutex);
+		cpufreq_get_policy(&cpu_policy, i);
+		pcpu->policy_max = cpu_policy.cpuinfo.max_freq;
+		if (cpu_online(i))
+			pcpu->cur_freq = cpufreq_quick_get(i);
+		cpumask_copy(pcpu->related_cpus, cpu_policy.cpus);
+	}
+	freq_transition.notifier_call = cpufreq_transition_handler;
+	cpu_hotplug.notifier_call = cpu_hotplug_handler;
+	cpufreq_register_notifier(&freq_transition,
+					CPUFREQ_TRANSITION_NOTIFIER);
+	register_hotcpu_notifier(&cpu_hotplug);
+
+	return ret;
+}
+late_initcall(msm_rq_stats_init);
+
+static int __init msm_rq_stats_early_init(void)
+{
+#ifndef CONFIG_SMP
+	/* Bail out if this is not an SMP Target */
+	rq_info.init = 0;
+	return -EPERM;
+#endif
+
+	pm_notifier(system_suspend_handler, 0);
+	return 0;
+}
+core_initcall(msm_rq_stats_early_init);
diff --git a/include/linux/rq_stats.h b/include/linux/rq_stats.h
new file mode 100644
index 0000000..44cd842
--- /dev/null
+++ b/include/linux/rq_stats.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2011,2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+struct rq_data {
+	unsigned int rq_avg;
+	unsigned long rq_poll_jiffies;
+	unsigned long def_timer_jiffies;
+	unsigned long rq_poll_last_jiffy;
+	unsigned long rq_poll_total_jiffies;
+	unsigned long def_timer_last_jiffy;
+	unsigned int hotplug_disabled;
+	int64_t def_start_time;
+	struct attribute_group *attr_group;
+	struct kobject *kobj;
+	struct work_struct def_timer_work;
+	int init;
+};
+
+extern spinlock_t rq_lock;
+extern struct rq_data rq_info;
+extern struct workqueue_struct *rq_wq;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 204fdc8..bb14142 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
 #include <linux/context_tracking.h>
+#include <linux/rq_stats.h>
 
 #include <asm/irq_regs.h>
 
@@ -30,6 +31,10 @@
 
 #include <trace/events/timer.h>
 
+struct rq_data rq_info;
+struct workqueue_struct *rq_wq;
+spinlock_t rq_lock;
+
 /*
  * Per-CPU nohz control structure
  */
@@ -1138,6 +1143,42 @@
  * High resolution timer specific code
  */
 #ifdef CONFIG_HIGH_RES_TIMERS
+static void update_rq_stats(void)
+{
+	unsigned long jiffy_gap = 0;
+	unsigned int rq_avg = 0;
+	unsigned long flags = 0;
+
+	jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy;
+	if (jiffy_gap >= rq_info.rq_poll_jiffies) {
+		spin_lock_irqsave(&rq_lock, flags);
+		if (!rq_info.rq_avg)
+			rq_info.rq_poll_total_jiffies = 0;
+		rq_avg = nr_running() * 10;
+		if (rq_info.rq_poll_total_jiffies) {
+			rq_avg = (rq_avg * jiffy_gap) +
+				(rq_info.rq_avg *
+				 rq_info.rq_poll_total_jiffies);
+			do_div(rq_avg,
+				rq_info.rq_poll_total_jiffies + jiffy_gap);
+		}
+		rq_info.rq_avg = rq_avg;
+		rq_info.rq_poll_total_jiffies += jiffy_gap;
+		rq_info.rq_poll_last_jiffy = jiffies;
+		spin_unlock_irqrestore(&rq_lock, flags);
+	}
+}
+static void wakeup_user(void)
+{
+	unsigned long jiffy_gap;
+
+	jiffy_gap = jiffies - rq_info.def_timer_last_jiffy;
+	if (jiffy_gap >= rq_info.def_timer_jiffies) {
+		rq_info.def_timer_last_jiffy = jiffies;
+		queue_work(rq_wq, &rq_info.def_timer_work);
+	}
+}
+
 /*
  * We rearm the timer until we get disabled by the idle code.
  * Called with interrupts disabled.
@@ -1155,8 +1196,20 @@
 	 * Do not call, when we are not in irq context and have
 	 * no valid regs pointer
 	 */
-	if (regs)
+	if (regs) {
 		tick_sched_handle(ts, regs);
+		if (rq_info.init == 1 &&
+				tick_do_timer_cpu == smp_processor_id()) {
+			/*
+			 * update run queue statistics
+			 */
+			update_rq_stats();
+			/*
+			 * wakeup user if needed
+			 */
+			wakeup_user();
+		}
+	}
 
 	/* No need to reprogram if we are in idle or full dynticks mode */
 	if (unlikely(ts->tick_stopped))