| /* Copyright (c) 2014-2017, The Linux Foundation. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 and |
| * only version 2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #define pr_fmt(fmt) "core_ctl: " fmt |
| |
| #include <linux/init.h> |
| #include <linux/notifier.h> |
| #include <linux/cpu.h> |
| #include <linux/cpumask.h> |
| #include <linux/cpufreq.h> |
| #include <linux/kthread.h> |
| #include <linux/sched.h> |
| #include <linux/sched/rt.h> |
| |
| #include <trace/events/sched.h> |
| |
| #define MAX_CPUS_PER_CLUSTER 6 |
| #define MAX_CLUSTERS 2 |
| |
| struct cluster_data { |
| bool inited; |
| unsigned int min_cpus; |
| unsigned int max_cpus; |
| unsigned int offline_delay_ms; |
| unsigned int busy_up_thres[MAX_CPUS_PER_CLUSTER]; |
| unsigned int busy_down_thres[MAX_CPUS_PER_CLUSTER]; |
| unsigned int active_cpus; |
| unsigned int num_cpus; |
| unsigned int nr_isolated_cpus; |
| cpumask_t cpu_mask; |
| unsigned int need_cpus; |
| unsigned int task_thres; |
| s64 need_ts; |
| struct list_head lru; |
| bool pending; |
| spinlock_t pending_lock; |
| bool is_big_cluster; |
| int nrrun; |
| struct task_struct *core_ctl_thread; |
| unsigned int first_cpu; |
| unsigned int boost; |
| struct kobject kobj; |
| }; |
| |
| struct cpu_data { |
| bool is_busy; |
| unsigned int busy; |
| unsigned int cpu; |
| bool not_preferred; |
| struct cluster_data *cluster; |
| struct list_head sib; |
| bool isolated_by_us; |
| }; |
| |
| static DEFINE_PER_CPU(struct cpu_data, cpu_state); |
| static struct cluster_data cluster_state[MAX_CLUSTERS]; |
| static unsigned int num_clusters; |
| |
| #define for_each_cluster(cluster, idx) \ |
| for ((cluster) = &cluster_state[idx]; (idx) < num_clusters;\ |
| (idx)++, (cluster) = &cluster_state[idx]) |
| |
| static DEFINE_SPINLOCK(state_lock); |
| static void apply_need(struct cluster_data *state); |
| static void wake_up_core_ctl_thread(struct cluster_data *state); |
| static bool initialized; |
| |
| static unsigned int get_active_cpu_count(const struct cluster_data *cluster); |
| |
| /* ========================= sysfs interface =========================== */ |
| |
| static ssize_t store_min_cpus(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val; |
| |
| if (sscanf(buf, "%u\n", &val) != 1) |
| return -EINVAL; |
| |
| state->min_cpus = min(val, state->max_cpus); |
| wake_up_core_ctl_thread(state); |
| |
| return count; |
| } |
| |
| static ssize_t show_min_cpus(const struct cluster_data *state, char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->min_cpus); |
| } |
| |
| static ssize_t store_max_cpus(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val; |
| |
| if (sscanf(buf, "%u\n", &val) != 1) |
| return -EINVAL; |
| |
| val = min(val, state->num_cpus); |
| state->max_cpus = val; |
| state->min_cpus = min(state->min_cpus, state->max_cpus); |
| wake_up_core_ctl_thread(state); |
| |
| return count; |
| } |
| |
| static ssize_t show_max_cpus(const struct cluster_data *state, char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->max_cpus); |
| } |
| |
| static ssize_t store_offline_delay_ms(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val; |
| |
| if (sscanf(buf, "%u\n", &val) != 1) |
| return -EINVAL; |
| |
| state->offline_delay_ms = val; |
| apply_need(state); |
| |
| return count; |
| } |
| |
| static ssize_t show_task_thres(const struct cluster_data *state, char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->task_thres); |
| } |
| |
| static ssize_t store_task_thres(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val; |
| |
| if (sscanf(buf, "%u\n", &val) != 1) |
| return -EINVAL; |
| |
| if (val < state->num_cpus) |
| return -EINVAL; |
| |
| state->task_thres = val; |
| apply_need(state); |
| |
| return count; |
| } |
| |
| static ssize_t show_offline_delay_ms(const struct cluster_data *state, |
| char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->offline_delay_ms); |
| } |
| |
| static ssize_t store_busy_up_thres(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val[MAX_CPUS_PER_CLUSTER]; |
| int ret, i; |
| |
| ret = sscanf(buf, "%u %u %u %u %u %u\n", |
| &val[0], &val[1], &val[2], &val[3], |
| &val[4], &val[5]); |
| if (ret != 1 && ret != state->num_cpus) |
| return -EINVAL; |
| |
| if (ret == 1) { |
| for (i = 0; i < state->num_cpus; i++) |
| state->busy_up_thres[i] = val[0]; |
| } else { |
| for (i = 0; i < state->num_cpus; i++) |
| state->busy_up_thres[i] = val[i]; |
| } |
| apply_need(state); |
| return count; |
| } |
| |
| static ssize_t show_busy_up_thres(const struct cluster_data *state, char *buf) |
| { |
| int i, count = 0; |
| |
| for (i = 0; i < state->num_cpus; i++) |
| count += snprintf(buf + count, PAGE_SIZE - count, "%u ", |
| state->busy_up_thres[i]); |
| |
| count += snprintf(buf + count, PAGE_SIZE - count, "\n"); |
| return count; |
| } |
| |
| static ssize_t store_busy_down_thres(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val[MAX_CPUS_PER_CLUSTER]; |
| int ret, i; |
| |
| ret = sscanf(buf, "%u %u %u %u %u %u\n", |
| &val[0], &val[1], &val[2], &val[3], |
| &val[4], &val[5]); |
| if (ret != 1 && ret != state->num_cpus) |
| return -EINVAL; |
| |
| if (ret == 1) { |
| for (i = 0; i < state->num_cpus; i++) |
| state->busy_down_thres[i] = val[0]; |
| } else { |
| for (i = 0; i < state->num_cpus; i++) |
| state->busy_down_thres[i] = val[i]; |
| } |
| apply_need(state); |
| return count; |
| } |
| |
| static ssize_t show_busy_down_thres(const struct cluster_data *state, char *buf) |
| { |
| int i, count = 0; |
| |
| for (i = 0; i < state->num_cpus; i++) |
| count += snprintf(buf + count, PAGE_SIZE - count, "%u ", |
| state->busy_down_thres[i]); |
| |
| count += snprintf(buf + count, PAGE_SIZE - count, "\n"); |
| return count; |
| } |
| |
| static ssize_t store_is_big_cluster(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| unsigned int val; |
| |
| if (sscanf(buf, "%u\n", &val) != 1) |
| return -EINVAL; |
| |
| state->is_big_cluster = val ? 1 : 0; |
| return count; |
| } |
| |
| static ssize_t show_is_big_cluster(const struct cluster_data *state, char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster); |
| } |
| |
| static ssize_t show_need_cpus(const struct cluster_data *state, char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus); |
| } |
| |
| static ssize_t show_active_cpus(const struct cluster_data *state, char *buf) |
| { |
| return snprintf(buf, PAGE_SIZE, "%u\n", state->active_cpus); |
| } |
| |
| static ssize_t show_global_state(const struct cluster_data *state, char *buf) |
| { |
| struct cpu_data *c; |
| struct cluster_data *cluster; |
| ssize_t count = 0; |
| unsigned int cpu; |
| |
| spin_lock_irq(&state_lock); |
| for_each_possible_cpu(cpu) { |
| c = &per_cpu(cpu_state, cpu); |
| cluster = c->cluster; |
| if (!cluster || !cluster->inited) |
| continue; |
| |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "CPU%u\n", cpu); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tCPU: %u\n", c->cpu); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tOnline: %u\n", |
| cpu_online(c->cpu)); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tIsolated: %u\n", |
| cpu_isolated(c->cpu)); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tFirst CPU: %u\n", |
| cluster->first_cpu); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tBusy%%: %u\n", c->busy); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tIs busy: %u\n", c->is_busy); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tNot preferred: %u\n", |
| c->not_preferred); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tNr running: %u\n", cluster->nrrun); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tActive CPUs: %u\n", get_active_cpu_count(cluster)); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tNeed CPUs: %u\n", cluster->need_cpus); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tNr isolated CPUs: %u\n", |
| cluster->nr_isolated_cpus); |
| count += snprintf(buf + count, PAGE_SIZE - count, |
| "\tBoost: %u\n", (unsigned int) cluster->boost); |
| } |
| spin_unlock_irq(&state_lock); |
| |
| return count; |
| } |
| |
| static ssize_t store_not_preferred(struct cluster_data *state, |
| const char *buf, size_t count) |
| { |
| struct cpu_data *c; |
| unsigned int i; |
| unsigned int val[MAX_CPUS_PER_CLUSTER]; |
| unsigned long flags; |
| int ret; |
| |
| ret = sscanf(buf, "%u %u %u %u %u %u\n", |
| &val[0], &val[1], &val[2], &val[3], |
| &val[4], &val[5]); |
| if (ret != state->num_cpus) |
| return -EINVAL; |
| |
| spin_lock_irqsave(&state_lock, flags); |
| for (i = 0; i < state->num_cpus; i++) { |
| c = &per_cpu(cpu_state, i + state->first_cpu); |
| c->not_preferred = val[i]; |
| } |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| return count; |
| } |
| |
| static ssize_t show_not_preferred(const struct cluster_data *state, char *buf) |
| { |
| struct cpu_data *c; |
| ssize_t count = 0; |
| unsigned long flags; |
| int i; |
| |
| spin_lock_irqsave(&state_lock, flags); |
| for (i = 0; i < state->num_cpus; i++) { |
| c = &per_cpu(cpu_state, i + state->first_cpu); |
| count += scnprintf(buf + count, PAGE_SIZE - count, |
| "CPU#%d: %u\n", c->cpu, c->not_preferred); |
| } |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| return count; |
| } |
| |
| |
| struct core_ctl_attr { |
| struct attribute attr; |
| ssize_t (*show)(const struct cluster_data *, char *); |
| ssize_t (*store)(struct cluster_data *, const char *, size_t count); |
| }; |
| |
| #define core_ctl_attr_ro(_name) \ |
| static struct core_ctl_attr _name = \ |
| __ATTR(_name, 0444, show_##_name, NULL) |
| |
| #define core_ctl_attr_rw(_name) \ |
| static struct core_ctl_attr _name = \ |
| __ATTR(_name, 0644, show_##_name, store_##_name) |
| |
| core_ctl_attr_rw(min_cpus); |
| core_ctl_attr_rw(max_cpus); |
| core_ctl_attr_rw(offline_delay_ms); |
| core_ctl_attr_rw(busy_up_thres); |
| core_ctl_attr_rw(busy_down_thres); |
| core_ctl_attr_rw(task_thres); |
| core_ctl_attr_rw(is_big_cluster); |
| core_ctl_attr_ro(need_cpus); |
| core_ctl_attr_ro(active_cpus); |
| core_ctl_attr_ro(global_state); |
| core_ctl_attr_rw(not_preferred); |
| |
| static struct attribute *default_attrs[] = { |
| &min_cpus.attr, |
| &max_cpus.attr, |
| &offline_delay_ms.attr, |
| &busy_up_thres.attr, |
| &busy_down_thres.attr, |
| &task_thres.attr, |
| &is_big_cluster.attr, |
| &need_cpus.attr, |
| &active_cpus.attr, |
| &global_state.attr, |
| ¬_preferred.attr, |
| NULL |
| }; |
| |
| #define to_cluster_data(k) container_of(k, struct cluster_data, kobj) |
| #define to_attr(a) container_of(a, struct core_ctl_attr, attr) |
| static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
| { |
| struct cluster_data *data = to_cluster_data(kobj); |
| struct core_ctl_attr *cattr = to_attr(attr); |
| ssize_t ret = -EIO; |
| |
| if (cattr->show) |
| ret = cattr->show(data, buf); |
| |
| return ret; |
| } |
| |
| static ssize_t store(struct kobject *kobj, struct attribute *attr, |
| const char *buf, size_t count) |
| { |
| struct cluster_data *data = to_cluster_data(kobj); |
| struct core_ctl_attr *cattr = to_attr(attr); |
| ssize_t ret = -EIO; |
| |
| if (cattr->store) |
| ret = cattr->store(data, buf, count); |
| |
| return ret; |
| } |
| |
| static const struct sysfs_ops sysfs_ops = { |
| .show = show, |
| .store = store, |
| }; |
| |
| static struct kobj_type ktype_core_ctl = { |
| .sysfs_ops = &sysfs_ops, |
| .default_attrs = default_attrs, |
| }; |
| |
| /* ==================== runqueue based core count =================== */ |
| |
| #define NR_RUNNING_TOLERANCE 5 |
| |
| static void update_running_avg(void) |
| { |
| int avg, iowait_avg, big_avg; |
| struct cluster_data *cluster; |
| unsigned int index = 0; |
| |
| sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg); |
| |
| /* |
| * Round up to the next integer if the average nr running tasks |
| * is within NR_RUNNING_TOLERANCE/100 of the next integer. |
| * If normal rounding up is used, it will allow a transient task |
| * to trigger online event. By the time core is onlined, the task |
| * has finished. |
| * Rounding to closest suffers same problem because scheduler |
| * might only provide running stats per jiffy, and a transient |
| * task could skew the number for one jiffy. If core control |
| * samples every 2 jiffies, it will observe 0.5 additional running |
| * average which rounds up to 1 task. |
| */ |
| avg = (avg + NR_RUNNING_TOLERANCE) / 100; |
| big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100; |
| |
| for_each_cluster(cluster, index) { |
| if (!cluster->inited) |
| continue; |
| /* |
| * Big cluster only need to take care of big tasks, but if |
| * there are not enough big cores, big tasks need to be run |
| * on little as well. Thus for little's runqueue stat, it |
| * has to use overall runqueue average, or derive what big |
| * tasks would have to be run on little. The latter approach |
| * is not easy to get given core control reacts much slower |
| * than scheduler, and can't predict scheduler's behavior. |
| */ |
| cluster->nrrun = cluster->is_big_cluster ? big_avg : avg; |
| } |
| } |
| |
| /* adjust needed CPUs based on current runqueue information */ |
| static unsigned int apply_task_need(const struct cluster_data *cluster, |
| unsigned int new_need) |
| { |
| /* unisolate all cores if there are enough tasks */ |
| if (cluster->nrrun >= cluster->task_thres) |
| return cluster->num_cpus; |
| |
| /* only unisolate more cores if there are tasks to run */ |
| if (cluster->nrrun > new_need) |
| return new_need + 1; |
| |
| return new_need; |
| } |
| |
| /* ======================= load based core count ====================== */ |
| |
| static unsigned int apply_limits(const struct cluster_data *cluster, |
| unsigned int need_cpus) |
| { |
| return min(max(cluster->min_cpus, need_cpus), cluster->max_cpus); |
| } |
| |
| static unsigned int get_active_cpu_count(const struct cluster_data *cluster) |
| { |
| return cluster->num_cpus - |
| sched_isolate_count(&cluster->cpu_mask, true); |
| } |
| |
| static bool is_active(const struct cpu_data *state) |
| { |
| return cpu_online(state->cpu) && !cpu_isolated(state->cpu); |
| } |
| |
| static bool adjustment_possible(const struct cluster_data *cluster, |
| unsigned int need) |
| { |
| return (need < cluster->active_cpus || (need > cluster->active_cpus && |
| cluster->nr_isolated_cpus)); |
| } |
| |
| static bool eval_need(struct cluster_data *cluster) |
| { |
| unsigned long flags; |
| struct cpu_data *c; |
| unsigned int need_cpus = 0, last_need, thres_idx; |
| int ret = 0; |
| bool need_flag = false; |
| unsigned int new_need; |
| s64 now, elapsed; |
| |
| if (unlikely(!cluster->inited)) |
| return 0; |
| |
| spin_lock_irqsave(&state_lock, flags); |
| |
| if (cluster->boost) { |
| need_cpus = cluster->max_cpus; |
| } else { |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| thres_idx = cluster->active_cpus ? cluster->active_cpus - 1 : 0; |
| list_for_each_entry(c, &cluster->lru, sib) { |
| if (c->busy >= cluster->busy_up_thres[thres_idx]) |
| c->is_busy = true; |
| else if (c->busy < cluster->busy_down_thres[thres_idx]) |
| c->is_busy = false; |
| need_cpus += c->is_busy; |
| } |
| need_cpus = apply_task_need(cluster, need_cpus); |
| } |
| new_need = apply_limits(cluster, need_cpus); |
| need_flag = adjustment_possible(cluster, new_need); |
| |
| last_need = cluster->need_cpus; |
| now = ktime_to_ms(ktime_get()); |
| |
| if (new_need > cluster->active_cpus) { |
| ret = 1; |
| } else { |
| if (new_need == last_need) { |
| cluster->need_ts = now; |
| spin_unlock_irqrestore(&state_lock, flags); |
| return 0; |
| } |
| |
| elapsed = now - cluster->need_ts; |
| ret = elapsed >= cluster->offline_delay_ms; |
| } |
| |
| if (ret) { |
| cluster->need_ts = now; |
| cluster->need_cpus = new_need; |
| } |
| trace_core_ctl_eval_need(cluster->first_cpu, last_need, new_need, |
| ret && need_flag); |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| return ret && need_flag; |
| } |
| |
| static void apply_need(struct cluster_data *cluster) |
| { |
| if (eval_need(cluster)) |
| wake_up_core_ctl_thread(cluster); |
| } |
| |
| static void core_ctl_set_busy(struct cpu_data *c, unsigned int busy) |
| { |
| unsigned int old_is_busy = c->is_busy; |
| |
| if (c->busy == busy) |
| return; |
| |
| c->busy = busy; |
| trace_core_ctl_set_busy(c->cpu, busy, old_is_busy, c->is_busy); |
| } |
| |
| /* ========================= core count enforcement ==================== */ |
| |
| static void wake_up_core_ctl_thread(struct cluster_data *cluster) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&cluster->pending_lock, flags); |
| cluster->pending = true; |
| spin_unlock_irqrestore(&cluster->pending_lock, flags); |
| |
| wake_up_process_no_notif(cluster->core_ctl_thread); |
| } |
| |
| static u64 core_ctl_check_timestamp; |
| |
| int core_ctl_set_boost(bool boost) |
| { |
| unsigned int index = 0; |
| struct cluster_data *cluster; |
| unsigned long flags; |
| int ret = 0; |
| bool boost_state_changed = false; |
| |
| if (unlikely(!initialized)) |
| return 0; |
| |
| spin_lock_irqsave(&state_lock, flags); |
| for_each_cluster(cluster, index) { |
| if (cluster->is_big_cluster) { |
| if (boost) { |
| boost_state_changed = !cluster->boost; |
| ++cluster->boost; |
| } else { |
| if (!cluster->boost) { |
| pr_err("Error turning off boost. Boost already turned off\n"); |
| ret = -EINVAL; |
| } else { |
| --cluster->boost; |
| boost_state_changed = !cluster->boost; |
| } |
| } |
| break; |
| } |
| } |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| if (boost_state_changed) |
| apply_need(cluster); |
| |
| trace_core_ctl_set_boost(cluster->boost, ret); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL(core_ctl_set_boost); |
| |
| void core_ctl_check(u64 window_start) |
| { |
| int cpu; |
| unsigned int busy; |
| struct cpu_data *c; |
| struct cluster_data *cluster; |
| unsigned int index = 0; |
| |
| if (unlikely(!initialized)) |
| return; |
| |
| if (window_start == core_ctl_check_timestamp) |
| return; |
| |
| core_ctl_check_timestamp = window_start; |
| |
| for_each_possible_cpu(cpu) { |
| |
| c = &per_cpu(cpu_state, cpu); |
| cluster = c->cluster; |
| |
| if (!cluster || !cluster->inited) |
| continue; |
| |
| busy = sched_get_cpu_util(cpu); |
| core_ctl_set_busy(c, busy); |
| } |
| |
| update_running_avg(); |
| |
| for_each_cluster(cluster, index) { |
| if (eval_need(cluster)) |
| wake_up_core_ctl_thread(cluster); |
| } |
| } |
| |
| static void move_cpu_lru(struct cpu_data *cpu_data) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&state_lock, flags); |
| list_del(&cpu_data->sib); |
| list_add_tail(&cpu_data->sib, &cpu_data->cluster->lru); |
| spin_unlock_irqrestore(&state_lock, flags); |
| } |
| |
| static void try_to_isolate(struct cluster_data *cluster, unsigned int need) |
| { |
| struct cpu_data *c, *tmp; |
| unsigned long flags; |
| unsigned int num_cpus = cluster->num_cpus; |
| unsigned int nr_isolated = 0; |
| |
| /* |
| * Protect against entry being removed (and added at tail) by other |
| * thread (hotplug). |
| */ |
| spin_lock_irqsave(&state_lock, flags); |
| list_for_each_entry_safe(c, tmp, &cluster->lru, sib) { |
| if (!num_cpus--) |
| break; |
| |
| if (!is_active(c)) |
| continue; |
| if (cluster->active_cpus == need) |
| break; |
| /* Don't offline busy CPUs. */ |
| if (c->is_busy) |
| continue; |
| |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| pr_debug("Trying to isolate CPU%u\n", c->cpu); |
| if (!sched_isolate_cpu(c->cpu)) { |
| c->isolated_by_us = true; |
| move_cpu_lru(c); |
| nr_isolated++; |
| } else { |
| pr_debug("Unable to isolate CPU%u\n", c->cpu); |
| } |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| spin_lock_irqsave(&state_lock, flags); |
| } |
| cluster->nr_isolated_cpus += nr_isolated; |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| /* |
| * If the number of active CPUs is within the limits, then |
| * don't force isolation of any busy CPUs. |
| */ |
| if (cluster->active_cpus <= cluster->max_cpus) |
| return; |
| |
| nr_isolated = 0; |
| num_cpus = cluster->num_cpus; |
| spin_lock_irqsave(&state_lock, flags); |
| list_for_each_entry_safe(c, tmp, &cluster->lru, sib) { |
| if (!num_cpus--) |
| break; |
| |
| if (!is_active(c)) |
| continue; |
| if (cluster->active_cpus <= cluster->max_cpus) |
| break; |
| |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| pr_debug("Trying to isolate CPU%u\n", c->cpu); |
| if (!sched_isolate_cpu(c->cpu)) { |
| c->isolated_by_us = true; |
| move_cpu_lru(c); |
| nr_isolated++; |
| } else { |
| pr_debug("Unable to isolate CPU%u\n", c->cpu); |
| } |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| spin_lock_irqsave(&state_lock, flags); |
| } |
| cluster->nr_isolated_cpus += nr_isolated; |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| } |
| |
| static void __try_to_unisolate(struct cluster_data *cluster, |
| unsigned int need, bool force) |
| { |
| struct cpu_data *c, *tmp; |
| unsigned long flags; |
| unsigned int num_cpus = cluster->num_cpus; |
| unsigned int nr_unisolated = 0; |
| |
| /* |
| * Protect against entry being removed (and added at tail) by other |
| * thread (hotplug). |
| */ |
| spin_lock_irqsave(&state_lock, flags); |
| list_for_each_entry_safe(c, tmp, &cluster->lru, sib) { |
| if (!num_cpus--) |
| break; |
| |
| if (!c->isolated_by_us) |
| continue; |
| if ((cpu_online(c->cpu) && !cpu_isolated(c->cpu)) || |
| (!force && c->not_preferred)) |
| continue; |
| if (cluster->active_cpus == need) |
| break; |
| |
| spin_unlock_irqrestore(&state_lock, flags); |
| |
| pr_debug("Trying to unisolate CPU%u\n", c->cpu); |
| if (!sched_unisolate_cpu(c->cpu)) { |
| c->isolated_by_us = false; |
| move_cpu_lru(c); |
| nr_unisolated++; |
| } else { |
| pr_debug("Unable to unisolate CPU%u\n", c->cpu); |
| } |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| spin_lock_irqsave(&state_lock, flags); |
| } |
| cluster->nr_isolated_cpus -= nr_unisolated; |
| spin_unlock_irqrestore(&state_lock, flags); |
| } |
| |
| static void try_to_unisolate(struct cluster_data *cluster, unsigned int need) |
| { |
| bool force_use_non_preferred = false; |
| |
| __try_to_unisolate(cluster, need, force_use_non_preferred); |
| |
| if (cluster->active_cpus == need) |
| return; |
| |
| force_use_non_preferred = true; |
| __try_to_unisolate(cluster, need, force_use_non_preferred); |
| } |
| |
| static void __ref do_core_ctl(struct cluster_data *cluster) |
| { |
| unsigned int need; |
| |
| need = apply_limits(cluster, cluster->need_cpus); |
| |
| if (adjustment_possible(cluster, need)) { |
| pr_debug("Trying to adjust group %u from %u to %u\n", |
| cluster->first_cpu, cluster->active_cpus, need); |
| |
| if (cluster->active_cpus > need) |
| try_to_isolate(cluster, need); |
| else if (cluster->active_cpus < need) |
| try_to_unisolate(cluster, need); |
| } |
| } |
| |
| static int __ref try_core_ctl(void *data) |
| { |
| struct cluster_data *cluster = data; |
| unsigned long flags; |
| |
| while (1) { |
| set_current_state(TASK_INTERRUPTIBLE); |
| spin_lock_irqsave(&cluster->pending_lock, flags); |
| if (!cluster->pending) { |
| spin_unlock_irqrestore(&cluster->pending_lock, flags); |
| schedule(); |
| if (kthread_should_stop()) |
| break; |
| spin_lock_irqsave(&cluster->pending_lock, flags); |
| } |
| set_current_state(TASK_RUNNING); |
| cluster->pending = false; |
| spin_unlock_irqrestore(&cluster->pending_lock, flags); |
| |
| do_core_ctl(cluster); |
| } |
| |
| return 0; |
| } |
| |
| static int __ref cpu_callback(struct notifier_block *nfb, |
| unsigned long action, void *hcpu) |
| { |
| uint32_t cpu = (uintptr_t)hcpu; |
| struct cpu_data *state = &per_cpu(cpu_state, cpu); |
| struct cluster_data *cluster = state->cluster; |
| unsigned int need; |
| bool do_wakeup, unisolated = false; |
| unsigned long flags; |
| |
| if (unlikely(!cluster || !cluster->inited)) |
| return NOTIFY_DONE; |
| |
| switch (action & ~CPU_TASKS_FROZEN) { |
| case CPU_ONLINE: |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| |
| /* |
| * Moving to the end of the list should only happen in |
| * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an |
| * infinite list traversal when thermal (or other entities) |
| * reject trying to online CPUs. |
| */ |
| move_cpu_lru(state); |
| break; |
| |
| case CPU_DEAD: |
| /* |
| * We don't want to have a CPU both offline and isolated. |
| * So unisolate a CPU that went down if it was isolated by us. |
| */ |
| if (state->isolated_by_us) { |
| sched_unisolate_cpu_unlocked(cpu); |
| state->isolated_by_us = false; |
| unisolated = true; |
| } |
| |
| /* Move a CPU to the end of the LRU when it goes offline. */ |
| move_cpu_lru(state); |
| |
| state->busy = 0; |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| break; |
| default: |
| return NOTIFY_DONE; |
| } |
| |
| need = apply_limits(cluster, cluster->need_cpus); |
| spin_lock_irqsave(&state_lock, flags); |
| if (unisolated) |
| cluster->nr_isolated_cpus--; |
| do_wakeup = adjustment_possible(cluster, need); |
| spin_unlock_irqrestore(&state_lock, flags); |
| if (do_wakeup) |
| wake_up_core_ctl_thread(cluster); |
| |
| return NOTIFY_OK; |
| } |
| |
| static struct notifier_block __refdata cpu_notifier = { |
| .notifier_call = cpu_callback, |
| }; |
| |
| /* ============================ init code ============================== */ |
| |
| static cpumask_var_t core_ctl_disable_cpumask; |
| static bool core_ctl_disable_cpumask_present; |
| |
| static int __init core_ctl_disable_setup(char *str) |
| { |
| if (!*str) |
| return -EINVAL; |
| |
| alloc_bootmem_cpumask_var(&core_ctl_disable_cpumask); |
| |
| if (cpulist_parse(str, core_ctl_disable_cpumask) < 0) { |
| free_bootmem_cpumask_var(core_ctl_disable_cpumask); |
| return -EINVAL; |
| } |
| |
| core_ctl_disable_cpumask_present = true; |
| pr_info("disable_cpumask=%*pbl\n", |
| cpumask_pr_args(core_ctl_disable_cpumask)); |
| |
| return 0; |
| } |
| early_param("core_ctl_disable_cpumask", core_ctl_disable_setup); |
| |
| static bool should_skip(const struct cpumask *mask) |
| { |
| if (!core_ctl_disable_cpumask_present) |
| return false; |
| |
| /* |
| * We operate on a cluster basis. Disable the core_ctl for |
| * a cluster, if all of it's cpus are specified in |
| * core_ctl_disable_cpumask |
| */ |
| return cpumask_subset(mask, core_ctl_disable_cpumask); |
| } |
| |
| static struct cluster_data *find_cluster_by_first_cpu(unsigned int first_cpu) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < num_clusters; ++i) { |
| if (cluster_state[i].first_cpu == first_cpu) |
| return &cluster_state[i]; |
| } |
| |
| return NULL; |
| } |
| |
| static int cluster_init(const struct cpumask *mask) |
| { |
| struct device *dev; |
| unsigned int first_cpu = cpumask_first(mask); |
| struct cluster_data *cluster; |
| struct cpu_data *state; |
| unsigned int cpu; |
| struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
| |
| if (should_skip(mask)) |
| return 0; |
| |
| if (find_cluster_by_first_cpu(first_cpu)) |
| return 0; |
| |
| dev = get_cpu_device(first_cpu); |
| if (!dev) |
| return -ENODEV; |
| |
| pr_info("Creating CPU group %d\n", first_cpu); |
| |
| if (num_clusters == MAX_CLUSTERS) { |
| pr_err("Unsupported number of clusters. Only %u supported\n", |
| MAX_CLUSTERS); |
| return -EINVAL; |
| } |
| cluster = &cluster_state[num_clusters]; |
| ++num_clusters; |
| |
| cpumask_copy(&cluster->cpu_mask, mask); |
| cluster->num_cpus = cpumask_weight(mask); |
| if (cluster->num_cpus > MAX_CPUS_PER_CLUSTER) { |
| pr_err("HW configuration not supported\n"); |
| return -EINVAL; |
| } |
| cluster->first_cpu = first_cpu; |
| cluster->min_cpus = 1; |
| cluster->max_cpus = cluster->num_cpus; |
| cluster->need_cpus = cluster->num_cpus; |
| cluster->offline_delay_ms = 100; |
| cluster->task_thres = UINT_MAX; |
| cluster->nrrun = cluster->num_cpus; |
| INIT_LIST_HEAD(&cluster->lru); |
| spin_lock_init(&cluster->pending_lock); |
| |
| for_each_cpu(cpu, mask) { |
| pr_info("Init CPU%u state\n", cpu); |
| |
| state = &per_cpu(cpu_state, cpu); |
| state->cluster = cluster; |
| state->cpu = cpu; |
| list_add_tail(&state->sib, &cluster->lru); |
| } |
| cluster->active_cpus = get_active_cpu_count(cluster); |
| |
| cluster->core_ctl_thread = kthread_run(try_core_ctl, (void *) cluster, |
| "core_ctl/%d", first_cpu); |
| if (IS_ERR(cluster->core_ctl_thread)) |
| return PTR_ERR(cluster->core_ctl_thread); |
| |
| sched_setscheduler_nocheck(cluster->core_ctl_thread, SCHED_FIFO, |
| ¶m); |
| |
| cluster->inited = true; |
| |
| kobject_init(&cluster->kobj, &ktype_core_ctl); |
| return kobject_add(&cluster->kobj, &dev->kobj, "core_ctl"); |
| } |
| |
| static int __init core_ctl_init(void) |
| { |
| unsigned int cpu; |
| struct cpumask cpus = *cpu_possible_mask; |
| |
| if (should_skip(cpu_possible_mask)) |
| return 0; |
| |
| register_cpu_notifier(&cpu_notifier); |
| |
| for_each_cpu(cpu, &cpus) { |
| int ret; |
| const struct cpumask *cluster_cpus = cpu_coregroup_mask(cpu); |
| |
| ret = cluster_init(cluster_cpus); |
| if (ret) |
| pr_warn("unable to create core ctl group: %d\n", ret); |
| cpumask_andnot(&cpus, &cpus, cluster_cpus); |
| } |
| initialized = true; |
| return 0; |
| } |
| |
| late_initcall(core_ctl_init); |