blob: 36516eb80f6cebd7357d0009676d20964d4edd4e [file] [log] [blame]
Vinayak Menon1bddbcf2015-03-20 14:15:39 +05301/*
Suyog Sardaee3a85ff2016-06-07 21:15:42 +05302 * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
Vinayak Menon1bddbcf2015-03-20 14:15:39 +05303 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 and
6 * only version 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/mm.h>
16#include <linux/swap.h>
17#include <linux/sort.h>
18#include <linux/oom.h>
19#include <linux/sched.h>
20#include <linux/rcupdate.h>
21#include <linux/notifier.h>
22#include <linux/vmpressure.h>
23
24#define CREATE_TRACE_POINTS
25#include <trace/events/process_reclaim.h>
26
27#define MAX_SWAP_TASKS SWAP_CLUSTER_MAX
28
29static void swap_fn(struct work_struct *work);
30DECLARE_WORK(swap_work, swap_fn);
31
32/* User knob to enable/disable process reclaim feature */
33static int enable_process_reclaim;
34module_param_named(enable_process_reclaim, enable_process_reclaim, int, 0644);
35
36/* The max number of pages tried to be reclaimed in a single run */
37int per_swap_size = SWAP_CLUSTER_MAX * 32;
38module_param_named(per_swap_size, per_swap_size, int, 0644);
39
40int reclaim_avg_efficiency;
41module_param_named(reclaim_avg_efficiency, reclaim_avg_efficiency, int, 0444);
42
43/* The vmpressure region where process reclaim operates */
44static unsigned long pressure_min = 50;
45static unsigned long pressure_max = 90;
46module_param_named(pressure_min, pressure_min, ulong, 0644);
47module_param_named(pressure_max, pressure_max, ulong, 0644);
48
Suyog Sardaee3a85ff2016-06-07 21:15:42 +053049static short min_score_adj = 360;
50module_param_named(min_score_adj, min_score_adj, short, 0644);
51
Vinayak Menon1bddbcf2015-03-20 14:15:39 +053052/*
53 * Scheduling process reclaim workqueue unecessarily
54 * when the reclaim efficiency is low does not make
55 * sense. We try to detect a drop in efficiency and
56 * disable reclaim for a time period. This period and the
57 * period for which we monitor a drop in efficiency is
58 * defined by swap_eff_win. swap_opt_eff is the optimal
59 * efficincy used as theshold for this.
60 */
61static int swap_eff_win = 2;
62module_param_named(swap_eff_win, swap_eff_win, int, 0644);
63
64static int swap_opt_eff = 50;
65module_param_named(swap_opt_eff, swap_opt_eff, int, 0644);
66
67static atomic_t skip_reclaim = ATOMIC_INIT(0);
68/* Not atomic since only a single instance of swap_fn run at a time */
69static int monitor_eff;
70
71struct selected_task {
72 struct task_struct *p;
73 int tasksize;
74 short oom_score_adj;
75};
76
77int selected_cmp(const void *a, const void *b)
78{
79 const struct selected_task *x = a;
80 const struct selected_task *y = b;
81 int ret;
82
83 ret = x->tasksize < y->tasksize ? -1 : 1;
84
85 return ret;
86}
87
88static int test_task_flag(struct task_struct *p, int flag)
89{
90 struct task_struct *t = p;
91
Vinayak Menon204b27e2015-05-21 17:12:37 +053092 rcu_read_lock();
93 for_each_thread(p, t) {
Vinayak Menon1bddbcf2015-03-20 14:15:39 +053094 task_lock(t);
95 if (test_tsk_thread_flag(t, flag)) {
96 task_unlock(t);
Vinayak Menon204b27e2015-05-21 17:12:37 +053097 rcu_read_unlock();
Vinayak Menon1bddbcf2015-03-20 14:15:39 +053098 return 1;
99 }
100 task_unlock(t);
Vinayak Menon204b27e2015-05-21 17:12:37 +0530101 }
102 rcu_read_unlock();
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530103
104 return 0;
105}
106
107static void swap_fn(struct work_struct *work)
108{
109 struct task_struct *tsk;
110 struct reclaim_param rp;
111
112 /* Pick the best MAX_SWAP_TASKS tasks in terms of anon size */
113 struct selected_task selected[MAX_SWAP_TASKS] = {{0, 0, 0},};
114 int si = 0;
115 int i;
116 int tasksize;
117 int total_sz = 0;
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530118 int total_scan = 0;
119 int total_reclaimed = 0;
120 int nr_to_reclaim;
121 int efficiency;
122
123 rcu_read_lock();
124 for_each_process(tsk) {
125 struct task_struct *p;
126 short oom_score_adj;
127
128 if (tsk->flags & PF_KTHREAD)
129 continue;
130
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530131 if (test_task_flag(tsk, TIF_MEMDIE))
132 continue;
133
134 p = find_lock_task_mm(tsk);
135 if (!p)
136 continue;
137
138 oom_score_adj = p->signal->oom_score_adj;
139 if (oom_score_adj < min_score_adj) {
140 task_unlock(p);
141 continue;
142 }
143
144 tasksize = get_mm_counter(p->mm, MM_ANONPAGES);
145 task_unlock(p);
146
147 if (tasksize <= 0)
148 continue;
149
150 if (si == MAX_SWAP_TASKS) {
151 sort(&selected[0], MAX_SWAP_TASKS,
152 sizeof(struct selected_task),
153 &selected_cmp, NULL);
154 if (tasksize < selected[0].tasksize)
155 continue;
156 selected[0].p = p;
157 selected[0].oom_score_adj = oom_score_adj;
158 selected[0].tasksize = tasksize;
159 } else {
160 selected[si].p = p;
161 selected[si].oom_score_adj = oom_score_adj;
162 selected[si].tasksize = tasksize;
163 si++;
164 }
165 }
166
Vinayak Menon204b27e2015-05-21 17:12:37 +0530167 for (i = 0; i < si; i++)
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530168 total_sz += selected[i].tasksize;
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530169
170 /* Skip reclaim if total size is too less */
171 if (total_sz < SWAP_CLUSTER_MAX) {
Vinayak Menon204b27e2015-05-21 17:12:37 +0530172 rcu_read_unlock();
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530173 return;
174 }
175
Vinayak Menon204b27e2015-05-21 17:12:37 +0530176 for (i = 0; i < si; i++)
177 get_task_struct(selected[i].p);
178
179 rcu_read_unlock();
180
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530181 while (si--) {
182 nr_to_reclaim =
183 (selected[si].tasksize * per_swap_size) / total_sz;
184 /* scan atleast a page */
185 if (!nr_to_reclaim)
186 nr_to_reclaim = 1;
187
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530188 rp = reclaim_task_anon(selected[si].p, nr_to_reclaim);
189
190 trace_process_reclaim(selected[si].tasksize,
191 selected[si].oom_score_adj, rp.nr_scanned,
192 rp.nr_reclaimed, per_swap_size, total_sz,
193 nr_to_reclaim);
194 total_scan += rp.nr_scanned;
195 total_reclaimed += rp.nr_reclaimed;
196 put_task_struct(selected[si].p);
197 }
198
199 if (total_scan) {
200 efficiency = (total_reclaimed * 100) / total_scan;
201
202 if (efficiency < swap_opt_eff) {
203 if (++monitor_eff == swap_eff_win) {
Vinayak Menon31a32712015-05-08 18:36:52 +0530204 atomic_set(&skip_reclaim, swap_eff_win);
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530205 monitor_eff = 0;
206 }
207 } else {
208 monitor_eff = 0;
209 }
210
211 reclaim_avg_efficiency =
212 (efficiency + reclaim_avg_efficiency) / 2;
213 trace_process_reclaim_eff(efficiency, reclaim_avg_efficiency);
214 }
215}
216
217static int vmpressure_notifier(struct notifier_block *nb,
218 unsigned long action, void *data)
219{
220 unsigned long pressure = action;
221
222 if (!enable_process_reclaim)
223 return 0;
224
225 if (!current_is_kswapd())
226 return 0;
227
Vinayak Menon31a32712015-05-08 18:36:52 +0530228 if (atomic_dec_if_positive(&skip_reclaim) >= 0)
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530229 return 0;
230
231 if ((pressure >= pressure_min) && (pressure < pressure_max))
232 if (!work_pending(&swap_work))
Shiraz Hashim99fddf12015-06-11 10:41:54 +0530233 queue_work(system_unbound_wq, &swap_work);
Vinayak Menon1bddbcf2015-03-20 14:15:39 +0530234 return 0;
235}
236
237static struct notifier_block vmpr_nb = {
238 .notifier_call = vmpressure_notifier,
239};
240
241static int __init process_reclaim_init(void)
242{
243 vmpressure_notifier_register(&vmpr_nb);
244 return 0;
245}
246
247static void __exit process_reclaim_exit(void)
248{
249 vmpressure_notifier_unregister(&vmpr_nb);
250}
251
252module_init(process_reclaim_init);
253module_exit(process_reclaim_exit);