blob: 4ca5783af41355c8a3f8308163a00f9525f5753d [file] [log] [blame]
Vinayak Menonc1c6a992015-12-21 13:00:58 +05301/*
Vinayak Menon92dffa62016-03-22 14:41:57 +05302 * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
Vinayak Menonc1c6a992015-12-21 13:00:58 +05303 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 and
6 * only version 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/mm_types.h>
15#include <linux/swapfile.h>
16#include <linux/swap.h>
17
18#define SWAP_RATIO_GROUP_START (SWAP_FLAG_PRIO_MASK - 9) /* 32758 */
19#define SWAP_RATIO_GROUP_END (SWAP_FLAG_PRIO_MASK) /* 32767 */
20#define SWAP_FAST_WRITES (SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8))
21#define SWAP_SLOW_WRITES SWAPFILE_CLUSTER
22
23/*
24 * The fast/slow swap write ratio.
25 * 100 indicates that all writes should
26 * go to fast swap device.
27 */
28int sysctl_swap_ratio = 100;
29
30/* Enable the swap ratio feature */
31int sysctl_swap_ratio_enable;
32
33static bool is_same_group(struct swap_info_struct *a,
34 struct swap_info_struct *b)
35{
36 if (!sysctl_swap_ratio_enable)
37 return false;
38
39 if (!is_swap_ratio_group(a->prio))
40 return false;
41
42 if (a->prio == b->prio)
43 return true;
44
45 return false;
46}
47
48/* Caller must hold swap_avail_lock */
49static int calculate_write_pending(struct swap_info_struct *si,
50 struct swap_info_struct *n)
51{
52 int ratio = sysctl_swap_ratio;
53
54 if ((ratio < 0) || (ratio > 100))
55 return -EINVAL;
56
57 if (WARN_ON(!(si->flags & SWP_FAST)))
58 return -ENODEV;
59
60 if ((n->flags & SWP_FAST) || !is_same_group(si, n))
61 return -ENODEV;
62
63 si->max_writes = ratio ? SWAP_FAST_WRITES : 0;
64 n->max_writes = ratio ? (SWAP_FAST_WRITES * 100) /
65 ratio - SWAP_FAST_WRITES : SWAP_SLOW_WRITES;
66
67 si->write_pending = si->max_writes;
68 n->write_pending = n->max_writes;
69
70 return 0;
71}
72
73static int swap_ratio_slow(struct swap_info_struct **si)
74{
75 struct swap_info_struct *n = NULL;
76 int ret = 0;
77
78 spin_lock(&(*si)->lock);
79 spin_lock(&swap_avail_lock);
80 if (&(*si)->avail_list == plist_last(&swap_avail_head)) {
81 /* just to make skip work */
82 n = *si;
83 ret = -ENODEV;
84 goto skip;
85 }
86 n = plist_next_entry(&(*si)->avail_list,
87 struct swap_info_struct,
88 avail_list);
Vinayak Menon92dffa62016-03-22 14:41:57 +053089 if (n == *si) {
90 /* No other swap device */
91 ret = -ENODEV;
92 goto skip;
93 }
94
Vinayak Menonc1c6a992015-12-21 13:00:58 +053095 spin_unlock(&swap_avail_lock);
96 spin_lock(&n->lock);
97 spin_lock(&swap_avail_lock);
98
99 if ((*si)->flags & SWP_FAST) {
100 if ((*si)->write_pending) {
101 (*si)->write_pending--;
102 goto exit;
103 } else {
104 if ((n->flags & SWP_FAST) || !is_same_group(*si, n)) {
105 /* Should never happen */
106 ret = -ENODEV;
107 } else if (n->write_pending) {
108 /*
109 * Requeue fast device, since there are pending
110 * writes for slow device.
111 */
112 plist_requeue(&(*si)->avail_list,
113 &swap_avail_head);
114 n->write_pending--;
115 spin_unlock(&(*si)->lock);
116 *si = n;
117 goto skip;
118 } else {
119 if (calculate_write_pending(*si, n) < 0) {
120 ret = -ENODEV;
121 goto exit;
122 }
123 /* Restart from fast device */
124 (*si)->write_pending--;
125 }
126 }
127 } else {
128 if (!(n->flags & SWP_FAST) || !is_same_group(*si, n)) {
129 /* Should never happen */
130 ret = -ENODEV;
131 } else if (n->write_pending) {
132 /*
133 * Pending writes for fast device.
134 * We reach here when slow device is swapped on first,
135 * before fast device.
136 */
137 /* requeue slow device to the end */
138 plist_requeue(&(*si)->avail_list, &swap_avail_head);
139 n->write_pending--;
140 spin_unlock(&(*si)->lock);
141 *si = n;
142 goto skip;
143 } else {
144 if ((*si)->write_pending) {
145 (*si)->write_pending--;
146 } else {
147 if (calculate_write_pending(n, *si) < 0) {
148 ret = -ENODEV;
149 goto exit;
150 }
151 n->write_pending--;
152 plist_requeue(&(*si)->avail_list,
153 &swap_avail_head);
154 spin_unlock(&(*si)->lock);
155 *si = n;
156 goto skip;
157 }
158 }
159 }
160exit:
161 spin_unlock(&(*si)->lock);
162skip:
163 spin_unlock(&swap_avail_lock);
164 /* n and si would have got interchanged */
165 spin_unlock(&n->lock);
166 return ret;
167}
168
169bool is_swap_ratio_group(int prio)
170{
171 return ((prio >= SWAP_RATIO_GROUP_START) &&
172 (prio <= SWAP_RATIO_GROUP_END)) ? true : false;
173}
174
175void setup_swap_ratio(struct swap_info_struct *p, int prio)
176{
177 /* Used only if sysctl_swap_ratio_enable is set */
178 if (is_swap_ratio_group(prio)) {
179 if (p->flags & SWP_FAST)
180 p->write_pending = SWAP_FAST_WRITES;
181 else
182 p->write_pending = SWAP_SLOW_WRITES;
183 p->max_writes = p->write_pending;
184 }
185}
186
187int swap_ratio(struct swap_info_struct **si)
188{
Vinayak Menon92dffa62016-03-22 14:41:57 +0530189 if (!sysctl_swap_ratio_enable)
190 return -ENODEV;
191
Vinayak Menonc1c6a992015-12-21 13:00:58 +0530192 if (is_swap_ratio_group((*si)->prio))
193 return swap_ratio_slow(si);
194 else
195 return -ENODEV;
196}