blob: 242cdf68ff89b694db23e28812b88b514e3156ee [file] [log] [blame]
Vinayak Menonc1c6a992015-12-21 13:00:58 +05301/*
2 * Copyright (c) 2015, The Linux Foundation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 and
6 * only version 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/mm_types.h>
15#include <linux/swapfile.h>
16#include <linux/swap.h>
17
18#define SWAP_RATIO_GROUP_START (SWAP_FLAG_PRIO_MASK - 9) /* 32758 */
19#define SWAP_RATIO_GROUP_END (SWAP_FLAG_PRIO_MASK) /* 32767 */
20#define SWAP_FAST_WRITES (SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8))
21#define SWAP_SLOW_WRITES SWAPFILE_CLUSTER
22
23/*
24 * The fast/slow swap write ratio.
25 * 100 indicates that all writes should
26 * go to fast swap device.
27 */
28int sysctl_swap_ratio = 100;
29
30/* Enable the swap ratio feature */
31int sysctl_swap_ratio_enable;
32
33static bool is_same_group(struct swap_info_struct *a,
34 struct swap_info_struct *b)
35{
36 if (!sysctl_swap_ratio_enable)
37 return false;
38
39 if (!is_swap_ratio_group(a->prio))
40 return false;
41
42 if (a->prio == b->prio)
43 return true;
44
45 return false;
46}
47
48/* Caller must hold swap_avail_lock */
49static int calculate_write_pending(struct swap_info_struct *si,
50 struct swap_info_struct *n)
51{
52 int ratio = sysctl_swap_ratio;
53
54 if ((ratio < 0) || (ratio > 100))
55 return -EINVAL;
56
57 if (WARN_ON(!(si->flags & SWP_FAST)))
58 return -ENODEV;
59
60 if ((n->flags & SWP_FAST) || !is_same_group(si, n))
61 return -ENODEV;
62
63 si->max_writes = ratio ? SWAP_FAST_WRITES : 0;
64 n->max_writes = ratio ? (SWAP_FAST_WRITES * 100) /
65 ratio - SWAP_FAST_WRITES : SWAP_SLOW_WRITES;
66
67 si->write_pending = si->max_writes;
68 n->write_pending = n->max_writes;
69
70 return 0;
71}
72
73static int swap_ratio_slow(struct swap_info_struct **si)
74{
75 struct swap_info_struct *n = NULL;
76 int ret = 0;
77
78 spin_lock(&(*si)->lock);
79 spin_lock(&swap_avail_lock);
80 if (&(*si)->avail_list == plist_last(&swap_avail_head)) {
81 /* just to make skip work */
82 n = *si;
83 ret = -ENODEV;
84 goto skip;
85 }
86 n = plist_next_entry(&(*si)->avail_list,
87 struct swap_info_struct,
88 avail_list);
89 spin_unlock(&swap_avail_lock);
90 spin_lock(&n->lock);
91 spin_lock(&swap_avail_lock);
92
93 if ((*si)->flags & SWP_FAST) {
94 if ((*si)->write_pending) {
95 (*si)->write_pending--;
96 goto exit;
97 } else {
98 if ((n->flags & SWP_FAST) || !is_same_group(*si, n)) {
99 /* Should never happen */
100 ret = -ENODEV;
101 } else if (n->write_pending) {
102 /*
103 * Requeue fast device, since there are pending
104 * writes for slow device.
105 */
106 plist_requeue(&(*si)->avail_list,
107 &swap_avail_head);
108 n->write_pending--;
109 spin_unlock(&(*si)->lock);
110 *si = n;
111 goto skip;
112 } else {
113 if (calculate_write_pending(*si, n) < 0) {
114 ret = -ENODEV;
115 goto exit;
116 }
117 /* Restart from fast device */
118 (*si)->write_pending--;
119 }
120 }
121 } else {
122 if (!(n->flags & SWP_FAST) || !is_same_group(*si, n)) {
123 /* Should never happen */
124 ret = -ENODEV;
125 } else if (n->write_pending) {
126 /*
127 * Pending writes for fast device.
128 * We reach here when slow device is swapped on first,
129 * before fast device.
130 */
131 /* requeue slow device to the end */
132 plist_requeue(&(*si)->avail_list, &swap_avail_head);
133 n->write_pending--;
134 spin_unlock(&(*si)->lock);
135 *si = n;
136 goto skip;
137 } else {
138 if ((*si)->write_pending) {
139 (*si)->write_pending--;
140 } else {
141 if (calculate_write_pending(n, *si) < 0) {
142 ret = -ENODEV;
143 goto exit;
144 }
145 n->write_pending--;
146 plist_requeue(&(*si)->avail_list,
147 &swap_avail_head);
148 spin_unlock(&(*si)->lock);
149 *si = n;
150 goto skip;
151 }
152 }
153 }
154exit:
155 spin_unlock(&(*si)->lock);
156skip:
157 spin_unlock(&swap_avail_lock);
158 /* n and si would have got interchanged */
159 spin_unlock(&n->lock);
160 return ret;
161}
162
163bool is_swap_ratio_group(int prio)
164{
165 return ((prio >= SWAP_RATIO_GROUP_START) &&
166 (prio <= SWAP_RATIO_GROUP_END)) ? true : false;
167}
168
169void setup_swap_ratio(struct swap_info_struct *p, int prio)
170{
171 /* Used only if sysctl_swap_ratio_enable is set */
172 if (is_swap_ratio_group(prio)) {
173 if (p->flags & SWP_FAST)
174 p->write_pending = SWAP_FAST_WRITES;
175 else
176 p->write_pending = SWAP_SLOW_WRITES;
177 p->max_writes = p->write_pending;
178 }
179}
180
181int swap_ratio(struct swap_info_struct **si)
182{
183 if (is_swap_ratio_group((*si)->prio))
184 return swap_ratio_slow(si);
185 else
186 return -ENODEV;
187}