blob: 1c36a6bc31d53d603d33b00157cf03ce67581b91 [file] [log] [blame]
Evgeniy Polyakova0fde6e2009-01-14 02:05:31 +03001/*
2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/bio.h>
17#include <linux/dst.h>
18#include <linux/slab.h>
19#include <linux/mempool.h>
20
21/*
22 * Transaction memory pool size.
23 */
24static int dst_mempool_num = 32;
25module_param(dst_mempool_num, int, 0644);
26
27/*
28 * Transaction tree management.
29 */
30static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new)
31{
32 if (gen < new)
33 return 1;
34 if (gen > new)
35 return -1;
36 return 0;
37}
38
39struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen)
40{
41 struct rb_root *root = &node->trans_root;
42 struct rb_node *n = root->rb_node;
43 struct dst_trans *t, *ret = NULL;
44 int cmp;
45
46 while (n) {
47 t = rb_entry(n, struct dst_trans, trans_entry);
48
49 cmp = dst_trans_cmp(t->gen, gen);
50 if (cmp < 0)
51 n = n->rb_left;
52 else if (cmp > 0)
53 n = n->rb_right;
54 else {
55 ret = t;
56 break;
57 }
58 }
59
60 dprintk("%s: %s transaction: id: %llu.\n", __func__,
Mariusz Ziulekd52ac3f2009-11-12 15:01:16 +010061 (ret) ? "found" : "not found", gen);
Evgeniy Polyakova0fde6e2009-01-14 02:05:31 +030062
63 return ret;
64}
65
66static int dst_trans_insert(struct dst_trans *new)
67{
68 struct rb_root *root = &new->n->trans_root;
69 struct rb_node **n = &root->rb_node, *parent = NULL;
70 struct dst_trans *ret = NULL, *t;
71 int cmp;
72
73 while (*n) {
74 parent = *n;
75
76 t = rb_entry(parent, struct dst_trans, trans_entry);
77
78 cmp = dst_trans_cmp(t->gen, new->gen);
79 if (cmp < 0)
80 n = &parent->rb_left;
81 else if (cmp > 0)
82 n = &parent->rb_right;
83 else {
84 ret = t;
85 break;
86 }
87 }
88
89 new->send_time = jiffies;
90 if (ret) {
Mariusz Ziulekd52ac3f2009-11-12 15:01:16 +010091 printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, "
92 "send_time: %lu, new: gen: %llu, bio: %llu/%u, "
93 "send_time: %lu.\n", __func__,
Evgeniy Polyakova0fde6e2009-01-14 02:05:31 +030094 ret->gen, (u64)ret->bio->bi_sector,
95 ret->bio->bi_size, ret->send_time,
96 new->gen, (u64)new->bio->bi_sector,
97 new->bio->bi_size, new->send_time);
98 return -EEXIST;
99 }
100
101 rb_link_node(&new->trans_entry, parent, n);
102 rb_insert_color(&new->trans_entry, root);
103
104 dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
105 __func__, new->gen, (u64)new->bio->bi_sector,
106 new->bio->bi_size, new->send_time);
107
108 return 0;
109}
110
111int dst_trans_remove_nolock(struct dst_trans *t)
112{
113 struct dst_node *n = t->n;
114
115 if (t->trans_entry.rb_parent_color) {
116 rb_erase(&t->trans_entry, &n->trans_root);
117 t->trans_entry.rb_parent_color = 0;
118 }
119 return 0;
120}
121
122int dst_trans_remove(struct dst_trans *t)
123{
124 int ret;
125 struct dst_node *n = t->n;
126
127 mutex_lock(&n->trans_lock);
128 ret = dst_trans_remove_nolock(t);
129 mutex_unlock(&n->trans_lock);
130
131 return ret;
132}
133
134/*
135 * When transaction is completed and there are no more users,
136 * we complete appriate block IO request with given error status.
137 */
138void dst_trans_put(struct dst_trans *t)
139{
140 if (atomic_dec_and_test(&t->refcnt)) {
141 struct bio *bio = t->bio;
142
143 dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
144 __func__, t, t->gen, bio);
145
146 bio_endio(bio, t->error);
147 bio_put(bio);
148
149 dst_node_put(t->n);
150 mempool_free(t, t->n->trans_pool);
151 }
152}
153
154/*
155 * Process given block IO request: allocate transaction, insert it into the tree
156 * and send/schedule crypto processing.
157 */
158int dst_process_bio(struct dst_node *n, struct bio *bio)
159{
160 struct dst_trans *t;
161 int err = -ENOMEM;
162
163 t = mempool_alloc(n->trans_pool, GFP_NOFS);
164 if (!t)
165 goto err_out_exit;
166
167 t->n = dst_node_get(n);
168 t->bio = bio;
169 t->error = 0;
170 t->retries = 0;
171 atomic_set(&t->refcnt, 1);
172 t->gen = atomic_long_inc_return(&n->gen);
173
174 t->enc = bio_data_dir(bio);
175 dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen);
176
177 mutex_lock(&n->trans_lock);
178 err = dst_trans_insert(t);
179 mutex_unlock(&n->trans_lock);
180 if (err)
181 goto err_out_free;
182
183 dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
184 __func__, t->gen, (u64)bio->bi_sector,
185 bio->bi_size, t->enc, dst_need_crypto(n));
186
187 if (dst_need_crypto(n) && t->enc)
188 dst_trans_crypto(t);
189 else
190 dst_trans_send(t);
191
192 return 0;
193
194err_out_free:
195 dst_node_put(n);
196 mempool_free(t, n->trans_pool);
197err_out_exit:
198 bio_endio(bio, err);
199 bio_put(bio);
200 return err;
201}
202
203/*
204 * Scan for timeout/stale transactions.
205 * Each transaction is being resent multiple times before error completion.
206 */
207static void dst_trans_scan(struct work_struct *work)
208{
Mariusz Ziulekd52ac3f2009-11-12 15:01:16 +0100209 struct dst_node *n = container_of(work, struct dst_node,
210 trans_work.work);
Evgeniy Polyakova0fde6e2009-01-14 02:05:31 +0300211 struct rb_node *rb_node;
212 struct dst_trans *t;
213 unsigned long timeout = n->trans_scan_timeout;
214 int num = 10 * n->trans_max_retries;
215
216 mutex_lock(&n->trans_lock);
217
218 for (rb_node = rb_first(&n->trans_root); rb_node; ) {
219 t = rb_entry(rb_node, struct dst_trans, trans_entry);
220
221 if (timeout && time_after(t->send_time + timeout, jiffies)
222 && t->retries == 0)
223 break;
224#if 0
225 dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
226 __func__, t, t->gen, n->name,
227 t->retries, n->trans_max_retries);
228#endif
229 if (--num == 0)
230 break;
231
232 dst_trans_get(t);
233
234 rb_node = rb_next(rb_node);
235
236 if (timeout && (++t->retries < n->trans_max_retries)) {
237 dst_trans_send(t);
238 } else {
239 t->error = -ETIMEDOUT;
240 dst_trans_remove_nolock(t);
241 dst_trans_put(t);
242 }
243
244 dst_trans_put(t);
245 }
246
247 mutex_unlock(&n->trans_lock);
248
249 /*
Mariusz Ziulekd52ac3f2009-11-12 15:01:16 +0100250 * If no timeout specified then system is in the middle of exiting
251 * process, so no need to reschedule scanning process again.
Evgeniy Polyakova0fde6e2009-01-14 02:05:31 +0300252 */
253 if (timeout) {
254 if (!num)
255 timeout = HZ;
256 schedule_delayed_work(&n->trans_work, timeout);
257 }
258}
259
260/*
261 * Flush all transactions and mark them as timed out.
262 * Destroy transaction pools.
263 */
264void dst_node_trans_exit(struct dst_node *n)
265{
266 struct dst_trans *t;
267 struct rb_node *rb_node;
268
269 if (!n->trans_cache)
270 return;
271
272 dprintk("%s: n: %p, cancelling the work.\n", __func__, n);
273 cancel_delayed_work_sync(&n->trans_work);
274 flush_scheduled_work();
275 dprintk("%s: n: %p, work has been cancelled.\n", __func__, n);
276
277 for (rb_node = rb_first(&n->trans_root); rb_node; ) {
278 t = rb_entry(rb_node, struct dst_trans, trans_entry);
279
280 dprintk("%s: t: %p, gen: %llu, n: %s.\n",
281 __func__, t, t->gen, n->name);
282
283 rb_node = rb_next(rb_node);
284
285 t->error = -ETIMEDOUT;
286 dst_trans_remove_nolock(t);
287 dst_trans_put(t);
288 }
289
290 mempool_destroy(n->trans_pool);
291 kmem_cache_destroy(n->trans_cache);
292}
293
294/*
295 * Initialize transaction storage for given node.
296 * Transaction stores not only control information,
297 * but also network command and crypto data (if needed)
298 * to reduce number of allocations. Thus transaction size
299 * differs from node to node.
300 */
301int dst_node_trans_init(struct dst_node *n, unsigned int size)
302{
303 /*
304 * We need this, since node with given name can be dropped from the
305 * hash table, but be still alive, so subsequent creation of the node
306 * with the same name may collide with existing cache name.
307 */
308
309 snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n);
310
311 n->trans_cache = kmem_cache_create(n->cache_name,
312 size + n->crypto.crypto_attached_size,
313 0, 0, NULL);
314 if (!n->trans_cache)
315 goto err_out_exit;
316
Mariusz Ziulekd52ac3f2009-11-12 15:01:16 +0100317 n->trans_pool = mempool_create_slab_pool(dst_mempool_num,
318 n->trans_cache);
Evgeniy Polyakova0fde6e2009-01-14 02:05:31 +0300319 if (!n->trans_pool)
320 goto err_out_cache_destroy;
321
322 mutex_init(&n->trans_lock);
323 n->trans_root = RB_ROOT;
324
325 INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan);
326 schedule_delayed_work(&n->trans_work, n->trans_scan_timeout);
327
328 dprintk("%s: n: %p, size: %u, crypto: %u.\n",
329 __func__, n, size, n->crypto.crypto_attached_size);
330
331 return 0;
332
333err_out_cache_destroy:
334 kmem_cache_destroy(n->trans_cache);
335err_out_exit:
336 return -ENOMEM;
337}