Evgeniy Polyakov | a0fde6e | 2009-01-14 02:05:31 +0300 | [diff] [blame] | 1 | /* |
| 2 | * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This program is free software; you can redistribute it and/or modify |
| 6 | * it under the terms of the GNU General Public License as published by |
| 7 | * the Free Software Foundation; either version 2 of the License, or |
| 8 | * (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | */ |
| 15 | |
| 16 | #include <linux/bio.h> |
| 17 | #include <linux/dst.h> |
| 18 | #include <linux/slab.h> |
| 19 | #include <linux/mempool.h> |
| 20 | |
| 21 | /* |
| 22 | * Transaction memory pool size. |
| 23 | */ |
| 24 | static int dst_mempool_num = 32; |
| 25 | module_param(dst_mempool_num, int, 0644); |
| 26 | |
| 27 | /* |
| 28 | * Transaction tree management. |
| 29 | */ |
| 30 | static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new) |
| 31 | { |
| 32 | if (gen < new) |
| 33 | return 1; |
| 34 | if (gen > new) |
| 35 | return -1; |
| 36 | return 0; |
| 37 | } |
| 38 | |
| 39 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen) |
| 40 | { |
| 41 | struct rb_root *root = &node->trans_root; |
| 42 | struct rb_node *n = root->rb_node; |
| 43 | struct dst_trans *t, *ret = NULL; |
| 44 | int cmp; |
| 45 | |
| 46 | while (n) { |
| 47 | t = rb_entry(n, struct dst_trans, trans_entry); |
| 48 | |
| 49 | cmp = dst_trans_cmp(t->gen, gen); |
| 50 | if (cmp < 0) |
| 51 | n = n->rb_left; |
| 52 | else if (cmp > 0) |
| 53 | n = n->rb_right; |
| 54 | else { |
| 55 | ret = t; |
| 56 | break; |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | dprintk("%s: %s transaction: id: %llu.\n", __func__, |
Mariusz Ziulek | d52ac3f | 2009-11-12 15:01:16 +0100 | [diff] [blame] | 61 | (ret) ? "found" : "not found", gen); |
Evgeniy Polyakov | a0fde6e | 2009-01-14 02:05:31 +0300 | [diff] [blame] | 62 | |
| 63 | return ret; |
| 64 | } |
| 65 | |
| 66 | static int dst_trans_insert(struct dst_trans *new) |
| 67 | { |
| 68 | struct rb_root *root = &new->n->trans_root; |
| 69 | struct rb_node **n = &root->rb_node, *parent = NULL; |
| 70 | struct dst_trans *ret = NULL, *t; |
| 71 | int cmp; |
| 72 | |
| 73 | while (*n) { |
| 74 | parent = *n; |
| 75 | |
| 76 | t = rb_entry(parent, struct dst_trans, trans_entry); |
| 77 | |
| 78 | cmp = dst_trans_cmp(t->gen, new->gen); |
| 79 | if (cmp < 0) |
| 80 | n = &parent->rb_left; |
| 81 | else if (cmp > 0) |
| 82 | n = &parent->rb_right; |
| 83 | else { |
| 84 | ret = t; |
| 85 | break; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | new->send_time = jiffies; |
| 90 | if (ret) { |
Mariusz Ziulek | d52ac3f | 2009-11-12 15:01:16 +0100 | [diff] [blame] | 91 | printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, " |
| 92 | "send_time: %lu, new: gen: %llu, bio: %llu/%u, " |
| 93 | "send_time: %lu.\n", __func__, |
Evgeniy Polyakov | a0fde6e | 2009-01-14 02:05:31 +0300 | [diff] [blame] | 94 | ret->gen, (u64)ret->bio->bi_sector, |
| 95 | ret->bio->bi_size, ret->send_time, |
| 96 | new->gen, (u64)new->bio->bi_sector, |
| 97 | new->bio->bi_size, new->send_time); |
| 98 | return -EEXIST; |
| 99 | } |
| 100 | |
| 101 | rb_link_node(&new->trans_entry, parent, n); |
| 102 | rb_insert_color(&new->trans_entry, root); |
| 103 | |
| 104 | dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n", |
| 105 | __func__, new->gen, (u64)new->bio->bi_sector, |
| 106 | new->bio->bi_size, new->send_time); |
| 107 | |
| 108 | return 0; |
| 109 | } |
| 110 | |
| 111 | int dst_trans_remove_nolock(struct dst_trans *t) |
| 112 | { |
| 113 | struct dst_node *n = t->n; |
| 114 | |
| 115 | if (t->trans_entry.rb_parent_color) { |
| 116 | rb_erase(&t->trans_entry, &n->trans_root); |
| 117 | t->trans_entry.rb_parent_color = 0; |
| 118 | } |
| 119 | return 0; |
| 120 | } |
| 121 | |
| 122 | int dst_trans_remove(struct dst_trans *t) |
| 123 | { |
| 124 | int ret; |
| 125 | struct dst_node *n = t->n; |
| 126 | |
| 127 | mutex_lock(&n->trans_lock); |
| 128 | ret = dst_trans_remove_nolock(t); |
| 129 | mutex_unlock(&n->trans_lock); |
| 130 | |
| 131 | return ret; |
| 132 | } |
| 133 | |
| 134 | /* |
| 135 | * When transaction is completed and there are no more users, |
| 136 | * we complete appriate block IO request with given error status. |
| 137 | */ |
| 138 | void dst_trans_put(struct dst_trans *t) |
| 139 | { |
| 140 | if (atomic_dec_and_test(&t->refcnt)) { |
| 141 | struct bio *bio = t->bio; |
| 142 | |
| 143 | dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n", |
| 144 | __func__, t, t->gen, bio); |
| 145 | |
| 146 | bio_endio(bio, t->error); |
| 147 | bio_put(bio); |
| 148 | |
| 149 | dst_node_put(t->n); |
| 150 | mempool_free(t, t->n->trans_pool); |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | /* |
| 155 | * Process given block IO request: allocate transaction, insert it into the tree |
| 156 | * and send/schedule crypto processing. |
| 157 | */ |
| 158 | int dst_process_bio(struct dst_node *n, struct bio *bio) |
| 159 | { |
| 160 | struct dst_trans *t; |
| 161 | int err = -ENOMEM; |
| 162 | |
| 163 | t = mempool_alloc(n->trans_pool, GFP_NOFS); |
| 164 | if (!t) |
| 165 | goto err_out_exit; |
| 166 | |
| 167 | t->n = dst_node_get(n); |
| 168 | t->bio = bio; |
| 169 | t->error = 0; |
| 170 | t->retries = 0; |
| 171 | atomic_set(&t->refcnt, 1); |
| 172 | t->gen = atomic_long_inc_return(&n->gen); |
| 173 | |
| 174 | t->enc = bio_data_dir(bio); |
| 175 | dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen); |
| 176 | |
| 177 | mutex_lock(&n->trans_lock); |
| 178 | err = dst_trans_insert(t); |
| 179 | mutex_unlock(&n->trans_lock); |
| 180 | if (err) |
| 181 | goto err_out_free; |
| 182 | |
| 183 | dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n", |
| 184 | __func__, t->gen, (u64)bio->bi_sector, |
| 185 | bio->bi_size, t->enc, dst_need_crypto(n)); |
| 186 | |
| 187 | if (dst_need_crypto(n) && t->enc) |
| 188 | dst_trans_crypto(t); |
| 189 | else |
| 190 | dst_trans_send(t); |
| 191 | |
| 192 | return 0; |
| 193 | |
| 194 | err_out_free: |
| 195 | dst_node_put(n); |
| 196 | mempool_free(t, n->trans_pool); |
| 197 | err_out_exit: |
| 198 | bio_endio(bio, err); |
| 199 | bio_put(bio); |
| 200 | return err; |
| 201 | } |
| 202 | |
| 203 | /* |
| 204 | * Scan for timeout/stale transactions. |
| 205 | * Each transaction is being resent multiple times before error completion. |
| 206 | */ |
| 207 | static void dst_trans_scan(struct work_struct *work) |
| 208 | { |
Mariusz Ziulek | d52ac3f | 2009-11-12 15:01:16 +0100 | [diff] [blame] | 209 | struct dst_node *n = container_of(work, struct dst_node, |
| 210 | trans_work.work); |
Evgeniy Polyakov | a0fde6e | 2009-01-14 02:05:31 +0300 | [diff] [blame] | 211 | struct rb_node *rb_node; |
| 212 | struct dst_trans *t; |
| 213 | unsigned long timeout = n->trans_scan_timeout; |
| 214 | int num = 10 * n->trans_max_retries; |
| 215 | |
| 216 | mutex_lock(&n->trans_lock); |
| 217 | |
| 218 | for (rb_node = rb_first(&n->trans_root); rb_node; ) { |
| 219 | t = rb_entry(rb_node, struct dst_trans, trans_entry); |
| 220 | |
| 221 | if (timeout && time_after(t->send_time + timeout, jiffies) |
| 222 | && t->retries == 0) |
| 223 | break; |
| 224 | #if 0 |
| 225 | dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n", |
| 226 | __func__, t, t->gen, n->name, |
| 227 | t->retries, n->trans_max_retries); |
| 228 | #endif |
| 229 | if (--num == 0) |
| 230 | break; |
| 231 | |
| 232 | dst_trans_get(t); |
| 233 | |
| 234 | rb_node = rb_next(rb_node); |
| 235 | |
| 236 | if (timeout && (++t->retries < n->trans_max_retries)) { |
| 237 | dst_trans_send(t); |
| 238 | } else { |
| 239 | t->error = -ETIMEDOUT; |
| 240 | dst_trans_remove_nolock(t); |
| 241 | dst_trans_put(t); |
| 242 | } |
| 243 | |
| 244 | dst_trans_put(t); |
| 245 | } |
| 246 | |
| 247 | mutex_unlock(&n->trans_lock); |
| 248 | |
| 249 | /* |
Mariusz Ziulek | d52ac3f | 2009-11-12 15:01:16 +0100 | [diff] [blame] | 250 | * If no timeout specified then system is in the middle of exiting |
| 251 | * process, so no need to reschedule scanning process again. |
Evgeniy Polyakov | a0fde6e | 2009-01-14 02:05:31 +0300 | [diff] [blame] | 252 | */ |
| 253 | if (timeout) { |
| 254 | if (!num) |
| 255 | timeout = HZ; |
| 256 | schedule_delayed_work(&n->trans_work, timeout); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | /* |
| 261 | * Flush all transactions and mark them as timed out. |
| 262 | * Destroy transaction pools. |
| 263 | */ |
| 264 | void dst_node_trans_exit(struct dst_node *n) |
| 265 | { |
| 266 | struct dst_trans *t; |
| 267 | struct rb_node *rb_node; |
| 268 | |
| 269 | if (!n->trans_cache) |
| 270 | return; |
| 271 | |
| 272 | dprintk("%s: n: %p, cancelling the work.\n", __func__, n); |
| 273 | cancel_delayed_work_sync(&n->trans_work); |
| 274 | flush_scheduled_work(); |
| 275 | dprintk("%s: n: %p, work has been cancelled.\n", __func__, n); |
| 276 | |
| 277 | for (rb_node = rb_first(&n->trans_root); rb_node; ) { |
| 278 | t = rb_entry(rb_node, struct dst_trans, trans_entry); |
| 279 | |
| 280 | dprintk("%s: t: %p, gen: %llu, n: %s.\n", |
| 281 | __func__, t, t->gen, n->name); |
| 282 | |
| 283 | rb_node = rb_next(rb_node); |
| 284 | |
| 285 | t->error = -ETIMEDOUT; |
| 286 | dst_trans_remove_nolock(t); |
| 287 | dst_trans_put(t); |
| 288 | } |
| 289 | |
| 290 | mempool_destroy(n->trans_pool); |
| 291 | kmem_cache_destroy(n->trans_cache); |
| 292 | } |
| 293 | |
| 294 | /* |
| 295 | * Initialize transaction storage for given node. |
| 296 | * Transaction stores not only control information, |
| 297 | * but also network command and crypto data (if needed) |
| 298 | * to reduce number of allocations. Thus transaction size |
| 299 | * differs from node to node. |
| 300 | */ |
| 301 | int dst_node_trans_init(struct dst_node *n, unsigned int size) |
| 302 | { |
| 303 | /* |
| 304 | * We need this, since node with given name can be dropped from the |
| 305 | * hash table, but be still alive, so subsequent creation of the node |
| 306 | * with the same name may collide with existing cache name. |
| 307 | */ |
| 308 | |
| 309 | snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n); |
| 310 | |
| 311 | n->trans_cache = kmem_cache_create(n->cache_name, |
| 312 | size + n->crypto.crypto_attached_size, |
| 313 | 0, 0, NULL); |
| 314 | if (!n->trans_cache) |
| 315 | goto err_out_exit; |
| 316 | |
Mariusz Ziulek | d52ac3f | 2009-11-12 15:01:16 +0100 | [diff] [blame] | 317 | n->trans_pool = mempool_create_slab_pool(dst_mempool_num, |
| 318 | n->trans_cache); |
Evgeniy Polyakov | a0fde6e | 2009-01-14 02:05:31 +0300 | [diff] [blame] | 319 | if (!n->trans_pool) |
| 320 | goto err_out_cache_destroy; |
| 321 | |
| 322 | mutex_init(&n->trans_lock); |
| 323 | n->trans_root = RB_ROOT; |
| 324 | |
| 325 | INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan); |
| 326 | schedule_delayed_work(&n->trans_work, n->trans_scan_timeout); |
| 327 | |
| 328 | dprintk("%s: n: %p, size: %u, crypto: %u.\n", |
| 329 | __func__, n, size, n->crypto.crypto_attached_size); |
| 330 | |
| 331 | return 0; |
| 332 | |
| 333 | err_out_cache_destroy: |
| 334 | kmem_cache_destroy(n->trans_cache); |
| 335 | err_out_exit: |
| 336 | return -ENOMEM; |
| 337 | } |