blob: 80b3d3ea10c3bc08a331381da70b855132015779 [file] [log] [blame]
Ed Cashinfea05a22012-10-04 17:16:38 -07001/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * aoedev.c
4 * AoE device utility functions; maintains device list.
5 */
6
7#include <linux/hdreg.h>
8#include <linux/blkdev.h>
9#include <linux/netdevice.h>
Ed L. Cashin9bb237b2008-02-08 04:20:05 -080010#include <linux/delay.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090011#include <linux/slab.h>
Ed Cashin0c966212012-10-04 17:16:40 -070012#include <linux/bitmap.h>
13#include <linux/kdev_t.h>
Ed Cashin4bcce1a2012-10-04 17:16:42 -070014#include <linux/moduleparam.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include "aoe.h"
16
Ed L. Cashin262bf542008-02-08 04:20:03 -080017static void dummy_timer(ulong);
18static void aoedev_freedev(struct aoedev *);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -080019static void freetgt(struct aoedev *d, struct aoetgt *t);
20static void skbpoolfree(struct aoedev *d);
Ed L. Cashin262bf542008-02-08 04:20:03 -080021
Ed Cashin08b60622012-10-04 17:16:47 -070022static int aoe_dyndevs = 1;
Ed Cashin4bcce1a2012-10-04 17:16:42 -070023module_param(aoe_dyndevs, int, 0644);
24MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
25
Linus Torvalds1da177e2005-04-16 15:20:36 -070026static struct aoedev *devlist;
Andrew Morton476aed32008-02-08 04:20:10 -080027static DEFINE_SPINLOCK(devlist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
Ed Cashin0c966212012-10-04 17:16:40 -070029/* Because some systems will have one, many, or no
30 * - partitions,
31 * - slots per shelf,
32 * - or shelves,
33 * we need some flexibility in the way the minor numbers
34 * are allocated. So they are dynamic.
35 */
36#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
37
38static DEFINE_SPINLOCK(used_minors_lock);
39static DECLARE_BITMAP(used_minors, N_DEVS);
40
41static int
Ed Cashin4bcce1a2012-10-04 17:16:42 -070042minor_get_dyn(ulong *sysminor)
Ed Cashin0c966212012-10-04 17:16:40 -070043{
44 ulong flags;
45 ulong n;
46 int error = 0;
47
48 spin_lock_irqsave(&used_minors_lock, flags);
49 n = find_first_zero_bit(used_minors, N_DEVS);
50 if (n < N_DEVS)
51 set_bit(n, used_minors);
52 else
53 error = -1;
54 spin_unlock_irqrestore(&used_minors_lock, flags);
55
Ed Cashin4bcce1a2012-10-04 17:16:42 -070056 *sysminor = n * AOE_PARTITIONS;
Ed Cashin0c966212012-10-04 17:16:40 -070057 return error;
58}
59
Ed Cashin4bcce1a2012-10-04 17:16:42 -070060static int
61minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
62{
63 ulong flags;
64 ulong n;
65 int error = 0;
66 enum {
67 /* for backwards compatibility when !aoe_dyndevs,
68 * a static number of supported slots per shelf */
69 NPERSHELF = 16,
70 };
71
Ed Cashine0b2bba2012-12-17 16:04:03 -080072 if (aoemin >= NPERSHELF) {
73 pr_err("aoe: %s %d slots per shelf\n",
74 "static minor device numbers support only",
75 NPERSHELF);
76 error = -1;
77 goto out;
78 }
79
Ed Cashin4bcce1a2012-10-04 17:16:42 -070080 n = aoemaj * NPERSHELF + aoemin;
Ed Cashine0b2bba2012-12-17 16:04:03 -080081 if (n >= N_DEVS) {
Ed Cashin4bcce1a2012-10-04 17:16:42 -070082 pr_err("aoe: %s with e%ld.%d\n",
83 "cannot use static minor device numbers",
84 aoemaj, aoemin);
85 error = -1;
Ed Cashine0b2bba2012-12-17 16:04:03 -080086 goto out;
Ed Cashin4bcce1a2012-10-04 17:16:42 -070087 }
88
Ed Cashine0b2bba2012-12-17 16:04:03 -080089 spin_lock_irqsave(&used_minors_lock, flags);
90 if (test_bit(n, used_minors)) {
91 pr_err("aoe: %s %lu\n",
92 "existing device already has static minor number",
93 n);
94 error = -1;
95 } else
96 set_bit(n, used_minors);
97 spin_unlock_irqrestore(&used_minors_lock, flags);
Ed Cashin4bcce1a2012-10-04 17:16:42 -070098 *sysminor = n;
Ed Cashine0b2bba2012-12-17 16:04:03 -080099out:
Ed Cashin4bcce1a2012-10-04 17:16:42 -0700100 return error;
101}
102
103static int
104minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
105{
106 if (aoe_dyndevs)
107 return minor_get_dyn(sysminor);
108 else
109 return minor_get_static(sysminor, aoemaj, aoemin);
110}
111
Ed Cashin0c966212012-10-04 17:16:40 -0700112static void
113minor_free(ulong minor)
114{
115 ulong flags;
116
117 minor /= AOE_PARTITIONS;
118 BUG_ON(minor >= N_DEVS);
119
120 spin_lock_irqsave(&used_minors_lock, flags);
121 BUG_ON(!test_bit(minor, used_minors));
122 clear_bit(minor, used_minors);
123 spin_unlock_irqrestore(&used_minors_lock, flags);
124}
125
Ed Cashin69cf2d852012-10-04 17:16:23 -0700126/*
Ed Cashin0c966212012-10-04 17:16:40 -0700127 * Users who grab a pointer to the device with aoedev_by_aoeaddr
128 * automatically get a reference count and must be responsible
129 * for performing a aoedev_put. With the addition of async
130 * kthread processing I'm no longer confident that we can
Ed Cashin69cf2d852012-10-04 17:16:23 -0700131 * guarantee consistency in the face of device flushes.
132 *
133 * For the time being, we only bother to add extra references for
134 * frames sitting on the iocq. When the kthreads finish processing
135 * these frames, they will aoedev_put the device.
136 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
Ed Cashin69cf2d852012-10-04 17:16:23 -0700138void
139aoedev_put(struct aoedev *d)
140{
141 ulong flags;
142
143 spin_lock_irqsave(&devlist_lock, flags);
144 d->ref--;
145 spin_unlock_irqrestore(&devlist_lock, flags);
146}
147
Ed L. Cashin3ae1c242006-01-19 13:46:19 -0500148static void
149dummy_timer(ulong vp)
150{
151 struct aoedev *d;
152
153 d = (struct aoedev *)vp;
154 if (d->flags & DEVFL_TKILL)
155 return;
156 d->timer.expires = jiffies + HZ;
157 add_timer(&d->timer);
158}
159
Ed Cashin69cf2d852012-10-04 17:16:23 -0700160static void
161aoe_failip(struct aoedev *d)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162{
Ed Cashin69cf2d852012-10-04 17:16:23 -0700163 struct request *rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 struct bio *bio;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700165 unsigned long n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Ed Cashin69cf2d852012-10-04 17:16:23 -0700167 aoe_failbuf(d, d->ip.buf);
168
169 rq = d->ip.rq;
170 if (rq == NULL)
Ed Cashin896831f2012-10-04 17:16:21 -0700171 return;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700172 while ((bio = d->ip.nxbio)) {
173 clear_bit(BIO_UPTODATE, &bio->bi_flags);
174 d->ip.nxbio = bio->bi_next;
175 n = (unsigned long) rq->special;
176 rq->special = (void *) --n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 }
Ed Cashin69cf2d852012-10-04 17:16:23 -0700178 if ((unsigned long) rq->special == 0)
179 aoe_end_request(d, rq, 0);
Ed Cashin896831f2012-10-04 17:16:21 -0700180}
181
Ed Cashin3fc9b032012-12-17 16:03:51 -0800182static void
183downdev_frame(struct list_head *pos)
184{
185 struct frame *f;
186
187 f = list_entry(pos, struct frame, head);
188 list_del(pos);
189 if (f->buf) {
190 f->buf->nframesout--;
191 aoe_failbuf(f->t->d, f->buf);
192 }
193 aoe_freetframe(f);
194}
195
Ed Cashin896831f2012-10-04 17:16:21 -0700196void
197aoedev_downdev(struct aoedev *d)
198{
199 struct aoetgt *t, **tt, **te;
Ed Cashin896831f2012-10-04 17:16:21 -0700200 struct list_head *head, *pos, *nx;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700201 struct request *rq;
Ed Cashin896831f2012-10-04 17:16:21 -0700202 int i;
203
Ed Cashin69cf2d852012-10-04 17:16:23 -0700204 d->flags &= ~DEVFL_UP;
205
Ed Cashin3fc9b032012-12-17 16:03:51 -0800206 /* clean out active and to-be-retransmitted buffers */
Ed Cashin64a80f52012-10-04 17:16:33 -0700207 for (i = 0; i < NFACTIVE; i++) {
208 head = &d->factive[i];
Ed Cashin3fc9b032012-12-17 16:03:51 -0800209 list_for_each_safe(pos, nx, head)
210 downdev_frame(pos);
Ed Cashin64a80f52012-10-04 17:16:33 -0700211 }
Ed Cashin3fc9b032012-12-17 16:03:51 -0800212 head = &d->rexmitq;
213 list_for_each_safe(pos, nx, head)
214 downdev_frame(pos);
215
Ed Cashin64a80f52012-10-04 17:16:33 -0700216 /* reset window dressings */
Ed Cashin896831f2012-10-04 17:16:21 -0700217 tt = d->targets;
218 te = tt + NTARGETS;
219 for (; tt < te && (t = *tt); tt++) {
Ed Cashin3a0c40d2012-12-17 16:03:43 -0800220 aoecmd_wreset(t);
Ed Cashin896831f2012-10-04 17:16:21 -0700221 t->nout = 0;
222 }
223
Ed Cashin69cf2d852012-10-04 17:16:23 -0700224 /* clean out the in-process request (if any) */
225 aoe_failip(d);
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800226 d->htgt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227
Ed Cashin69cf2d852012-10-04 17:16:23 -0700228 /* fast fail all pending I/O */
229 if (d->blkq) {
230 while ((rq = blk_peek_request(d->blkq))) {
231 blk_start_request(rq);
232 aoe_end_request(d, rq, 1);
233 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 }
235
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 if (d->gd)
Tejun Heo80795ae2008-08-25 19:56:07 +0900237 set_capacity(d->gd, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238}
239
Ed L. Cashin262bf542008-02-08 04:20:03 -0800240static void
241aoedev_freedev(struct aoedev *d)
242{
243 struct aoetgt **t, **e;
244
Tejun Heo5ad21a32010-10-28 06:15:26 -0600245 cancel_work_sync(&d->work);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800246 if (d->gd) {
247 aoedisk_rm_sysfs(d);
248 del_gendisk(d->gd);
249 put_disk(d->gd);
Ed Cashin69cf2d852012-10-04 17:16:23 -0700250 blk_cleanup_queue(d->blkq);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800251 }
252 t = d->targets;
253 e = t + NTARGETS;
254 for (; t < e && *t; t++)
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800255 freetgt(d, *t);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800256 if (d->bufpool)
257 mempool_destroy(d->bufpool);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800258 skbpoolfree(d);
Ed Cashin0c966212012-10-04 17:16:40 -0700259 minor_free(d->sysminor);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800260 kfree(d);
261}
262
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800263/* return whether the user asked for this particular
264 * device to be flushed
265 */
266static int
267user_req(char *s, size_t slen, struct aoedev *d)
268{
269 char *p;
270 size_t lim;
271
272 if (!d->gd)
273 return 0;
274 p = strrchr(d->gd->disk_name, '/');
275 if (!p)
276 p = d->gd->disk_name;
277 else
278 p += 1;
279 lim = sizeof(d->gd->disk_name);
280 lim -= p - d->gd->disk_name;
281 if (slen < lim)
282 lim = slen;
283
284 return !strncmp(s, p, lim);
285}
286
Ed L. Cashin262bf542008-02-08 04:20:03 -0800287int
288aoedev_flush(const char __user *str, size_t cnt)
289{
290 ulong flags;
291 struct aoedev *d, **dd;
292 struct aoedev *rmd = NULL;
293 char buf[16];
294 int all = 0;
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800295 int specified = 0; /* flush a specific device */
Ed L. Cashin262bf542008-02-08 04:20:03 -0800296
297 if (cnt >= 3) {
298 if (cnt > sizeof buf)
299 cnt = sizeof buf;
300 if (copy_from_user(buf, str, cnt))
301 return -EFAULT;
302 all = !strncmp(buf, "all", 3);
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800303 if (!all)
304 specified = 1;
Ed L. Cashin262bf542008-02-08 04:20:03 -0800305 }
306
Ed L. Cashin262bf542008-02-08 04:20:03 -0800307 spin_lock_irqsave(&devlist_lock, flags);
308 dd = &devlist;
309 while ((d = *dd)) {
310 spin_lock(&d->lock);
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800311 if (specified) {
312 if (!user_req(buf, cnt, d))
313 goto skip;
314 } else if ((!all && (d->flags & DEVFL_UP))
Ed L. Cashin262bf542008-02-08 04:20:03 -0800315 || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
Ed Cashin69cf2d852012-10-04 17:16:23 -0700316 || d->nopen
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800317 || d->ref)
318 goto skip;
319
Ed L. Cashin262bf542008-02-08 04:20:03 -0800320 *dd = d->next;
321 aoedev_downdev(d);
322 d->flags |= DEVFL_TKILL;
323 spin_unlock(&d->lock);
324 d->next = rmd;
325 rmd = d;
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800326 continue;
327skip:
328 spin_unlock(&d->lock);
329 dd = &d->next;
Ed L. Cashin262bf542008-02-08 04:20:03 -0800330 }
331 spin_unlock_irqrestore(&devlist_lock, flags);
332 while ((d = rmd)) {
333 rmd = d->next;
334 del_timer_sync(&d->timer);
335 aoedev_freedev(d); /* must be able to sleep */
336 }
337 return 0;
338}
339
Ed Cashin69cf2d852012-10-04 17:16:23 -0700340/* This has been confirmed to occur once with Tms=3*1000 due to the
341 * driver changing link and not processing its transmit ring. The
342 * problem is hard enough to solve by returning an error that I'm
343 * still punting on "solving" this.
344 */
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800345static void
346skbfree(struct sk_buff *skb)
347{
Ed Cashin69cf2d852012-10-04 17:16:23 -0700348 enum { Sms = 250, Tms = 30 * 1000};
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800349 int i = Tms / Sms;
350
351 if (skb == NULL)
352 return;
353 while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
354 msleep(Sms);
Roel Kluin94873112009-03-04 00:07:57 -0800355 if (i < 0) {
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800356 printk(KERN_ERR
357 "aoe: %s holds ref: %s\n",
358 skb->dev ? skb->dev->name : "netif",
359 "cannot free skb -- memory leaked.");
360 return;
361 }
Ed Cashin3d5b0602012-10-04 17:16:20 -0700362 skb->truesize -= skb->data_len;
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800363 skb_shinfo(skb)->nr_frags = skb->data_len = 0;
364 skb_trim(skb, 0);
365 dev_kfree_skb(skb);
366}
367
368static void
369skbpoolfree(struct aoedev *d)
370{
David S. Millere9bb8fb2008-09-21 22:36:49 -0700371 struct sk_buff *skb, *tmp;
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800372
David S. Millere9bb8fb2008-09-21 22:36:49 -0700373 skb_queue_walk_safe(&d->skbpool, skb, tmp)
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800374 skbfree(skb);
David S. Millere9bb8fb2008-09-21 22:36:49 -0700375
376 __skb_queue_head_init(&d->skbpool);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800377}
378
Ed Cashin0c966212012-10-04 17:16:40 -0700379/* find it or allocate it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380struct aoedev *
Ed Cashin0c966212012-10-04 17:16:40 -0700381aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382{
383 struct aoedev *d;
Ed Cashin64a80f52012-10-04 17:16:33 -0700384 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 ulong flags;
Ed Cashin10935d02012-12-17 16:04:04 -0800386 ulong sysminor = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
388 spin_lock_irqsave(&devlist_lock, flags);
389
390 for (d=devlist; d; d=d->next)
Ed Cashin0c966212012-10-04 17:16:40 -0700391 if (d->aoemajor == maj && d->aoeminor == min) {
Ed Cashin69cf2d852012-10-04 17:16:23 -0700392 d->ref++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 break;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700394 }
Ed Cashin4bcce1a2012-10-04 17:16:42 -0700395 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800396 goto out;
397 d = kcalloc(1, sizeof *d, GFP_ATOMIC);
398 if (!d)
399 goto out;
400 INIT_WORK(&d->work, aoecmd_sleepwork);
401 spin_lock_init(&d->lock);
David S. Millere9bb8fb2008-09-21 22:36:49 -0700402 skb_queue_head_init(&d->skbpool);
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800403 init_timer(&d->timer);
404 d->timer.data = (ulong) d;
405 d->timer.function = dummy_timer;
406 d->timer.expires = jiffies + HZ;
407 add_timer(&d->timer);
408 d->bufpool = NULL; /* defer to aoeblk_gdalloc */
409 d->tgt = d->targets;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700410 d->ref = 1;
Ed Cashin64a80f52012-10-04 17:16:33 -0700411 for (i = 0; i < NFACTIVE; i++)
412 INIT_LIST_HEAD(&d->factive[i]);
Ed Cashin3a0c40d2012-12-17 16:03:43 -0800413 INIT_LIST_HEAD(&d->rexmitq);
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800414 d->sysminor = sysminor;
Ed Cashin0c966212012-10-04 17:16:40 -0700415 d->aoemajor = maj;
416 d->aoeminor = min;
Ed Cashin3a0c40d2012-12-17 16:03:43 -0800417 d->rttavg = RTTAVG_INIT;
418 d->rttdev = RTTDEV_INIT;
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800419 d->next = devlist;
420 devlist = d;
421 out:
Ed L. Cashin3ae1c242006-01-19 13:46:19 -0500422 spin_unlock_irqrestore(&devlist_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 return d;
424}
425
426static void
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800427freetgt(struct aoedev *d, struct aoetgt *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Ed Cashin896831f2012-10-04 17:16:21 -0700429 struct frame *f;
430 struct list_head *pos, *nx, *head;
Ed Cashin1b86fda2012-10-04 17:16:34 -0700431 struct aoeif *ifp;
432
433 for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
434 if (!ifp->nd)
435 break;
436 dev_put(ifp->nd);
437 }
Ed L. Cashine407a7f2006-09-20 14:36:49 -0400438
Ed Cashin896831f2012-10-04 17:16:21 -0700439 head = &t->ffree;
440 list_for_each_safe(pos, nx, head) {
441 list_del(pos);
442 f = list_entry(pos, struct frame, head);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800443 skbfree(f->skb);
Ed Cashin896831f2012-10-04 17:16:21 -0700444 kfree(f);
445 }
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800446 kfree(t);
447}
448
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449void
450aoedev_exit(void)
451{
452 struct aoedev *d;
453 ulong flags;
454
Ed Cashin69cf2d852012-10-04 17:16:23 -0700455 aoe_flush_iocq();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 while ((d = devlist)) {
457 devlist = d->next;
458
459 spin_lock_irqsave(&d->lock, flags);
460 aoedev_downdev(d);
Ed L. Cashin3ae1c242006-01-19 13:46:19 -0500461 d->flags |= DEVFL_TKILL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 spin_unlock_irqrestore(&d->lock, flags);
463
464 del_timer_sync(&d->timer);
465 aoedev_freedev(d);
466 }
467}
468
469int __init
470aoedev_init(void)
471{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 return 0;
473}