blob: 91f7c99eb2a922b8dae749667aaf82960664b843 [file] [log] [blame]
Ed Cashinfea05a22012-10-04 17:16:38 -07001/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * aoedev.c
4 * AoE device utility functions; maintains device list.
5 */
6
7#include <linux/hdreg.h>
8#include <linux/blkdev.h>
9#include <linux/netdevice.h>
Ed L. Cashin9bb237b2008-02-08 04:20:05 -080010#include <linux/delay.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090011#include <linux/slab.h>
Ed Cashin0c966212012-10-04 17:16:40 -070012#include <linux/bitmap.h>
13#include <linux/kdev_t.h>
Ed Cashin4bcce1a2012-10-04 17:16:42 -070014#include <linux/moduleparam.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include "aoe.h"
16
Ed L. Cashin262bf542008-02-08 04:20:03 -080017static void dummy_timer(ulong);
18static void aoedev_freedev(struct aoedev *);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -080019static void freetgt(struct aoedev *d, struct aoetgt *t);
20static void skbpoolfree(struct aoedev *d);
Ed L. Cashin262bf542008-02-08 04:20:03 -080021
Ed Cashin08b60622012-10-04 17:16:47 -070022static int aoe_dyndevs = 1;
Ed Cashin4bcce1a2012-10-04 17:16:42 -070023module_param(aoe_dyndevs, int, 0644);
24MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
25
Linus Torvalds1da177e2005-04-16 15:20:36 -070026static struct aoedev *devlist;
Andrew Morton476aed32008-02-08 04:20:10 -080027static DEFINE_SPINLOCK(devlist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
Ed Cashin0c966212012-10-04 17:16:40 -070029/* Because some systems will have one, many, or no
30 * - partitions,
31 * - slots per shelf,
32 * - or shelves,
33 * we need some flexibility in the way the minor numbers
34 * are allocated. So they are dynamic.
35 */
36#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
37
38static DEFINE_SPINLOCK(used_minors_lock);
39static DECLARE_BITMAP(used_minors, N_DEVS);
40
41static int
Ed Cashin4bcce1a2012-10-04 17:16:42 -070042minor_get_dyn(ulong *sysminor)
Ed Cashin0c966212012-10-04 17:16:40 -070043{
44 ulong flags;
45 ulong n;
46 int error = 0;
47
48 spin_lock_irqsave(&used_minors_lock, flags);
49 n = find_first_zero_bit(used_minors, N_DEVS);
50 if (n < N_DEVS)
51 set_bit(n, used_minors);
52 else
53 error = -1;
54 spin_unlock_irqrestore(&used_minors_lock, flags);
55
Ed Cashin4bcce1a2012-10-04 17:16:42 -070056 *sysminor = n * AOE_PARTITIONS;
Ed Cashin0c966212012-10-04 17:16:40 -070057 return error;
58}
59
Ed Cashin4bcce1a2012-10-04 17:16:42 -070060static int
61minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
62{
63 ulong flags;
64 ulong n;
65 int error = 0;
66 enum {
67 /* for backwards compatibility when !aoe_dyndevs,
68 * a static number of supported slots per shelf */
69 NPERSHELF = 16,
70 };
71
72 n = aoemaj * NPERSHELF + aoemin;
73 if (aoemin >= NPERSHELF || n >= N_DEVS) {
74 pr_err("aoe: %s with e%ld.%d\n",
75 "cannot use static minor device numbers",
76 aoemaj, aoemin);
77 error = -1;
78 } else {
79 spin_lock_irqsave(&used_minors_lock, flags);
80 if (test_bit(n, used_minors)) {
81 pr_err("aoe: %s %lu\n",
82 "existing device already has static minor number",
83 n);
84 error = -1;
85 } else
86 set_bit(n, used_minors);
87 spin_unlock_irqrestore(&used_minors_lock, flags);
88 }
89
90 *sysminor = n;
91 return error;
92}
93
94static int
95minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
96{
97 if (aoe_dyndevs)
98 return minor_get_dyn(sysminor);
99 else
100 return minor_get_static(sysminor, aoemaj, aoemin);
101}
102
Ed Cashin0c966212012-10-04 17:16:40 -0700103static void
104minor_free(ulong minor)
105{
106 ulong flags;
107
108 minor /= AOE_PARTITIONS;
109 BUG_ON(minor >= N_DEVS);
110
111 spin_lock_irqsave(&used_minors_lock, flags);
112 BUG_ON(!test_bit(minor, used_minors));
113 clear_bit(minor, used_minors);
114 spin_unlock_irqrestore(&used_minors_lock, flags);
115}
116
Ed Cashin69cf2d852012-10-04 17:16:23 -0700117/*
Ed Cashin0c966212012-10-04 17:16:40 -0700118 * Users who grab a pointer to the device with aoedev_by_aoeaddr
119 * automatically get a reference count and must be responsible
120 * for performing a aoedev_put. With the addition of async
121 * kthread processing I'm no longer confident that we can
Ed Cashin69cf2d852012-10-04 17:16:23 -0700122 * guarantee consistency in the face of device flushes.
123 *
124 * For the time being, we only bother to add extra references for
125 * frames sitting on the iocq. When the kthreads finish processing
126 * these frames, they will aoedev_put the device.
127 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Ed Cashin69cf2d852012-10-04 17:16:23 -0700129void
130aoedev_put(struct aoedev *d)
131{
132 ulong flags;
133
134 spin_lock_irqsave(&devlist_lock, flags);
135 d->ref--;
136 spin_unlock_irqrestore(&devlist_lock, flags);
137}
138
Ed L. Cashin3ae1c242006-01-19 13:46:19 -0500139static void
140dummy_timer(ulong vp)
141{
142 struct aoedev *d;
143
144 d = (struct aoedev *)vp;
145 if (d->flags & DEVFL_TKILL)
146 return;
147 d->timer.expires = jiffies + HZ;
148 add_timer(&d->timer);
149}
150
Ed Cashin69cf2d852012-10-04 17:16:23 -0700151static void
152aoe_failip(struct aoedev *d)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153{
Ed Cashin69cf2d852012-10-04 17:16:23 -0700154 struct request *rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 struct bio *bio;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700156 unsigned long n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
Ed Cashin69cf2d852012-10-04 17:16:23 -0700158 aoe_failbuf(d, d->ip.buf);
159
160 rq = d->ip.rq;
161 if (rq == NULL)
Ed Cashin896831f2012-10-04 17:16:21 -0700162 return;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700163 while ((bio = d->ip.nxbio)) {
164 clear_bit(BIO_UPTODATE, &bio->bi_flags);
165 d->ip.nxbio = bio->bi_next;
166 n = (unsigned long) rq->special;
167 rq->special = (void *) --n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 }
Ed Cashin69cf2d852012-10-04 17:16:23 -0700169 if ((unsigned long) rq->special == 0)
170 aoe_end_request(d, rq, 0);
Ed Cashin896831f2012-10-04 17:16:21 -0700171}
172
Ed Cashin3fc9b032012-12-17 16:03:51 -0800173static void
174downdev_frame(struct list_head *pos)
175{
176 struct frame *f;
177
178 f = list_entry(pos, struct frame, head);
179 list_del(pos);
180 if (f->buf) {
181 f->buf->nframesout--;
182 aoe_failbuf(f->t->d, f->buf);
183 }
184 aoe_freetframe(f);
185}
186
Ed Cashin896831f2012-10-04 17:16:21 -0700187void
188aoedev_downdev(struct aoedev *d)
189{
190 struct aoetgt *t, **tt, **te;
Ed Cashin896831f2012-10-04 17:16:21 -0700191 struct list_head *head, *pos, *nx;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700192 struct request *rq;
Ed Cashin896831f2012-10-04 17:16:21 -0700193 int i;
194
Ed Cashin69cf2d852012-10-04 17:16:23 -0700195 d->flags &= ~DEVFL_UP;
196
Ed Cashin3fc9b032012-12-17 16:03:51 -0800197 /* clean out active and to-be-retransmitted buffers */
Ed Cashin64a80f52012-10-04 17:16:33 -0700198 for (i = 0; i < NFACTIVE; i++) {
199 head = &d->factive[i];
Ed Cashin3fc9b032012-12-17 16:03:51 -0800200 list_for_each_safe(pos, nx, head)
201 downdev_frame(pos);
Ed Cashin64a80f52012-10-04 17:16:33 -0700202 }
Ed Cashin3fc9b032012-12-17 16:03:51 -0800203 head = &d->rexmitq;
204 list_for_each_safe(pos, nx, head)
205 downdev_frame(pos);
206
Ed Cashin64a80f52012-10-04 17:16:33 -0700207 /* reset window dressings */
Ed Cashin896831f2012-10-04 17:16:21 -0700208 tt = d->targets;
209 te = tt + NTARGETS;
210 for (; tt < te && (t = *tt); tt++) {
Ed Cashin3a0c40d2012-12-17 16:03:43 -0800211 aoecmd_wreset(t);
Ed Cashin896831f2012-10-04 17:16:21 -0700212 t->nout = 0;
213 }
214
Ed Cashin69cf2d852012-10-04 17:16:23 -0700215 /* clean out the in-process request (if any) */
216 aoe_failip(d);
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800217 d->htgt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
Ed Cashin69cf2d852012-10-04 17:16:23 -0700219 /* fast fail all pending I/O */
220 if (d->blkq) {
221 while ((rq = blk_peek_request(d->blkq))) {
222 blk_start_request(rq);
223 aoe_end_request(d, rq, 1);
224 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 }
226
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 if (d->gd)
Tejun Heo80795ae2008-08-25 19:56:07 +0900228 set_capacity(d->gd, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229}
230
Ed L. Cashin262bf542008-02-08 04:20:03 -0800231static void
232aoedev_freedev(struct aoedev *d)
233{
234 struct aoetgt **t, **e;
235
Tejun Heo5ad21a32010-10-28 06:15:26 -0600236 cancel_work_sync(&d->work);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800237 if (d->gd) {
238 aoedisk_rm_sysfs(d);
239 del_gendisk(d->gd);
240 put_disk(d->gd);
Ed Cashin69cf2d852012-10-04 17:16:23 -0700241 blk_cleanup_queue(d->blkq);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800242 }
243 t = d->targets;
244 e = t + NTARGETS;
245 for (; t < e && *t; t++)
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800246 freetgt(d, *t);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800247 if (d->bufpool)
248 mempool_destroy(d->bufpool);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800249 skbpoolfree(d);
Ed Cashin0c966212012-10-04 17:16:40 -0700250 minor_free(d->sysminor);
Ed L. Cashin262bf542008-02-08 04:20:03 -0800251 kfree(d);
252}
253
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800254/* return whether the user asked for this particular
255 * device to be flushed
256 */
257static int
258user_req(char *s, size_t slen, struct aoedev *d)
259{
260 char *p;
261 size_t lim;
262
263 if (!d->gd)
264 return 0;
265 p = strrchr(d->gd->disk_name, '/');
266 if (!p)
267 p = d->gd->disk_name;
268 else
269 p += 1;
270 lim = sizeof(d->gd->disk_name);
271 lim -= p - d->gd->disk_name;
272 if (slen < lim)
273 lim = slen;
274
275 return !strncmp(s, p, lim);
276}
277
Ed L. Cashin262bf542008-02-08 04:20:03 -0800278int
279aoedev_flush(const char __user *str, size_t cnt)
280{
281 ulong flags;
282 struct aoedev *d, **dd;
283 struct aoedev *rmd = NULL;
284 char buf[16];
285 int all = 0;
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800286 int specified = 0; /* flush a specific device */
Ed L. Cashin262bf542008-02-08 04:20:03 -0800287
288 if (cnt >= 3) {
289 if (cnt > sizeof buf)
290 cnt = sizeof buf;
291 if (copy_from_user(buf, str, cnt))
292 return -EFAULT;
293 all = !strncmp(buf, "all", 3);
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800294 if (!all)
295 specified = 1;
Ed L. Cashin262bf542008-02-08 04:20:03 -0800296 }
297
Ed L. Cashin262bf542008-02-08 04:20:03 -0800298 spin_lock_irqsave(&devlist_lock, flags);
299 dd = &devlist;
300 while ((d = *dd)) {
301 spin_lock(&d->lock);
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800302 if (specified) {
303 if (!user_req(buf, cnt, d))
304 goto skip;
305 } else if ((!all && (d->flags & DEVFL_UP))
Ed L. Cashin262bf542008-02-08 04:20:03 -0800306 || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
Ed Cashin69cf2d852012-10-04 17:16:23 -0700307 || d->nopen
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800308 || d->ref)
309 goto skip;
310
Ed L. Cashin262bf542008-02-08 04:20:03 -0800311 *dd = d->next;
312 aoedev_downdev(d);
313 d->flags |= DEVFL_TKILL;
314 spin_unlock(&d->lock);
315 d->next = rmd;
316 rmd = d;
Ed Cashin4ba9aa72012-12-17 16:03:30 -0800317 continue;
318skip:
319 spin_unlock(&d->lock);
320 dd = &d->next;
Ed L. Cashin262bf542008-02-08 04:20:03 -0800321 }
322 spin_unlock_irqrestore(&devlist_lock, flags);
323 while ((d = rmd)) {
324 rmd = d->next;
325 del_timer_sync(&d->timer);
326 aoedev_freedev(d); /* must be able to sleep */
327 }
328 return 0;
329}
330
Ed Cashin69cf2d852012-10-04 17:16:23 -0700331/* This has been confirmed to occur once with Tms=3*1000 due to the
332 * driver changing link and not processing its transmit ring. The
333 * problem is hard enough to solve by returning an error that I'm
334 * still punting on "solving" this.
335 */
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800336static void
337skbfree(struct sk_buff *skb)
338{
Ed Cashin69cf2d852012-10-04 17:16:23 -0700339 enum { Sms = 250, Tms = 30 * 1000};
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800340 int i = Tms / Sms;
341
342 if (skb == NULL)
343 return;
344 while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
345 msleep(Sms);
Roel Kluin94873112009-03-04 00:07:57 -0800346 if (i < 0) {
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800347 printk(KERN_ERR
348 "aoe: %s holds ref: %s\n",
349 skb->dev ? skb->dev->name : "netif",
350 "cannot free skb -- memory leaked.");
351 return;
352 }
Ed Cashin3d5b0602012-10-04 17:16:20 -0700353 skb->truesize -= skb->data_len;
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800354 skb_shinfo(skb)->nr_frags = skb->data_len = 0;
355 skb_trim(skb, 0);
356 dev_kfree_skb(skb);
357}
358
359static void
360skbpoolfree(struct aoedev *d)
361{
David S. Millere9bb8fb2008-09-21 22:36:49 -0700362 struct sk_buff *skb, *tmp;
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800363
David S. Millere9bb8fb2008-09-21 22:36:49 -0700364 skb_queue_walk_safe(&d->skbpool, skb, tmp)
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800365 skbfree(skb);
David S. Millere9bb8fb2008-09-21 22:36:49 -0700366
367 __skb_queue_head_init(&d->skbpool);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800368}
369
Ed Cashin0c966212012-10-04 17:16:40 -0700370/* find it or allocate it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371struct aoedev *
Ed Cashin0c966212012-10-04 17:16:40 -0700372aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373{
374 struct aoedev *d;
Ed Cashin64a80f52012-10-04 17:16:33 -0700375 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 ulong flags;
Ed Cashin0c966212012-10-04 17:16:40 -0700377 ulong sysminor;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
379 spin_lock_irqsave(&devlist_lock, flags);
380
381 for (d=devlist; d; d=d->next)
Ed Cashin0c966212012-10-04 17:16:40 -0700382 if (d->aoemajor == maj && d->aoeminor == min) {
Ed Cashin69cf2d852012-10-04 17:16:23 -0700383 d->ref++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 break;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700385 }
Ed Cashin4bcce1a2012-10-04 17:16:42 -0700386 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800387 goto out;
388 d = kcalloc(1, sizeof *d, GFP_ATOMIC);
389 if (!d)
390 goto out;
391 INIT_WORK(&d->work, aoecmd_sleepwork);
392 spin_lock_init(&d->lock);
David S. Millere9bb8fb2008-09-21 22:36:49 -0700393 skb_queue_head_init(&d->skbpool);
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800394 init_timer(&d->timer);
395 d->timer.data = (ulong) d;
396 d->timer.function = dummy_timer;
397 d->timer.expires = jiffies + HZ;
398 add_timer(&d->timer);
399 d->bufpool = NULL; /* defer to aoeblk_gdalloc */
400 d->tgt = d->targets;
Ed Cashin69cf2d852012-10-04 17:16:23 -0700401 d->ref = 1;
Ed Cashin64a80f52012-10-04 17:16:33 -0700402 for (i = 0; i < NFACTIVE; i++)
403 INIT_LIST_HEAD(&d->factive[i]);
Ed Cashin3a0c40d2012-12-17 16:03:43 -0800404 INIT_LIST_HEAD(&d->rexmitq);
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800405 d->sysminor = sysminor;
Ed Cashin0c966212012-10-04 17:16:40 -0700406 d->aoemajor = maj;
407 d->aoeminor = min;
Ed Cashin3a0c40d2012-12-17 16:03:43 -0800408 d->rttavg = RTTAVG_INIT;
409 d->rttdev = RTTDEV_INIT;
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800410 d->next = devlist;
411 devlist = d;
412 out:
Ed L. Cashin3ae1c242006-01-19 13:46:19 -0500413 spin_unlock_irqrestore(&devlist_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return d;
415}
416
417static void
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800418freetgt(struct aoedev *d, struct aoetgt *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419{
Ed Cashin896831f2012-10-04 17:16:21 -0700420 struct frame *f;
421 struct list_head *pos, *nx, *head;
Ed Cashin1b86fda2012-10-04 17:16:34 -0700422 struct aoeif *ifp;
423
424 for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
425 if (!ifp->nd)
426 break;
427 dev_put(ifp->nd);
428 }
Ed L. Cashine407a7f2006-09-20 14:36:49 -0400429
Ed Cashin896831f2012-10-04 17:16:21 -0700430 head = &t->ffree;
431 list_for_each_safe(pos, nx, head) {
432 list_del(pos);
433 f = list_entry(pos, struct frame, head);
Ed L. Cashin9bb237b2008-02-08 04:20:05 -0800434 skbfree(f->skb);
Ed Cashin896831f2012-10-04 17:16:21 -0700435 kfree(f);
436 }
Ed L. Cashin68e0d422008-02-08 04:20:00 -0800437 kfree(t);
438}
439
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440void
441aoedev_exit(void)
442{
443 struct aoedev *d;
444 ulong flags;
445
Ed Cashin69cf2d852012-10-04 17:16:23 -0700446 aoe_flush_iocq();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 while ((d = devlist)) {
448 devlist = d->next;
449
450 spin_lock_irqsave(&d->lock, flags);
451 aoedev_downdev(d);
Ed L. Cashin3ae1c242006-01-19 13:46:19 -0500452 d->flags |= DEVFL_TKILL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 spin_unlock_irqrestore(&d->lock, flags);
454
455 del_timer_sync(&d->timer);
456 aoedev_freedev(d);
457 }
458}
459
460int __init
461aoedev_init(void)
462{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 return 0;
464}