blob: e362ae511f85c08a15dfe39916e85ac15e3a4111 [file] [log] [blame]
Carl van Schaik402932a2018-07-06 22:24:59 +10001/*
2 * drivers/block/vs_block_client.c
3 *
4 * Copyright (c) 2012-2018 General Dynamics
5 * Copyright (c) 2014 Open Kernel Labs, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * block vservice client driver
12 *
13 * Function vs_block_client_vs_alloc() is partially derived from
14 * drivers/block/brd.c (brd_alloc())
15 *
16 */
17
18#include <linux/device.h>
19#include <linux/slab.h>
20#include <linux/init.h>
21#include <linux/blkdev.h>
22#include <linux/hdreg.h>
23#include <linux/genhd.h>
24#include <linux/fs.h>
25#include <linux/bio.h>
26#include <linux/kref.h>
27#include <linux/mutex.h>
28#include <linux/list.h>
29#include <linux/version.h>
30#include <linux/idr.h>
31#include <linux/module.h>
32#include <linux/moduleparam.h>
33
34#include <vservices/buffer.h>
35#include <vservices/protocol/block/types.h>
36#include <vservices/protocol/block/common.h>
37#include <vservices/protocol/block/client.h>
38#include <vservices/service.h>
39#include <vservices/session.h>
40#include <vservices/wait.h>
41
42/*
43 * BLK_DEF_MAX_SECTORS was replaced with the hard-coded number 1024 in 3.19,
44 * and restored in 4.3
45 */
46#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && \
47 (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
48#define BLK_DEF_MAX_SECTORS 1024
49#endif
50
51#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
52#define bio_sector(bio) (bio)->bi_iter.bi_sector
53#define bio_size(bio) (bio)->bi_iter.bi_size
54#else
55#define bio_sector(bio) (bio)->bi_sector
56#define bio_size(bio) (bio)->bi_size
57#endif
58
59#define CLIENT_BLKDEV_NAME "vblock"
60
61#define PERDEV_MINORS 256
62
63struct block_client;
64
65struct vs_block_device {
66 /*
67 * The client that created this block device. A reference is held
68 * to the client until the block device is released, so this pointer
69 * should always be valid. However, the client may since have reset;
70 * so it should only be used if, after locking it, its blkdev pointer
71 * points back to this block device.
72 */
73 struct block_client *client;
74
75 int id;
76 struct gendisk *disk;
77 struct request_queue *queue;
78
79 struct kref kref;
80};
81
82struct block_client {
83 struct vs_client_block_state client;
84 struct vs_service_device *service;
85
86 /* Tasklet & queue for bouncing buffers out of read acks */
87 struct tasklet_struct rx_tasklet;
88 struct list_head rx_queue;
89 struct spinlock rx_queue_lock;
90
91 /*
92 * The current virtual block device. This gets replaced when we do
93 * a reset since other parts of the kernel (e.g. vfs) may still
94 * be accessing the disk.
95 */
96 struct vs_block_device *blkdev;
97
98 /* Shared work item for disk creation */
99 struct work_struct disk_creation_work;
100
101 struct kref kref;
102};
103
104#define state_to_block_client(state) \
105 container_of(state, struct block_client, client)
106
107static int block_client_major;
108
109/* Unique identifier allocation for virtual block devices */
110static DEFINE_IDA(vs_block_ida);
111static DEFINE_MUTEX(vs_block_ida_lock);
112
113static int
114block_client_vs_to_linux_error(vservice_block_block_io_error_t vs_err)
115{
116 switch (vs_err) {
117 case VSERVICE_BLOCK_INVALID_INDEX:
118 return -EILSEQ;
119 case VSERVICE_BLOCK_MEDIA_FAILURE:
120 return -EIO;
121 case VSERVICE_BLOCK_MEDIA_TIMEOUT:
122 return -ETIMEDOUT;
123 case VSERVICE_BLOCK_UNSUPPORTED_COMMAND:
124 return -ENOTSUPP;
125 case VSERVICE_BLOCK_SERVICE_RESET:
126 return -ENXIO;
127 default:
128 WARN_ON(vs_err);
129 return 0;
130 }
131
132 return 0;
133}
134
135static void vs_block_client_kfree(struct kref *kref)
136{
137 struct block_client *client =
138 container_of(kref, struct block_client, kref);
139
140 vs_put_service(client->service);
141 kfree(client);
142}
143
144static void vs_block_client_put(struct block_client *client)
145{
146 kref_put(&client->kref, vs_block_client_kfree);
147}
148
149static void vs_block_device_kfree(struct kref *kref)
150{
151 struct vs_block_device *blkdev =
152 container_of(kref, struct vs_block_device, kref);
153
154 /* Delete the disk and clean up its queue */
155 del_gendisk(blkdev->disk);
156 blk_cleanup_queue(blkdev->queue);
157 put_disk(blkdev->disk);
158
159 mutex_lock(&vs_block_ida_lock);
160 ida_remove(&vs_block_ida, blkdev->id);
161 mutex_unlock(&vs_block_ida_lock);
162
163 if (blkdev->client)
164 vs_block_client_put(blkdev->client);
165
166 kfree(blkdev);
167}
168
169static void vs_block_device_put(struct vs_block_device *blkdev)
170{
171 kref_put(&blkdev->kref, vs_block_device_kfree);
172}
173
174#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
175static void
176#else
177static int
178#endif
179vs_block_client_blkdev_release(struct gendisk *disk, fmode_t mode)
180{
181 struct vs_block_device *blkdev = disk->private_data;
182
183#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
184 if (WARN_ON(!blkdev))
185 return;
186#else
187 if (WARN_ON(!blkdev))
188 return -ENXIO;
189#endif
190
191 vs_block_device_put(blkdev);
192#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
193 return 0;
194#endif
195}
196
197static int vs_block_client_blkdev_open(struct block_device *bdev, fmode_t mode)
198{
199 struct vs_block_device *blkdev = bdev->bd_disk->private_data;
200 struct block_client *client;
201 int err = -ENXIO;
202
203 if (!blkdev || !kref_get_unless_zero(&blkdev->kref))
204 goto fail_get_blkdev;
205
206 client = blkdev->client;
207 if (WARN_ON(!client))
208 goto fail_lock_client;
209
210 if (!vs_state_lock_safe(&client->client)) {
211 err = -ENODEV;
212 goto fail_lock_client;
213 }
214
215 if (blkdev != client->blkdev) {
216 /* The client has reset, this blkdev is no longer usable */
217 err = -ENXIO;
218 goto fail_check_client;
219 }
220
221 if ((mode & FMODE_WRITE) > 0 && client->client.readonly) {
222 dev_dbg(&client->service->dev,
223 "opening a readonly disk as writable\n");
224 err = -EROFS;
225 goto fail_check_client;
226 }
227
228 vs_state_unlock(&client->client);
229
230 return 0;
231
232fail_check_client:
233 vs_state_unlock(&client->client);
234fail_lock_client:
235 vs_block_device_put(blkdev);
236fail_get_blkdev:
237 return err;
238}
239
240static int vs_block_client_blkdev_getgeo(struct block_device *bdev,
241 struct hd_geometry *geo)
242{
243 /* These numbers are some default sane values for disk geometry. */
244 geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
245 geo->heads = 4;
246 geo->sectors = 16;
247
248 return 0;
249}
250
251/*
252 * Indirectly determine linux block layer sector size and ensure that our
253 * sector size matches.
254 */
255static int vs_block_client_check_sector_size(struct block_client *client,
256 struct bio *bio)
257{
Carl van Schaik402932a2018-07-06 22:24:59 +1000258 if (unlikely(!bio_sectors(bio))) {
259 dev_err(&client->service->dev, "zero-length bio");
260 return -EIO;
261 }
262
Neeraj Upadhyay21376422018-07-20 21:18:43 +0530263 if (unlikely(bio_size(bio) % client->client.sector_size)) {
Carl van Schaik402932a2018-07-06 22:24:59 +1000264 dev_err(&client->service->dev,
Neeraj Upadhyay21376422018-07-20 21:18:43 +0530265 "bio has %zd bytes, unexpected for sector_size of %zd bytes",
266 (size_t)bio_size(bio),
267 (size_t)client->client.sector_size);
Carl van Schaik402932a2018-07-06 22:24:59 +1000268 return -EIO;
269 }
270
271 return 0;
272}
273
274static const struct block_device_operations block_client_ops = {
275 .getgeo = vs_block_client_blkdev_getgeo,
276 .open = vs_block_client_blkdev_open,
277 .release = vs_block_client_blkdev_release,
278 .owner = THIS_MODULE,
279};
280
281static int block_client_send_write_req(struct block_client *client,
282 struct bio *bio)
283{
284 struct vs_client_block_state *state = &client->client;
285 struct vs_mbuf *mbuf;
286 struct vs_pbuf pbuf;
287 struct bio_vec *bvec;
288 int err;
289 bool flush, nodelay, commit;
290#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
291 struct bvec_iter iter;
292 struct bio_vec bvec_local;
293#else
294 int i;
295#endif
296
297 err = vs_block_client_check_sector_size(client, bio);
298 if (err < 0)
299 goto fail;
300
301 do {
302 /* Wait until it's possible to send a write request */
303 err = vs_wait_state_nointr(state,
304 vs_client_block_io_req_write_can_send(state));
305 if (err == -ECANCELED)
306 err = -ENXIO;
307 if (err < 0)
308 goto fail;
309
310 /* Wait for quota, while sending a write remains possible */
311 mbuf = vs_wait_alloc_nointr(state,
312 vs_client_block_io_req_write_can_send(state),
313 vs_client_block_io_alloc_req_write(
314 state, &pbuf, GFP_KERNEL));
315 err = IS_ERR(mbuf) ? PTR_ERR(mbuf) : 0;
316
317 /* Retry if sending is no longer possible */
318 } while (err == -ECANCELED);
319
320 if (err < 0)
321 goto fail;
322
323 vs_pbuf_resize(&pbuf, 0);
324
325#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
326 bvec = &bvec_local;
327 bio_for_each_segment(bvec_local, bio, iter)
328#else
329 bio_for_each_segment(bvec, bio, i)
330#endif
331 {
332 unsigned long flags;
333 void *buf = bvec_kmap_irq(bvec, &flags);
334 flush_kernel_dcache_page(bvec->bv_page);
335 err = vs_pbuf_append(&pbuf, buf, bvec->bv_len);
336 bvec_kunmap_irq(buf, &flags);
337 if (err < 0) {
338 dev_err(&client->service->dev,
339 "pbuf copy failed with err %d\n", err);
340 err = -EIO;
341 goto fail_free_write;
342 }
343 }
344
345 if (unlikely(vs_pbuf_size(&pbuf) != bio_size(bio))) {
346 dev_err(&client->service->dev,
347 "pbuf size is wrong: %zd, should be %zd\n",
348 vs_pbuf_size(&pbuf), (size_t)bio_size(bio));
349 err = -EIO;
350 goto fail_free_write;
351 }
352#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
353 flush = (bio_flags(bio) & REQ_PREFLUSH);
354 commit = (bio_flags(bio) & REQ_FUA);
355 nodelay = (bio_flags(bio) & REQ_SYNC);
356#else
357 flush = (bio->bi_rw & REQ_FLUSH);
358 commit = (bio->bi_rw & REQ_FUA);
359 nodelay = (bio->bi_rw & REQ_SYNC);
360#endif
361 err = vs_client_block_io_req_write(state, bio, bio_sector(bio),
362 bio_sectors(bio), nodelay, flush, commit, pbuf, mbuf);
363
364 if (err) {
365 dev_err(&client->service->dev,
366 "write req failed with err %d\n", err);
367 goto fail_free_write;
368 }
369
370 return 0;
371
372fail_free_write:
373 vs_client_block_io_free_req_write(state, &pbuf, mbuf);
374fail:
375 return err;
376}
377
378static int block_client_send_read_req(struct block_client *client,
379 struct bio *bio)
380{
381 struct vs_client_block_state *state = &client->client;
382 int err;
383 bool flush, nodelay;
384
385 err = vs_block_client_check_sector_size(client, bio);
386 if (err < 0)
387 return err;
388#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
389 flush = (bio_flags(bio) & REQ_PREFLUSH);
390 nodelay = (bio_flags(bio) & REQ_SYNC);
391#else
392 flush = (bio->bi_rw & REQ_FLUSH);
393 nodelay = (bio->bi_rw & REQ_SYNC);
394#endif
395 do {
396 /* Wait until it's possible to send a read request */
397 err = vs_wait_state_nointr(state,
398 vs_client_block_io_req_read_can_send(state));
399 if (err == -ECANCELED)
400 err = -ENXIO;
401 if (err < 0)
402 break;
403
404 /* Wait for quota, while sending a read remains possible */
405 err = vs_wait_send_nointr(state,
406 vs_client_block_io_req_read_can_send(state),
407 vs_client_block_io_req_read(state, bio,
408 bio_sector(bio), bio_sectors(bio),
409 nodelay, flush, GFP_KERNEL));
410 } while (err == -ECANCELED);
411
412 return err;
413}
414
415#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
416static blk_qc_t
417#else
418static void
419#endif
420vs_block_client_make_request(struct request_queue *q, struct bio *bio)
421{
422 struct block_device *bdev = bio->bi_bdev;
423 struct vs_block_device *blkdev = bdev->bd_disk->private_data;
424 struct block_client *client;
425 int err = 0;
426
427 client = blkdev->client;
428 if (!client || !kref_get_unless_zero(&client->kref)) {
429 err = -ENODEV;
430 goto fail_get_client;
431 }
432
433#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
434 blk_queue_split(q, &bio, q->bio_split);
435#endif
436
437 if (!vs_state_lock_safe(&client->client)) {
438 err = -ENODEV;
439 goto fail_lock_client;
440 }
441
442 if (client->blkdev != blkdev) {
443 /* Client has reset, this block device is no longer usable */
444 err = -EIO;
445 goto fail_check_client;
446 }
447
448 if (bio_data_dir(bio) == WRITE)
449 err = block_client_send_write_req(client, bio);
450 else
451 err = block_client_send_read_req(client, bio);
452
453fail_check_client:
454 if (err == -ENOLINK)
455 err = -EIO;
456 else
457 vs_state_unlock(&client->client);
458fail_lock_client:
459 vs_block_client_put(client);
460fail_get_client:
461#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
462 if (err < 0) {
463 bio->bi_error = err;
464 bio_endio(bio);
465 }
466#else
467 if (err < 0)
468 bio_endio(bio, err);
469#endif
470#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
471 return BLK_QC_T_NONE;
472#endif
473}
474
475static int vs_block_client_get_blkdev_id(struct block_client *client)
476{
477 int id;
478 int ret;
479
480retry:
481 ret = ida_pre_get(&vs_block_ida, GFP_KERNEL);
482 if (ret == 0)
483 return -ENOMEM;
484
485 mutex_lock(&vs_block_ida_lock);
486 ret = ida_get_new(&vs_block_ida, &id);
487 mutex_unlock(&vs_block_ida_lock);
488
489 if (ret == -EAGAIN)
490 goto retry;
491
492 return id;
493}
494
495static int vs_block_client_disk_add(struct block_client *client)
496{
497 struct vs_block_device *blkdev;
498 unsigned int max_hw_sectors;
499 int err;
500
501 dev_dbg(&client->service->dev, "device add\n");
502
503 blkdev = kzalloc(sizeof(*blkdev), GFP_KERNEL);
504 if (!blkdev) {
505 err = -ENOMEM;
506 goto fail;
507 }
508
509 kref_init(&blkdev->kref);
510 blkdev->id = vs_block_client_get_blkdev_id(client);
511 if (blkdev->id < 0) {
512 err = blkdev->id;
513 goto fail_free_blkdev;
514 }
515
516 if ((blkdev->id * PERDEV_MINORS) >> MINORBITS) {
517 err = -ENODEV;
518 goto fail_remove_ida;
519 }
520
521 blkdev->queue = blk_alloc_queue(GFP_KERNEL);
522 if (!blkdev->queue) {
523 dev_err(&client->service->dev,
524 "Error initializing blk queue\n");
525 err = -ENOMEM;
526 goto fail_remove_ida;
527 }
528
529 blk_queue_make_request(blkdev->queue, vs_block_client_make_request);
530 blk_queue_bounce_limit(blkdev->queue, BLK_BOUNCE_ANY);
531 blk_queue_dma_alignment(blkdev->queue, 0);
532
533 /*
534 * Mark this as a paravirtualised device. This is just an alias
535 * of QUEUE_FLAG_NONROT, which prevents the I/O schedulers trying
536 * to wait for the disk to spin.
537 */
538 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, blkdev->queue);
539
540 blkdev->queue->queuedata = blkdev;
541
542 blkdev->client = client;
543 kref_get(&client->kref);
544
545 max_hw_sectors = min_t(sector_t, BLK_DEF_MAX_SECTORS,
546 client->client.segment_size /
547 client->client.sector_size);
548 blk_queue_max_hw_sectors(blkdev->queue, max_hw_sectors);
Neeraj Upadhyay21376422018-07-20 21:18:43 +0530549 blk_queue_logical_block_size(blkdev->queue,
550 client->client.sector_size);
551 blk_queue_physical_block_size(blkdev->queue,
552 client->client.sector_size);
Carl van Schaik402932a2018-07-06 22:24:59 +1000553
554 blkdev->disk = alloc_disk(PERDEV_MINORS);
555 if (!blkdev->disk) {
556 dev_err(&client->service->dev, "Error allocating disk\n");
557 err = -ENOMEM;
558 goto fail_free_blk_queue;
559 }
560
561 if (client->client.readonly) {
562 dev_dbg(&client->service->dev, "set device as readonly\n");
563 set_disk_ro(blkdev->disk, true);
564 }
565
566 blkdev->disk->major = block_client_major;
567 blkdev->disk->first_minor = blkdev->id * PERDEV_MINORS;
568 blkdev->disk->fops = &block_client_ops;
569#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
570 blkdev->disk->driverfs_dev = &client->service->dev;
571#endif
572 blkdev->disk->private_data = blkdev;
573 blkdev->disk->queue = blkdev->queue;
574 blkdev->disk->flags |= GENHD_FL_EXT_DEVT;
575
576 /*
577 * The block device name is vblock<x>, where x is a unique
578 * identifier. Userspace should rename or symlink the device
579 * appropriately, typically by processing the add uevent.
580 *
581 * If a virtual block device is reset then it may re-open with a
582 * different identifier if something still holds a reference to
583 * the old device (such as a userspace application having an open
584 * file handle).
585 */
586 snprintf(blkdev->disk->disk_name, sizeof(blkdev->disk->disk_name),
587 "%s%d", CLIENT_BLKDEV_NAME, blkdev->id);
Neeraj Upadhyay21376422018-07-20 21:18:43 +0530588 set_capacity(blkdev->disk, client->client.device_sectors *
589 (client->client.sector_size >> 9));
Carl van Schaik402932a2018-07-06 22:24:59 +1000590
591 /*
592 * We need to hold a reference on blkdev across add_disk(), to make
593 * sure a concurrent reset does not immediately release the blkdev
594 * and call del_gendisk().
595 */
596 kref_get(&blkdev->kref);
597
598 vs_service_state_lock(client->service);
599 if (!VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base)) {
600 vs_service_state_unlock(client->service);
601 err = -ENXIO;
602 goto fail_free_blk_queue;
603 }
604 client->blkdev = blkdev;
605 vs_service_state_unlock(client->service);
606
607#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0)
608 device_add_disk(&client->service->dev, blkdev->disk);
609#else
610 add_disk(blkdev->disk);
611#endif
612 dev_dbg(&client->service->dev, "added block disk '%s'\n",
613 blkdev->disk->disk_name);
614
615 /* Release the reference taken above. */
616 vs_block_device_put(blkdev);
617
618 return 0;
619
620fail_free_blk_queue:
621 blk_cleanup_queue(blkdev->queue);
622fail_remove_ida:
623 mutex_lock(&vs_block_ida_lock);
624 ida_remove(&vs_block_ida, blkdev->id);
625 mutex_unlock(&vs_block_ida_lock);
626fail_free_blkdev:
627 kfree(blkdev);
628fail:
629 return err;
630}
631
632static void vs_block_client_disk_creation_work(struct work_struct *work)
633{
634 struct block_client *client = container_of(work,
635 struct block_client, disk_creation_work);
636 struct vs_block_device *blkdev;
637 bool running;
638
639 vs_service_state_lock(client->service);
640 blkdev = client->blkdev;
641 running = VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base);
642
643 dev_dbg(&client->service->dev,
644 "disk changed: blkdev = %pK, running = %d\n",
645 client->blkdev, running);
646 if (!blkdev && running) {
647 dev_dbg(&client->service->dev, "adding block disk\n");
648 vs_service_state_unlock(client->service);
649 vs_block_client_disk_add(client);
650 } else {
651 vs_service_state_unlock(client->service);
652 }
653}
654
655static void vs_block_client_rx_tasklet(unsigned long data);
656
657static struct vs_client_block_state *
658vs_block_client_alloc(struct vs_service_device *service)
659{
660 struct block_client *client;
661
662 client = kzalloc(sizeof(*client), GFP_KERNEL);
663 if (!client) {
664 dev_err(&service->dev, "Error allocating client struct\n");
665 return NULL;
666 }
667
668 vs_get_service(service);
669 client->service = service;
670
671 INIT_LIST_HEAD(&client->rx_queue);
672 spin_lock_init(&client->rx_queue_lock);
673 tasklet_init(&client->rx_tasklet, vs_block_client_rx_tasklet,
674 (unsigned long)client);
675 tasklet_disable(&client->rx_tasklet);
676
677 INIT_WORK(&client->disk_creation_work,
678 vs_block_client_disk_creation_work);
679 kref_init(&client->kref);
680
681 dev_dbg(&service->dev, "New block client %pK\n", client);
682
683 return &client->client;
684}
685
686static void vs_block_client_release(struct vs_client_block_state *state)
687{
688 struct block_client *client = state_to_block_client(state);
689
690 flush_work(&client->disk_creation_work);
691
692 vs_block_client_put(client);
693}
694
695/* FIXME: Jira ticket SDK-2459 - anjaniv */
696static void vs_block_client_closed(struct vs_client_block_state *state)
697{
698 struct block_client *client = state_to_block_client(state);
699
700 /*
701 * Stop the RX bounce tasklet and clean up its queue. We can wait for
702 * it to stop safely because it doesn't need to acquire the state
703 * lock, only the RX lock which we acquire after it is disabled.
704 */
705 tasklet_disable(&client->rx_tasklet);
706 spin_lock(&client->rx_queue_lock);
707 while (!list_empty(&client->rx_queue)) {
708 struct vs_mbuf *mbuf = list_first_entry(&client->rx_queue,
709 struct vs_mbuf, queue);
710 struct vs_pbuf pbuf;
711 list_del(&mbuf->queue);
712 vs_client_block_io_getbufs_ack_read(state, &pbuf, mbuf);
713 vs_client_block_io_free_ack_read(state, &pbuf, mbuf);
714 }
715 spin_unlock(&client->rx_queue_lock);
716
717 if (client->blkdev) {
718 struct vs_block_device *blkdev = client->blkdev;
719 char service_remove[] = "REMOVING_SERVICE=1";
720 /* + 9 because "DEVNAME=" is 8 chars plus 1 for '\0' */
721 char devname[sizeof(blkdev->disk->disk_name) + 9];
722 char *envp[] = { service_remove, devname, NULL };
723
724 dev_dbg(&client->service->dev, "removing block disk\n");
725
726 /*
727 * Send a change event with DEVNAME to allow the block helper
728 * script to remove any server sessions which use either
729 * v${SERVICE_NAME} or ${DEVNAME}. The remove event generated
730 * by the session driver doesn't include DEVNAME so the only
731 * way for userspace to map SERVICE_NAME to DEVNAME is by the
732 * symlink added when the client service was created. If that
733 * symlink has been deleted, there's no other way to connect
734 * the two names.
735 */
736 snprintf(devname, sizeof(devname), "DEVNAME=%s",
737 blkdev->disk->disk_name);
738 kobject_uevent_env(&client->service->dev.kobj, KOBJ_CHANGE,
739 envp);
740
741 /*
742 * We are done with the device now. The block device will only
743 * get removed once there are no more users (e.g. userspace
744 * applications).
745 */
746 client->blkdev = NULL;
747 vs_block_device_put(blkdev);
748 }
749}
750
751static void vs_block_client_opened(struct vs_client_block_state *state)
752{
753 struct block_client *client = state_to_block_client(state);
754
755#if !defined(CONFIG_LBDAF) && !defined(CONFIG_64BIT)
Neeraj Upadhyay21376422018-07-20 21:18:43 +0530756 if ((state->device_sectors * (state->sector_size >> 9))
757 >> (sizeof(sector_t) * 8)) {
Carl van Schaik402932a2018-07-06 22:24:59 +1000758 dev_err(&client->service->dev,
759 "Client doesn't support full capacity large block devices\n");
760 vs_client_block_close(state);
761 return;
762 }
763#endif
764
765 /* Unblock the RX bounce tasklet. */
766 tasklet_enable(&client->rx_tasklet);
767
768 /*
769 * The block device allocation needs to sleep, so we defer it to a
770 * work queue.
771 */
772 queue_work(client->service->work_queue, &client->disk_creation_work);
773}
774
775static int vs_block_client_ack_read(struct vs_client_block_state *state,
776 void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf)
777{
778 struct block_client *client = state_to_block_client(state);
779 struct bio *bio = tag;
780 struct bio_vec *bvec;
781 int err = 0;
782 size_t bytes_read = 0;
783#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
784 struct bio_vec bvec_local;
785 struct bvec_iter iter;
786#else
787 int i;
788#endif
789
790#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
791 bvec = &bvec_local;
792 bio_for_each_segment(bvec_local, bio, iter)
793#else
794 bio_for_each_segment(bvec, bio, i)
795#endif
796 {
797 unsigned long flags;
798 void *buf;
799 if (vs_pbuf_size(&pbuf) < bytes_read + bvec->bv_len) {
800 dev_err(&client->service->dev,
801 "bio read overrun: %zu into %zu byte response, but need %zd bytes\n",
802 bytes_read, vs_pbuf_size(&pbuf),
803 (size_t)bvec->bv_len);
804 err = -EIO;
805 break;
806 }
807 buf = bvec_kmap_irq(bvec, &flags);
808 memcpy(buf, vs_pbuf_data(&pbuf) + bytes_read, bvec->bv_len);
809 flush_kernel_dcache_page(bvec->bv_page);
810 bvec_kunmap_irq(buf, &flags);
811 bytes_read += bvec->bv_len;
812 }
813
814 vs_client_block_io_free_ack_read(state, &pbuf, mbuf);
815
816#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
817 if (err < 0)
818 bio->bi_error = err;
819 bio_endio(bio);
820#else
821 bio_endio(bio, err);
822#endif
823
824 return 0;
825}
826
827static void vs_block_client_rx_tasklet(unsigned long data)
828{
829 struct block_client *client = (struct block_client *)data;
830 struct vs_mbuf *mbuf;
831 struct vs_pbuf pbuf;
832
833 spin_lock(&client->rx_queue_lock);
834
835 /* The list shouldn't be empty. */
836 if (WARN_ON(list_empty(&client->rx_queue))) {
837 spin_unlock(&client->rx_queue_lock);
838 return;
839 }
840
841 /* Get the next mbuf, and reschedule ourselves if there are more. */
842 mbuf = list_first_entry(&client->rx_queue, struct vs_mbuf, queue);
843 list_del(&mbuf->queue);
844 if (!list_empty(&client->rx_queue))
845 tasklet_schedule(&client->rx_tasklet);
846
847 spin_unlock(&client->rx_queue_lock);
848
849 /* Process the ack. */
850 vs_client_block_io_getbufs_ack_read(&client->client, &pbuf, mbuf);
851 vs_block_client_ack_read(&client->client, mbuf->priv, pbuf, mbuf);
852}
853
854static int vs_block_client_queue_ack_read(struct vs_client_block_state *state,
855 void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf)
856{
857 struct block_client *client = state_to_block_client(state);
858
859 spin_lock(&client->rx_queue_lock);
860 list_add_tail(&mbuf->queue, &client->rx_queue);
861 mbuf->priv = tag;
862 spin_unlock(&client->rx_queue_lock);
863
864 tasklet_schedule(&client->rx_tasklet);
865
866 wake_up(&state->service->quota_wq);
867
868 return 0;
869}
870
871static int vs_block_client_ack_write(struct vs_client_block_state *state,
872 void *tag)
873{
874 struct bio *bio = tag;
875
876 if (WARN_ON(!bio))
877 return -EPROTO;
878
879#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
880 bio_endio(bio);
881#else
882 bio_endio(bio, 0);
883#endif
884
885 wake_up(&state->service->quota_wq);
886
887 return 0;
888}
889
890static int vs_block_client_nack_io(struct vs_client_block_state *state,
891 void *tag, vservice_block_block_io_error_t err)
892{
893 struct bio *bio = tag;
894
895 if (WARN_ON(!bio))
896 return -EPROTO;
897
898#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
899 bio->bi_error = block_client_vs_to_linux_error(err);
900 bio_endio(bio);
901#else
902 bio_endio(bio, block_client_vs_to_linux_error(err));
903#endif
904
905 wake_up(&state->service->quota_wq);
906
907 return 0;
908}
909
910static struct vs_client_block block_client_driver = {
911 .rx_atomic = true,
912 .alloc = vs_block_client_alloc,
913 .release = vs_block_client_release,
914 .opened = vs_block_client_opened,
915 .closed = vs_block_client_closed,
916 .io = {
917 .ack_read = vs_block_client_queue_ack_read,
918 .nack_read = vs_block_client_nack_io,
919 .ack_write = vs_block_client_ack_write,
920 .nack_write = vs_block_client_nack_io,
921 }
922};
923
924static int __init vs_block_client_init(void)
925{
926 int err;
927
928 block_client_major = register_blkdev(0, CLIENT_BLKDEV_NAME);
929 if (block_client_major < 0) {
930 pr_err("Err registering blkdev\n");
931 err = -ENOMEM;
932 goto fail;
933 }
934
935 err = vservice_block_client_register(&block_client_driver,
936 "block_client_driver");
937 if (err)
938 goto fail_unregister_blkdev;
939
940 return 0;
941
942fail_unregister_blkdev:
943 unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME);
944fail:
945 return err;
946}
947
948static void __exit vs_block_client_exit(void)
949{
950 vservice_block_client_unregister(&block_client_driver);
951 unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME);
952}
953
954module_init(vs_block_client_init);
955module_exit(vs_block_client_exit);
956
957MODULE_DESCRIPTION("OKL4 Virtual Services Block Client Driver");
958MODULE_AUTHOR("Open Kernel Labs, Inc");