| /* |
| * drivers/block/vs_block_client.c |
| * |
| * Copyright (c) 2012-2018 General Dynamics |
| * Copyright (c) 2014 Open Kernel Labs, Inc. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 as |
| * published by the Free Software Foundation. |
| * |
| * block vservice client driver |
| * |
| * Function vs_block_client_vs_alloc() is partially derived from |
| * drivers/block/brd.c (brd_alloc()) |
| * |
| */ |
| |
| #include <linux/device.h> |
| #include <linux/slab.h> |
| #include <linux/init.h> |
| #include <linux/blkdev.h> |
| #include <linux/hdreg.h> |
| #include <linux/genhd.h> |
| #include <linux/fs.h> |
| #include <linux/bio.h> |
| #include <linux/kref.h> |
| #include <linux/mutex.h> |
| #include <linux/list.h> |
| #include <linux/version.h> |
| #include <linux/idr.h> |
| #include <linux/module.h> |
| #include <linux/moduleparam.h> |
| |
| #include <vservices/buffer.h> |
| #include <vservices/protocol/block/types.h> |
| #include <vservices/protocol/block/common.h> |
| #include <vservices/protocol/block/client.h> |
| #include <vservices/service.h> |
| #include <vservices/session.h> |
| #include <vservices/wait.h> |
| |
| /* |
| * BLK_DEF_MAX_SECTORS was replaced with the hard-coded number 1024 in 3.19, |
| * and restored in 4.3 |
| */ |
| #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && \ |
| (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) |
| #define BLK_DEF_MAX_SECTORS 1024 |
| #endif |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) |
| #define bio_sector(bio) (bio)->bi_iter.bi_sector |
| #define bio_size(bio) (bio)->bi_iter.bi_size |
| #else |
| #define bio_sector(bio) (bio)->bi_sector |
| #define bio_size(bio) (bio)->bi_size |
| #endif |
| |
| #define CLIENT_BLKDEV_NAME "vblock" |
| |
| #define PERDEV_MINORS 256 |
| |
| struct block_client; |
| |
| struct vs_block_device { |
| /* |
| * The client that created this block device. A reference is held |
| * to the client until the block device is released, so this pointer |
| * should always be valid. However, the client may since have reset; |
| * so it should only be used if, after locking it, its blkdev pointer |
| * points back to this block device. |
| */ |
| struct block_client *client; |
| |
| int id; |
| struct gendisk *disk; |
| struct request_queue *queue; |
| |
| struct kref kref; |
| }; |
| |
| struct block_client { |
| struct vs_client_block_state client; |
| struct vs_service_device *service; |
| |
| /* Tasklet & queue for bouncing buffers out of read acks */ |
| struct tasklet_struct rx_tasklet; |
| struct list_head rx_queue; |
| struct spinlock rx_queue_lock; |
| |
| /* |
| * The current virtual block device. This gets replaced when we do |
| * a reset since other parts of the kernel (e.g. vfs) may still |
| * be accessing the disk. |
| */ |
| struct vs_block_device *blkdev; |
| |
| /* Shared work item for disk creation */ |
| struct work_struct disk_creation_work; |
| |
| struct kref kref; |
| }; |
| |
| #define state_to_block_client(state) \ |
| container_of(state, struct block_client, client) |
| |
| static int block_client_major; |
| |
| /* Unique identifier allocation for virtual block devices */ |
| static DEFINE_IDA(vs_block_ida); |
| static DEFINE_MUTEX(vs_block_ida_lock); |
| |
| static int |
| block_client_vs_to_linux_error(vservice_block_block_io_error_t vs_err) |
| { |
| switch (vs_err) { |
| case VSERVICE_BLOCK_INVALID_INDEX: |
| return -EILSEQ; |
| case VSERVICE_BLOCK_MEDIA_FAILURE: |
| return -EIO; |
| case VSERVICE_BLOCK_MEDIA_TIMEOUT: |
| return -ETIMEDOUT; |
| case VSERVICE_BLOCK_UNSUPPORTED_COMMAND: |
| return -ENOTSUPP; |
| case VSERVICE_BLOCK_SERVICE_RESET: |
| return -ENXIO; |
| default: |
| WARN_ON(vs_err); |
| return 0; |
| } |
| |
| return 0; |
| } |
| |
| static void vs_block_client_kfree(struct kref *kref) |
| { |
| struct block_client *client = |
| container_of(kref, struct block_client, kref); |
| |
| vs_put_service(client->service); |
| kfree(client); |
| } |
| |
| static void vs_block_client_put(struct block_client *client) |
| { |
| kref_put(&client->kref, vs_block_client_kfree); |
| } |
| |
| static void vs_block_device_kfree(struct kref *kref) |
| { |
| struct vs_block_device *blkdev = |
| container_of(kref, struct vs_block_device, kref); |
| |
| /* Delete the disk and clean up its queue */ |
| del_gendisk(blkdev->disk); |
| blk_cleanup_queue(blkdev->queue); |
| put_disk(blkdev->disk); |
| |
| mutex_lock(&vs_block_ida_lock); |
| ida_remove(&vs_block_ida, blkdev->id); |
| mutex_unlock(&vs_block_ida_lock); |
| |
| if (blkdev->client) |
| vs_block_client_put(blkdev->client); |
| |
| kfree(blkdev); |
| } |
| |
| static void vs_block_device_put(struct vs_block_device *blkdev) |
| { |
| kref_put(&blkdev->kref, vs_block_device_kfree); |
| } |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) |
| static void |
| #else |
| static int |
| #endif |
| vs_block_client_blkdev_release(struct gendisk *disk, fmode_t mode) |
| { |
| struct vs_block_device *blkdev = disk->private_data; |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) |
| if (WARN_ON(!blkdev)) |
| return; |
| #else |
| if (WARN_ON(!blkdev)) |
| return -ENXIO; |
| #endif |
| |
| vs_block_device_put(blkdev); |
| #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) |
| return 0; |
| #endif |
| } |
| |
| static int vs_block_client_blkdev_open(struct block_device *bdev, fmode_t mode) |
| { |
| struct vs_block_device *blkdev = bdev->bd_disk->private_data; |
| struct block_client *client; |
| int err = -ENXIO; |
| |
| if (!blkdev || !kref_get_unless_zero(&blkdev->kref)) |
| goto fail_get_blkdev; |
| |
| client = blkdev->client; |
| if (WARN_ON(!client)) |
| goto fail_lock_client; |
| |
| if (!vs_state_lock_safe(&client->client)) { |
| err = -ENODEV; |
| goto fail_lock_client; |
| } |
| |
| if (blkdev != client->blkdev) { |
| /* The client has reset, this blkdev is no longer usable */ |
| err = -ENXIO; |
| goto fail_check_client; |
| } |
| |
| if ((mode & FMODE_WRITE) > 0 && client->client.readonly) { |
| dev_dbg(&client->service->dev, |
| "opening a readonly disk as writable\n"); |
| err = -EROFS; |
| goto fail_check_client; |
| } |
| |
| vs_state_unlock(&client->client); |
| |
| return 0; |
| |
| fail_check_client: |
| vs_state_unlock(&client->client); |
| fail_lock_client: |
| vs_block_device_put(blkdev); |
| fail_get_blkdev: |
| return err; |
| } |
| |
| static int vs_block_client_blkdev_getgeo(struct block_device *bdev, |
| struct hd_geometry *geo) |
| { |
| /* These numbers are some default sane values for disk geometry. */ |
| geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16); |
| geo->heads = 4; |
| geo->sectors = 16; |
| |
| return 0; |
| } |
| |
| /* |
| * Indirectly determine linux block layer sector size and ensure that our |
| * sector size matches. |
| */ |
| static int vs_block_client_check_sector_size(struct block_client *client, |
| struct bio *bio) |
| { |
| if (unlikely(!bio_sectors(bio))) { |
| dev_err(&client->service->dev, "zero-length bio"); |
| return -EIO; |
| } |
| |
| if (unlikely(bio_size(bio) % client->client.sector_size)) { |
| dev_err(&client->service->dev, |
| "bio has %zd bytes, unexpected for sector_size of %zd bytes", |
| (size_t)bio_size(bio), |
| (size_t)client->client.sector_size); |
| return -EIO; |
| } |
| |
| return 0; |
| } |
| |
| static const struct block_device_operations block_client_ops = { |
| .getgeo = vs_block_client_blkdev_getgeo, |
| .open = vs_block_client_blkdev_open, |
| .release = vs_block_client_blkdev_release, |
| .owner = THIS_MODULE, |
| }; |
| |
| static int block_client_send_write_req(struct block_client *client, |
| struct bio *bio) |
| { |
| struct vs_client_block_state *state = &client->client; |
| struct vs_mbuf *mbuf; |
| struct vs_pbuf pbuf; |
| struct bio_vec *bvec; |
| int err; |
| bool flush, nodelay, commit; |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) |
| struct bvec_iter iter; |
| struct bio_vec bvec_local; |
| #else |
| int i; |
| #endif |
| |
| err = vs_block_client_check_sector_size(client, bio); |
| if (err < 0) |
| goto fail; |
| |
| do { |
| /* Wait until it's possible to send a write request */ |
| err = vs_wait_state_nointr(state, |
| vs_client_block_io_req_write_can_send(state)); |
| if (err == -ECANCELED) |
| err = -ENXIO; |
| if (err < 0) |
| goto fail; |
| |
| /* Wait for quota, while sending a write remains possible */ |
| mbuf = vs_wait_alloc_nointr(state, |
| vs_client_block_io_req_write_can_send(state), |
| vs_client_block_io_alloc_req_write( |
| state, &pbuf, GFP_KERNEL)); |
| err = IS_ERR(mbuf) ? PTR_ERR(mbuf) : 0; |
| |
| /* Retry if sending is no longer possible */ |
| } while (err == -ECANCELED); |
| |
| if (err < 0) |
| goto fail; |
| |
| vs_pbuf_resize(&pbuf, 0); |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) |
| bvec = &bvec_local; |
| bio_for_each_segment(bvec_local, bio, iter) |
| #else |
| bio_for_each_segment(bvec, bio, i) |
| #endif |
| { |
| unsigned long flags; |
| void *buf = bvec_kmap_irq(bvec, &flags); |
| flush_kernel_dcache_page(bvec->bv_page); |
| err = vs_pbuf_append(&pbuf, buf, bvec->bv_len); |
| bvec_kunmap_irq(buf, &flags); |
| if (err < 0) { |
| dev_err(&client->service->dev, |
| "pbuf copy failed with err %d\n", err); |
| err = -EIO; |
| goto fail_free_write; |
| } |
| } |
| |
| if (unlikely(vs_pbuf_size(&pbuf) != bio_size(bio))) { |
| dev_err(&client->service->dev, |
| "pbuf size is wrong: %zd, should be %zd\n", |
| vs_pbuf_size(&pbuf), (size_t)bio_size(bio)); |
| err = -EIO; |
| goto fail_free_write; |
| } |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) |
| flush = (bio_flags(bio) & REQ_PREFLUSH); |
| commit = (bio_flags(bio) & REQ_FUA); |
| nodelay = (bio_flags(bio) & REQ_SYNC); |
| #else |
| flush = (bio->bi_rw & REQ_FLUSH); |
| commit = (bio->bi_rw & REQ_FUA); |
| nodelay = (bio->bi_rw & REQ_SYNC); |
| #endif |
| err = vs_client_block_io_req_write(state, bio, bio_sector(bio), |
| bio_sectors(bio), nodelay, flush, commit, pbuf, mbuf); |
| |
| if (err) { |
| dev_err(&client->service->dev, |
| "write req failed with err %d\n", err); |
| goto fail_free_write; |
| } |
| |
| return 0; |
| |
| fail_free_write: |
| vs_client_block_io_free_req_write(state, &pbuf, mbuf); |
| fail: |
| return err; |
| } |
| |
| static int block_client_send_read_req(struct block_client *client, |
| struct bio *bio) |
| { |
| struct vs_client_block_state *state = &client->client; |
| int err; |
| bool flush, nodelay; |
| |
| err = vs_block_client_check_sector_size(client, bio); |
| if (err < 0) |
| return err; |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) |
| flush = (bio_flags(bio) & REQ_PREFLUSH); |
| nodelay = (bio_flags(bio) & REQ_SYNC); |
| #else |
| flush = (bio->bi_rw & REQ_FLUSH); |
| nodelay = (bio->bi_rw & REQ_SYNC); |
| #endif |
| do { |
| /* Wait until it's possible to send a read request */ |
| err = vs_wait_state_nointr(state, |
| vs_client_block_io_req_read_can_send(state)); |
| if (err == -ECANCELED) |
| err = -ENXIO; |
| if (err < 0) |
| break; |
| |
| /* Wait for quota, while sending a read remains possible */ |
| err = vs_wait_send_nointr(state, |
| vs_client_block_io_req_read_can_send(state), |
| vs_client_block_io_req_read(state, bio, |
| bio_sector(bio), bio_sectors(bio), |
| nodelay, flush, GFP_KERNEL)); |
| } while (err == -ECANCELED); |
| |
| return err; |
| } |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) |
| static blk_qc_t |
| #else |
| static void |
| #endif |
| vs_block_client_make_request(struct request_queue *q, struct bio *bio) |
| { |
| struct block_device *bdev = bio->bi_bdev; |
| struct vs_block_device *blkdev = bdev->bd_disk->private_data; |
| struct block_client *client; |
| int err = 0; |
| |
| client = blkdev->client; |
| if (!client || !kref_get_unless_zero(&client->kref)) { |
| err = -ENODEV; |
| goto fail_get_client; |
| } |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) |
| blk_queue_split(q, &bio, q->bio_split); |
| #endif |
| |
| if (!vs_state_lock_safe(&client->client)) { |
| err = -ENODEV; |
| goto fail_lock_client; |
| } |
| |
| if (client->blkdev != blkdev) { |
| /* Client has reset, this block device is no longer usable */ |
| err = -EIO; |
| goto fail_check_client; |
| } |
| |
| if (bio_data_dir(bio) == WRITE) |
| err = block_client_send_write_req(client, bio); |
| else |
| err = block_client_send_read_req(client, bio); |
| |
| fail_check_client: |
| if (err == -ENOLINK) |
| err = -EIO; |
| else |
| vs_state_unlock(&client->client); |
| fail_lock_client: |
| vs_block_client_put(client); |
| fail_get_client: |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) |
| if (err < 0) { |
| bio->bi_error = err; |
| bio_endio(bio); |
| } |
| #else |
| if (err < 0) |
| bio_endio(bio, err); |
| #endif |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) |
| return BLK_QC_T_NONE; |
| #endif |
| } |
| |
| static int vs_block_client_get_blkdev_id(struct block_client *client) |
| { |
| int id; |
| int ret; |
| |
| retry: |
| ret = ida_pre_get(&vs_block_ida, GFP_KERNEL); |
| if (ret == 0) |
| return -ENOMEM; |
| |
| mutex_lock(&vs_block_ida_lock); |
| ret = ida_get_new(&vs_block_ida, &id); |
| mutex_unlock(&vs_block_ida_lock); |
| |
| if (ret == -EAGAIN) |
| goto retry; |
| |
| return id; |
| } |
| |
| static int vs_block_client_disk_add(struct block_client *client) |
| { |
| struct vs_block_device *blkdev; |
| unsigned int max_hw_sectors; |
| int err; |
| |
| dev_dbg(&client->service->dev, "device add\n"); |
| |
| blkdev = kzalloc(sizeof(*blkdev), GFP_KERNEL); |
| if (!blkdev) { |
| err = -ENOMEM; |
| goto fail; |
| } |
| |
| kref_init(&blkdev->kref); |
| blkdev->id = vs_block_client_get_blkdev_id(client); |
| if (blkdev->id < 0) { |
| err = blkdev->id; |
| goto fail_free_blkdev; |
| } |
| |
| if ((blkdev->id * PERDEV_MINORS) >> MINORBITS) { |
| err = -ENODEV; |
| goto fail_remove_ida; |
| } |
| |
| blkdev->queue = blk_alloc_queue(GFP_KERNEL); |
| if (!blkdev->queue) { |
| dev_err(&client->service->dev, |
| "Error initializing blk queue\n"); |
| err = -ENOMEM; |
| goto fail_remove_ida; |
| } |
| |
| blk_queue_make_request(blkdev->queue, vs_block_client_make_request); |
| blk_queue_bounce_limit(blkdev->queue, BLK_BOUNCE_ANY); |
| blk_queue_dma_alignment(blkdev->queue, 0); |
| |
| /* |
| * Mark this as a paravirtualised device. This is just an alias |
| * of QUEUE_FLAG_NONROT, which prevents the I/O schedulers trying |
| * to wait for the disk to spin. |
| */ |
| queue_flag_set_unlocked(QUEUE_FLAG_VIRT, blkdev->queue); |
| |
| blkdev->queue->queuedata = blkdev; |
| |
| blkdev->client = client; |
| kref_get(&client->kref); |
| |
| max_hw_sectors = min_t(sector_t, BLK_DEF_MAX_SECTORS, |
| client->client.segment_size / |
| client->client.sector_size); |
| blk_queue_max_hw_sectors(blkdev->queue, max_hw_sectors); |
| blk_queue_logical_block_size(blkdev->queue, |
| client->client.sector_size); |
| blk_queue_physical_block_size(blkdev->queue, |
| client->client.sector_size); |
| |
| blkdev->disk = alloc_disk(PERDEV_MINORS); |
| if (!blkdev->disk) { |
| dev_err(&client->service->dev, "Error allocating disk\n"); |
| err = -ENOMEM; |
| goto fail_free_blk_queue; |
| } |
| |
| if (client->client.readonly) { |
| dev_dbg(&client->service->dev, "set device as readonly\n"); |
| set_disk_ro(blkdev->disk, true); |
| } |
| |
| blkdev->disk->major = block_client_major; |
| blkdev->disk->first_minor = blkdev->id * PERDEV_MINORS; |
| blkdev->disk->fops = &block_client_ops; |
| #if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0) |
| blkdev->disk->driverfs_dev = &client->service->dev; |
| #endif |
| blkdev->disk->private_data = blkdev; |
| blkdev->disk->queue = blkdev->queue; |
| blkdev->disk->flags |= GENHD_FL_EXT_DEVT; |
| |
| /* |
| * The block device name is vblock<x>, where x is a unique |
| * identifier. Userspace should rename or symlink the device |
| * appropriately, typically by processing the add uevent. |
| * |
| * If a virtual block device is reset then it may re-open with a |
| * different identifier if something still holds a reference to |
| * the old device (such as a userspace application having an open |
| * file handle). |
| */ |
| snprintf(blkdev->disk->disk_name, sizeof(blkdev->disk->disk_name), |
| "%s%d", CLIENT_BLKDEV_NAME, blkdev->id); |
| set_capacity(blkdev->disk, client->client.device_sectors * |
| (client->client.sector_size >> 9)); |
| |
| /* |
| * We need to hold a reference on blkdev across add_disk(), to make |
| * sure a concurrent reset does not immediately release the blkdev |
| * and call del_gendisk(). |
| */ |
| kref_get(&blkdev->kref); |
| |
| vs_service_state_lock(client->service); |
| if (!VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base)) { |
| vs_service_state_unlock(client->service); |
| err = -ENXIO; |
| goto fail_free_blk_queue; |
| } |
| client->blkdev = blkdev; |
| vs_service_state_unlock(client->service); |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0) |
| device_add_disk(&client->service->dev, blkdev->disk); |
| #else |
| add_disk(blkdev->disk); |
| #endif |
| dev_dbg(&client->service->dev, "added block disk '%s'\n", |
| blkdev->disk->disk_name); |
| |
| /* Release the reference taken above. */ |
| vs_block_device_put(blkdev); |
| |
| return 0; |
| |
| fail_free_blk_queue: |
| blk_cleanup_queue(blkdev->queue); |
| fail_remove_ida: |
| mutex_lock(&vs_block_ida_lock); |
| ida_remove(&vs_block_ida, blkdev->id); |
| mutex_unlock(&vs_block_ida_lock); |
| fail_free_blkdev: |
| kfree(blkdev); |
| fail: |
| return err; |
| } |
| |
| static void vs_block_client_disk_creation_work(struct work_struct *work) |
| { |
| struct block_client *client = container_of(work, |
| struct block_client, disk_creation_work); |
| struct vs_block_device *blkdev; |
| bool running; |
| |
| vs_service_state_lock(client->service); |
| blkdev = client->blkdev; |
| running = VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base); |
| |
| dev_dbg(&client->service->dev, |
| "disk changed: blkdev = %pK, running = %d\n", |
| client->blkdev, running); |
| if (!blkdev && running) { |
| dev_dbg(&client->service->dev, "adding block disk\n"); |
| vs_service_state_unlock(client->service); |
| vs_block_client_disk_add(client); |
| } else { |
| vs_service_state_unlock(client->service); |
| } |
| } |
| |
| static void vs_block_client_rx_tasklet(unsigned long data); |
| |
| static struct vs_client_block_state * |
| vs_block_client_alloc(struct vs_service_device *service) |
| { |
| struct block_client *client; |
| |
| client = kzalloc(sizeof(*client), GFP_KERNEL); |
| if (!client) { |
| dev_err(&service->dev, "Error allocating client struct\n"); |
| return NULL; |
| } |
| |
| vs_get_service(service); |
| client->service = service; |
| |
| INIT_LIST_HEAD(&client->rx_queue); |
| spin_lock_init(&client->rx_queue_lock); |
| tasklet_init(&client->rx_tasklet, vs_block_client_rx_tasklet, |
| (unsigned long)client); |
| tasklet_disable(&client->rx_tasklet); |
| |
| INIT_WORK(&client->disk_creation_work, |
| vs_block_client_disk_creation_work); |
| kref_init(&client->kref); |
| |
| dev_dbg(&service->dev, "New block client %pK\n", client); |
| |
| return &client->client; |
| } |
| |
| static void vs_block_client_release(struct vs_client_block_state *state) |
| { |
| struct block_client *client = state_to_block_client(state); |
| |
| flush_work(&client->disk_creation_work); |
| |
| vs_block_client_put(client); |
| } |
| |
| /* FIXME: Jira ticket SDK-2459 - anjaniv */ |
| static void vs_block_client_closed(struct vs_client_block_state *state) |
| { |
| struct block_client *client = state_to_block_client(state); |
| |
| /* |
| * Stop the RX bounce tasklet and clean up its queue. We can wait for |
| * it to stop safely because it doesn't need to acquire the state |
| * lock, only the RX lock which we acquire after it is disabled. |
| */ |
| tasklet_disable(&client->rx_tasklet); |
| spin_lock(&client->rx_queue_lock); |
| while (!list_empty(&client->rx_queue)) { |
| struct vs_mbuf *mbuf = list_first_entry(&client->rx_queue, |
| struct vs_mbuf, queue); |
| struct vs_pbuf pbuf; |
| list_del(&mbuf->queue); |
| vs_client_block_io_getbufs_ack_read(state, &pbuf, mbuf); |
| vs_client_block_io_free_ack_read(state, &pbuf, mbuf); |
| } |
| spin_unlock(&client->rx_queue_lock); |
| |
| if (client->blkdev) { |
| struct vs_block_device *blkdev = client->blkdev; |
| char service_remove[] = "REMOVING_SERVICE=1"; |
| /* + 9 because "DEVNAME=" is 8 chars plus 1 for '\0' */ |
| char devname[sizeof(blkdev->disk->disk_name) + 9]; |
| char *envp[] = { service_remove, devname, NULL }; |
| |
| dev_dbg(&client->service->dev, "removing block disk\n"); |
| |
| /* |
| * Send a change event with DEVNAME to allow the block helper |
| * script to remove any server sessions which use either |
| * v${SERVICE_NAME} or ${DEVNAME}. The remove event generated |
| * by the session driver doesn't include DEVNAME so the only |
| * way for userspace to map SERVICE_NAME to DEVNAME is by the |
| * symlink added when the client service was created. If that |
| * symlink has been deleted, there's no other way to connect |
| * the two names. |
| */ |
| snprintf(devname, sizeof(devname), "DEVNAME=%s", |
| blkdev->disk->disk_name); |
| kobject_uevent_env(&client->service->dev.kobj, KOBJ_CHANGE, |
| envp); |
| |
| /* |
| * We are done with the device now. The block device will only |
| * get removed once there are no more users (e.g. userspace |
| * applications). |
| */ |
| client->blkdev = NULL; |
| vs_block_device_put(blkdev); |
| } |
| } |
| |
| static void vs_block_client_opened(struct vs_client_block_state *state) |
| { |
| struct block_client *client = state_to_block_client(state); |
| |
| #if !defined(CONFIG_LBDAF) && !defined(CONFIG_64BIT) |
| if ((state->device_sectors * (state->sector_size >> 9)) |
| >> (sizeof(sector_t) * 8)) { |
| dev_err(&client->service->dev, |
| "Client doesn't support full capacity large block devices\n"); |
| vs_client_block_close(state); |
| return; |
| } |
| #endif |
| |
| /* Unblock the RX bounce tasklet. */ |
| tasklet_enable(&client->rx_tasklet); |
| |
| /* |
| * The block device allocation needs to sleep, so we defer it to a |
| * work queue. |
| */ |
| queue_work(client->service->work_queue, &client->disk_creation_work); |
| } |
| |
| static int vs_block_client_ack_read(struct vs_client_block_state *state, |
| void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf) |
| { |
| struct block_client *client = state_to_block_client(state); |
| struct bio *bio = tag; |
| struct bio_vec *bvec; |
| int err = 0; |
| size_t bytes_read = 0; |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) |
| struct bio_vec bvec_local; |
| struct bvec_iter iter; |
| #else |
| int i; |
| #endif |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) |
| bvec = &bvec_local; |
| bio_for_each_segment(bvec_local, bio, iter) |
| #else |
| bio_for_each_segment(bvec, bio, i) |
| #endif |
| { |
| unsigned long flags; |
| void *buf; |
| if (vs_pbuf_size(&pbuf) < bytes_read + bvec->bv_len) { |
| dev_err(&client->service->dev, |
| "bio read overrun: %zu into %zu byte response, but need %zd bytes\n", |
| bytes_read, vs_pbuf_size(&pbuf), |
| (size_t)bvec->bv_len); |
| err = -EIO; |
| break; |
| } |
| buf = bvec_kmap_irq(bvec, &flags); |
| memcpy(buf, vs_pbuf_data(&pbuf) + bytes_read, bvec->bv_len); |
| flush_kernel_dcache_page(bvec->bv_page); |
| bvec_kunmap_irq(buf, &flags); |
| bytes_read += bvec->bv_len; |
| } |
| |
| vs_client_block_io_free_ack_read(state, &pbuf, mbuf); |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) |
| if (err < 0) |
| bio->bi_error = err; |
| bio_endio(bio); |
| #else |
| bio_endio(bio, err); |
| #endif |
| |
| return 0; |
| } |
| |
| static void vs_block_client_rx_tasklet(unsigned long data) |
| { |
| struct block_client *client = (struct block_client *)data; |
| struct vs_mbuf *mbuf; |
| struct vs_pbuf pbuf; |
| |
| spin_lock(&client->rx_queue_lock); |
| |
| /* The list shouldn't be empty. */ |
| if (WARN_ON(list_empty(&client->rx_queue))) { |
| spin_unlock(&client->rx_queue_lock); |
| return; |
| } |
| |
| /* Get the next mbuf, and reschedule ourselves if there are more. */ |
| mbuf = list_first_entry(&client->rx_queue, struct vs_mbuf, queue); |
| list_del(&mbuf->queue); |
| if (!list_empty(&client->rx_queue)) |
| tasklet_schedule(&client->rx_tasklet); |
| |
| spin_unlock(&client->rx_queue_lock); |
| |
| /* Process the ack. */ |
| vs_client_block_io_getbufs_ack_read(&client->client, &pbuf, mbuf); |
| vs_block_client_ack_read(&client->client, mbuf->priv, pbuf, mbuf); |
| } |
| |
| static int vs_block_client_queue_ack_read(struct vs_client_block_state *state, |
| void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf) |
| { |
| struct block_client *client = state_to_block_client(state); |
| |
| spin_lock(&client->rx_queue_lock); |
| list_add_tail(&mbuf->queue, &client->rx_queue); |
| mbuf->priv = tag; |
| spin_unlock(&client->rx_queue_lock); |
| |
| tasklet_schedule(&client->rx_tasklet); |
| |
| wake_up(&state->service->quota_wq); |
| |
| return 0; |
| } |
| |
| static int vs_block_client_ack_write(struct vs_client_block_state *state, |
| void *tag) |
| { |
| struct bio *bio = tag; |
| |
| if (WARN_ON(!bio)) |
| return -EPROTO; |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) |
| bio_endio(bio); |
| #else |
| bio_endio(bio, 0); |
| #endif |
| |
| wake_up(&state->service->quota_wq); |
| |
| return 0; |
| } |
| |
| static int vs_block_client_nack_io(struct vs_client_block_state *state, |
| void *tag, vservice_block_block_io_error_t err) |
| { |
| struct bio *bio = tag; |
| |
| if (WARN_ON(!bio)) |
| return -EPROTO; |
| |
| #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) |
| bio->bi_error = block_client_vs_to_linux_error(err); |
| bio_endio(bio); |
| #else |
| bio_endio(bio, block_client_vs_to_linux_error(err)); |
| #endif |
| |
| wake_up(&state->service->quota_wq); |
| |
| return 0; |
| } |
| |
| static struct vs_client_block block_client_driver = { |
| .rx_atomic = true, |
| .alloc = vs_block_client_alloc, |
| .release = vs_block_client_release, |
| .opened = vs_block_client_opened, |
| .closed = vs_block_client_closed, |
| .io = { |
| .ack_read = vs_block_client_queue_ack_read, |
| .nack_read = vs_block_client_nack_io, |
| .ack_write = vs_block_client_ack_write, |
| .nack_write = vs_block_client_nack_io, |
| } |
| }; |
| |
| static int __init vs_block_client_init(void) |
| { |
| int err; |
| |
| block_client_major = register_blkdev(0, CLIENT_BLKDEV_NAME); |
| if (block_client_major < 0) { |
| pr_err("Err registering blkdev\n"); |
| err = -ENOMEM; |
| goto fail; |
| } |
| |
| err = vservice_block_client_register(&block_client_driver, |
| "block_client_driver"); |
| if (err) |
| goto fail_unregister_blkdev; |
| |
| return 0; |
| |
| fail_unregister_blkdev: |
| unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME); |
| fail: |
| return err; |
| } |
| |
| static void __exit vs_block_client_exit(void) |
| { |
| vservice_block_client_unregister(&block_client_driver); |
| unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME); |
| } |
| |
| module_init(vs_block_client_init); |
| module_exit(vs_block_client_exit); |
| |
| MODULE_DESCRIPTION("OKL4 Virtual Services Block Client Driver"); |
| MODULE_AUTHOR("Open Kernel Labs, Inc"); |