NVMe: IOCTL path RCU protect queue access
This adds rcu protected access to a queue in the nvme IOCTL path
to fix potential races between a surprise removal and queue usage in
nvme_submit_sync_cmd. The fix holds the rcu_read_lock() here to prevent
the nvme_queue from freeing while this path is executing so it can't
sleep, and so this path will no longer wait for a available command
id should they all be in use at the time a passthrough IOCTL request
is received.
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 4a0ceb6..e157e85 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -2033,7 +2033,6 @@
int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- struct nvme_queue *nvmeq;
u32 num_cmds;
struct nvme_iod *iod;
u64 unit_len;
@@ -2106,18 +2105,7 @@
nvme_offset += unit_num_blocks;
- nvmeq = get_nvmeq(dev);
- /*
- * Since nvme_submit_sync_cmd sleeps, we can't keep
- * preemption disabled. We may be preempted at any
- * point, and be rescheduled to a different CPU. That
- * will cause cacheline bouncing, but no additional
- * races since q_lock already protects against other
- * CPUs.
- */
- put_nvmeq(nvmeq);
- nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL,
- NVME_IO_TIMEOUT);
+ nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
if (nvme_sc != NVME_SC_SUCCESS) {
nvme_unmap_user_pages(dev,
(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2644,7 +2632,6 @@
{
int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
- struct nvme_queue *nvmeq;
struct nvme_command c;
u8 immed, pcmod, pc, no_flush, start;
@@ -2671,10 +2658,7 @@
c.common.opcode = nvme_cmd_flush;
c.common.nsid = cpu_to_le32(ns->ns_id);
- nvmeq = get_nvmeq(ns->dev);
- put_nvmeq(nvmeq);
- nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
-
+ nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
goto out;
@@ -2697,15 +2681,12 @@
int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
struct nvme_command c;
- struct nvme_queue *nvmeq;
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_cmd_flush;
c.common.nsid = cpu_to_le32(ns->ns_id);
- nvmeq = get_nvmeq(ns->dev);
- put_nvmeq(nvmeq);
- nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
+ nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
@@ -2872,7 +2853,6 @@
struct nvme_dev *dev = ns->dev;
struct scsi_unmap_parm_list *plist;
struct nvme_dsm_range *range;
- struct nvme_queue *nvmeq;
struct nvme_command c;
int i, nvme_sc, res = -ENOMEM;
u16 ndesc, list_len;
@@ -2914,10 +2894,7 @@
c.dsm.nr = cpu_to_le32(ndesc - 1);
c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
- nvmeq = get_nvmeq(dev);
- put_nvmeq(nvmeq);
-
- nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
+ nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),