nvme-pci: fix NULL pointer reference in nvme_alloc_ns
When the io queues setup or tagset allocation failed, ctrl.tagset is
NULL. But the scan work will still be queued and executed, then panic
comes up due to NULL pointer reference of ctrl.tagset.
To fix this, add a new ctrl state NVME_CTRL_ADMIN_ONLY to inidcate only
admin queue is live. When non io queues or tagset allocation failed, ctrl
enters into this state, scan work will not be started. But async event
work and nvme dev ioctl will be still available. This will be helpful to
do further investigation and recovery.
Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2a69d73..609307c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -232,6 +232,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state;
switch (new_state) {
+ case NVME_CTRL_ADMIN_ONLY:
+ switch (old_state) {
+ case NVME_CTRL_RESETTING:
+ changed = true;
+ /* FALLTHRU */
+ default:
+ break;
+ }
+ break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
@@ -247,6 +256,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
@@ -266,6 +276,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_RECONNECTING:
changed = true;
@@ -2336,8 +2347,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
struct nvme_ctrl *ctrl =
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
- if (ctrl->state != NVME_CTRL_LIVE)
+ switch (ctrl->state) {
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
+ break;
+ default:
return -EWOULDBLOCK;
+ }
+
file->private_data = ctrl;
return 0;
}
@@ -2601,6 +2618,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
+ [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_RECONNECTING]= "reconnecting",
[NVME_CTRL_DELETING] = "deleting",
@@ -3073,6 +3091,8 @@ static void nvme_scan_work(struct work_struct *work)
if (ctrl->state != NVME_CTRL_LIVE)
return;
+ WARN_ON_ONCE(!ctrl->tagset);
+
if (nvme_identify_ctrl(ctrl, &id))
return;
@@ -3093,8 +3113,7 @@ static void nvme_scan_work(struct work_struct *work)
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
/*
- * Do not queue new scan work when a controller is reset during
- * removal.
+ * Only new queue scan work when admin and IO queues are both alive
*/
if (ctrl->state == NVME_CTRL_LIVE)
queue_work(nvme_wq, &ctrl->scan_work);