NVMe: Fix blk-mq hot cpu notification The driver may issue commands to a device that may never return, so its request_queue could always have active requests while the controller is running. Waiting for the queue to freeze could block forever, which is what blk-mq's hot cpu notification handler was doing when nvme drives were in use. This has the nvme driver make the asynchronous event command's tag reserved and does not keep the request active. We can't have more than one since the request is released back to the request_queue before the command is completed. Having only one avoids potential tag collisions, and reserving the tag for this purpose prevents other admin tasks from reusing the tag. I also couldn't think of a scenario where issuing AEN requests single depth is worse than issuing them in batches, so I don't think we lose anything with this change. As an added bonus, doing it this way removes "Cancelling I/O" warnings observed when unbinding the nvme driver from a device. Reported-by: Yigal Korman <yigal@plexistor.com> Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>

commit: 1efccc9ddb98fd533169669160201b027562af7e [log] [tgz]
author: Keith Busch <keith.busch@intel.com> Tue Mar 31 10:37:17 2015 -0600
committer: Jens Axboe <axboe@fb.com> Tue Mar 31 10:39:56 2015 -0600
tree: 6d7680fe16e50bb0fdd72e41d312accd80634fc7
parent: fda631ffe5422424579e1649e04cc468d0215b85 [diff] [blame]
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7ed61812..c12c95c 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c

@@ -302,8 +302,6 @@
 static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
-	struct request *req = ctx;
-
 	u32 result = le32_to_cpup(&cqe->result);
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 
@@ -312,8 +310,6 @@
 	if (status == NVME_SC_SUCCESS)
 		dev_warn(nvmeq->q_dmadev,
 			"async event result %08x\n", result);
-
-	blk_mq_free_hctx_request(nvmeq->hctx, req);
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1027,18 +1023,19 @@
 	struct nvme_cmd_info *cmd_info;
 	struct request *req;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false);
+	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	req->cmd_flags |= REQ_NO_TIMEOUT;
 	cmd_info = blk_mq_rq_to_pdu(req);
-	nvme_set_info(cmd_info, req, async_req_completion);
+	nvme_set_info(cmd_info, NULL, async_req_completion);
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_async_event;
 	c.common.command_id = req->tag;
 
+	blk_mq_free_hctx_request(nvmeq->hctx, req);
 	return __nvme_submit_cmd(nvmeq, &c);
 }
 
@@ -1583,6 +1580,7 @@
 		dev->admin_tagset.ops = &nvme_mq_admin_ops;
 		dev->admin_tagset.nr_hw_queues = 1;
 		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
+		dev->admin_tagset.reserved_tags = 1;
 		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
 		dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
 		dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@@ -2334,7 +2332,6 @@
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
 	dev->vwc = ctrl->vwc;
-	dev->event_limit = min(ctrl->aerl + 1, 8);
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2881,6 +2878,7 @@
 
 	nvme_set_irq_hints(dev);
 
+	dev->event_limit = 1;
 	return result;
 
  free_tags:
commit	1efccc9ddb98fd533169669160201b027562af7e	[log] [tgz]
author	Keith Busch <keith.busch@intel.com>	Tue Mar 31 10:37:17 2015 -0600
committer	Jens Axboe <axboe@fb.com>	Tue Mar 31 10:39:56 2015 -0600
tree	6d7680fe16e50bb0fdd72e41d312accd80634fc7
parent	fda631ffe5422424579e1649e04cc468d0215b85 [diff] [blame]