nvme: split a new struct nvme_ctrl out of struct nvme_dev

The new struct nvme_ctrl will be used by the common NVMe code that sits
on top of struct request_queue and the new nvme_ctrl_ops abstraction.
It only contains the bare minimum required, which consists of values
sampled during controller probe, the admin queue pointer and a second
struct device pointer at the moment, but more will follow later.  Only
values that are not used in the I/O fast path should be moved to
struct nvme_ctrl so that drivers can optimize their cache line usage
easily.  That's also the reason why we have two device pointers as
the struct device is used for DMA mapping purposes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ce938a4..ca54a34 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -79,7 +79,7 @@
 	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
 }
 
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
 	int error;
@@ -99,7 +99,7 @@
 	return error;
 }
 
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id)
 {
 	struct nvme_command c = { };
@@ -120,7 +120,7 @@
 	return error;
 }
 
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
@@ -135,7 +135,7 @@
 			result, 0);
 }
 
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
@@ -150,7 +150,7 @@
 			result, 0);
 }
 
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
 {
 	struct nvme_command c = { };
 	int error;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 66550b7..19583e1 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -30,46 +30,16 @@
 	NVME_NS_LIGHTNVM	= 1,
 };
 
-/*
- * Represents an NVM Express device.  Each nvme_dev is a PCI function.
- */
-struct nvme_dev {
-	struct list_head node;
-	struct nvme_queue **queues;
+struct nvme_ctrl {
+	const struct nvme_ctrl_ops *ops;
 	struct request_queue *admin_q;
-	struct blk_mq_tag_set tagset;
-	struct blk_mq_tag_set admin_tagset;
-	u32 __iomem *dbs;
 	struct device *dev;
-	struct dma_pool *prp_page_pool;
-	struct dma_pool *prp_small_pool;
 	int instance;
-	unsigned queue_count;
-	unsigned online_queues;
-	unsigned max_qid;
-	int q_depth;
-	u32 db_stride;
-	u32 ctrl_config;
-	struct msix_entry *entry;
-	void __iomem *bar;
-	struct list_head namespaces;
-	struct kref kref;
-	struct device *device;
-	struct work_struct reset_work;
-	struct work_struct probe_work;
-	struct work_struct scan_work;
+
 	char name[12];
 	char serial[20];
 	char model[40];
 	char firmware_rev[8];
-	bool subsystem;
-	u32 max_hw_sectors;
-	u32 stripe_size;
-	u32 page_size;
-	void __iomem *cmb;
-	dma_addr_t cmb_dma_addr;
-	u64 cmb_size;
-	u32 cmbsz;
 	u16 oncs;
 	u16 abort_limit;
 	u8 event_limit;
@@ -82,7 +52,7 @@
 struct nvme_ns {
 	struct list_head list;
 
-	struct nvme_dev *dev;
+	struct nvme_ctrl *ctrl;
 	struct request_queue *queue;
 	struct gendisk *disk;
 	struct kref kref;
@@ -97,6 +67,19 @@
 	u32 mode_select_block_len;
 };
 
+struct nvme_ctrl_ops {
+	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+};
+
+static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
+{
+	u32 val = 0;
+
+	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+		return false;
+	return val & NVME_CSTS_RDY;
+}
+
 static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 {
 	return (sector >> (ns->lba_shift - 9));
@@ -107,13 +90,13 @@
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buffer, void __user *ubuffer, unsigned bufflen,
 		u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 			dma_addr_t dma_addr, u32 *result);
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 			dma_addr_t dma_addr, u32 *result);
 
 struct sg_io_hdr;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bfea7ec..8a564f4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -87,6 +87,9 @@
 
 static struct class *nvme_class;
 
+struct nvme_dev;
+struct nvme_queue;
+
 static int __nvme_reset(struct nvme_dev *dev);
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
@@ -102,6 +105,49 @@
 };
 
 /*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct list_head node;
+	struct nvme_queue **queues;
+	struct blk_mq_tag_set tagset;
+	struct blk_mq_tag_set admin_tagset;
+	u32 __iomem *dbs;
+	struct device *dev;
+	struct dma_pool *prp_page_pool;
+	struct dma_pool *prp_small_pool;
+	unsigned queue_count;
+	unsigned online_queues;
+	unsigned max_qid;
+	int q_depth;
+	u32 db_stride;
+	u32 ctrl_config;
+	struct msix_entry *entry;
+	void __iomem *bar;
+	struct list_head namespaces;
+	struct kref kref;
+	struct device *device;
+	struct work_struct reset_work;
+	struct work_struct probe_work;
+	struct work_struct scan_work;
+	bool subsystem;
+	u32 max_hw_sectors;
+	u32 stripe_size;
+	u32 page_size;
+	void __iomem *cmb;
+	dma_addr_t cmb_dma_addr;
+	u64 cmb_size;
+	u32 cmbsz;
+
+	struct nvme_ctrl ctrl;
+};
+
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+	return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
+/*
  * An NVM Express queue.  Each device has at least two (one for admin
  * commands and one for I/O commands).
  */
@@ -333,7 +379,7 @@
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
-		++nvmeq->dev->event_limit;
+		++nvmeq->dev->ctrl.event_limit;
 	if (status != NVME_SC_SUCCESS)
 		return;
 
@@ -357,7 +403,7 @@
 	blk_mq_free_request(req);
 
 	dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
-	++nvmeq->dev->abort_limit;
+	++nvmeq->dev->ctrl.abort_limit;
 }
 
 static void async_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1051,7 +1097,7 @@
 	struct nvme_cmd_info *cmd_info;
 	struct request *req;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE,
+	req = blk_mq_alloc_request(dev->ctrl.admin_q, WRITE,
 			BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1077,7 +1123,7 @@
 	struct request *req;
 	struct nvme_cmd_info *cmd_rq;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
+	req = blk_mq_alloc_request(dev->ctrl.admin_q, WRITE, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1101,7 +1147,7 @@
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1122,7 +1168,7 @@
 	c.create_cq.cq_flags = cpu_to_le16(flags);
 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1143,7 +1189,7 @@
 	c.create_sq.sq_flags = cpu_to_le16(flags);
 	c.create_sq.cqid = cpu_to_le16(qid);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1182,10 +1228,10 @@
 		return;
 	}
 
-	if (!dev->abort_limit)
+	if (!dev->ctrl.abort_limit)
 		return;
 
-	abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
+	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, WRITE,
 			BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(abort_req))
 		return;
@@ -1199,7 +1245,7 @@
 	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
 	cmd.abort.command_id = abort_req->tag;
 
-	--dev->abort_limit;
+	--dev->ctrl.abort_limit;
 	cmd_rq->aborted = 1;
 
 	dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
@@ -1294,8 +1340,8 @@
 	nvmeq->cq_vector = -1;
 	spin_unlock_irq(&nvmeq->q_lock);
 
-	if (!nvmeq->qid && nvmeq->dev->admin_q)
-		blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
+		blk_mq_freeze_queue_start(nvmeq->dev->ctrl.admin_q);
 
 	irq_set_affinity_hint(vector, NULL);
 	free_irq(vector, nvmeq);
@@ -1391,7 +1437,7 @@
 	nvmeq->q_dmadev = dev->dev;
 	nvmeq->dev = dev;
 	snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
-			dev->instance, qid);
+			dev->ctrl.instance, qid);
 	spin_lock_init(&nvmeq->q_lock);
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
@@ -1559,15 +1605,15 @@
 
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
-	if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
-		blk_cleanup_queue(dev->admin_q);
+	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
 }
 
 static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 {
-	if (!dev->admin_q) {
+	if (!dev->ctrl.admin_q) {
 		dev->admin_tagset.ops = &nvme_mq_admin_ops;
 		dev->admin_tagset.nr_hw_queues = 1;
 		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
@@ -1580,18 +1626,18 @@
 		if (blk_mq_alloc_tag_set(&dev->admin_tagset))
 			return -ENOMEM;
 
-		dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
-		if (IS_ERR(dev->admin_q)) {
+		dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
+		if (IS_ERR(dev->ctrl.admin_q)) {
 			blk_mq_free_tag_set(&dev->admin_tagset);
 			return -ENOMEM;
 		}
-		if (!blk_get_queue(dev->admin_q)) {
+		if (!blk_get_queue(dev->ctrl.admin_q)) {
 			nvme_dev_remove_admin(dev);
-			dev->admin_q = NULL;
+			dev->ctrl.admin_q = NULL;
 			return -ENODEV;
 		}
 	} else
-		blk_mq_unfreeze_queue(dev->admin_q);
+		blk_mq_unfreeze_queue(dev->ctrl.admin_q);
 
 	return 0;
 }
@@ -1670,7 +1716,7 @@
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 	struct nvme_user_io io;
 	struct nvme_command c;
 	unsigned length, meta_len;
@@ -1745,7 +1791,7 @@
 	return status;
 }
 
-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			struct nvme_passthru_cmd __user *ucmd)
 {
 	struct nvme_passthru_cmd cmd;
@@ -1774,7 +1820,7 @@
 	if (cmd.timeout_ms)
 		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
-	status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
+	status = __nvme_submit_sync_cmd(ns ? ns->queue : ctrl->admin_q, &c,
 			NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
 			&cmd.result, timeout);
 	if (status >= 0) {
@@ -1804,9 +1850,9 @@
 		force_successful_syscall_return();
 		return ns->ns_id;
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
 	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
+		return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
 	case SG_GET_VERSION_NUM:
@@ -1836,6 +1882,7 @@
 static void nvme_free_ns(struct kref *kref)
 {
 	struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 
 	if (ns->type == NVME_NS_LIGHTNVM)
 		nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
@@ -1844,7 +1891,7 @@
 	ns->disk->private_data = NULL;
 	spin_unlock(&dev_list_lock);
 
-	kref_put(&ns->dev->kref, nvme_free_dev);
+	kref_put(&dev->kref, nvme_free_dev);
 	put_disk(ns->disk);
 	kfree(ns);
 }
@@ -1893,15 +1940,15 @@
 static int nvme_revalidate_disk(struct gendisk *disk)
 {
 	struct nvme_ns *ns = disk->private_data;
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_dev *dev = to_nvme_dev(ns->ctrl);
 	struct nvme_id_ns *id;
 	u8 lbaf, pi_type;
 	u16 old_ms;
 	unsigned short bs;
 
-	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
+	if (nvme_identify_ns(&dev->ctrl, ns->ns_id, &id)) {
 		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
-						dev->instance, ns->ns_id);
+						dev->ctrl.instance, ns->ns_id);
 		return -ENODEV;
 	}
 	if (id->ncap == 0) {
@@ -1957,7 +2004,7 @@
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-	if (dev->oncs & NVME_CTRL_ONCS_DSM)
+	if (dev->ctrl.oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 	blk_mq_unfreeze_queue(disk->queue);
 
@@ -2095,10 +2142,10 @@
 				spin_lock_irq(&nvmeq->q_lock);
 				nvme_process_cq(nvmeq);
 
-				while ((i == 0) && (dev->event_limit > 0)) {
+				while (i == 0 && dev->ctrl.event_limit > 0) {
 					if (nvme_submit_async_admin_req(dev))
 						break;
-					dev->event_limit--;
+					dev->ctrl.event_limit--;
 				}
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
@@ -2124,7 +2171,7 @@
 		goto out_free_ns;
 	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
-	ns->dev = dev;
+	ns->ctrl = &dev->ctrl;
 	ns->queue->queuedata = ns;
 
 	disk = alloc_disk_node(0, node);
@@ -2145,7 +2192,7 @@
 	}
 	if (dev->stripe_size)
 		blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
-	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+	if (dev->ctrl.vwc & NVME_CTRL_VWC_PRESENT)
 		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 	blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
 
@@ -2156,7 +2203,7 @@
 	disk->queue = ns->queue;
 	disk->driverfs_dev = dev->device;
 	disk->flags = GENHD_FL_EXT_DEVT;
-	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
+	sprintf(disk->disk_name, "nvme%dn%d", dev->ctrl.instance, nsid);
 
 	/*
 	 * Initialize capacity to 0 until we establish the namespace format and
@@ -2221,7 +2268,7 @@
 	u32 result;
 	u32 q_count = (count - 1) | ((count - 1) << 16);
 
-	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
+	status = nvme_set_features(&dev->ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
 								&result);
 	if (status < 0)
 		return status;
@@ -2405,7 +2452,8 @@
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+	bool kill = nvme_io_incapable(to_nvme_dev(ns->ctrl)) &&
+			!blk_queue_dying(ns->queue);
 
 	if (kill)
 		blk_set_queue_dying(ns->queue);
@@ -2462,7 +2510,7 @@
 
 	if (!dev->tagset.tags)
 		return;
-	if (nvme_identify_ctrl(dev, &ctrl))
+	if (nvme_identify_ctrl(&dev->ctrl, &ctrl))
 		return;
 	nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
 	kfree(ctrl);
@@ -2482,18 +2530,18 @@
 	struct nvme_id_ctrl *ctrl;
 	int shift = NVME_CAP_MPSMIN(lo_hi_readq(dev->bar + NVME_REG_CAP)) + 12;
 
-	res = nvme_identify_ctrl(dev, &ctrl);
+	res = nvme_identify_ctrl(&dev->ctrl, &ctrl);
 	if (res) {
 		dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
 		return -EIO;
 	}
 
-	dev->oncs = le16_to_cpup(&ctrl->oncs);
-	dev->abort_limit = ctrl->acl + 1;
-	dev->vwc = ctrl->vwc;
-	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
-	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
-	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
+	dev->ctrl.oncs = le16_to_cpup(&ctrl->oncs);
+	dev->ctrl.abort_limit = ctrl->acl + 1;
+	dev->ctrl.vwc = ctrl->vwc;
+	memcpy(dev->ctrl.serial, ctrl->sn, sizeof(ctrl->sn));
+	memcpy(dev->ctrl.model, ctrl->mn, sizeof(ctrl->mn));
+	memcpy(dev->ctrl.firmware_rev, ctrl->fr, sizeof(ctrl->fr));
 	if (ctrl->mdts)
 		dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
 	else
@@ -2728,7 +2776,7 @@
 	DEFINE_KTHREAD_WORKER_ONSTACK(worker);
 	struct nvme_delq_ctx dq;
 	struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
-					&worker, "nvme%d", dev->instance);
+					&worker, "nvme%d", dev->ctrl.instance);
 
 	if (IS_ERR(kworker_task)) {
 		dev_err(dev->dev,
@@ -2879,14 +2927,14 @@
 	if (error)
 		return -ENODEV;
 
-	dev->instance = instance;
+	dev->ctrl.instance = instance;
 	return 0;
 }
 
 static void nvme_release_instance(struct nvme_dev *dev)
 {
 	spin_lock(&dev_list_lock);
-	ida_remove(&nvme_instance_ida, dev->instance);
+	ida_remove(&nvme_instance_ida, dev->ctrl.instance);
 	spin_unlock(&dev_list_lock);
 }
 
@@ -2899,8 +2947,8 @@
 	nvme_release_instance(dev);
 	if (dev->tagset.tags)
 		blk_mq_free_tag_set(&dev->tagset);
-	if (dev->admin_q)
-		blk_put_queue(dev->admin_q);
+	if (dev->ctrl.admin_q)
+		blk_put_queue(dev->ctrl.admin_q);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2914,8 +2962,8 @@
 
 	spin_lock(&dev_list_lock);
 	list_for_each_entry(dev, &dev_list, node) {
-		if (dev->instance == instance) {
-			if (!dev->admin_q) {
+		if (dev->ctrl.instance == instance) {
+			if (!dev->ctrl.admin_q) {
 				ret = -EWOULDBLOCK;
 				break;
 			}
@@ -2945,12 +2993,12 @@
 
 	switch (cmd) {
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(&dev->ctrl, NULL, (void __user *)arg);
 	case NVME_IOCTL_IO_CMD:
 		if (list_empty(&dev->namespaces))
 			return -ENOTTY;
 		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
-		return nvme_user_cmd(dev, ns, (void __user *)arg);
+		return nvme_user_cmd(&dev->ctrl, ns, (void __user *)arg);
 	case NVME_IOCTL_RESET:
 		dev_warn(dev->dev, "resetting controller\n");
 		return nvme_reset(dev);
@@ -3011,7 +3059,7 @@
 	if (result)
 		goto free_tags;
 
-	dev->event_limit = 1;
+	dev->ctrl.event_limit = 1;
 
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
@@ -3029,8 +3077,8 @@
 
  free_tags:
 	nvme_dev_remove_admin(dev);
-	blk_put_queue(dev->admin_q);
-	dev->admin_q = NULL;
+	blk_put_queue(dev->ctrl.admin_q);
+	dev->ctrl.admin_q = NULL;
 	dev->queues[0]->tags = NULL;
  disable:
 	nvme_disable_queue(dev, 0);
@@ -3058,7 +3106,7 @@
 	dev_warn(dev->dev, "Device failed to resume\n");
 	kref_get(&dev->kref);
 	if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
-						dev->instance))) {
+						dev->ctrl.instance))) {
 		dev_err(dev->dev,
 			"Failed to start controller remove task\n");
 		kref_put(&dev->kref, nvme_free_dev);
@@ -3100,7 +3148,7 @@
 {
 	int ret;
 
-	if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
 		return -ENODEV;
 
 	spin_lock(&dev_list_lock);
@@ -3131,6 +3179,16 @@
 }
 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
 
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+{
+	*val = readl(to_nvme_dev(ctrl)->bar + off);
+	return 0;
+}
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+	.reg_read32		= nvme_pci_reg_read32,
+};
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
@@ -3156,6 +3214,10 @@
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
+
+	dev->ctrl.ops = &nvme_pci_ctrl_ops;
+	dev->ctrl.dev = dev->dev;
+
 	result = nvme_set_instance(dev);
 	if (result)
 		goto put_pci;
@@ -3166,8 +3228,8 @@
 
 	kref_init(&dev->kref);
 	dev->device = device_create(nvme_class, &pdev->dev,
-				MKDEV(nvme_char_major, dev->instance),
-				dev, "nvme%d", dev->instance);
+				MKDEV(nvme_char_major, dev->ctrl.instance),
+				dev, "nvme%d", dev->ctrl.instance);
 	if (IS_ERR(dev->device)) {
 		result = PTR_ERR(dev->device);
 		goto release_pools;
@@ -3186,7 +3248,7 @@
 	return 0;
 
  put_dev:
-	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->ctrl.instance));
 	put_device(dev->device);
  release_pools:
 	nvme_release_prp_pools(dev);
@@ -3233,7 +3295,7 @@
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove_admin(dev);
-	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->ctrl.instance));
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index 0bf90b6..bba2955 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -524,7 +524,7 @@
 					struct sg_io_hdr *hdr, u8 *inq_response,
 					int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_id_ns *id_ns;
 	int res;
 	int nvme_sc;
@@ -532,10 +532,10 @@
 	u8 resp_data_format = 0x02;
 	u8 protect;
 	u8 cmdque = 0x01 << 1;
-	u8 fw_offset = sizeof(dev->firmware_rev);
+	u8 fw_offset = sizeof(ctrl->firmware_rev);
 
 	/* nvme ns identify - use DPS value for PROTECT field */
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -553,12 +553,12 @@
 	inq_response[5] = protect;	/* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
 	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
 	strncpy(&inq_response[8], "NVMe    ", 8);
-	strncpy(&inq_response[16], dev->model, 16);
+	strncpy(&inq_response[16], ctrl->model, 16);
 
-	while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+	while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
 		fw_offset--;
 	fw_offset -= 4;
-	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
+	strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -588,27 +588,26 @@
 					struct sg_io_hdr *hdr, u8 *inq_response,
 					int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
 	int xfer_len;
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
 	inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
 	inq_response[3] = INQ_SERIAL_NUMBER_LENGTH;    /* Page Length */
-	strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
+	strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-		u8 *inq_response, int alloc_len)
+		u8 *inq_response, int alloc_len, u32 vs)
 {
 	struct nvme_id_ns *id_ns;
 	int nvme_sc, res;
 	size_t len;
 	void *eui;
 
-	nvme_sc = nvme_identify_ns(ns->dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -616,7 +615,7 @@
 	eui = id_ns->eui64;
 	len = sizeof(id_ns->eui64);
 
-	if (readl(ns->dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) {
+	if (vs >= NVME_VS(1, 2)) {
 		if (bitmap_empty(eui, len * 8)) {
 			eui = id_ns->nguid;
 			len = sizeof(id_ns->nguid);
@@ -648,7 +647,7 @@
 static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
 		struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
 {
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_id_ctrl *id_ctrl;
 	int nvme_sc, res;
 
@@ -659,7 +658,7 @@
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	}
 
-	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -675,9 +674,9 @@
 	inq_response[7] = 0x44;	/* Designator Length */
 
 	sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
-	memcpy(&inq_response[12], dev->model, sizeof(dev->model));
+	memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
 	sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
-	memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+	memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
 
 	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
 	kfree(id_ctrl);
@@ -688,9 +687,14 @@
 					u8 *resp, int alloc_len)
 {
 	int res;
+	u32 vs;
 
-	if (readl(ns->dev->bar + NVME_REG_VS) >= NVME_VS(1, 1)) {
-		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
+	res = ns->ctrl->ops->reg_read32(ns->ctrl, NVME_REG_VS, &vs);
+	if (res)
+		return res;
+
+	if (vs >= NVME_VS(1, 1)) {
+		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len, vs);
 		if (res != -EOPNOTSUPP)
 			return res;
 	}
@@ -704,7 +708,7 @@
 	u8 *inq_response;
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_id_ctrl *id_ctrl;
 	struct nvme_id_ns *id_ns;
 	int xfer_len;
@@ -720,7 +724,7 @@
 	if (inq_response == NULL)
 		return -ENOMEM;
 
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_free_inq;
@@ -736,7 +740,7 @@
 	app_chk = protect << 1;
 	ref_chk = protect;
 
-	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_free_inq;
@@ -847,7 +851,6 @@
 	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
 	u8 temp_c;
 	u16 temp_k;
@@ -856,7 +859,7 @@
 	if (log_response == NULL)
 		return -ENOMEM;
 
-	res = nvme_get_log_page(dev, &smart_log);
+	res = nvme_get_log_page(ns->ctrl, &smart_log);
 	if (res < 0)
 		goto out_free_response;
 
@@ -894,7 +897,6 @@
 	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
 	u32 feature_resp;
 	u8 temp_c_cur, temp_c_thresh;
@@ -904,7 +906,7 @@
 	if (log_response == NULL)
 		return -ENOMEM;
 
-	res = nvme_get_log_page(dev, &smart_log);
+	res = nvme_get_log_page(ns->ctrl, &smart_log);
 	if (res < 0)
 		goto out_free_response;
 
@@ -918,7 +920,7 @@
 	kfree(smart_log);
 
 	/* Get Features for Temp Threshold */
-	res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
+	res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0,
 								&feature_resp);
 	if (res != NVME_SC_SUCCESS)
 		temp_c_thresh = LOG_TEMP_UNKNOWN;
@@ -980,7 +982,6 @@
 {
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id_ns;
 	u8 flbas;
 	u32 lba_length;
@@ -990,7 +991,7 @@
 	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
 		return -EINVAL;
 
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -1046,14 +1047,13 @@
 {
 	int res = 0;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	u32 feature_resp;
 	u8 vwc;
 
 	if (len < MODE_PAGE_CACHING_LEN)
 		return -EINVAL;
 
-	nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
+	nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0,
 								&feature_resp);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
@@ -1239,12 +1239,11 @@
 {
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ctrl *id_ctrl;
 	int lowest_pow_st;	/* max npss = lowest power consumption */
 	unsigned ps_desired = 0;
 
-	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+	nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -1288,7 +1287,7 @@
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		break;
 	}
-	nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
+	nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0,
 				    NULL);
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1312,7 +1311,6 @@
 					u8 buffer_id)
 {
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_command c;
 
 	if (hdr->iovec_count > 0) {
@@ -1329,7 +1327,7 @@
 	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
 	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
 
-	nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+	nvme_sc = __nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL,
 			hdr->dxferp, tot_len, NULL, 0);
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
@@ -1396,14 +1394,13 @@
 {
 	int res = 0;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	unsigned dword11;
 
 	switch (page_code) {
 	case MODE_PAGE_CACHING:
 		dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
-		nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
-					    0, NULL);
+		nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
+					    dword11, 0, NULL);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		break;
 	case MODE_PAGE_CONTROL:
@@ -1505,7 +1502,6 @@
 {
 	int res = 0;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	u8 flbas;
 
 	/*
@@ -1518,7 +1514,7 @@
 	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
 		struct nvme_id_ns *id_ns;
 
-		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+		nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			return res;
@@ -1602,7 +1598,6 @@
 {
 	int res;
 	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id_ns;
 	u8 i;
 	u8 flbas, nlbaf;
@@ -1611,7 +1606,7 @@
 	struct nvme_command c;
 
 	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;
@@ -1643,7 +1638,7 @@
 	c.format.nsid = cpu_to_le32(ns->ns_id);
 	c.format.cdw10 = cpu_to_le32(cdw10);
 
-	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+	nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
 	kfree(id_ns);
@@ -2072,7 +2067,6 @@
 	u32 alloc_len;
 	u32 resp_size;
 	u32 xfer_len;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id_ns;
 	u8 *response;
 
@@ -2084,7 +2078,7 @@
 		resp_size = READ_CAP_10_RESP_SIZE;
 	}
 
-	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		return res;	
@@ -2112,7 +2106,6 @@
 	int nvme_sc;
 	u32 alloc_len, xfer_len, resp_size;
 	u8 *response;
-	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ctrl *id_ctrl;
 	u32 ll_length, lun_id;
 	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
@@ -2126,7 +2119,7 @@
 	case ALL_LUNS_RETURNED:
 	case ALL_WELL_KNOWN_LUNS_RETURNED:
 	case RESTRICTED_LUNS_RETURNED:
-		nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+		nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			return res;
@@ -2327,9 +2320,7 @@
 					struct sg_io_hdr *hdr,
 					u8 *cmd)
 {
-	struct nvme_dev *dev = ns->dev;
-
-	if (!(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_RDY))
+	if (nvme_ctrl_ready(ns->ctrl))
 		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
 					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);