Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fix from Paolo Bonzini:
 "One fix for an x86 regression in VM migration, mostly visible with
  Windows because it uses RTC periodic interrupts"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: x86: correctly reset dest_map->vector when restoring LAPIC state
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index e3af86f..2131c4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -433,7 +433,11 @@
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
 	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
 		     "2:\n"						\
-		     _ASM_EXTABLE_EX(1b, 2b)				\
+		     ".section .fixup,\"ax\"\n"				\
+                     "3:xor"itype" %"rtype"0,%"rtype"0\n"		\
+		     "  jmp 2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE_EX(1b, 3b)				\
 		     : ltype(x) : "m" (__m(addr)))
 
 #define __put_user_nocheck(x, ptr, size)			\
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 837ea36..6d52b94 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -553,15 +553,21 @@
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
 
 /*
- * Broadwell EP Home Agent BARs erroneously return non-zero values when read.
+ * Device [8086:2fc0]
+ * Erratum HSE43
+ * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset
+ * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html
  *
- * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
- * entry BDF2.
+ * Devices [8086:6f60,6fa0,6fc0]
+ * Erratum BDF2
+ * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration
+ * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
  */
-static void pci_bdwep_bar(struct pci_dev *dev)
+static void pci_invalid_bar(struct pci_dev *dev)
 {
 	dev->non_compliant_bars = 1;
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index b6a953a..80f9889 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -333,6 +333,8 @@
 
 	spin_lock_irqsave(&ep->com.dev->lock, flags);
 	_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+	if (idr_is_empty(&ep->com.dev->hwtid_idr))
+		wake_up(&ep->com.dev->wait);
 	spin_unlock_irqrestore(&ep->com.dev->lock, flags);
 }
 
@@ -2117,8 +2119,10 @@
 		}
 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
 					n, pdev, rt_tos2priority(tos));
-		if (!ep->l2t)
+		if (!ep->l2t) {
+			dev_put(pdev);
 			goto out;
+		}
 		ep->mtu = pdev->mtu;
 		ep->tx_chan = cxgb4_port_chan(pdev);
 		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 071d733..3c4b212 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -872,9 +872,13 @@
 static void c4iw_dealloc(struct uld_ctx *ctx)
 {
 	c4iw_rdev_close(&ctx->dev->rdev);
+	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
 	idr_destroy(&ctx->dev->cqidr);
+	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
 	idr_destroy(&ctx->dev->qpidr);
+	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
 	idr_destroy(&ctx->dev->mmidr);
+	wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
 	idr_destroy(&ctx->dev->hwtid_idr);
 	idr_destroy(&ctx->dev->stid_idr);
 	idr_destroy(&ctx->dev->atid_idr);
@@ -992,6 +996,7 @@
 	mutex_init(&devp->rdev.stats.lock);
 	mutex_init(&devp->db_mutex);
 	INIT_LIST_HEAD(&devp->db_fc_list);
+	init_waitqueue_head(&devp->wait);
 	devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
 
 	if (c4iw_debugfs_root) {
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index aa47e0a..4b83b84f 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -263,6 +263,7 @@
 	struct idr stid_idr;
 	struct list_head db_fc_list;
 	u32 avail_ird;
+	wait_queue_head_t wait;
 };
 
 static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8dcf5a9..60f7eab 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1693,7 +1693,12 @@
 		nvme_suspend_queue(dev->queues[i]);
 
 	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
-		nvme_suspend_queue(dev->queues[0]);
+		/* A device might become IO incapable very soon during
+		 * probe, before the admin queue is configured. Thus,
+		 * queue_count can be 0 here.
+		 */
+		if (dev->queue_count)
+			nvme_suspend_queue(dev->queues[0]);
 	} else {
 		nvme_disable_io_queues(dev);
 		nvme_disable_admin_queue(dev, shutdown);
@@ -2112,6 +2117,8 @@
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x1c5f, 0x0540),	/* Memblaze Pblaze4 adapter */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ 0, }
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ab545fb..c2c2c28 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -82,6 +82,8 @@
 
 enum nvme_rdma_queue_flags {
 	NVME_RDMA_Q_CONNECTED = (1 << 0),
+	NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
+	NVME_RDMA_Q_DELETING = (1 << 2),
 };
 
 struct nvme_rdma_queue {
@@ -291,6 +293,7 @@
 	if (IS_ERR(req->mr)) {
 		ret = PTR_ERR(req->mr);
 		req->mr = NULL;
+		goto out;
 	}
 
 	req->mr->need_inval = false;
@@ -480,9 +483,14 @@
 
 static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 {
-	struct nvme_rdma_device *dev = queue->device;
-	struct ib_device *ibdev = dev->dev;
+	struct nvme_rdma_device *dev;
+	struct ib_device *ibdev;
 
+	if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
+		return;
+
+	dev = queue->device;
+	ibdev = dev->dev;
 	rdma_destroy_qp(queue->cm_id);
 	ib_free_cq(queue->ib_cq);
 
@@ -533,6 +541,7 @@
 		ret = -ENOMEM;
 		goto out_destroy_qp;
 	}
+	set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
 
 	return 0;
 
@@ -552,6 +561,7 @@
 
 	queue = &ctrl->queues[idx];
 	queue->ctrl = ctrl;
+	queue->flags = 0;
 	init_completion(&queue->cm_done);
 
 	if (idx > 0)
@@ -590,6 +600,7 @@
 	return 0;
 
 out_destroy_cm_id:
+	nvme_rdma_destroy_queue_ib(queue);
 	rdma_destroy_id(queue->cm_id);
 	return ret;
 }
@@ -608,7 +619,7 @@
 
 static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
 {
-	if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
+	if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
 		return;
 	nvme_rdma_stop_queue(queue);
 	nvme_rdma_free_queue(queue);
@@ -652,7 +663,7 @@
 	return 0;
 
 out_free_queues:
-	for (; i >= 1; i--)
+	for (i--; i >= 1; i--)
 		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 
 	return ret;
@@ -761,8 +772,13 @@
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(work,
 			struct nvme_rdma_ctrl, err_work);
+	int i;
 
 	nvme_stop_keep_alive(&ctrl->ctrl);
+
+	for (i = 0; i < ctrl->queue_count; i++)
+		clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+
 	if (ctrl->queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1305,58 +1321,6 @@
 	return ret;
 }
 
-/**
- * nvme_rdma_device_unplug() - Handle RDMA device unplug
- * @queue:      Queue that owns the cm_id that caught the event
- *
- * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
- *
- * In our case, the RDMA resources are managed per controller and not
- * only per queue. So the way we handle this is we trigger an implicit
- * controller deletion upon the first DEVICE_REMOVAL event we see, and
- * hold the event inflight until the controller deletion is completed.
- *
- * One exception that we need to handle is the destruction of the cm_id
- * that caught the event. Since we hold the callout until the controller
- * deletion is completed, we'll deadlock if the controller deletion will
- * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources,
- * then queue the controller deletion which won't destroy this queue and
- * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
- */
-static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
-{
-	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-	int ret = 0;
-
-	/* Own the controller deletion */
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-		return 0;
-
-	dev_warn(ctrl->ctrl.device,
-		"Got rdma device removal event, deleting ctrl\n");
-
-	/* Get rid of reconnect work if its running */
-	cancel_delayed_work_sync(&ctrl->reconnect_work);
-
-	/* Disable the queue so ctrl delete won't free it */
-	if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
-		/* Free this queue ourselves */
-		nvme_rdma_stop_queue(queue);
-		nvme_rdma_destroy_queue_ib(queue);
-
-		/* Return non-zero so the cm_id will destroy implicitly */
-		ret = 1;
-	}
-
-	/* Queue controller deletion */
-	queue_work(nvme_rdma_wq, &ctrl->delete_work);
-	flush_work(&ctrl->delete_work);
-	return ret;
-}
-
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *ev)
 {
@@ -1398,8 +1362,8 @@
 		nvme_rdma_error_recovery(queue->ctrl);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		/* return 1 means impliciy CM ID destroy */
-		return nvme_rdma_device_unplug(queue);
+		/* device removal is handled via the ib_client API */
+		break;
 	default:
 		dev_err(queue->ctrl->ctrl.device,
 			"Unexpected RDMA CM event (%d)\n", ev->event);
@@ -1700,15 +1664,19 @@
 static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-	int ret;
+	int ret = 0;
 
+	/*
+	 * Keep a reference until all work is flushed since
+	 * __nvme_rdma_del_ctrl can free the ctrl mem
+	 */
+	if (!kref_get_unless_zero(&ctrl->ctrl.kref))
+		return -EBUSY;
 	ret = __nvme_rdma_del_ctrl(ctrl);
-	if (ret)
-		return ret;
-
-	flush_work(&ctrl->delete_work);
-
-	return 0;
+	if (!ret)
+		flush_work(&ctrl->delete_work);
+	nvme_put_ctrl(&ctrl->ctrl);
+	return ret;
 }
 
 static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
@@ -2005,27 +1973,57 @@
 	.create_ctrl	= nvme_rdma_create_ctrl,
 };
 
+static void nvme_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+	struct nvme_rdma_ctrl *ctrl;
+
+	/* Delete all controllers using this device */
+	mutex_lock(&nvme_rdma_ctrl_mutex);
+	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
+		if (ctrl->device->dev != ib_device)
+			continue;
+		dev_info(ctrl->ctrl.device,
+			"Removing ctrl: NQN \"%s\", addr %pISp\n",
+			ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+		__nvme_rdma_del_ctrl(ctrl);
+	}
+	mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+	flush_workqueue(nvme_rdma_wq);
+}
+
+static struct ib_client nvme_rdma_ib_client = {
+	.name   = "nvme_rdma",
+	.add = nvme_rdma_add_one,
+	.remove = nvme_rdma_remove_one
+};
+
 static int __init nvme_rdma_init_module(void)
 {
+	int ret;
+
 	nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
 	if (!nvme_rdma_wq)
 		return -ENOMEM;
 
+	ret = ib_register_client(&nvme_rdma_ib_client);
+	if (ret) {
+		destroy_workqueue(nvme_rdma_wq);
+		return ret;
+	}
+
 	nvmf_register_transport(&nvme_rdma_transport);
 	return 0;
 }
 
 static void __exit nvme_rdma_cleanup_module(void)
 {
-	struct nvme_rdma_ctrl *ctrl;
-
 	nvmf_unregister_transport(&nvme_rdma_transport);
-
-	mutex_lock(&nvme_rdma_ctrl_mutex);
-	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
-		__nvme_rdma_del_ctrl(ctrl);
-	mutex_unlock(&nvme_rdma_ctrl_mutex);
-
+	ib_unregister_client(&nvme_rdma_ib_client);
 	destroy_workqueue(nvme_rdma_wq);
 }
 
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index d1ef7ac..f9357e0 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -40,6 +40,7 @@
 	list_del(&dev->bus_list);
 	up_write(&pci_bus_sem);
 
+	pci_bridge_d3_device_removed(dev);
 	pci_free_resources(dev);
 	put_device(&dev->dev);
 }
@@ -96,8 +97,6 @@
 		dev->subordinate = NULL;
 	}
 
-	pci_bridge_d3_device_removed(dev);
-
 	pci_destroy_dev(dev);
 }
 
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 0f56deb..c415668 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -568,7 +568,7 @@
 	return thaw_super(sb);
 }
 
-static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+static int ioctl_file_dedupe_range(struct file *file, void __user *arg)
 {
 	struct file_dedupe_range __user *argp = arg;
 	struct file_dedupe_range *same = NULL;
@@ -582,6 +582,10 @@
 	}
 
 	size = offsetof(struct file_dedupe_range __user, info[count]);
+	if (size > PAGE_SIZE) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	same = memdup_user(argp, size);
 	if (IS_ERR(same)) {