cxl: Support the cxl kernel API from a guest

Like on bare-metal, the cxl driver creates a virtual PHB and a pci
device for the AFU. The configuration space of the device is mapped to
the configuration record of the AFU.

Reuse the code defined in afu_cr_read8|16|32() when reading the
configuration space of the AFU device.

Even though the (virtual) AFU device is a pci device, the adapter is
not. So a driver using the cxl kernel API cannot read the VPD of the
adapter through the usual PCI interface. Therefore, we add a call to
the cxl kernel API:
ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count);

Co-authored-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Reviewed-by: Manoj Kumar <manoj@linux.vnet.ibm.com>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 325f957..75ec2f9 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -89,28 +89,11 @@
 }
 EXPORT_SYMBOL_GPL(cxl_release_context);
 
-int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
-{
-	if (num == 0)
-		num = ctx->afu->pp_irqs;
-	return afu_allocate_irqs(ctx, num);
-}
-EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
-
-void cxl_free_afu_irqs(struct cxl_context *ctx)
-{
-	afu_irq_name_free(ctx);
-	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
-}
-EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
-
 static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
 {
 	__u16 range;
 	int r;
 
-	WARN_ON(num == 0);
-
 	for (r = 0; r < CXL_IRQ_RANGES; r++) {
 		range = ctx->irqs.range[r];
 		if (num < range) {
@@ -121,6 +104,44 @@
 	return 0;
 }
 
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+	int res;
+	irq_hw_number_t hwirq;
+
+	if (num == 0)
+		num = ctx->afu->pp_irqs;
+	res = afu_allocate_irqs(ctx, num);
+	if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* In a guest, the PSL interrupt is not multiplexed. It was
+		 * allocated above, and we need to set its handler
+		 */
+		hwirq = cxl_find_afu_irq(ctx, 0);
+		if (hwirq)
+			cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
+		hwirq = cxl_find_afu_irq(ctx, 0);
+		if (hwirq) {
+			virq = irq_find_mapping(NULL, hwirq);
+			if (virq)
+				cxl_unmap_irq(virq, ctx);
+		}
+	}
+	afu_irq_name_free(ctx);
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
 int cxl_map_afu_irq(struct cxl_context *ctx, int num,
 		    irq_handler_t handler, void *cookie, char *name)
 {
@@ -356,3 +377,11 @@
 	afu->adapter->perst_same_image = perst_reloads_same_image;
 }
 EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
+
+ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
+{
+	struct cxl_afu *afu = cxl_pci_to_afu(dev);
+
+	return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
+}
+EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 24bd4ca..b388c97 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -587,6 +587,7 @@
 int cxl_update_image_control(struct cxl *adapter);
 int cxl_pci_reset(struct cxl *adapter);
 void cxl_pci_release_afu(struct device *dev);
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
 
 /* common == phyp + powernv */
 struct cxl_process_element_common {
@@ -808,7 +809,6 @@
 
 void cxl_stop_trace(struct cxl *cxl);
 int cxl_pci_vphb_add(struct cxl_afu *afu);
-void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
 void cxl_pci_vphb_remove(struct cxl_afu *afu);
 
 extern struct pci_driver cxl_pci_driver;
@@ -869,6 +869,10 @@
 	int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val);
 	int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val);
 	int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val);
+	int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val);
+	int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val);
+	int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val);
+	ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count);
 };
 extern const struct cxl_backend_ops cxl_native_ops;
 extern const struct cxl_backend_ops cxl_guest_ops;
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 816113d..2b07ebd 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -418,6 +418,24 @@
 	return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out);
 }
 
+static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
 static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
 {
 	struct cxl_process_element_hcall *elem;
@@ -807,6 +825,9 @@
 
 	afu->enabled = true;
 
+	if ((rc = cxl_pci_vphb_add(afu)))
+		dev_info(&afu->dev, "Can't register vPHB\n");
+
 	return 0;
 
 err_put2:
@@ -832,6 +853,7 @@
 	if (!afu)
 		return;
 
+	cxl_pci_vphb_remove(afu);
 	cxl_sysfs_afu_remove(afu);
 
 	spin_lock(&afu->adapter->afu_list_lock);
@@ -987,4 +1009,8 @@
 	.afu_cr_read16 = guest_afu_cr_read16,
 	.afu_cr_read32 = guest_afu_cr_read32,
 	.afu_cr_read64 = guest_afu_cr_read64,
+	.afu_cr_write8 = guest_afu_cr_write8,
+	.afu_cr_write16 = guest_afu_cr_write16,
+	.afu_cr_write32 = guest_afu_cr_write32,
+	.read_adapter_vpd = cxl_guest_read_adapter_vpd,
 };
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 0e289c2..e564ae6 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -1019,6 +1019,52 @@
 	return rc;
 }
 
+static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	out_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off, in);
+	return 0;
+}
+
+static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val32, mask, shift;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+	if (rc)
+		return rc;
+	shift = (off & 0x3) * 8;
+	WARN_ON(shift == 24);
+	mask = 0xffff << shift;
+	val32 = (val32 & ~mask) | (in << shift);
+
+	rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+	return rc;
+}
+
+static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val32, mask, shift;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+	if (rc)
+		return rc;
+	shift = (off & 0x3) * 8;
+	mask = 0xff << shift;
+	val32 = (val32 & ~mask) | (in << shift);
+
+	rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+	return rc;
+}
+
 const struct cxl_backend_ops cxl_native_ops = {
 	.module = THIS_MODULE,
 	.adapter_reset = cxl_pci_reset,
@@ -1044,4 +1090,8 @@
 	.afu_cr_read16 = native_afu_cr_read16,
 	.afu_cr_read32 = native_afu_cr_read32,
 	.afu_cr_read64 = native_afu_cr_read64,
+	.afu_cr_write8 = native_afu_cr_write8,
+	.afu_cr_write16 = native_afu_cr_write16,
+	.afu_cr_write32 = native_afu_cr_write32,
+	.read_adapter_vpd = cxl_pci_read_adapter_vpd,
 };
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index fb4fd45..6cae044 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -881,6 +881,7 @@
 	if (!afu)
 		return;
 
+	cxl_pci_vphb_remove(afu);
 	cxl_sysfs_afu_remove(afu);
 	cxl_debugfs_afu_remove(afu);
 
@@ -1067,6 +1068,11 @@
 	return 0;
 }
 
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+	return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf);
+}
+
 static void cxl_release_adapter(struct device *dev)
 {
 	struct cxl *adapter = to_cxl_adapter(dev);
@@ -1272,7 +1278,6 @@
 	 */
 	for (i = 0; i < adapter->slices; i++) {
 		afu = adapter->afu[i];
-		cxl_pci_vphb_remove(afu);
 		cxl_pci_remove_afu(afu);
 	}
 	cxl_pci_remove_adapter(adapter);
@@ -1451,8 +1456,6 @@
 		if (cxl_afu_select_best_mode(afu))
 			goto err;
 
-		cxl_pci_vphb_reconfigure(afu);
-
 		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
 			/* Reset the device context.
 			 * TODO: make this less disruptive
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index baa4087..c960a09 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -99,113 +99,90 @@
 	return (bus << 8) + devfn;
 }
 
-static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb,
-				       u8 bus, u8 devfn, int offset)
-{
-	int record = cxl_pcie_cfg_record(bus, devfn);
-
-	return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset;
-}
-
-
 static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
-				int offset, int len,
-				volatile void __iomem **ioaddr,
-				u32 *mask, int *shift)
+				struct cxl_afu **_afu, int *_record)
 {
 	struct pci_controller *phb;
 	struct cxl_afu *afu;
-	unsigned long addr;
+	int record;
 
 	phb = pci_bus_to_host(bus);
 	if (phb == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
+
 	afu = (struct cxl_afu *)phb->private_data;
-
-	if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
+	record = cxl_pcie_cfg_record(bus->number, devfn);
+	if (record > afu->crs_num)
 		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= (unsigned long)phb->cfg_data)
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset);
 
-	*ioaddr = (void *)(addr & ~0x3ULL);
-	*shift = ((addr & 0x3) * 8);
-	switch (len) {
-	case 1:
-		*mask = 0xff;
-		break;
-	case 2:
-		*mask = 0xffff;
-		break;
-	default:
-		*mask = 0xffffffff;
-		break;
-	}
+	*_afu = afu;
+	*_record = record;
 	return 0;
 }
 
-
-static inline bool cxl_config_link_ok(struct pci_bus *bus)
-{
-	struct pci_controller *phb;
-	struct cxl_afu *afu;
-
-	/* Config space IO is based on phb->cfg_addr, which is based on
-	 * afu_desc_mmio. This isn't safe to read/write when the link
-	 * goes down, as EEH tears down MMIO space.
-	 *
-	 * Check if the link is OK before proceeding.
-	 */
-
-	phb = pci_bus_to_host(bus);
-	if (phb == NULL)
-		return false;
-	afu = (struct cxl_afu *)phb->private_data;
-	return cxl_ops->link_ok(afu->adapter);
-}
-
 static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
 				int offset, int len, u32 *val)
 {
-	volatile void __iomem *ioaddr;
-	int shift, rc;
-	u32 mask;
+	int rc, record;
+	struct cxl_afu *afu;
+	u8 val8;
+	u16 val16;
+	u32 val32;
 
-	rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
-				  &mask, &shift);
+	rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
 	if (rc)
 		return rc;
 
-	if (!cxl_config_link_ok(bus))
+	switch (len) {
+	case 1:
+		rc = cxl_ops->afu_cr_read8(afu, record, offset,	&val8);
+		*val = val8;
+		break;
+	case 2:
+		rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16);
+		*val = val16;
+		break;
+	case 4:
+		rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32);
+		*val = val32;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	if (rc)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	/* Can only read 32 bits */
-	*val = (in_le32(ioaddr) >> shift) & mask;
 	return PCIBIOS_SUCCESSFUL;
 }
 
 static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
 				 int offset, int len, u32 val)
 {
-	volatile void __iomem *ioaddr;
-	u32 v, mask;
-	int shift, rc;
+	int rc, record;
+	struct cxl_afu *afu;
 
-	rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
-				  &mask, &shift);
+	rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
 	if (rc)
 		return rc;
 
-	if (!cxl_config_link_ok(bus))
-		return PCIBIOS_DEVICE_NOT_FOUND;
+	switch (len) {
+	case 1:
+		rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff);
+		break;
+	case 2:
+		rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff);
+		break;
+	case 4:
+		rc = cxl_ops->afu_cr_write32(afu, record, offset, val);
+		break;
+	default:
+		WARN_ON(1);
+	}
 
-	/* Can only write 32 bits so do read-modify-write */
-	mask <<= shift;
-	val <<= shift;
+	if (rc)
+		return PCIBIOS_SET_FAILED;
 
-	v = (in_le32(ioaddr) & ~mask) | (val & mask);
-
-	out_le32(ioaddr, v);
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -233,23 +210,31 @@
 {
 	struct pci_dev *phys_dev;
 	struct pci_controller *phb, *phys_phb;
+	struct device_node *vphb_dn;
+	struct device *parent;
 
-	phys_dev = to_pci_dev(afu->adapter->dev.parent);
-	phys_phb = pci_bus_to_host(phys_dev->bus);
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		phys_dev = to_pci_dev(afu->adapter->dev.parent);
+		phys_phb = pci_bus_to_host(phys_dev->bus);
+		vphb_dn = phys_phb->dn;
+		parent = &phys_dev->dev;
+	} else {
+		vphb_dn = afu->adapter->dev.parent->of_node;
+		parent = afu->adapter->dev.parent;
+	}
 
 	/* Alloc and setup PHB data structure */
-	phb = pcibios_alloc_controller(phys_phb->dn);
-
+	phb = pcibios_alloc_controller(vphb_dn);
 	if (!phb)
 		return -ENODEV;
 
 	/* Setup parent in sysfs */
-	phb->parent = &phys_dev->dev;
+	phb->parent = parent;
 
 	/* Setup the PHB using arch provided callback */
 	phb->ops = &cxl_pcie_pci_ops;
-	phb->cfg_addr = afu->native->afu_desc_mmio + afu->crs_offset;
-	phb->cfg_data = (void *)(u64)afu->crs_len;
+	phb->cfg_addr = NULL;
+	phb->cfg_data = 0;
 	phb->private_data = afu;
 	phb->controller_ops = cxl_pci_controller_ops;
 
@@ -272,15 +257,6 @@
 	return 0;
 }
 
-void cxl_pci_vphb_reconfigure(struct cxl_afu *afu)
-{
-	/* When we are reconfigured, the AFU's MMIO space is unmapped
-	 * and remapped. We need to reflect this in the PHB's view of
-	 * the world.
-	 */
-	afu->phb->cfg_addr = afu->native->afu_desc_mmio + afu->crs_offset;
-}
-
 void cxl_pci_vphb_remove(struct cxl_afu *afu)
 {
 	struct pci_controller *phb;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index f2ffe5b..5bcf11a 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -210,4 +210,9 @@
 void cxl_perst_reloads_same_image(struct cxl_afu *afu,
 				  bool perst_reloads_same_image);
 
+/*
+ * Read the VPD for the card where the AFU resides
+ */
+ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count);
+
 #endif /* _MISC_CXL_H */