powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group

So far one TCE table could only be used by one IOMMU group. However
IODA2 hardware allows programming the same TCE table address to
multiple PE allowing sharing tables.

This replaces a single pointer to a group in a iommu_table struct
with a linked list of groups which provides the way of invalidating
TCE cache for every PE when an actual TCE table is updated. This adds
pnv_pci_link_table_and_group() and pnv_pci_unlink_table_and_group()
helpers to manage the list. However without VFIO, it is still going
to be a single IOMMU group per iommu_table.

This changes iommu_add_device() to add a device to a first group
from the group list of a table as it is only called from the platform
init code or PCI bus notifier and at these moments there is only
one group per table.

This does not change TCE invalidation code to loop through all
attached groups in order to simplify this patch and because
it is not really needed in most cases. IODA2 is fixed in a later
patch.

This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index c4bc345..ffc634a 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -88,7 +88,7 @@
  */
 struct tce_container {
 	struct mutex lock;
-	struct iommu_table *tbl;
+	struct iommu_group *grp;
 	bool enabled;
 	unsigned long locked_pages;
 };
@@ -103,13 +103,42 @@
 	return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
 }
 
+static long tce_iommu_find_table(struct tce_container *container,
+		phys_addr_t ioba, struct iommu_table **ptbl)
+{
+	long i;
+	struct iommu_table_group *table_group;
+
+	table_group = iommu_group_get_iommudata(container->grp);
+	if (!table_group)
+		return -1;
+
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbl = table_group->tables[i];
+
+		if (tbl) {
+			unsigned long entry = ioba >> tbl->it_page_shift;
+			unsigned long start = tbl->it_offset;
+			unsigned long end = start + tbl->it_size;
+
+			if ((start <= entry) && (entry < end)) {
+				*ptbl = tbl;
+				return i;
+			}
+		}
+	}
+
+	return -1;
+}
+
 static int tce_iommu_enable(struct tce_container *container)
 {
 	int ret = 0;
 	unsigned long locked;
-	struct iommu_table *tbl = container->tbl;
+	struct iommu_table *tbl;
+	struct iommu_table_group *table_group;
 
-	if (!container->tbl)
+	if (!container->grp)
 		return -ENXIO;
 
 	if (!current->mm)
@@ -143,6 +172,11 @@
 	 * as this information is only available from KVM and VFIO is
 	 * KVM agnostic.
 	 */
+	table_group = iommu_group_get_iommudata(container->grp);
+	if (!table_group)
+		return -ENODEV;
+
+	tbl = table_group->tables[0];
 	locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
 	ret = try_increment_locked_vm(locked);
 	if (ret)
@@ -190,11 +224,10 @@
 {
 	struct tce_container *container = iommu_data;
 
-	WARN_ON(container->tbl && !container->tbl->it_table_group->group);
+	WARN_ON(container->grp);
 
-	if (container->tbl && container->tbl->it_table_group->group)
-		tce_iommu_detach_group(iommu_data,
-				container->tbl->it_table_group->group);
+	if (container->grp)
+		tce_iommu_detach_group(iommu_data, container->grp);
 
 	tce_iommu_disable(container);
 	mutex_destroy(&container->lock);
@@ -312,9 +345,16 @@
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
-		struct iommu_table *tbl = container->tbl;
+		struct iommu_table *tbl;
+		struct iommu_table_group *table_group;
 
-		if (WARN_ON(!tbl))
+		if (WARN_ON(!container->grp))
+			return -ENXIO;
+
+		table_group = iommu_group_get_iommudata(container->grp);
+
+		tbl = table_group->tables[0];
+		if (WARN_ON_ONCE(!tbl))
 			return -ENXIO;
 
 		minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
@@ -337,17 +377,13 @@
 	}
 	case VFIO_IOMMU_MAP_DMA: {
 		struct vfio_iommu_type1_dma_map param;
-		struct iommu_table *tbl = container->tbl;
+		struct iommu_table *tbl = NULL;
 		unsigned long tce;
+		long num;
 
 		if (!container->enabled)
 			return -EPERM;
 
-		if (!tbl)
-			return -ENXIO;
-
-		BUG_ON(!tbl->it_table_group->group);
-
 		minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
 
 		if (copy_from_user(&param, (void __user *)arg, minsz))
@@ -360,6 +396,10 @@
 				VFIO_DMA_MAP_FLAG_WRITE))
 			return -EINVAL;
 
+		num = tce_iommu_find_table(container, param.iova, &tbl);
+		if (num < 0)
+			return -ENXIO;
+
 		if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
 				(param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
 			return -EINVAL;
@@ -385,14 +425,12 @@
 	}
 	case VFIO_IOMMU_UNMAP_DMA: {
 		struct vfio_iommu_type1_dma_unmap param;
-		struct iommu_table *tbl = container->tbl;
+		struct iommu_table *tbl = NULL;
+		long num;
 
 		if (!container->enabled)
 			return -EPERM;
 
-		if (WARN_ON(!tbl))
-			return -ENXIO;
-
 		minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
 				size);
 
@@ -406,6 +444,10 @@
 		if (param.flags)
 			return -EINVAL;
 
+		num = tce_iommu_find_table(container, param.iova, &tbl);
+		if (num < 0)
+			return -ENXIO;
+
 		if (param.size & ~IOMMU_PAGE_MASK(tbl))
 			return -EINVAL;
 
@@ -434,12 +476,11 @@
 		mutex_unlock(&container->lock);
 		return 0;
 	case VFIO_EEH_PE_OP:
-		if (!container->tbl || !container->tbl->it_table_group->group)
+		if (!container->grp)
 			return -ENODEV;
 
-		return vfio_spapr_iommu_eeh_ioctl(
-				container->tbl->it_table_group->group,
-				cmd, arg);
+		return vfio_spapr_iommu_eeh_ioctl(container->grp,
+						  cmd, arg);
 	}
 
 	return -ENOTTY;
@@ -450,17 +491,15 @@
 {
 	int ret;
 	struct tce_container *container = iommu_data;
-	struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
+	struct iommu_table_group *table_group;
 
-	BUG_ON(!tbl);
 	mutex_lock(&container->lock);
 
 	/* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
 			iommu_group_id(iommu_group), iommu_group); */
-	if (container->tbl) {
+	if (container->grp) {
 		pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
-				iommu_group_id(container->tbl->
-						it_table_group->group),
+				iommu_group_id(container->grp),
 				iommu_group_id(iommu_group));
 		ret = -EBUSY;
 		goto unlock_exit;
@@ -473,9 +512,15 @@
 		goto unlock_exit;
 	}
 
-	ret = iommu_take_ownership(tbl);
+	table_group = iommu_group_get_iommudata(iommu_group);
+	if (!table_group) {
+		ret = -ENXIO;
+		goto unlock_exit;
+	}
+
+	ret = iommu_take_ownership(table_group->tables[0]);
 	if (!ret)
-		container->tbl = tbl;
+		container->grp = iommu_group;
 
 unlock_exit:
 	mutex_unlock(&container->lock);
@@ -487,26 +532,31 @@
 		struct iommu_group *iommu_group)
 {
 	struct tce_container *container = iommu_data;
-	struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
+	struct iommu_table_group *table_group;
+	struct iommu_table *tbl;
 
-	BUG_ON(!tbl);
 	mutex_lock(&container->lock);
-	if (tbl != container->tbl) {
+	if (iommu_group != container->grp) {
 		pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
 				iommu_group_id(iommu_group),
-				iommu_group_id(tbl->it_table_group->group));
+				iommu_group_id(container->grp));
 		goto unlock_exit;
 	}
 
 	if (container->enabled) {
 		pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
-				iommu_group_id(tbl->it_table_group->group));
+				iommu_group_id(container->grp));
 		tce_iommu_disable(container);
 	}
 
 	/* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
 	   iommu_group_id(iommu_group), iommu_group); */
-	container->tbl = NULL;
+	container->grp = NULL;
+
+	table_group = iommu_group_get_iommudata(iommu_group);
+	BUG_ON(!table_group);
+
+	tbl = table_group->tables[0];
 	tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
 	iommu_release_ownership(tbl);