vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type

VFIO allows devices to be safely handed off to userspace by putting
them behind an IOMMU configured to ensure DMA and interrupt isolation.
This enables userspace KVM clients, such as kvmtool and qemu, to further
map the device into a virtual machine.

With IOMMUs such as the ARM SMMU, it is then possible to provide SMMU
translation services to the guest operating system, which are nested
with the existing translation installed by VFIO. However, enabling this
feature means that the IOMMU driver must be informed that the VFIO domain
is being created for the purposes of nested translation.

This patch adds a new IOMMU type (VFIO_TYPE1_NESTING_IOMMU) to the VFIO
type-1 driver. The new IOMMU type acts identically to the
VFIO_TYPE1v2_IOMMU type, but additionally sets the DOMAIN_ATTR_NESTING
attribute on its IOMMU domains.

Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0734fbe..583ccdb 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -57,7 +57,8 @@
 	struct list_head	domain_list;
 	struct mutex		lock;
 	struct rb_root		dma_list;
-	bool v2;
+	bool			v2;
+	bool			nesting;
 };
 
 struct vfio_domain {
@@ -705,6 +706,15 @@
 		goto out_free;
 	}
 
+	if (iommu->nesting) {
+		int attr = 1;
+
+		ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
+					    &attr);
+		if (ret)
+			goto out_domain;
+	}
+
 	ret = iommu_attach_group(domain->domain, iommu_group);
 	if (ret)
 		goto out_domain;
@@ -819,17 +829,26 @@
 {
 	struct vfio_iommu *iommu;
 
-	if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU)
-		return ERR_PTR(-EINVAL);
-
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
 		return ERR_PTR(-ENOMEM);
 
+	switch (arg) {
+	case VFIO_TYPE1_IOMMU:
+		break;
+	case VFIO_TYPE1_NESTING_IOMMU:
+		iommu->nesting = true;
+	case VFIO_TYPE1v2_IOMMU:
+		iommu->v2 = true;
+		break;
+	default:
+		kfree(iommu);
+		return ERR_PTR(-EINVAL);
+	}
+
 	INIT_LIST_HEAD(&iommu->domain_list);
 	iommu->dma_list = RB_ROOT;
 	mutex_init(&iommu->lock);
-	iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU);
 
 	return iommu;
 }
@@ -885,6 +904,7 @@
 		switch (arg) {
 		case VFIO_TYPE1_IOMMU:
 		case VFIO_TYPE1v2_IOMMU:
+		case VFIO_TYPE1_NESTING_IOMMU:
 			return 1;
 		case VFIO_DMA_CC_IOMMU:
 			if (!iommu)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6612974..29715d2 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -33,6 +33,9 @@
 /* Check if EEH is supported */
 #define VFIO_EEH			5
 
+/* Two-stage IOMMU */
+#define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between