vfio/pci: Include sparse mmap capability for MSI-X table regions

vfio-pci has never allowed the user to directly mmap the MSI-X vector
table, but we've always relied on implicit knowledge of the user that
they cannot do this.  Now that we have capability chains that we can
expose in the region info ioctl and a sparse mmap capability that
represents the sub-areas within the region that can be mmap'd, we can
make the mmap constraints more explicit.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 2760a7b..4682207 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -421,6 +421,48 @@
 	return walk.ret;
 }
 
+static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
+				struct vfio_info_cap *caps)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_sparse_mmap *sparse;
+	size_t end, size;
+	int nr_areas = 2, i = 0;
+
+	end = pci_resource_len(vdev->pdev, vdev->msix_bar);
+
+	/* If MSI-X table is aligned to the start or end, only one area */
+	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
+	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
+		nr_areas = 1;
+
+	size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
+
+	header = vfio_info_cap_add(caps, size,
+				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	sparse = container_of(header,
+			      struct vfio_region_info_cap_sparse_mmap, header);
+	sparse->nr_areas = nr_areas;
+
+	if (vdev->msix_offset & PAGE_MASK) {
+		sparse->areas[i].offset = 0;
+		sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
+		i++;
+	}
+
+	if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) {
+		sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset +
+						     vdev->msix_size);
+		sparse->areas[i].size = end - sparse->areas[i].offset;
+		i++;
+	}
+
+	return 0;
+}
+
 static long vfio_pci_ioctl(void *device_data,
 			   unsigned int cmd, unsigned long arg)
 {
@@ -451,6 +493,8 @@
 	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
 		struct pci_dev *pdev = vdev->pdev;
 		struct vfio_region_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		int ret;
 
 		minsz = offsetofend(struct vfio_region_info, offset);
 
@@ -479,8 +523,15 @@
 				     VFIO_REGION_INFO_FLAG_WRITE;
 			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
 			    pci_resource_flags(pdev, info.index) &
-			    IORESOURCE_MEM && info.size >= PAGE_SIZE)
+			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
+				if (info.index == vdev->msix_bar) {
+					ret = msix_sparse_mmap_cap(vdev, &caps);
+					if (ret)
+						return ret;
+				}
+			}
+
 			break;
 		case VFIO_PCI_ROM_REGION_INDEX:
 		{
@@ -520,6 +571,26 @@
 			return -EINVAL;
 		}
 
+		if (caps.size) {
+			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+				info.cap_offset = 0;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				ret = copy_to_user((void __user *)arg +
+						   sizeof(info), caps.buf,
+						   caps.size);
+				if (ret) {
+					kfree(caps.buf);
+					return ret;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
+		}
+
 		return copy_to_user((void __user *)arg, &info, minsz);
 
 	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {