Blame - drivers/vfio/vfio_iommu_type1.c - kernel/msm-4.9

blob: 0d5b667c0e652de526fdcdef866a1cb878ee3fe1 [file] [log] [blame]

Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1	/*
				2	* VFIO: IOMMU DMA mapping support for Type1 IOMMU
				3	*
				4	* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
				5	* Author: Alex Williamson <alex.williamson@redhat.com>
				6	*
				7	* This program is free software; you can redistribute it and/or modify
				8	* it under the terms of the GNU General Public License version 2 as
				9	* published by the Free Software Foundation.
				10	*
				11	* Derived from original vfio:
				12	* Copyright 2010 Cisco Systems, Inc. All rights reserved.
				13	* Author: Tom Lyon, pugs@cisco.com
				14	*
				15	* We arbitrarily define a Type1 IOMMU as one matching the below code.
				16	* It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel
				17	* VT-d, but that makes it harder to re-use as theoretically anyone
				18	* implementing a similar IOMMU could make use of this. We expect the
				19	* IOMMU to support the IOMMU API and have few to no restrictions around
				20	* the IOVA range that can be mapped. The Type1 IOMMU is currently
				21	* optimized for relatively static mappings of a userspace process with
				22	* userpsace pages pinned into memory. We also assume devices and IOMMU
				23	* domains are PCI based as the IOMMU API is still centered around a
				24	* device/bus interface rather than a group interface.
				25	*/
				26
				27	#include <linux/compat.h>
				28	#include <linux/device.h>
				29	#include <linux/fs.h>
				30	#include <linux/iommu.h>
				31	#include <linux/module.h>
				32	#include <linux/mm.h>
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	33	#include <linux/rbtree.h>
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	34	#include <linux/sched.h>
				35	#include <linux/slab.h>
				36	#include <linux/uaccess.h>
				37	#include <linux/vfio.h>
				38	#include <linux/workqueue.h>
				39
				40	#define DRIVER_VERSION "0.2"
				41	#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
				42	#define DRIVER_DESC "Type1 IOMMU driver for VFIO"
				43
				44	static bool allow_unsafe_interrupts;
				45	module_param_named(allow_unsafe_interrupts,
				46	allow_unsafe_interrupts, bool, S_IRUGO \| S_IWUSR);
				47	MODULE_PARM_DESC(allow_unsafe_interrupts,
				48	"Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
				49
Alex Williamson	5c6c2b2	2013-06-21 09:38:11 -0600	[diff] [blame]	50	static bool disable_hugepages;
				51	module_param_named(disable_hugepages,
				52	disable_hugepages, bool, S_IRUGO \| S_IWUSR);
				53	MODULE_PARM_DESC(disable_hugepages,
				54	"Disable VFIO IOMMU support for IOMMU hugepages.");
				55
Alex Williamson	4f97abd	2019-04-03 12:36:21 -0600	[diff] [blame]	56	static unsigned int dma_entry_limit __read_mostly = U16_MAX;
				57	module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
				58	MODULE_PARM_DESC(dma_entry_limit,
				59	"Maximum number of user DMA mappings per container (65535).");
				60
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	61	struct vfio_iommu {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	62	struct list_head domain_list;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	63	struct mutex lock;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	64	struct rb_root dma_list;
Alex Williamson	4f97abd	2019-04-03 12:36:21 -0600	[diff] [blame]	65	unsigned int dma_avail;
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	66	bool v2;
				67	bool nesting;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	68	};
				69
				70	struct vfio_domain {
				71	struct iommu_domain *domain;
				72	struct list_head next;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	73	struct list_head group_list;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	74	int prot; /* IOMMU_CACHE */
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	75	bool fgsp; /* Fine-grained super pages */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	76	};
				77
				78	struct vfio_dma {
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	79	struct rb_node node;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	80	dma_addr_t iova; /* Device address */
				81	unsigned long vaddr; /* Process virtual addr */
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	82	size_t size; /* Map size (bytes) */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	83	int prot; /* IOMMU_READ/WRITE */
				84	};
				85
				86	struct vfio_group {
				87	struct iommu_group *iommu_group;
				88	struct list_head next;
				89	};
				90
				91	/*
				92	* This code handles mapping and unmapping of user data buffers
				93	* into DMA'ble space using the IOMMU
				94	*/
				95
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	96	static struct vfio_dma vfio_find_dma(struct vfio_iommu iommu,
				97	dma_addr_t start, size_t size)
				98	{
				99	struct rb_node *node = iommu->dma_list.rb_node;
				100
				101	while (node) {
				102	struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
				103
				104	if (start + size <= dma->iova)
				105	node = node->rb_left;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	106	else if (start >= dma->iova + dma->size)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	107	node = node->rb_right;
				108	else
				109	return dma;
				110	}
				111
				112	return NULL;
				113	}
				114
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	115	static void vfio_link_dma(struct vfio_iommu iommu, struct vfio_dma new)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	116	{
				117	struct rb_node *link = &iommu->dma_list.rb_node, parent = NULL;
				118	struct vfio_dma *dma;
				119
				120	while (*link) {
				121	parent = *link;
				122	dma = rb_entry(parent, struct vfio_dma, node);
				123
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	124	if (new->iova + new->size <= dma->iova)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	125	link = &(*link)->rb_left;
				126	else
				127	link = &(*link)->rb_right;
				128	}
				129
				130	rb_link_node(&new->node, parent, link);
				131	rb_insert_color(&new->node, &iommu->dma_list);
				132	}
				133
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	134	static void vfio_unlink_dma(struct vfio_iommu iommu, struct vfio_dma old)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	135	{
				136	rb_erase(&old->node, &iommu->dma_list);
				137	}
				138
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	139	static int vfio_lock_acct(long npage, bool *lock_cap)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	140	{
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	141	int ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	142
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	143	if (!npage)
				144	return 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	145
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	146	if (!current->mm)
				147	return -ESRCH; /* process exited */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	148
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	149	ret = down_write_killable(&current->mm->mmap_sem);
				150	if (!ret) {
				151	if (npage > 0) {
				152	if (lock_cap ? !*lock_cap : !capable(CAP_IPC_LOCK)) {
				153	unsigned long limit;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	154
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	155	limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				156
				157	if (current->mm->locked_vm + npage > limit)
				158	ret = -ENOMEM;
				159	}
				160	}
				161
				162	if (!ret)
				163	current->mm->locked_vm += npage;
				164
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	165	up_write(&current->mm->mmap_sem);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	166	}
				167
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	168	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	169	}
				170
				171	/*
				172	* Some mappings aren't backed by a struct page, for example an mmap'd
				173	* MMIO range for our own or another device. These use a different
				174	* pfn conversion and shouldn't be tracked as locked pages.
				175	*/
				176	static bool is_invalid_reserved_pfn(unsigned long pfn)
				177	{
				178	if (pfn_valid(pfn)) {
				179	bool reserved;
				180	struct page *tail = pfn_to_page(pfn);
David Rientjes	668f9abb	2014-03-03 15:38:18 -0800	[diff] [blame]	181	struct page *head = compound_head(tail);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	182	reserved = !!(PageReserved(head));
				183	if (head != tail) {
				184	/*
				185	* "head" is not a dangling pointer
David Rientjes	668f9abb	2014-03-03 15:38:18 -0800	[diff] [blame]	186	* (compound_head takes care of that)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	187	* but the hugepage may have been split
				188	* from under us (and we may not hold a
				189	* reference count on the head page so it can
				190	* be reused before we run PageReferenced), so
				191	* we've to check PageTail before returning
				192	* what we just read.
				193	*/
				194	smp_rmb();
				195	if (PageTail(tail))
				196	return reserved;
				197	}
				198	return PageReserved(tail);
				199	}
				200
				201	return true;
				202	}
				203
				204	static int put_pfn(unsigned long pfn, int prot)
				205	{
				206	if (!is_invalid_reserved_pfn(pfn)) {
				207	struct page *page = pfn_to_page(pfn);
				208	if (prot & IOMMU_WRITE)
				209	SetPageDirty(page);
				210	put_page(page);
				211	return 1;
				212	}
				213	return 0;
				214	}
				215
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	216	static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
				217	{
				218	struct page *page[1];
				219	struct vm_area_struct *vma;
				220	int ret = -EFAULT;
				221
				222	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
				223	*pfn = page_to_pfn(page[0]);
				224	return 0;
				225	}
				226
				227	down_read(&current->mm->mmap_sem);
				228
				229	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
				230
				231	if (vma && vma->vm_flags & VM_PFNMAP) {
				232	*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
				233	if (is_invalid_reserved_pfn(*pfn))
				234	ret = 0;
				235	}
				236
				237	up_read(&current->mm->mmap_sem);
				238
				239	return ret;
				240	}
				241
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	242	/*
				243	* Attempt to pin pages. We really don't want to track all the pfns and
				244	* the iommu can only map chunks of consecutive pfns anyway, so get the
				245	* first page and all consecutive pages with the same locking.
				246	*/
				247	static long vfio_pin_pages(unsigned long vaddr, long npage,
				248	int prot, unsigned long *pfn_base)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	249	{
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	250	unsigned long pfn = 0, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	251	bool lock_cap = capable(CAP_IPC_LOCK);
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	252	long ret, i = 1;
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	253	bool rsvd;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	254
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	255	if (!current->mm)
				256	return -ENODEV;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	257
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	258	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
				259	if (ret)
				260	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	261
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	262	rsvd = is_invalid_reserved_pfn(*pfn_base);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	263
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	264	if (!rsvd && !lock_cap && current->mm->locked_vm + 1 > limit) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	265	put_pfn(*pfn_base, prot);
				266	pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
				267	limit << PAGE_SHIFT);
				268	return -ENOMEM;
				269	}
				270
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	271	if (unlikely(disable_hugepages))
				272	goto out;
Alex Williamson	5c6c2b2	2013-06-21 09:38:11 -0600	[diff] [blame]	273
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	274	/* Lock all the consecutive pages from pfn_base */
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	275	for (vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	276	ret = vaddr_get_pfn(vaddr, prot, &pfn);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	277	if (ret)
				278	break;
				279
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	280	if (pfn != *pfn_base + i \|\|
				281	rsvd != is_invalid_reserved_pfn(pfn)) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	282	put_pfn(pfn, prot);
				283	break;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	284	}
				285
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	286	if (!rsvd && !lock_cap &&
				287	current->mm->locked_vm + i + 1 > limit) {
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	288	put_pfn(pfn, prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	289	pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
				290	__func__, limit << PAGE_SHIFT);
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	291	ret = -ENOMEM;
				292	goto unpin_out;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	293	}
				294	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	295
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	296	out:
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	297	if (!rsvd)
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	298	ret = vfio_lock_acct(i, &lock_cap);
				299
				300	unpin_out:
				301	if (ret) {
				302	if (!rsvd) {
				303	for (pfn = *pfn_base ; i ; pfn++, i--)
				304	put_pfn(pfn, prot);
				305	}
				306
				307	return ret;
				308	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	309
				310	return i;
				311	}
				312
				313	static long vfio_unpin_pages(unsigned long pfn, long npage,
				314	int prot, bool do_accounting)
				315	{
				316	unsigned long unlocked = 0;
				317	long i;
				318
				319	for (i = 0; i < npage; i++)
				320	unlocked += put_pfn(pfn++, prot);
				321
				322	if (do_accounting)
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	323	vfio_lock_acct(-unlocked, NULL);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	324
				325	return unlocked;
				326	}
				327
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	328	static void vfio_unmap_unpin(struct vfio_iommu iommu, struct vfio_dma dma)
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	329	{
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	330	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
				331	struct vfio_domain domain, d;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	332	long unlocked = 0;
				333
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	334	if (!dma->size)
				335	return;
				336	/*
				337	* We use the IOMMU to track the physical addresses, otherwise we'd
				338	* need a much more complicated tracking system. Unfortunately that
				339	* means we need to use one of the iommu domains to figure out the
				340	* pfns to unpin. The rest need to be unmapped in advance so we have
				341	* no iommu translations remaining when the pages are unpinned.
				342	*/
				343	domain = d = list_first_entry(&iommu->domain_list,
				344	struct vfio_domain, next);
				345
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	346	list_for_each_entry_continue(d, &iommu->domain_list, next) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	347	iommu_unmap(d->domain, dma->iova, dma->size);
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	348	cond_resched();
				349	}
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	350
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	351	while (iova < end) {
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	352	size_t unmapped, len;
				353	phys_addr_t phys, next;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	354
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	355	phys = iommu_iova_to_phys(domain->domain, iova);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	356	if (WARN_ON(!phys)) {
				357	iova += PAGE_SIZE;
				358	continue;
				359	}
				360
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	361	/*
				362	* To optimize for fewer iommu_unmap() calls, each of which
				363	* may require hardware cache flushing, try to find the
				364	* largest contiguous physical memory chunk to unmap.
				365	*/
				366	for (len = PAGE_SIZE;
				367	!domain->fgsp && iova + len < end; len += PAGE_SIZE) {
				368	next = iommu_iova_to_phys(domain->domain, iova + len);
				369	if (next != phys + len)
				370	break;
				371	}
				372
				373	unmapped = iommu_unmap(domain->domain, iova, len);
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	374	if (WARN_ON(!unmapped))
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	375	break;
				376
				377	unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
				378	unmapped >> PAGE_SHIFT,
				379	dma->prot, false);
				380	iova += unmapped;
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	381
				382	cond_resched();
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	383	}
				384
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame]	385	vfio_lock_acct(-unlocked, NULL);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	386	}
				387
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	388	static void vfio_remove_dma(struct vfio_iommu iommu, struct vfio_dma dma)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	389	{
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	390	vfio_unmap_unpin(iommu, dma);
				391	vfio_unlink_dma(iommu, dma);
				392	kfree(dma);
Alex Williamson	4f97abd	2019-04-03 12:36:21 -0600	[diff] [blame]	393	iommu->dma_avail++;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	394	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	395
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	396	static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
				397	{
				398	struct vfio_domain *domain;
Eric Auger	4644321	2015-10-29 17:49:42 +0000	[diff] [blame]	399	unsigned long bitmap = ULONG_MAX;
Alex Williamson	f5bfdbf	2013-06-25 16:01:44 -0600	[diff] [blame]	400
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	401	mutex_lock(&iommu->lock);
				402	list_for_each_entry(domain, &iommu->domain_list, next)
Robin Murphy	d16e0fa	2016-04-07 18:42:06 +0100	[diff] [blame]	403	bitmap &= domain->domain->pgsize_bitmap;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	404	mutex_unlock(&iommu->lock);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	405
Eric Auger	4644321	2015-10-29 17:49:42 +0000	[diff] [blame]	406	/*
				407	* In case the IOMMU supports page sizes smaller than PAGE_SIZE
				408	* we pretend PAGE_SIZE is supported and hide sub-PAGE_SIZE sizes.
				409	* That way the user will be able to map/unmap buffers whose size/
				410	* start address is aligned with PAGE_SIZE. Pinning code uses that
				411	* granularity while iommu driver can use the sub-PAGE_SIZE size
				412	* to map the buffer.
				413	*/
				414	if (bitmap & ~PAGE_MASK) {
				415	bitmap &= PAGE_MASK;
				416	bitmap \|= PAGE_SIZE;
				417	}
				418
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	419	return bitmap;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	420	}
				421
				422	static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
				423	struct vfio_iommu_type1_dma_unmap *unmap)
				424	{
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	425	uint64_t mask;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	426	struct vfio_dma *dma;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	427	size_t unmapped = 0;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	428	int ret = 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	429
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	430	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	431
				432	if (unmap->iova & mask)
				433	return -EINVAL;
Alex Williamson	f5bfdbf	2013-06-25 16:01:44 -0600	[diff] [blame]	434	if (!unmap->size \|\| unmap->size & mask)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	435	return -EINVAL;
				436
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	437	WARN_ON(mask & PAGE_MASK);
				438
				439	mutex_lock(&iommu->lock);
				440
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	441	/*
				442	* vfio-iommu-type1 (v1) - User mappings were coalesced together to
				443	* avoid tracking individual mappings. This means that the granularity
				444	* of the original mapping was lost and the user was allowed to attempt
				445	* to unmap any range. Depending on the contiguousness of physical
				446	* memory and page sizes supported by the IOMMU, arbitrary unmaps may
				447	* or may not have worked. We only guaranteed unmap granularity
				448	* matching the original mapping; even though it was untracked here,
				449	* the original mappings are reflected in IOMMU mappings. This
				450	* resulted in a couple unusual behaviors. First, if a range is not
				451	* able to be unmapped, ex. a set of 4k pages that was mapped as a
				452	* 2M hugepage into the IOMMU, the unmap ioctl returns success but with
				453	* a zero sized unmap. Also, if an unmap request overlaps the first
				454	* address of a hugepage, the IOMMU will unmap the entire hugepage.
				455	* This also returns success and the returned unmap size reflects the
				456	* actual size unmapped.
				457	*
				458	* We attempt to maintain compatibility with this "v1" interface, but
				459	* we take control out of the hands of the IOMMU. Therefore, an unmap
				460	* request offset from the beginning of the original mapping will
				461	* return success with zero sized unmap. And an unmap request covering
				462	* the first iova of mapping will unmap the entire range.
				463	*
				464	* The v2 version of this interface intends to be more deterministic.
				465	* Unmap requests must fully cover previous mappings. Multiple
				466	* mappings may still be unmaped by specifying large ranges, but there
				467	* must not be any previous mappings bisected by the range. An error
				468	* will be returned if these conditions are not met. The v2 interface
				469	* will only return success and a size of zero if there were no
				470	* mappings within the range.
				471	*/
				472	if (iommu->v2) {
				473	dma = vfio_find_dma(iommu, unmap->iova, 0);
				474	if (dma && dma->iova != unmap->iova) {
				475	ret = -EINVAL;
				476	goto unlock;
				477	}
				478	dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
				479	if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
				480	ret = -EINVAL;
				481	goto unlock;
				482	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	483	}
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	484
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	485	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
				486	if (!iommu->v2 && unmap->iova > dma->iova)
				487	break;
				488	unmapped += dma->size;
				489	vfio_remove_dma(iommu, dma);
				490	}
				491
				492	unlock:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	493	mutex_unlock(&iommu->lock);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	494
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	495	/* Report how much was unmapped */
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	496	unmap->size = unmapped;
				497
				498	return ret;
				499	}
				500
				501	/*
				502	* Turns out AMD IOMMU has a page table bug where it won't map large pages
				503	* to a region that previously mapped smaller pages. This should be fixed
				504	* soon, so this is just a temporary workaround to break mappings down into
				505	* PAGE_SIZE. Better to map smaller pages than nothing.
				506	*/
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	507	static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	508	unsigned long pfn, long npage, int prot)
				509	{
				510	long i;
Alex Williamson	089f1c6	2016-05-30 07:58:10 -0600	[diff] [blame]	511	int ret = 0;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	512
				513	for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	514	ret = iommu_map(domain->domain, iova,
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	515	(phys_addr_t)pfn << PAGE_SHIFT,
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	516	PAGE_SIZE, prot \| domain->prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	517	if (ret)
				518	break;
				519	}
				520
				521	for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	522	iommu_unmap(domain->domain, iova, PAGE_SIZE);
				523
				524	return ret;
				525	}
				526
				527	static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
				528	unsigned long pfn, long npage, int prot)
				529	{
				530	struct vfio_domain *d;
				531	int ret;
				532
				533	list_for_each_entry(d, &iommu->domain_list, next) {
				534	ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
				535	npage << PAGE_SHIFT, prot \| d->prot);
				536	if (ret) {
				537	if (ret != -EBUSY \|\|
				538	map_try_harder(d, iova, pfn, npage, prot))
				539	goto unwind;
				540	}
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	541
				542	cond_resched();
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	543	}
				544
				545	return 0;
				546
				547	unwind:
				548	list_for_each_entry_continue_reverse(d, &iommu->domain_list, next)
				549	iommu_unmap(d->domain, iova, npage << PAGE_SHIFT);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	550
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	551	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	552	}
				553
				554	static int vfio_dma_do_map(struct vfio_iommu *iommu,
				555	struct vfio_iommu_type1_dma_map *map)
				556	{
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	557	dma_addr_t iova = map->iova;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	558	unsigned long vaddr = map->vaddr;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	559	size_t size = map->size;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	560	long npage;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	561	int ret = 0, prot = 0;
				562	uint64_t mask;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	563	struct vfio_dma *dma;
Antonios Motakis	d93b3ac	2013-10-11 10:40:46 -0600	[diff] [blame]	564	unsigned long pfn;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	565
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	566	/* Verify that none of our __u64 fields overflow */
				567	if (map->size != size \|\| map->vaddr != vaddr \|\| map->iova != iova)
				568	return -EINVAL;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	569
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	570	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	571
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	572	WARN_ON(mask & PAGE_MASK);
				573
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	574	/* READ/WRITE from device perspective */
				575	if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
				576	prot \|= IOMMU_WRITE;
				577	if (map->flags & VFIO_DMA_MAP_FLAG_READ)
				578	prot \|= IOMMU_READ;
				579
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	580	if (!prot \|\| !size \|\| (size \| iova \| vaddr) & mask)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	581	return -EINVAL;
				582
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	583	/* Don't allow IOVA or virtual address wrap */
				584	if (iova + size - 1 < iova \|\| vaddr + size - 1 < vaddr)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	585	return -EINVAL;
				586
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	587	mutex_lock(&iommu->lock);
				588
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	589	if (vfio_find_dma(iommu, iova, size)) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	590	mutex_unlock(&iommu->lock);
				591	return -EEXIST;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	592	}
				593
Alex Williamson	4f97abd	2019-04-03 12:36:21 -0600	[diff] [blame]	594	if (!iommu->dma_avail) {
				595	mutex_unlock(&iommu->lock);
				596	return -ENOSPC;
				597	}
				598
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	599	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
				600	if (!dma) {
				601	mutex_unlock(&iommu->lock);
				602	return -ENOMEM;
				603	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	604
Alex Williamson	4f97abd	2019-04-03 12:36:21 -0600	[diff] [blame]	605	iommu->dma_avail--;
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	606	dma->iova = iova;
				607	dma->vaddr = vaddr;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	608	dma->prot = prot;
				609
				610	/* Insert zero-sized and grow as we map chunks of it */
				611	vfio_link_dma(iommu, dma);
				612
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	613	while (size) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	614	/* Pin a contiguous chunk of memory */
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	615	npage = vfio_pin_pages(vaddr + dma->size,
				616	size >> PAGE_SHIFT, prot, &pfn);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	617	if (npage <= 0) {
				618	WARN_ON(!npage);
				619	ret = (int)npage;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	620	break;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	621	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	622
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	623	/* Map it! */
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	624	ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	625	if (ret) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	626	vfio_unpin_pages(pfn, npage, prot, true);
				627	break;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	628	}
				629
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	630	size -= npage << PAGE_SHIFT;
				631	dma->size += npage << PAGE_SHIFT;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	632	}
				633
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	634	if (ret)
				635	vfio_remove_dma(iommu, dma);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	636
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	637	mutex_unlock(&iommu->lock);
				638	return ret;
				639	}
				640
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	641	static int vfio_bus_type(struct device dev, void data)
				642	{
				643	struct bus_type **bus = data;
				644
				645	if (bus && bus != dev->bus)
				646	return -EINVAL;
				647
				648	*bus = dev->bus;
				649
				650	return 0;
				651	}
				652
				653	static int vfio_iommu_replay(struct vfio_iommu *iommu,
				654	struct vfio_domain *domain)
				655	{
				656	struct vfio_domain *d;
				657	struct rb_node *n;
				658	int ret;
				659
				660	/* Arbitrarily pick the first domain in the list for lookups */
				661	d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
				662	n = rb_first(&iommu->dma_list);
				663
				664	/* If there's not a domain, there better not be any mappings */
				665	if (WARN_ON(n && !d))
				666	return -EINVAL;
				667
				668	for (; n; n = rb_next(n)) {
				669	struct vfio_dma *dma;
				670	dma_addr_t iova;
				671
				672	dma = rb_entry(n, struct vfio_dma, node);
				673	iova = dma->iova;
				674
				675	while (iova < dma->iova + dma->size) {
				676	phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
				677	size_t size;
				678
				679	if (WARN_ON(!phys)) {
				680	iova += PAGE_SIZE;
				681	continue;
				682	}
				683
				684	size = PAGE_SIZE;
				685
				686	while (iova + size < dma->iova + dma->size &&
				687	phys + size == iommu_iova_to_phys(d->domain,
				688	iova + size))
				689	size += PAGE_SIZE;
				690
				691	ret = iommu_map(domain->domain, iova, phys,
				692	size, dma->prot \| domain->prot);
				693	if (ret)
				694	return ret;
				695
				696	iova += size;
				697	}
				698	}
				699
				700	return 0;
				701	}
				702
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	703	/*
				704	* We change our unmap behavior slightly depending on whether the IOMMU
				705	* supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage
				706	* for practically any contiguous power-of-two mapping we give it. This means
				707	* we don't need to look for contiguous chunks ourselves to make unmapping
				708	* more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d
				709	* with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
				710	* significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
				711	* hugetlbfs is in use.
				712	*/
				713	static void vfio_test_domain_fgsp(struct vfio_domain *domain)
				714	{
				715	struct page *pages;
				716	int ret, order = get_order(PAGE_SIZE * 2);
				717
				718	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, order);
				719	if (!pages)
				720	return;
				721
				722	ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
				723	IOMMU_READ \| IOMMU_WRITE \| domain->prot);
				724	if (!ret) {
				725	size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
				726
				727	if (unmapped == PAGE_SIZE)
				728	iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
				729	else
				730	domain->fgsp = true;
				731	}
				732
				733	__free_pages(pages, order);
				734	}
				735
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	736	static int vfio_iommu_type1_attach_group(void *iommu_data,
				737	struct iommu_group *iommu_group)
				738	{
				739	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	740	struct vfio_group group, g;
				741	struct vfio_domain domain, d;
				742	struct bus_type *bus = NULL;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	743	int ret;
				744
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	745	mutex_lock(&iommu->lock);
				746
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	747	list_for_each_entry(d, &iommu->domain_list, next) {
				748	list_for_each_entry(g, &d->group_list, next) {
				749	if (g->iommu_group != iommu_group)
				750	continue;
				751
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	752	mutex_unlock(&iommu->lock);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	753	return -EINVAL;
				754	}
				755	}
				756
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	757	group = kzalloc(sizeof(*group), GFP_KERNEL);
				758	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
				759	if (!group \|\| !domain) {
				760	ret = -ENOMEM;
				761	goto out_free;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	762	}
				763
				764	group->iommu_group = iommu_group;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	765
				766	/* Determine bus_type in order to allocate a domain */
				767	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
				768	if (ret)
				769	goto out_free;
				770
				771	domain->domain = iommu_domain_alloc(bus);
				772	if (!domain->domain) {
				773	ret = -EIO;
				774	goto out_free;
				775	}
				776
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	777	if (iommu->nesting) {
				778	int attr = 1;
				779
				780	ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
				781	&attr);
				782	if (ret)
				783	goto out_domain;
				784	}
				785
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	786	ret = iommu_attach_group(domain->domain, iommu_group);
				787	if (ret)
				788	goto out_domain;
				789
				790	INIT_LIST_HEAD(&domain->group_list);
				791	list_add(&group->next, &domain->group_list);
				792
				793	if (!allow_unsafe_interrupts &&
Joerg Roedel	eb165f0	2014-09-05 10:56:05 +0200	[diff] [blame]	794	!iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	795	pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
				796	__func__);
				797	ret = -EPERM;
				798	goto out_detach;
				799	}
				800
Joerg Roedel	eb165f0	2014-09-05 10:56:05 +0200	[diff] [blame]	801	if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	802	domain->prot \|= IOMMU_CACHE;
				803
				804	/*
				805	* Try to match an existing compatible domain. We don't want to
				806	* preclude an IOMMU driver supporting multiple bus_types and being
				807	* able to include different bus_types in the same IOMMU domain, so
				808	* we test whether the domains use the same iommu_ops rather than
				809	* testing if they're on the same bus_type.
				810	*/
				811	list_for_each_entry(d, &iommu->domain_list, next) {
				812	if (d->domain->ops == domain->domain->ops &&
				813	d->prot == domain->prot) {
				814	iommu_detach_group(domain->domain, iommu_group);
				815	if (!iommu_attach_group(d->domain, iommu_group)) {
				816	list_add(&group->next, &d->group_list);
				817	iommu_domain_free(domain->domain);
				818	kfree(domain);
				819	mutex_unlock(&iommu->lock);
				820	return 0;
				821	}
				822
				823	ret = iommu_attach_group(domain->domain, iommu_group);
				824	if (ret)
				825	goto out_domain;
				826	}
				827	}
				828
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	829	vfio_test_domain_fgsp(domain);
				830
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	831	/* replay mappings on new domains */
				832	ret = vfio_iommu_replay(iommu, domain);
				833	if (ret)
				834	goto out_detach;
				835
				836	list_add(&domain->next, &iommu->domain_list);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	837
				838	mutex_unlock(&iommu->lock);
				839
				840	return 0;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	841
				842	out_detach:
				843	iommu_detach_group(domain->domain, iommu_group);
				844	out_domain:
				845	iommu_domain_free(domain->domain);
				846	out_free:
				847	kfree(domain);
				848	kfree(group);
				849	mutex_unlock(&iommu->lock);
				850	return ret;
				851	}
				852
				853	static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
				854	{
				855	struct rb_node *node;
				856
				857	while ((node = rb_first(&iommu->dma_list)))
				858	vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	859	}
				860
				861	static void vfio_iommu_type1_detach_group(void *iommu_data,
				862	struct iommu_group *iommu_group)
				863	{
				864	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	865	struct vfio_domain *domain;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	866	struct vfio_group *group;
				867
				868	mutex_lock(&iommu->lock);
				869
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	870	list_for_each_entry(domain, &iommu->domain_list, next) {
				871	list_for_each_entry(group, &domain->group_list, next) {
				872	if (group->iommu_group != iommu_group)
				873	continue;
				874
				875	iommu_detach_group(domain->domain, iommu_group);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	876	list_del(&group->next);
				877	kfree(group);
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	878	/*
				879	* Group ownership provides privilege, if the group
				880	* list is empty, the domain goes away. If it's the
				881	* last domain, then all the mappings go away too.
				882	*/
				883	if (list_empty(&domain->group_list)) {
				884	if (list_is_singular(&iommu->domain_list))
				885	vfio_iommu_unmap_unpin_all(iommu);
				886	iommu_domain_free(domain->domain);
				887	list_del(&domain->next);
				888	kfree(domain);
				889	}
				890	goto done;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	891	}
				892	}
				893
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	894	done:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	895	mutex_unlock(&iommu->lock);
				896	}
				897
				898	static void *vfio_iommu_type1_open(unsigned long arg)
				899	{
				900	struct vfio_iommu *iommu;
				901
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	902	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
				903	if (!iommu)
				904	return ERR_PTR(-ENOMEM);
				905
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	906	switch (arg) {
				907	case VFIO_TYPE1_IOMMU:
				908	break;
				909	case VFIO_TYPE1_NESTING_IOMMU:
				910	iommu->nesting = true;
				911	case VFIO_TYPE1v2_IOMMU:
				912	iommu->v2 = true;
				913	break;
				914	default:
				915	kfree(iommu);
				916	return ERR_PTR(-EINVAL);
				917	}
				918
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	919	INIT_LIST_HEAD(&iommu->domain_list);
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	920	iommu->dma_list = RB_ROOT;
Alex Williamson	4f97abd	2019-04-03 12:36:21 -0600	[diff] [blame]	921	iommu->dma_avail = dma_entry_limit;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	922	mutex_init(&iommu->lock);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	923
				924	return iommu;
				925	}
				926
				927	static void vfio_iommu_type1_release(void *iommu_data)
				928	{
				929	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	930	struct vfio_domain domain, domain_tmp;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	931	struct vfio_group group, group_tmp;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	932
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	933	vfio_iommu_unmap_unpin_all(iommu);
				934
				935	list_for_each_entry_safe(domain, domain_tmp,
				936	&iommu->domain_list, next) {
				937	list_for_each_entry_safe(group, group_tmp,
				938	&domain->group_list, next) {
				939	iommu_detach_group(domain->domain, group->iommu_group);
				940	list_del(&group->next);
				941	kfree(group);
				942	}
				943	iommu_domain_free(domain->domain);
				944	list_del(&domain->next);
				945	kfree(domain);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	946	}
				947
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	948	kfree(iommu);
				949	}
				950
Alex Williamson	aa42931	2014-02-26 11:38:37 -0700	[diff] [blame]	951	static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
				952	{
				953	struct vfio_domain *domain;
				954	int ret = 1;
				955
				956	mutex_lock(&iommu->lock);
				957	list_for_each_entry(domain, &iommu->domain_list, next) {
				958	if (!(domain->prot & IOMMU_CACHE)) {
				959	ret = 0;
				960	break;
				961	}
				962	}
				963	mutex_unlock(&iommu->lock);
				964
				965	return ret;
				966	}
				967
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	968	static long vfio_iommu_type1_ioctl(void *iommu_data,
				969	unsigned int cmd, unsigned long arg)
				970	{
				971	struct vfio_iommu *iommu = iommu_data;
				972	unsigned long minsz;
				973
				974	if (cmd == VFIO_CHECK_EXTENSION) {
				975	switch (arg) {
				976	case VFIO_TYPE1_IOMMU:
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	977	case VFIO_TYPE1v2_IOMMU:
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	978	case VFIO_TYPE1_NESTING_IOMMU:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	979	return 1;
Alex Williamson	aa42931	2014-02-26 11:38:37 -0700	[diff] [blame]	980	case VFIO_DMA_CC_IOMMU:
				981	if (!iommu)
				982	return 0;
				983	return vfio_domains_have_iommu_cache(iommu);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	984	default:
				985	return 0;
				986	}
				987	} else if (cmd == VFIO_IOMMU_GET_INFO) {
				988	struct vfio_iommu_type1_info info;
				989
				990	minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
				991
				992	if (copy_from_user(&info, (void __user *)arg, minsz))
				993	return -EFAULT;
				994
				995	if (info.argsz < minsz)
				996	return -EINVAL;
				997
Pierre Morel	d4f50ee	2015-12-23 13:08:05 +0100	[diff] [blame]	998	info.flags = VFIO_IOMMU_INFO_PGSIZES;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	999
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	1000	info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1001
Michael S. Tsirkin	8160c4e	2016-02-28 16:31:39 +0200	[diff] [blame]	1002	return copy_to_user((void __user *)arg, &info, minsz) ?
				1003	-EFAULT : 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1004
				1005	} else if (cmd == VFIO_IOMMU_MAP_DMA) {
				1006	struct vfio_iommu_type1_dma_map map;
				1007	uint32_t mask = VFIO_DMA_MAP_FLAG_READ \|
				1008	VFIO_DMA_MAP_FLAG_WRITE;
				1009
				1010	minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
				1011
				1012	if (copy_from_user(&map, (void __user *)arg, minsz))
				1013	return -EFAULT;
				1014
				1015	if (map.argsz < minsz \|\| map.flags & ~mask)
				1016	return -EINVAL;
				1017
				1018	return vfio_dma_do_map(iommu, &map);
				1019
				1020	} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
				1021	struct vfio_iommu_type1_dma_unmap unmap;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	1022	long ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1023
				1024	minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
				1025
				1026	if (copy_from_user(&unmap, (void __user *)arg, minsz))
				1027	return -EFAULT;
				1028
				1029	if (unmap.argsz < minsz \|\| unmap.flags)
				1030	return -EINVAL;
				1031
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	1032	ret = vfio_dma_do_unmap(iommu, &unmap);
				1033	if (ret)
				1034	return ret;
				1035
Michael S. Tsirkin	8160c4e	2016-02-28 16:31:39 +0200	[diff] [blame]	1036	return copy_to_user((void __user *)arg, &unmap, minsz) ?
				1037	-EFAULT : 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1038	}
				1039
				1040	return -ENOTTY;
				1041	}
				1042
				1043	static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
				1044	.name = "vfio-iommu-type1",
				1045	.owner = THIS_MODULE,
				1046	.open = vfio_iommu_type1_open,
				1047	.release = vfio_iommu_type1_release,
				1048	.ioctl = vfio_iommu_type1_ioctl,
				1049	.attach_group = vfio_iommu_type1_attach_group,
				1050	.detach_group = vfio_iommu_type1_detach_group,
				1051	};
				1052
				1053	static int __init vfio_iommu_type1_init(void)
				1054	{
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1055	return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1);
				1056	}
				1057
				1058	static void __exit vfio_iommu_type1_cleanup(void)
				1059	{
				1060	vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1);
				1061	}
				1062
				1063	module_init(vfio_iommu_type1_init);
				1064	module_exit(vfio_iommu_type1_cleanup);
				1065
				1066	MODULE_VERSION(DRIVER_VERSION);
				1067	MODULE_LICENSE("GPL v2");
				1068	MODULE_AUTHOR(DRIVER_AUTHOR);
				1069	MODULE_DESCRIPTION(DRIVER_DESC);