Blame - drivers/vfio/vfio_iommu_type1.c - kernel/msm-4.9

blob: e6e7f155bdd9429dafb7c58653b942acae653a50 [file] [log] [blame]

Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1	/*
				2	* VFIO: IOMMU DMA mapping support for Type1 IOMMU
				3	*
				4	* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
				5	* Author: Alex Williamson <alex.williamson@redhat.com>
				6	*
				7	* This program is free software; you can redistribute it and/or modify
				8	* it under the terms of the GNU General Public License version 2 as
				9	* published by the Free Software Foundation.
				10	*
				11	* Derived from original vfio:
				12	* Copyright 2010 Cisco Systems, Inc. All rights reserved.
				13	* Author: Tom Lyon, pugs@cisco.com
				14	*
				15	* We arbitrarily define a Type1 IOMMU as one matching the below code.
				16	* It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel
				17	* VT-d, but that makes it harder to re-use as theoretically anyone
				18	* implementing a similar IOMMU could make use of this. We expect the
				19	* IOMMU to support the IOMMU API and have few to no restrictions around
				20	* the IOVA range that can be mapped. The Type1 IOMMU is currently
				21	* optimized for relatively static mappings of a userspace process with
				22	* userpsace pages pinned into memory. We also assume devices and IOMMU
				23	* domains are PCI based as the IOMMU API is still centered around a
				24	* device/bus interface rather than a group interface.
				25	*/
				26
				27	#include <linux/compat.h>
				28	#include <linux/device.h>
				29	#include <linux/fs.h>
				30	#include <linux/iommu.h>
				31	#include <linux/module.h>
				32	#include <linux/mm.h>
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	33	#include <linux/rbtree.h>
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	34	#include <linux/sched.h>
				35	#include <linux/slab.h>
				36	#include <linux/uaccess.h>
				37	#include <linux/vfio.h>
				38	#include <linux/workqueue.h>
				39
				40	#define DRIVER_VERSION "0.2"
				41	#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
				42	#define DRIVER_DESC "Type1 IOMMU driver for VFIO"
				43
				44	static bool allow_unsafe_interrupts;
				45	module_param_named(allow_unsafe_interrupts,
				46	allow_unsafe_interrupts, bool, S_IRUGO \| S_IWUSR);
				47	MODULE_PARM_DESC(allow_unsafe_interrupts,
				48	"Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
				49
Alex Williamson	5c6c2b2	2013-06-21 09:38:11 -0600	[diff] [blame]	50	static bool disable_hugepages;
				51	module_param_named(disable_hugepages,
				52	disable_hugepages, bool, S_IRUGO \| S_IWUSR);
				53	MODULE_PARM_DESC(disable_hugepages,
				54	"Disable VFIO IOMMU support for IOMMU hugepages.");
				55
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	56	struct vfio_iommu {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	57	struct list_head domain_list;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	58	struct mutex lock;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	59	struct rb_root dma_list;
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	60	bool v2;
				61	bool nesting;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	62	};
				63
				64	struct vfio_domain {
				65	struct iommu_domain *domain;
				66	struct list_head next;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	67	struct list_head group_list;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	68	int prot; /* IOMMU_CACHE */
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame^]	69	bool fgsp; /* Fine-grained super pages */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	70	};
				71
				72	struct vfio_dma {
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	73	struct rb_node node;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	74	dma_addr_t iova; /* Device address */
				75	unsigned long vaddr; /* Process virtual addr */
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	76	size_t size; /* Map size (bytes) */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	77	int prot; /* IOMMU_READ/WRITE */
				78	};
				79
				80	struct vfio_group {
				81	struct iommu_group *iommu_group;
				82	struct list_head next;
				83	};
				84
				85	/*
				86	* This code handles mapping and unmapping of user data buffers
				87	* into DMA'ble space using the IOMMU
				88	*/
				89
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	90	static struct vfio_dma vfio_find_dma(struct vfio_iommu iommu,
				91	dma_addr_t start, size_t size)
				92	{
				93	struct rb_node *node = iommu->dma_list.rb_node;
				94
				95	while (node) {
				96	struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
				97
				98	if (start + size <= dma->iova)
				99	node = node->rb_left;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	100	else if (start >= dma->iova + dma->size)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	101	node = node->rb_right;
				102	else
				103	return dma;
				104	}
				105
				106	return NULL;
				107	}
				108
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	109	static void vfio_link_dma(struct vfio_iommu iommu, struct vfio_dma new)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	110	{
				111	struct rb_node *link = &iommu->dma_list.rb_node, parent = NULL;
				112	struct vfio_dma *dma;
				113
				114	while (*link) {
				115	parent = *link;
				116	dma = rb_entry(parent, struct vfio_dma, node);
				117
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	118	if (new->iova + new->size <= dma->iova)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	119	link = &(*link)->rb_left;
				120	else
				121	link = &(*link)->rb_right;
				122	}
				123
				124	rb_link_node(&new->node, parent, link);
				125	rb_insert_color(&new->node, &iommu->dma_list);
				126	}
				127
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	128	static void vfio_unlink_dma(struct vfio_iommu iommu, struct vfio_dma old)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	129	{
				130	rb_erase(&old->node, &iommu->dma_list);
				131	}
				132
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	133	struct vwork {
				134	struct mm_struct *mm;
				135	long npage;
				136	struct work_struct work;
				137	};
				138
				139	/* delayed decrement/increment for locked_vm */
				140	static void vfio_lock_acct_bg(struct work_struct *work)
				141	{
				142	struct vwork *vwork = container_of(work, struct vwork, work);
				143	struct mm_struct *mm;
				144
				145	mm = vwork->mm;
				146	down_write(&mm->mmap_sem);
				147	mm->locked_vm += vwork->npage;
				148	up_write(&mm->mmap_sem);
				149	mmput(mm);
				150	kfree(vwork);
				151	}
				152
				153	static void vfio_lock_acct(long npage)
				154	{
				155	struct vwork *vwork;
				156	struct mm_struct *mm;
				157
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	158	if (!current->mm \|\| !npage)
				159	return; /* process exited or nothing to do */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	160
				161	if (down_write_trylock(&current->mm->mmap_sem)) {
				162	current->mm->locked_vm += npage;
				163	up_write(&current->mm->mmap_sem);
				164	return;
				165	}
				166
				167	/*
				168	* Couldn't get mmap_sem lock, so must setup to update
				169	* mm->locked_vm later. If locked_vm were atomic, we
				170	* wouldn't need this silliness
				171	*/
				172	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
				173	if (!vwork)
				174	return;
				175	mm = get_task_mm(current);
				176	if (!mm) {
				177	kfree(vwork);
				178	return;
				179	}
				180	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
				181	vwork->mm = mm;
				182	vwork->npage = npage;
				183	schedule_work(&vwork->work);
				184	}
				185
				186	/*
				187	* Some mappings aren't backed by a struct page, for example an mmap'd
				188	* MMIO range for our own or another device. These use a different
				189	* pfn conversion and shouldn't be tracked as locked pages.
				190	*/
				191	static bool is_invalid_reserved_pfn(unsigned long pfn)
				192	{
				193	if (pfn_valid(pfn)) {
				194	bool reserved;
				195	struct page *tail = pfn_to_page(pfn);
David Rientjes	668f9abb	2014-03-03 15:38:18 -0800	[diff] [blame]	196	struct page *head = compound_head(tail);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	197	reserved = !!(PageReserved(head));
				198	if (head != tail) {
				199	/*
				200	* "head" is not a dangling pointer
David Rientjes	668f9abb	2014-03-03 15:38:18 -0800	[diff] [blame]	201	* (compound_head takes care of that)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	202	* but the hugepage may have been split
				203	* from under us (and we may not hold a
				204	* reference count on the head page so it can
				205	* be reused before we run PageReferenced), so
				206	* we've to check PageTail before returning
				207	* what we just read.
				208	*/
				209	smp_rmb();
				210	if (PageTail(tail))
				211	return reserved;
				212	}
				213	return PageReserved(tail);
				214	}
				215
				216	return true;
				217	}
				218
				219	static int put_pfn(unsigned long pfn, int prot)
				220	{
				221	if (!is_invalid_reserved_pfn(pfn)) {
				222	struct page *page = pfn_to_page(pfn);
				223	if (prot & IOMMU_WRITE)
				224	SetPageDirty(page);
				225	put_page(page);
				226	return 1;
				227	}
				228	return 0;
				229	}
				230
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	231	static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
				232	{
				233	struct page *page[1];
				234	struct vm_area_struct *vma;
				235	int ret = -EFAULT;
				236
				237	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
				238	*pfn = page_to_pfn(page[0]);
				239	return 0;
				240	}
				241
				242	down_read(&current->mm->mmap_sem);
				243
				244	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
				245
				246	if (vma && vma->vm_flags & VM_PFNMAP) {
				247	*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
				248	if (is_invalid_reserved_pfn(*pfn))
				249	ret = 0;
				250	}
				251
				252	up_read(&current->mm->mmap_sem);
				253
				254	return ret;
				255	}
				256
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	257	/*
				258	* Attempt to pin pages. We really don't want to track all the pfns and
				259	* the iommu can only map chunks of consecutive pfns anyway, so get the
				260	* first page and all consecutive pages with the same locking.
				261	*/
				262	static long vfio_pin_pages(unsigned long vaddr, long npage,
				263	int prot, unsigned long *pfn_base)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	264	{
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	265	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				266	bool lock_cap = capable(CAP_IPC_LOCK);
				267	long ret, i;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	268
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	269	if (!current->mm)
				270	return -ENODEV;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	271
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	272	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
				273	if (ret)
				274	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	275
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	276	if (is_invalid_reserved_pfn(*pfn_base))
				277	return 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	278
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	279	if (!lock_cap && current->mm->locked_vm + 1 > limit) {
				280	put_pfn(*pfn_base, prot);
				281	pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
				282	limit << PAGE_SHIFT);
				283	return -ENOMEM;
				284	}
				285
Alex Williamson	5c6c2b2	2013-06-21 09:38:11 -0600	[diff] [blame]	286	if (unlikely(disable_hugepages)) {
				287	vfio_lock_acct(1);
				288	return 1;
				289	}
				290
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	291	/* Lock all the consecutive pages from pfn_base */
				292	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	293	unsigned long pfn = 0;
				294
				295	ret = vaddr_get_pfn(vaddr, prot, &pfn);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	296	if (ret)
				297	break;
				298
				299	if (pfn != *pfn_base + i \|\| is_invalid_reserved_pfn(pfn)) {
				300	put_pfn(pfn, prot);
				301	break;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	302	}
				303
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	304	if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	305	put_pfn(pfn, prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	306	pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
				307	__func__, limit << PAGE_SHIFT);
				308	break;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	309	}
				310	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	311
				312	vfio_lock_acct(i);
				313
				314	return i;
				315	}
				316
				317	static long vfio_unpin_pages(unsigned long pfn, long npage,
				318	int prot, bool do_accounting)
				319	{
				320	unsigned long unlocked = 0;
				321	long i;
				322
				323	for (i = 0; i < npage; i++)
				324	unlocked += put_pfn(pfn++, prot);
				325
				326	if (do_accounting)
				327	vfio_lock_acct(-unlocked);
				328
				329	return unlocked;
				330	}
				331
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	332	static void vfio_unmap_unpin(struct vfio_iommu iommu, struct vfio_dma dma)
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	333	{
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	334	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
				335	struct vfio_domain domain, d;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	336	long unlocked = 0;
				337
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	338	if (!dma->size)
				339	return;
				340	/*
				341	* We use the IOMMU to track the physical addresses, otherwise we'd
				342	* need a much more complicated tracking system. Unfortunately that
				343	* means we need to use one of the iommu domains to figure out the
				344	* pfns to unpin. The rest need to be unmapped in advance so we have
				345	* no iommu translations remaining when the pages are unpinned.
				346	*/
				347	domain = d = list_first_entry(&iommu->domain_list,
				348	struct vfio_domain, next);
				349
				350	list_for_each_entry_continue(d, &iommu->domain_list, next)
				351	iommu_unmap(d->domain, dma->iova, dma->size);
				352
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	353	while (iova < end) {
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame^]	354	size_t unmapped, len;
				355	phys_addr_t phys, next;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	356
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	357	phys = iommu_iova_to_phys(domain->domain, iova);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	358	if (WARN_ON(!phys)) {
				359	iova += PAGE_SIZE;
				360	continue;
				361	}
				362
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame^]	363	/*
				364	* To optimize for fewer iommu_unmap() calls, each of which
				365	* may require hardware cache flushing, try to find the
				366	* largest contiguous physical memory chunk to unmap.
				367	*/
				368	for (len = PAGE_SIZE;
				369	!domain->fgsp && iova + len < end; len += PAGE_SIZE) {
				370	next = iommu_iova_to_phys(domain->domain, iova + len);
				371	if (next != phys + len)
				372	break;
				373	}
				374
				375	unmapped = iommu_unmap(domain->domain, iova, len);
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	376	if (WARN_ON(!unmapped))
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	377	break;
				378
				379	unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
				380	unmapped >> PAGE_SHIFT,
				381	dma->prot, false);
				382	iova += unmapped;
				383	}
				384
				385	vfio_lock_acct(-unlocked);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	386	}
				387
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	388	static void vfio_remove_dma(struct vfio_iommu iommu, struct vfio_dma dma)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	389	{
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	390	vfio_unmap_unpin(iommu, dma);
				391	vfio_unlink_dma(iommu, dma);
				392	kfree(dma);
				393	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	394
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	395	static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
				396	{
				397	struct vfio_domain *domain;
				398	unsigned long bitmap = PAGE_MASK;
Alex Williamson	f5bfdbf	2013-06-25 16:01:44 -0600	[diff] [blame]	399
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	400	mutex_lock(&iommu->lock);
				401	list_for_each_entry(domain, &iommu->domain_list, next)
				402	bitmap &= domain->domain->ops->pgsize_bitmap;
				403	mutex_unlock(&iommu->lock);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	404
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	405	return bitmap;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	406	}
				407
				408	static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
				409	struct vfio_iommu_type1_dma_unmap *unmap)
				410	{
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	411	uint64_t mask;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	412	struct vfio_dma *dma;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	413	size_t unmapped = 0;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	414	int ret = 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	415
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	416	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	417
				418	if (unmap->iova & mask)
				419	return -EINVAL;
Alex Williamson	f5bfdbf	2013-06-25 16:01:44 -0600	[diff] [blame]	420	if (!unmap->size \|\| unmap->size & mask)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	421	return -EINVAL;
				422
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	423	WARN_ON(mask & PAGE_MASK);
				424
				425	mutex_lock(&iommu->lock);
				426
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	427	/*
				428	* vfio-iommu-type1 (v1) - User mappings were coalesced together to
				429	* avoid tracking individual mappings. This means that the granularity
				430	* of the original mapping was lost and the user was allowed to attempt
				431	* to unmap any range. Depending on the contiguousness of physical
				432	* memory and page sizes supported by the IOMMU, arbitrary unmaps may
				433	* or may not have worked. We only guaranteed unmap granularity
				434	* matching the original mapping; even though it was untracked here,
				435	* the original mappings are reflected in IOMMU mappings. This
				436	* resulted in a couple unusual behaviors. First, if a range is not
				437	* able to be unmapped, ex. a set of 4k pages that was mapped as a
				438	* 2M hugepage into the IOMMU, the unmap ioctl returns success but with
				439	* a zero sized unmap. Also, if an unmap request overlaps the first
				440	* address of a hugepage, the IOMMU will unmap the entire hugepage.
				441	* This also returns success and the returned unmap size reflects the
				442	* actual size unmapped.
				443	*
				444	* We attempt to maintain compatibility with this "v1" interface, but
				445	* we take control out of the hands of the IOMMU. Therefore, an unmap
				446	* request offset from the beginning of the original mapping will
				447	* return success with zero sized unmap. And an unmap request covering
				448	* the first iova of mapping will unmap the entire range.
				449	*
				450	* The v2 version of this interface intends to be more deterministic.
				451	* Unmap requests must fully cover previous mappings. Multiple
				452	* mappings may still be unmaped by specifying large ranges, but there
				453	* must not be any previous mappings bisected by the range. An error
				454	* will be returned if these conditions are not met. The v2 interface
				455	* will only return success and a size of zero if there were no
				456	* mappings within the range.
				457	*/
				458	if (iommu->v2) {
				459	dma = vfio_find_dma(iommu, unmap->iova, 0);
				460	if (dma && dma->iova != unmap->iova) {
				461	ret = -EINVAL;
				462	goto unlock;
				463	}
				464	dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
				465	if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
				466	ret = -EINVAL;
				467	goto unlock;
				468	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	469	}
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	470
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	471	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
				472	if (!iommu->v2 && unmap->iova > dma->iova)
				473	break;
				474	unmapped += dma->size;
				475	vfio_remove_dma(iommu, dma);
				476	}
				477
				478	unlock:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	479	mutex_unlock(&iommu->lock);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	480
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	481	/* Report how much was unmapped */
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	482	unmap->size = unmapped;
				483
				484	return ret;
				485	}
				486
				487	/*
				488	* Turns out AMD IOMMU has a page table bug where it won't map large pages
				489	* to a region that previously mapped smaller pages. This should be fixed
				490	* soon, so this is just a temporary workaround to break mappings down into
				491	* PAGE_SIZE. Better to map smaller pages than nothing.
				492	*/
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	493	static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	494	unsigned long pfn, long npage, int prot)
				495	{
				496	long i;
				497	int ret;
				498
				499	for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	500	ret = iommu_map(domain->domain, iova,
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	501	(phys_addr_t)pfn << PAGE_SHIFT,
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	502	PAGE_SIZE, prot \| domain->prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	503	if (ret)
				504	break;
				505	}
				506
				507	for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	508	iommu_unmap(domain->domain, iova, PAGE_SIZE);
				509
				510	return ret;
				511	}
				512
				513	static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
				514	unsigned long pfn, long npage, int prot)
				515	{
				516	struct vfio_domain *d;
				517	int ret;
				518
				519	list_for_each_entry(d, &iommu->domain_list, next) {
				520	ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
				521	npage << PAGE_SHIFT, prot \| d->prot);
				522	if (ret) {
				523	if (ret != -EBUSY \|\|
				524	map_try_harder(d, iova, pfn, npage, prot))
				525	goto unwind;
				526	}
				527	}
				528
				529	return 0;
				530
				531	unwind:
				532	list_for_each_entry_continue_reverse(d, &iommu->domain_list, next)
				533	iommu_unmap(d->domain, iova, npage << PAGE_SHIFT);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	534
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	535	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	536	}
				537
				538	static int vfio_dma_do_map(struct vfio_iommu *iommu,
				539	struct vfio_iommu_type1_dma_map *map)
				540	{
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	541	dma_addr_t iova = map->iova;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	542	unsigned long vaddr = map->vaddr;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	543	size_t size = map->size;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	544	long npage;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	545	int ret = 0, prot = 0;
				546	uint64_t mask;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	547	struct vfio_dma *dma;
Antonios Motakis	d93b3ac	2013-10-11 10:40:46 -0600	[diff] [blame]	548	unsigned long pfn;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	549
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	550	/* Verify that none of our __u64 fields overflow */
				551	if (map->size != size \|\| map->vaddr != vaddr \|\| map->iova != iova)
				552	return -EINVAL;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	553
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	554	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	555
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	556	WARN_ON(mask & PAGE_MASK);
				557
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	558	/* READ/WRITE from device perspective */
				559	if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
				560	prot \|= IOMMU_WRITE;
				561	if (map->flags & VFIO_DMA_MAP_FLAG_READ)
				562	prot \|= IOMMU_READ;
				563
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	564	if (!prot \|\| !size \|\| (size \| iova \| vaddr) & mask)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	565	return -EINVAL;
				566
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	567	/* Don't allow IOVA or virtual address wrap */
				568	if (iova + size - 1 < iova \|\| vaddr + size - 1 < vaddr)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	569	return -EINVAL;
				570
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	571	mutex_lock(&iommu->lock);
				572
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	573	if (vfio_find_dma(iommu, iova, size)) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	574	mutex_unlock(&iommu->lock);
				575	return -EEXIST;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	576	}
				577
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	578	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
				579	if (!dma) {
				580	mutex_unlock(&iommu->lock);
				581	return -ENOMEM;
				582	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	583
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	584	dma->iova = iova;
				585	dma->vaddr = vaddr;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	586	dma->prot = prot;
				587
				588	/* Insert zero-sized and grow as we map chunks of it */
				589	vfio_link_dma(iommu, dma);
				590
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	591	while (size) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	592	/* Pin a contiguous chunk of memory */
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	593	npage = vfio_pin_pages(vaddr + dma->size,
				594	size >> PAGE_SHIFT, prot, &pfn);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	595	if (npage <= 0) {
				596	WARN_ON(!npage);
				597	ret = (int)npage;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	598	break;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	599	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	600
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	601	/* Map it! */
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	602	ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	603	if (ret) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	604	vfio_unpin_pages(pfn, npage, prot, true);
				605	break;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	606	}
				607
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	608	size -= npage << PAGE_SHIFT;
				609	dma->size += npage << PAGE_SHIFT;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	610	}
				611
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	612	if (ret)
				613	vfio_remove_dma(iommu, dma);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	614
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	615	mutex_unlock(&iommu->lock);
				616	return ret;
				617	}
				618
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	619	static int vfio_bus_type(struct device dev, void data)
				620	{
				621	struct bus_type **bus = data;
				622
				623	if (bus && bus != dev->bus)
				624	return -EINVAL;
				625
				626	*bus = dev->bus;
				627
				628	return 0;
				629	}
				630
				631	static int vfio_iommu_replay(struct vfio_iommu *iommu,
				632	struct vfio_domain *domain)
				633	{
				634	struct vfio_domain *d;
				635	struct rb_node *n;
				636	int ret;
				637
				638	/* Arbitrarily pick the first domain in the list for lookups */
				639	d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
				640	n = rb_first(&iommu->dma_list);
				641
				642	/* If there's not a domain, there better not be any mappings */
				643	if (WARN_ON(n && !d))
				644	return -EINVAL;
				645
				646	for (; n; n = rb_next(n)) {
				647	struct vfio_dma *dma;
				648	dma_addr_t iova;
				649
				650	dma = rb_entry(n, struct vfio_dma, node);
				651	iova = dma->iova;
				652
				653	while (iova < dma->iova + dma->size) {
				654	phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
				655	size_t size;
				656
				657	if (WARN_ON(!phys)) {
				658	iova += PAGE_SIZE;
				659	continue;
				660	}
				661
				662	size = PAGE_SIZE;
				663
				664	while (iova + size < dma->iova + dma->size &&
				665	phys + size == iommu_iova_to_phys(d->domain,
				666	iova + size))
				667	size += PAGE_SIZE;
				668
				669	ret = iommu_map(domain->domain, iova, phys,
				670	size, dma->prot \| domain->prot);
				671	if (ret)
				672	return ret;
				673
				674	iova += size;
				675	}
				676	}
				677
				678	return 0;
				679	}
				680
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame^]	681	/*
				682	* We change our unmap behavior slightly depending on whether the IOMMU
				683	* supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage
				684	* for practically any contiguous power-of-two mapping we give it. This means
				685	* we don't need to look for contiguous chunks ourselves to make unmapping
				686	* more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d
				687	* with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
				688	* significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
				689	* hugetlbfs is in use.
				690	*/
				691	static void vfio_test_domain_fgsp(struct vfio_domain *domain)
				692	{
				693	struct page *pages;
				694	int ret, order = get_order(PAGE_SIZE * 2);
				695
				696	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, order);
				697	if (!pages)
				698	return;
				699
				700	ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
				701	IOMMU_READ \| IOMMU_WRITE \| domain->prot);
				702	if (!ret) {
				703	size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
				704
				705	if (unmapped == PAGE_SIZE)
				706	iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
				707	else
				708	domain->fgsp = true;
				709	}
				710
				711	__free_pages(pages, order);
				712	}
				713
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	714	static int vfio_iommu_type1_attach_group(void *iommu_data,
				715	struct iommu_group *iommu_group)
				716	{
				717	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	718	struct vfio_group group, g;
				719	struct vfio_domain domain, d;
				720	struct bus_type *bus = NULL;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	721	int ret;
				722
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	723	mutex_lock(&iommu->lock);
				724
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	725	list_for_each_entry(d, &iommu->domain_list, next) {
				726	list_for_each_entry(g, &d->group_list, next) {
				727	if (g->iommu_group != iommu_group)
				728	continue;
				729
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	730	mutex_unlock(&iommu->lock);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	731	return -EINVAL;
				732	}
				733	}
				734
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	735	group = kzalloc(sizeof(*group), GFP_KERNEL);
				736	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
				737	if (!group \|\| !domain) {
				738	ret = -ENOMEM;
				739	goto out_free;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	740	}
				741
				742	group->iommu_group = iommu_group;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	743
				744	/* Determine bus_type in order to allocate a domain */
				745	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
				746	if (ret)
				747	goto out_free;
				748
				749	domain->domain = iommu_domain_alloc(bus);
				750	if (!domain->domain) {
				751	ret = -EIO;
				752	goto out_free;
				753	}
				754
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	755	if (iommu->nesting) {
				756	int attr = 1;
				757
				758	ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
				759	&attr);
				760	if (ret)
				761	goto out_domain;
				762	}
				763
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	764	ret = iommu_attach_group(domain->domain, iommu_group);
				765	if (ret)
				766	goto out_domain;
				767
				768	INIT_LIST_HEAD(&domain->group_list);
				769	list_add(&group->next, &domain->group_list);
				770
				771	if (!allow_unsafe_interrupts &&
Joerg Roedel	eb165f0	2014-09-05 10:56:05 +0200	[diff] [blame]	772	!iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	773	pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
				774	__func__);
				775	ret = -EPERM;
				776	goto out_detach;
				777	}
				778
Joerg Roedel	eb165f0	2014-09-05 10:56:05 +0200	[diff] [blame]	779	if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	780	domain->prot \|= IOMMU_CACHE;
				781
				782	/*
				783	* Try to match an existing compatible domain. We don't want to
				784	* preclude an IOMMU driver supporting multiple bus_types and being
				785	* able to include different bus_types in the same IOMMU domain, so
				786	* we test whether the domains use the same iommu_ops rather than
				787	* testing if they're on the same bus_type.
				788	*/
				789	list_for_each_entry(d, &iommu->domain_list, next) {
				790	if (d->domain->ops == domain->domain->ops &&
				791	d->prot == domain->prot) {
				792	iommu_detach_group(domain->domain, iommu_group);
				793	if (!iommu_attach_group(d->domain, iommu_group)) {
				794	list_add(&group->next, &d->group_list);
				795	iommu_domain_free(domain->domain);
				796	kfree(domain);
				797	mutex_unlock(&iommu->lock);
				798	return 0;
				799	}
				800
				801	ret = iommu_attach_group(domain->domain, iommu_group);
				802	if (ret)
				803	goto out_domain;
				804	}
				805	}
				806
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame^]	807	vfio_test_domain_fgsp(domain);
				808
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	809	/* replay mappings on new domains */
				810	ret = vfio_iommu_replay(iommu, domain);
				811	if (ret)
				812	goto out_detach;
				813
				814	list_add(&domain->next, &iommu->domain_list);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	815
				816	mutex_unlock(&iommu->lock);
				817
				818	return 0;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	819
				820	out_detach:
				821	iommu_detach_group(domain->domain, iommu_group);
				822	out_domain:
				823	iommu_domain_free(domain->domain);
				824	out_free:
				825	kfree(domain);
				826	kfree(group);
				827	mutex_unlock(&iommu->lock);
				828	return ret;
				829	}
				830
				831	static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
				832	{
				833	struct rb_node *node;
				834
				835	while ((node = rb_first(&iommu->dma_list)))
				836	vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	837	}
				838
				839	static void vfio_iommu_type1_detach_group(void *iommu_data,
				840	struct iommu_group *iommu_group)
				841	{
				842	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	843	struct vfio_domain *domain;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	844	struct vfio_group *group;
				845
				846	mutex_lock(&iommu->lock);
				847
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	848	list_for_each_entry(domain, &iommu->domain_list, next) {
				849	list_for_each_entry(group, &domain->group_list, next) {
				850	if (group->iommu_group != iommu_group)
				851	continue;
				852
				853	iommu_detach_group(domain->domain, iommu_group);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	854	list_del(&group->next);
				855	kfree(group);
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	856	/*
				857	* Group ownership provides privilege, if the group
				858	* list is empty, the domain goes away. If it's the
				859	* last domain, then all the mappings go away too.
				860	*/
				861	if (list_empty(&domain->group_list)) {
				862	if (list_is_singular(&iommu->domain_list))
				863	vfio_iommu_unmap_unpin_all(iommu);
				864	iommu_domain_free(domain->domain);
				865	list_del(&domain->next);
				866	kfree(domain);
				867	}
				868	goto done;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	869	}
				870	}
				871
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	872	done:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	873	mutex_unlock(&iommu->lock);
				874	}
				875
				876	static void *vfio_iommu_type1_open(unsigned long arg)
				877	{
				878	struct vfio_iommu *iommu;
				879
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	880	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
				881	if (!iommu)
				882	return ERR_PTR(-ENOMEM);
				883
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	884	switch (arg) {
				885	case VFIO_TYPE1_IOMMU:
				886	break;
				887	case VFIO_TYPE1_NESTING_IOMMU:
				888	iommu->nesting = true;
				889	case VFIO_TYPE1v2_IOMMU:
				890	iommu->v2 = true;
				891	break;
				892	default:
				893	kfree(iommu);
				894	return ERR_PTR(-EINVAL);
				895	}
				896
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	897	INIT_LIST_HEAD(&iommu->domain_list);
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	898	iommu->dma_list = RB_ROOT;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	899	mutex_init(&iommu->lock);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	900
				901	return iommu;
				902	}
				903
				904	static void vfio_iommu_type1_release(void *iommu_data)
				905	{
				906	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	907	struct vfio_domain domain, domain_tmp;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	908	struct vfio_group group, group_tmp;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	909
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	910	vfio_iommu_unmap_unpin_all(iommu);
				911
				912	list_for_each_entry_safe(domain, domain_tmp,
				913	&iommu->domain_list, next) {
				914	list_for_each_entry_safe(group, group_tmp,
				915	&domain->group_list, next) {
				916	iommu_detach_group(domain->domain, group->iommu_group);
				917	list_del(&group->next);
				918	kfree(group);
				919	}
				920	iommu_domain_free(domain->domain);
				921	list_del(&domain->next);
				922	kfree(domain);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	923	}
				924
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	925	kfree(iommu);
				926	}
				927
Alex Williamson	aa42931	2014-02-26 11:38:37 -0700	[diff] [blame]	928	static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
				929	{
				930	struct vfio_domain *domain;
				931	int ret = 1;
				932
				933	mutex_lock(&iommu->lock);
				934	list_for_each_entry(domain, &iommu->domain_list, next) {
				935	if (!(domain->prot & IOMMU_CACHE)) {
				936	ret = 0;
				937	break;
				938	}
				939	}
				940	mutex_unlock(&iommu->lock);
				941
				942	return ret;
				943	}
				944
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	945	static long vfio_iommu_type1_ioctl(void *iommu_data,
				946	unsigned int cmd, unsigned long arg)
				947	{
				948	struct vfio_iommu *iommu = iommu_data;
				949	unsigned long minsz;
				950
				951	if (cmd == VFIO_CHECK_EXTENSION) {
				952	switch (arg) {
				953	case VFIO_TYPE1_IOMMU:
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	954	case VFIO_TYPE1v2_IOMMU:
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	955	case VFIO_TYPE1_NESTING_IOMMU:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	956	return 1;
Alex Williamson	aa42931	2014-02-26 11:38:37 -0700	[diff] [blame]	957	case VFIO_DMA_CC_IOMMU:
				958	if (!iommu)
				959	return 0;
				960	return vfio_domains_have_iommu_cache(iommu);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	961	default:
				962	return 0;
				963	}
				964	} else if (cmd == VFIO_IOMMU_GET_INFO) {
				965	struct vfio_iommu_type1_info info;
				966
				967	minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
				968
				969	if (copy_from_user(&info, (void __user *)arg, minsz))
				970	return -EFAULT;
				971
				972	if (info.argsz < minsz)
				973	return -EINVAL;
				974
				975	info.flags = 0;
				976
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	977	info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	978
				979	return copy_to_user((void __user *)arg, &info, minsz);
				980
				981	} else if (cmd == VFIO_IOMMU_MAP_DMA) {
				982	struct vfio_iommu_type1_dma_map map;
				983	uint32_t mask = VFIO_DMA_MAP_FLAG_READ \|
				984	VFIO_DMA_MAP_FLAG_WRITE;
				985
				986	minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
				987
				988	if (copy_from_user(&map, (void __user *)arg, minsz))
				989	return -EFAULT;
				990
				991	if (map.argsz < minsz \|\| map.flags & ~mask)
				992	return -EINVAL;
				993
				994	return vfio_dma_do_map(iommu, &map);
				995
				996	} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
				997	struct vfio_iommu_type1_dma_unmap unmap;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	998	long ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	999
				1000	minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
				1001
				1002	if (copy_from_user(&unmap, (void __user *)arg, minsz))
				1003	return -EFAULT;
				1004
				1005	if (unmap.argsz < minsz \|\| unmap.flags)
				1006	return -EINVAL;
				1007
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	1008	ret = vfio_dma_do_unmap(iommu, &unmap);
				1009	if (ret)
				1010	return ret;
				1011
				1012	return copy_to_user((void __user *)arg, &unmap, minsz);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1013	}
				1014
				1015	return -ENOTTY;
				1016	}
				1017
				1018	static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
				1019	.name = "vfio-iommu-type1",
				1020	.owner = THIS_MODULE,
				1021	.open = vfio_iommu_type1_open,
				1022	.release = vfio_iommu_type1_release,
				1023	.ioctl = vfio_iommu_type1_ioctl,
				1024	.attach_group = vfio_iommu_type1_attach_group,
				1025	.detach_group = vfio_iommu_type1_detach_group,
				1026	};
				1027
				1028	static int __init vfio_iommu_type1_init(void)
				1029	{
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1030	return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1);
				1031	}
				1032
				1033	static void __exit vfio_iommu_type1_cleanup(void)
				1034	{
				1035	vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1);
				1036	}
				1037
				1038	module_init(vfio_iommu_type1_init);
				1039	module_exit(vfio_iommu_type1_cleanup);
				1040
				1041	MODULE_VERSION(DRIVER_VERSION);
				1042	MODULE_LICENSE("GPL v2");
				1043	MODULE_AUTHOR(DRIVER_AUTHOR);
				1044	MODULE_DESCRIPTION(DRIVER_DESC);