Blame - drivers/vfio/vfio_iommu_type1.c - kernel/msm-4.9

blob: 1d48e62f4f5260aaac74a4a4f416c14f34d54da6 [file] [log] [blame]

Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1	/*
				2	* VFIO: IOMMU DMA mapping support for Type1 IOMMU
				3	*
				4	* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
				5	* Author: Alex Williamson <alex.williamson@redhat.com>
				6	*
				7	* This program is free software; you can redistribute it and/or modify
				8	* it under the terms of the GNU General Public License version 2 as
				9	* published by the Free Software Foundation.
				10	*
				11	* Derived from original vfio:
				12	* Copyright 2010 Cisco Systems, Inc. All rights reserved.
				13	* Author: Tom Lyon, pugs@cisco.com
				14	*
				15	* We arbitrarily define a Type1 IOMMU as one matching the below code.
				16	* It could be called the x86 IOMMU as it's designed for AMD-Vi & Intel
				17	* VT-d, but that makes it harder to re-use as theoretically anyone
				18	* implementing a similar IOMMU could make use of this. We expect the
				19	* IOMMU to support the IOMMU API and have few to no restrictions around
				20	* the IOVA range that can be mapped. The Type1 IOMMU is currently
				21	* optimized for relatively static mappings of a userspace process with
				22	* userpsace pages pinned into memory. We also assume devices and IOMMU
				23	* domains are PCI based as the IOMMU API is still centered around a
				24	* device/bus interface rather than a group interface.
				25	*/
				26
				27	#include <linux/compat.h>
				28	#include <linux/device.h>
				29	#include <linux/fs.h>
				30	#include <linux/iommu.h>
				31	#include <linux/module.h>
				32	#include <linux/mm.h>
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	33	#include <linux/rbtree.h>
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	34	#include <linux/sched.h>
				35	#include <linux/slab.h>
				36	#include <linux/uaccess.h>
				37	#include <linux/vfio.h>
				38	#include <linux/workqueue.h>
				39
				40	#define DRIVER_VERSION "0.2"
				41	#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
				42	#define DRIVER_DESC "Type1 IOMMU driver for VFIO"
				43
				44	static bool allow_unsafe_interrupts;
				45	module_param_named(allow_unsafe_interrupts,
				46	allow_unsafe_interrupts, bool, S_IRUGO \| S_IWUSR);
				47	MODULE_PARM_DESC(allow_unsafe_interrupts,
				48	"Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
				49
Alex Williamson	5c6c2b2	2013-06-21 09:38:11 -0600	[diff] [blame]	50	static bool disable_hugepages;
				51	module_param_named(disable_hugepages,
				52	disable_hugepages, bool, S_IRUGO \| S_IWUSR);
				53	MODULE_PARM_DESC(disable_hugepages,
				54	"Disable VFIO IOMMU support for IOMMU hugepages.");
				55
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	56	struct vfio_iommu {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	57	struct list_head domain_list;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	58	struct mutex lock;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	59	struct rb_root dma_list;
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	60	bool v2;
				61	bool nesting;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	62	};
				63
				64	struct vfio_domain {
				65	struct iommu_domain *domain;
				66	struct list_head next;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	67	struct list_head group_list;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	68	int prot; /* IOMMU_CACHE */
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	69	bool fgsp; /* Fine-grained super pages */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	70	};
				71
				72	struct vfio_dma {
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	73	struct rb_node node;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	74	dma_addr_t iova; /* Device address */
				75	unsigned long vaddr; /* Process virtual addr */
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	76	size_t size; /* Map size (bytes) */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	77	int prot; /* IOMMU_READ/WRITE */
				78	};
				79
				80	struct vfio_group {
				81	struct iommu_group *iommu_group;
				82	struct list_head next;
				83	};
				84
				85	/*
				86	* This code handles mapping and unmapping of user data buffers
				87	* into DMA'ble space using the IOMMU
				88	*/
				89
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	90	static struct vfio_dma vfio_find_dma(struct vfio_iommu iommu,
				91	dma_addr_t start, size_t size)
				92	{
				93	struct rb_node *node = iommu->dma_list.rb_node;
				94
				95	while (node) {
				96	struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
				97
				98	if (start + size <= dma->iova)
				99	node = node->rb_left;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	100	else if (start >= dma->iova + dma->size)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	101	node = node->rb_right;
				102	else
				103	return dma;
				104	}
				105
				106	return NULL;
				107	}
				108
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	109	static void vfio_link_dma(struct vfio_iommu iommu, struct vfio_dma new)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	110	{
				111	struct rb_node *link = &iommu->dma_list.rb_node, parent = NULL;
				112	struct vfio_dma *dma;
				113
				114	while (*link) {
				115	parent = *link;
				116	dma = rb_entry(parent, struct vfio_dma, node);
				117
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	118	if (new->iova + new->size <= dma->iova)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	119	link = &(*link)->rb_left;
				120	else
				121	link = &(*link)->rb_right;
				122	}
				123
				124	rb_link_node(&new->node, parent, link);
				125	rb_insert_color(&new->node, &iommu->dma_list);
				126	}
				127
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	128	static void vfio_unlink_dma(struct vfio_iommu iommu, struct vfio_dma old)
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	129	{
				130	rb_erase(&old->node, &iommu->dma_list);
				131	}
				132
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	133	static int vfio_lock_acct(long npage, bool *lock_cap)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	134	{
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	135	int ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	136
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	137	if (!npage)
				138	return 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	139
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	140	if (!current->mm)
				141	return -ESRCH; /* process exited */
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	142
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	143	ret = down_write_killable(&current->mm->mmap_sem);
				144	if (!ret) {
				145	if (npage > 0) {
				146	if (lock_cap ? !*lock_cap : !capable(CAP_IPC_LOCK)) {
				147	unsigned long limit;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	148
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	149	limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				150
				151	if (current->mm->locked_vm + npage > limit)
				152	ret = -ENOMEM;
				153	}
				154	}
				155
				156	if (!ret)
				157	current->mm->locked_vm += npage;
				158
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	159	up_write(&current->mm->mmap_sem);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	160	}
				161
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	162	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	163	}
				164
				165	/*
				166	* Some mappings aren't backed by a struct page, for example an mmap'd
				167	* MMIO range for our own or another device. These use a different
				168	* pfn conversion and shouldn't be tracked as locked pages.
				169	*/
				170	static bool is_invalid_reserved_pfn(unsigned long pfn)
				171	{
				172	if (pfn_valid(pfn)) {
				173	bool reserved;
				174	struct page *tail = pfn_to_page(pfn);
David Rientjes	668f9abb	2014-03-03 15:38:18 -0800	[diff] [blame]	175	struct page *head = compound_head(tail);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	176	reserved = !!(PageReserved(head));
				177	if (head != tail) {
				178	/*
				179	* "head" is not a dangling pointer
David Rientjes	668f9abb	2014-03-03 15:38:18 -0800	[diff] [blame]	180	* (compound_head takes care of that)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	181	* but the hugepage may have been split
				182	* from under us (and we may not hold a
				183	* reference count on the head page so it can
				184	* be reused before we run PageReferenced), so
				185	* we've to check PageTail before returning
				186	* what we just read.
				187	*/
				188	smp_rmb();
				189	if (PageTail(tail))
				190	return reserved;
				191	}
				192	return PageReserved(tail);
				193	}
				194
				195	return true;
				196	}
				197
				198	static int put_pfn(unsigned long pfn, int prot)
				199	{
				200	if (!is_invalid_reserved_pfn(pfn)) {
				201	struct page *page = pfn_to_page(pfn);
				202	if (prot & IOMMU_WRITE)
				203	SetPageDirty(page);
				204	put_page(page);
				205	return 1;
				206	}
				207	return 0;
				208	}
				209
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	210	static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
				211	{
				212	struct page *page[1];
				213	struct vm_area_struct *vma;
				214	int ret = -EFAULT;
				215
				216	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
				217	*pfn = page_to_pfn(page[0]);
				218	return 0;
				219	}
				220
				221	down_read(&current->mm->mmap_sem);
				222
				223	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
				224
				225	if (vma && vma->vm_flags & VM_PFNMAP) {
				226	*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
				227	if (is_invalid_reserved_pfn(*pfn))
				228	ret = 0;
				229	}
				230
				231	up_read(&current->mm->mmap_sem);
				232
				233	return ret;
				234	}
				235
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	236	/*
				237	* Attempt to pin pages. We really don't want to track all the pfns and
				238	* the iommu can only map chunks of consecutive pfns anyway, so get the
				239	* first page and all consecutive pages with the same locking.
				240	*/
				241	static long vfio_pin_pages(unsigned long vaddr, long npage,
				242	int prot, unsigned long *pfn_base)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	243	{
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	244	unsigned long pfn = 0, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	245	bool lock_cap = capable(CAP_IPC_LOCK);
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	246	long ret, i = 1;
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	247	bool rsvd;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	248
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	249	if (!current->mm)
				250	return -ENODEV;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	251
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	252	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
				253	if (ret)
				254	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	255
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	256	rsvd = is_invalid_reserved_pfn(*pfn_base);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	257
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	258	if (!rsvd && !lock_cap && current->mm->locked_vm + 1 > limit) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	259	put_pfn(*pfn_base, prot);
				260	pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
				261	limit << PAGE_SHIFT);
				262	return -ENOMEM;
				263	}
				264
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	265	if (unlikely(disable_hugepages))
				266	goto out;
Alex Williamson	5c6c2b2	2013-06-21 09:38:11 -0600	[diff] [blame]	267
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	268	/* Lock all the consecutive pages from pfn_base */
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	269	for (vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	270	ret = vaddr_get_pfn(vaddr, prot, &pfn);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	271	if (ret)
				272	break;
				273
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	274	if (pfn != *pfn_base + i \|\|
				275	rsvd != is_invalid_reserved_pfn(pfn)) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	276	put_pfn(pfn, prot);
				277	break;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	278	}
				279
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	280	if (!rsvd && !lock_cap &&
				281	current->mm->locked_vm + i + 1 > limit) {
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	282	put_pfn(pfn, prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	283	pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
				284	__func__, limit << PAGE_SHIFT);
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	285	ret = -ENOMEM;
				286	goto unpin_out;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	287	}
				288	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	289
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	290	out:
Alex Williamson	babbf176	2015-02-06 10:59:16 -0700	[diff] [blame]	291	if (!rsvd)
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	292	ret = vfio_lock_acct(i, &lock_cap);
				293
				294	unpin_out:
				295	if (ret) {
				296	if (!rsvd) {
				297	for (pfn = *pfn_base ; i ; pfn++, i--)
				298	put_pfn(pfn, prot);
				299	}
				300
				301	return ret;
				302	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	303
				304	return i;
				305	}
				306
				307	static long vfio_unpin_pages(unsigned long pfn, long npage,
				308	int prot, bool do_accounting)
				309	{
				310	unsigned long unlocked = 0;
				311	long i;
				312
				313	for (i = 0; i < npage; i++)
				314	unlocked += put_pfn(pfn++, prot);
				315
				316	if (do_accounting)
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	317	vfio_lock_acct(-unlocked, NULL);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	318
				319	return unlocked;
				320	}
				321
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	322	static void vfio_unmap_unpin(struct vfio_iommu iommu, struct vfio_dma dma)
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	323	{
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	324	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
				325	struct vfio_domain domain, d;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	326	long unlocked = 0;
				327
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	328	if (!dma->size)
				329	return;
				330	/*
				331	* We use the IOMMU to track the physical addresses, otherwise we'd
				332	* need a much more complicated tracking system. Unfortunately that
				333	* means we need to use one of the iommu domains to figure out the
				334	* pfns to unpin. The rest need to be unmapped in advance so we have
				335	* no iommu translations remaining when the pages are unpinned.
				336	*/
				337	domain = d = list_first_entry(&iommu->domain_list,
				338	struct vfio_domain, next);
				339
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	340	list_for_each_entry_continue(d, &iommu->domain_list, next) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	341	iommu_unmap(d->domain, dma->iova, dma->size);
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	342	cond_resched();
				343	}
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	344
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	345	while (iova < end) {
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	346	size_t unmapped, len;
				347	phys_addr_t phys, next;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	348
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	349	phys = iommu_iova_to_phys(domain->domain, iova);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	350	if (WARN_ON(!phys)) {
				351	iova += PAGE_SIZE;
				352	continue;
				353	}
				354
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	355	/*
				356	* To optimize for fewer iommu_unmap() calls, each of which
				357	* may require hardware cache flushing, try to find the
				358	* largest contiguous physical memory chunk to unmap.
				359	*/
				360	for (len = PAGE_SIZE;
				361	!domain->fgsp && iova + len < end; len += PAGE_SIZE) {
				362	next = iommu_iova_to_phys(domain->domain, iova + len);
				363	if (next != phys + len)
				364	break;
				365	}
				366
				367	unmapped = iommu_unmap(domain->domain, iova, len);
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	368	if (WARN_ON(!unmapped))
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	369	break;
				370
				371	unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
				372	unmapped >> PAGE_SHIFT,
				373	dma->prot, false);
				374	iova += unmapped;
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	375
				376	cond_resched();
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	377	}
				378
Alex Williamson	9f43f70	2017-04-13 14:10:15 -0600	[diff] [blame^]	379	vfio_lock_acct(-unlocked, NULL);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	380	}
				381
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	382	static void vfio_remove_dma(struct vfio_iommu iommu, struct vfio_dma dma)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	383	{
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	384	vfio_unmap_unpin(iommu, dma);
				385	vfio_unlink_dma(iommu, dma);
				386	kfree(dma);
				387	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	388
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	389	static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
				390	{
				391	struct vfio_domain *domain;
Eric Auger	4644321	2015-10-29 17:49:42 +0000	[diff] [blame]	392	unsigned long bitmap = ULONG_MAX;
Alex Williamson	f5bfdbf	2013-06-25 16:01:44 -0600	[diff] [blame]	393
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	394	mutex_lock(&iommu->lock);
				395	list_for_each_entry(domain, &iommu->domain_list, next)
Robin Murphy	d16e0fa	2016-04-07 18:42:06 +0100	[diff] [blame]	396	bitmap &= domain->domain->pgsize_bitmap;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	397	mutex_unlock(&iommu->lock);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	398
Eric Auger	4644321	2015-10-29 17:49:42 +0000	[diff] [blame]	399	/*
				400	* In case the IOMMU supports page sizes smaller than PAGE_SIZE
				401	* we pretend PAGE_SIZE is supported and hide sub-PAGE_SIZE sizes.
				402	* That way the user will be able to map/unmap buffers whose size/
				403	* start address is aligned with PAGE_SIZE. Pinning code uses that
				404	* granularity while iommu driver can use the sub-PAGE_SIZE size
				405	* to map the buffer.
				406	*/
				407	if (bitmap & ~PAGE_MASK) {
				408	bitmap &= PAGE_MASK;
				409	bitmap \|= PAGE_SIZE;
				410	}
				411
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	412	return bitmap;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	413	}
				414
				415	static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
				416	struct vfio_iommu_type1_dma_unmap *unmap)
				417	{
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	418	uint64_t mask;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	419	struct vfio_dma *dma;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	420	size_t unmapped = 0;
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	421	int ret = 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	422
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	423	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	424
				425	if (unmap->iova & mask)
				426	return -EINVAL;
Alex Williamson	f5bfdbf	2013-06-25 16:01:44 -0600	[diff] [blame]	427	if (!unmap->size \|\| unmap->size & mask)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	428	return -EINVAL;
				429
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	430	WARN_ON(mask & PAGE_MASK);
				431
				432	mutex_lock(&iommu->lock);
				433
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	434	/*
				435	* vfio-iommu-type1 (v1) - User mappings were coalesced together to
				436	* avoid tracking individual mappings. This means that the granularity
				437	* of the original mapping was lost and the user was allowed to attempt
				438	* to unmap any range. Depending on the contiguousness of physical
				439	* memory and page sizes supported by the IOMMU, arbitrary unmaps may
				440	* or may not have worked. We only guaranteed unmap granularity
				441	* matching the original mapping; even though it was untracked here,
				442	* the original mappings are reflected in IOMMU mappings. This
				443	* resulted in a couple unusual behaviors. First, if a range is not
				444	* able to be unmapped, ex. a set of 4k pages that was mapped as a
				445	* 2M hugepage into the IOMMU, the unmap ioctl returns success but with
				446	* a zero sized unmap. Also, if an unmap request overlaps the first
				447	* address of a hugepage, the IOMMU will unmap the entire hugepage.
				448	* This also returns success and the returned unmap size reflects the
				449	* actual size unmapped.
				450	*
				451	* We attempt to maintain compatibility with this "v1" interface, but
				452	* we take control out of the hands of the IOMMU. Therefore, an unmap
				453	* request offset from the beginning of the original mapping will
				454	* return success with zero sized unmap. And an unmap request covering
				455	* the first iova of mapping will unmap the entire range.
				456	*
				457	* The v2 version of this interface intends to be more deterministic.
				458	* Unmap requests must fully cover previous mappings. Multiple
				459	* mappings may still be unmaped by specifying large ranges, but there
				460	* must not be any previous mappings bisected by the range. An error
				461	* will be returned if these conditions are not met. The v2 interface
				462	* will only return success and a size of zero if there were no
				463	* mappings within the range.
				464	*/
				465	if (iommu->v2) {
				466	dma = vfio_find_dma(iommu, unmap->iova, 0);
				467	if (dma && dma->iova != unmap->iova) {
				468	ret = -EINVAL;
				469	goto unlock;
				470	}
				471	dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
				472	if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
				473	ret = -EINVAL;
				474	goto unlock;
				475	}
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	476	}
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	477
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	478	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
				479	if (!iommu->v2 && unmap->iova > dma->iova)
				480	break;
				481	unmapped += dma->size;
				482	vfio_remove_dma(iommu, dma);
				483	}
				484
				485	unlock:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	486	mutex_unlock(&iommu->lock);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	487
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	488	/* Report how much was unmapped */
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	489	unmap->size = unmapped;
				490
				491	return ret;
				492	}
				493
				494	/*
				495	* Turns out AMD IOMMU has a page table bug where it won't map large pages
				496	* to a region that previously mapped smaller pages. This should be fixed
				497	* soon, so this is just a temporary workaround to break mappings down into
				498	* PAGE_SIZE. Better to map smaller pages than nothing.
				499	*/
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	500	static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	501	unsigned long pfn, long npage, int prot)
				502	{
				503	long i;
Alex Williamson	089f1c6	2016-05-30 07:58:10 -0600	[diff] [blame]	504	int ret = 0;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	505
				506	for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	507	ret = iommu_map(domain->domain, iova,
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	508	(phys_addr_t)pfn << PAGE_SHIFT,
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	509	PAGE_SIZE, prot \| domain->prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	510	if (ret)
				511	break;
				512	}
				513
				514	for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	515	iommu_unmap(domain->domain, iova, PAGE_SIZE);
				516
				517	return ret;
				518	}
				519
				520	static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
				521	unsigned long pfn, long npage, int prot)
				522	{
				523	struct vfio_domain *d;
				524	int ret;
				525
				526	list_for_each_entry(d, &iommu->domain_list, next) {
				527	ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
				528	npage << PAGE_SHIFT, prot \| d->prot);
				529	if (ret) {
				530	if (ret != -EBUSY \|\|
				531	map_try_harder(d, iova, pfn, npage, prot))
				532	goto unwind;
				533	}
Alex Williamson	c5e6688	2015-02-06 14:19:12 -0700	[diff] [blame]	534
				535	cond_resched();
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	536	}
				537
				538	return 0;
				539
				540	unwind:
				541	list_for_each_entry_continue_reverse(d, &iommu->domain_list, next)
				542	iommu_unmap(d->domain, iova, npage << PAGE_SHIFT);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	543
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	544	return ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	545	}
				546
				547	static int vfio_dma_do_map(struct vfio_iommu *iommu,
				548	struct vfio_iommu_type1_dma_map *map)
				549	{
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	550	dma_addr_t iova = map->iova;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	551	unsigned long vaddr = map->vaddr;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	552	size_t size = map->size;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	553	long npage;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	554	int ret = 0, prot = 0;
				555	uint64_t mask;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	556	struct vfio_dma *dma;
Antonios Motakis	d93b3ac	2013-10-11 10:40:46 -0600	[diff] [blame]	557	unsigned long pfn;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	558
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	559	/* Verify that none of our __u64 fields overflow */
				560	if (map->size != size \|\| map->vaddr != vaddr \|\| map->iova != iova)
				561	return -EINVAL;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	562
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	563	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	564
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	565	WARN_ON(mask & PAGE_MASK);
				566
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	567	/* READ/WRITE from device perspective */
				568	if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
				569	prot \|= IOMMU_WRITE;
				570	if (map->flags & VFIO_DMA_MAP_FLAG_READ)
				571	prot \|= IOMMU_READ;
				572
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	573	if (!prot \|\| !size \|\| (size \| iova \| vaddr) & mask)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	574	return -EINVAL;
				575
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	576	/* Don't allow IOVA or virtual address wrap */
				577	if (iova + size - 1 < iova \|\| vaddr + size - 1 < vaddr)
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	578	return -EINVAL;
				579
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	580	mutex_lock(&iommu->lock);
				581
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	582	if (vfio_find_dma(iommu, iova, size)) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	583	mutex_unlock(&iommu->lock);
				584	return -EEXIST;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	585	}
				586
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	587	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
				588	if (!dma) {
				589	mutex_unlock(&iommu->lock);
				590	return -ENOMEM;
				591	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	592
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	593	dma->iova = iova;
				594	dma->vaddr = vaddr;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	595	dma->prot = prot;
				596
				597	/* Insert zero-sized and grow as we map chunks of it */
				598	vfio_link_dma(iommu, dma);
				599
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	600	while (size) {
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	601	/* Pin a contiguous chunk of memory */
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	602	npage = vfio_pin_pages(vaddr + dma->size,
				603	size >> PAGE_SHIFT, prot, &pfn);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	604	if (npage <= 0) {
				605	WARN_ON(!npage);
				606	ret = (int)npage;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	607	break;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	608	}
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	609
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	610	/* Map it! */
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	611	ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	612	if (ret) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	613	vfio_unpin_pages(pfn, npage, prot, true);
				614	break;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	615	}
				616
Alex Williamson	c8dbca1	2014-05-30 11:35:54 -0600	[diff] [blame]	617	size -= npage << PAGE_SHIFT;
				618	dma->size += npage << PAGE_SHIFT;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	619	}
				620
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	621	if (ret)
				622	vfio_remove_dma(iommu, dma);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	623
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	624	mutex_unlock(&iommu->lock);
				625	return ret;
				626	}
				627
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	628	static int vfio_bus_type(struct device dev, void data)
				629	{
				630	struct bus_type **bus = data;
				631
				632	if (bus && bus != dev->bus)
				633	return -EINVAL;
				634
				635	*bus = dev->bus;
				636
				637	return 0;
				638	}
				639
				640	static int vfio_iommu_replay(struct vfio_iommu *iommu,
				641	struct vfio_domain *domain)
				642	{
				643	struct vfio_domain *d;
				644	struct rb_node *n;
				645	int ret;
				646
				647	/* Arbitrarily pick the first domain in the list for lookups */
				648	d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
				649	n = rb_first(&iommu->dma_list);
				650
				651	/* If there's not a domain, there better not be any mappings */
				652	if (WARN_ON(n && !d))
				653	return -EINVAL;
				654
				655	for (; n; n = rb_next(n)) {
				656	struct vfio_dma *dma;
				657	dma_addr_t iova;
				658
				659	dma = rb_entry(n, struct vfio_dma, node);
				660	iova = dma->iova;
				661
				662	while (iova < dma->iova + dma->size) {
				663	phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
				664	size_t size;
				665
				666	if (WARN_ON(!phys)) {
				667	iova += PAGE_SIZE;
				668	continue;
				669	}
				670
				671	size = PAGE_SIZE;
				672
				673	while (iova + size < dma->iova + dma->size &&
				674	phys + size == iommu_iova_to_phys(d->domain,
				675	iova + size))
				676	size += PAGE_SIZE;
				677
				678	ret = iommu_map(domain->domain, iova, phys,
				679	size, dma->prot \| domain->prot);
				680	if (ret)
				681	return ret;
				682
				683	iova += size;
				684	}
				685	}
				686
				687	return 0;
				688	}
				689
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	690	/*
				691	* We change our unmap behavior slightly depending on whether the IOMMU
				692	* supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage
				693	* for practically any contiguous power-of-two mapping we give it. This means
				694	* we don't need to look for contiguous chunks ourselves to make unmapping
				695	* more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d
				696	* with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
				697	* significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
				698	* hugetlbfs is in use.
				699	*/
				700	static void vfio_test_domain_fgsp(struct vfio_domain *domain)
				701	{
				702	struct page *pages;
				703	int ret, order = get_order(PAGE_SIZE * 2);
				704
				705	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, order);
				706	if (!pages)
				707	return;
				708
				709	ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
				710	IOMMU_READ \| IOMMU_WRITE \| domain->prot);
				711	if (!ret) {
				712	size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
				713
				714	if (unmapped == PAGE_SIZE)
				715	iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
				716	else
				717	domain->fgsp = true;
				718	}
				719
				720	__free_pages(pages, order);
				721	}
				722
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	723	static int vfio_iommu_type1_attach_group(void *iommu_data,
				724	struct iommu_group *iommu_group)
				725	{
				726	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	727	struct vfio_group group, g;
				728	struct vfio_domain domain, d;
				729	struct bus_type *bus = NULL;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	730	int ret;
				731
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	732	mutex_lock(&iommu->lock);
				733
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	734	list_for_each_entry(d, &iommu->domain_list, next) {
				735	list_for_each_entry(g, &d->group_list, next) {
				736	if (g->iommu_group != iommu_group)
				737	continue;
				738
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	739	mutex_unlock(&iommu->lock);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	740	return -EINVAL;
				741	}
				742	}
				743
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	744	group = kzalloc(sizeof(*group), GFP_KERNEL);
				745	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
				746	if (!group \|\| !domain) {
				747	ret = -ENOMEM;
				748	goto out_free;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	749	}
				750
				751	group->iommu_group = iommu_group;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	752
				753	/* Determine bus_type in order to allocate a domain */
				754	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
				755	if (ret)
				756	goto out_free;
				757
				758	domain->domain = iommu_domain_alloc(bus);
				759	if (!domain->domain) {
				760	ret = -EIO;
				761	goto out_free;
				762	}
				763
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	764	if (iommu->nesting) {
				765	int attr = 1;
				766
				767	ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
				768	&attr);
				769	if (ret)
				770	goto out_domain;
				771	}
				772
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	773	ret = iommu_attach_group(domain->domain, iommu_group);
				774	if (ret)
				775	goto out_domain;
				776
				777	INIT_LIST_HEAD(&domain->group_list);
				778	list_add(&group->next, &domain->group_list);
				779
				780	if (!allow_unsafe_interrupts &&
Joerg Roedel	eb165f0	2014-09-05 10:56:05 +0200	[diff] [blame]	781	!iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	782	pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
				783	__func__);
				784	ret = -EPERM;
				785	goto out_detach;
				786	}
				787
Joerg Roedel	eb165f0	2014-09-05 10:56:05 +0200	[diff] [blame]	788	if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	789	domain->prot \|= IOMMU_CACHE;
				790
				791	/*
				792	* Try to match an existing compatible domain. We don't want to
				793	* preclude an IOMMU driver supporting multiple bus_types and being
				794	* able to include different bus_types in the same IOMMU domain, so
				795	* we test whether the domains use the same iommu_ops rather than
				796	* testing if they're on the same bus_type.
				797	*/
				798	list_for_each_entry(d, &iommu->domain_list, next) {
				799	if (d->domain->ops == domain->domain->ops &&
				800	d->prot == domain->prot) {
				801	iommu_detach_group(domain->domain, iommu_group);
				802	if (!iommu_attach_group(d->domain, iommu_group)) {
				803	list_add(&group->next, &d->group_list);
				804	iommu_domain_free(domain->domain);
				805	kfree(domain);
				806	mutex_unlock(&iommu->lock);
				807	return 0;
				808	}
				809
				810	ret = iommu_attach_group(domain->domain, iommu_group);
				811	if (ret)
				812	goto out_domain;
				813	}
				814	}
				815
Alex Williamson	6fe1010	2015-02-06 10:58:56 -0700	[diff] [blame]	816	vfio_test_domain_fgsp(domain);
				817
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	818	/* replay mappings on new domains */
				819	ret = vfio_iommu_replay(iommu, domain);
				820	if (ret)
				821	goto out_detach;
				822
				823	list_add(&domain->next, &iommu->domain_list);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	824
				825	mutex_unlock(&iommu->lock);
				826
				827	return 0;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	828
				829	out_detach:
				830	iommu_detach_group(domain->domain, iommu_group);
				831	out_domain:
				832	iommu_domain_free(domain->domain);
				833	out_free:
				834	kfree(domain);
				835	kfree(group);
				836	mutex_unlock(&iommu->lock);
				837	return ret;
				838	}
				839
				840	static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
				841	{
				842	struct rb_node *node;
				843
				844	while ((node = rb_first(&iommu->dma_list)))
				845	vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	846	}
				847
				848	static void vfio_iommu_type1_detach_group(void *iommu_data,
				849	struct iommu_group *iommu_group)
				850	{
				851	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	852	struct vfio_domain *domain;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	853	struct vfio_group *group;
				854
				855	mutex_lock(&iommu->lock);
				856
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	857	list_for_each_entry(domain, &iommu->domain_list, next) {
				858	list_for_each_entry(group, &domain->group_list, next) {
				859	if (group->iommu_group != iommu_group)
				860	continue;
				861
				862	iommu_detach_group(domain->domain, iommu_group);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	863	list_del(&group->next);
				864	kfree(group);
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	865	/*
				866	* Group ownership provides privilege, if the group
				867	* list is empty, the domain goes away. If it's the
				868	* last domain, then all the mappings go away too.
				869	*/
				870	if (list_empty(&domain->group_list)) {
				871	if (list_is_singular(&iommu->domain_list))
				872	vfio_iommu_unmap_unpin_all(iommu);
				873	iommu_domain_free(domain->domain);
				874	list_del(&domain->next);
				875	kfree(domain);
				876	}
				877	goto done;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	878	}
				879	}
				880
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	881	done:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	882	mutex_unlock(&iommu->lock);
				883	}
				884
				885	static void *vfio_iommu_type1_open(unsigned long arg)
				886	{
				887	struct vfio_iommu *iommu;
				888
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	889	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
				890	if (!iommu)
				891	return ERR_PTR(-ENOMEM);
				892
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	893	switch (arg) {
				894	case VFIO_TYPE1_IOMMU:
				895	break;
				896	case VFIO_TYPE1_NESTING_IOMMU:
				897	iommu->nesting = true;
				898	case VFIO_TYPE1v2_IOMMU:
				899	iommu->v2 = true;
				900	break;
				901	default:
				902	kfree(iommu);
				903	return ERR_PTR(-EINVAL);
				904	}
				905
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	906	INIT_LIST_HEAD(&iommu->domain_list);
Alex Williamson	cd9b226	2013-06-21 09:37:50 -0600	[diff] [blame]	907	iommu->dma_list = RB_ROOT;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	908	mutex_init(&iommu->lock);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	909
				910	return iommu;
				911	}
				912
				913	static void vfio_iommu_type1_release(void *iommu_data)
				914	{
				915	struct vfio_iommu *iommu = iommu_data;
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	916	struct vfio_domain domain, domain_tmp;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	917	struct vfio_group group, group_tmp;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	918
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	919	vfio_iommu_unmap_unpin_all(iommu);
				920
				921	list_for_each_entry_safe(domain, domain_tmp,
				922	&iommu->domain_list, next) {
				923	list_for_each_entry_safe(group, group_tmp,
				924	&domain->group_list, next) {
				925	iommu_detach_group(domain->domain, group->iommu_group);
				926	list_del(&group->next);
				927	kfree(group);
				928	}
				929	iommu_domain_free(domain->domain);
				930	list_del(&domain->next);
				931	kfree(domain);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	932	}
				933
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	934	kfree(iommu);
				935	}
				936
Alex Williamson	aa42931	2014-02-26 11:38:37 -0700	[diff] [blame]	937	static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
				938	{
				939	struct vfio_domain *domain;
				940	int ret = 1;
				941
				942	mutex_lock(&iommu->lock);
				943	list_for_each_entry(domain, &iommu->domain_list, next) {
				944	if (!(domain->prot & IOMMU_CACHE)) {
				945	ret = 0;
				946	break;
				947	}
				948	}
				949	mutex_unlock(&iommu->lock);
				950
				951	return ret;
				952	}
				953
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	954	static long vfio_iommu_type1_ioctl(void *iommu_data,
				955	unsigned int cmd, unsigned long arg)
				956	{
				957	struct vfio_iommu *iommu = iommu_data;
				958	unsigned long minsz;
				959
				960	if (cmd == VFIO_CHECK_EXTENSION) {
				961	switch (arg) {
				962	case VFIO_TYPE1_IOMMU:
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	963	case VFIO_TYPE1v2_IOMMU:
Will Deacon	f5c9ece	2014-09-29 10:06:19 -0600	[diff] [blame]	964	case VFIO_TYPE1_NESTING_IOMMU:
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	965	return 1;
Alex Williamson	aa42931	2014-02-26 11:38:37 -0700	[diff] [blame]	966	case VFIO_DMA_CC_IOMMU:
				967	if (!iommu)
				968	return 0;
				969	return vfio_domains_have_iommu_cache(iommu);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	970	default:
				971	return 0;
				972	}
				973	} else if (cmd == VFIO_IOMMU_GET_INFO) {
				974	struct vfio_iommu_type1_info info;
				975
				976	minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
				977
				978	if (copy_from_user(&info, (void __user *)arg, minsz))
				979	return -EFAULT;
				980
				981	if (info.argsz < minsz)
				982	return -EINVAL;
				983
Pierre Morel	d4f50ee	2015-12-23 13:08:05 +0100	[diff] [blame]	984	info.flags = VFIO_IOMMU_INFO_PGSIZES;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	985
Alex Williamson	1ef3e2b	2014-02-26 11:38:36 -0700	[diff] [blame]	986	info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	987
Michael S. Tsirkin	8160c4e	2016-02-28 16:31:39 +0200	[diff] [blame]	988	return copy_to_user((void __user *)arg, &info, minsz) ?
				989	-EFAULT : 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	990
				991	} else if (cmd == VFIO_IOMMU_MAP_DMA) {
				992	struct vfio_iommu_type1_dma_map map;
				993	uint32_t mask = VFIO_DMA_MAP_FLAG_READ \|
				994	VFIO_DMA_MAP_FLAG_WRITE;
				995
				996	minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
				997
				998	if (copy_from_user(&map, (void __user *)arg, minsz))
				999	return -EFAULT;
				1000
				1001	if (map.argsz < minsz \|\| map.flags & ~mask)
				1002	return -EINVAL;
				1003
				1004	return vfio_dma_do_map(iommu, &map);
				1005
				1006	} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
				1007	struct vfio_iommu_type1_dma_unmap unmap;
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	1008	long ret;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1009
				1010	minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
				1011
				1012	if (copy_from_user(&unmap, (void __user *)arg, minsz))
				1013	return -EFAULT;
				1014
				1015	if (unmap.argsz < minsz \|\| unmap.flags)
				1016	return -EINVAL;
				1017
Alex Williamson	166fd7d	2013-06-21 09:38:02 -0600	[diff] [blame]	1018	ret = vfio_dma_do_unmap(iommu, &unmap);
				1019	if (ret)
				1020	return ret;
				1021
Michael S. Tsirkin	8160c4e	2016-02-28 16:31:39 +0200	[diff] [blame]	1022	return copy_to_user((void __user *)arg, &unmap, minsz) ?
				1023	-EFAULT : 0;
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1024	}
				1025
				1026	return -ENOTTY;
				1027	}
				1028
				1029	static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
				1030	.name = "vfio-iommu-type1",
				1031	.owner = THIS_MODULE,
				1032	.open = vfio_iommu_type1_open,
				1033	.release = vfio_iommu_type1_release,
				1034	.ioctl = vfio_iommu_type1_ioctl,
				1035	.attach_group = vfio_iommu_type1_attach_group,
				1036	.detach_group = vfio_iommu_type1_detach_group,
				1037	};
				1038
				1039	static int __init vfio_iommu_type1_init(void)
				1040	{
Alex Williamson	73fa0d1	2012-07-31 08:16:23 -0600	[diff] [blame]	1041	return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1);
				1042	}
				1043
				1044	static void __exit vfio_iommu_type1_cleanup(void)
				1045	{
				1046	vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1);
				1047	}
				1048
				1049	module_init(vfio_iommu_type1_init);
				1050	module_exit(vfio_iommu_type1_cleanup);
				1051
				1052	MODULE_VERSION(DRIVER_VERSION);
				1053	MODULE_LICENSE("GPL v2");
				1054	MODULE_AUTHOR(DRIVER_AUTHOR);
				1055	MODULE_DESCRIPTION(DRIVER_DESC);