blob: 735b308709e5413b914e3c0078cca5fc3b39a1b3 [file] [log] [blame]
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
32/*
33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
34 *
35 * This code handles mapping and unmapping of user data buffers
36 * into DMA'ble space using the IOMMU
37 */
38
39/*
40 * The container descriptor supports only a single group per container.
41 * Required by the API as the container is not supplied with the IOMMU group
42 * at the moment of initialization.
43 */
44struct tce_container {
45 struct mutex lock;
46 struct iommu_table *tbl;
47 bool enabled;
48};
49
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +100050static bool tce_page_is_contained(struct page *page, unsigned page_shift)
51{
52 /*
53 * Check that the TCE table granularity is not bigger than the size of
54 * a page we just found. Otherwise the hardware can get access to
55 * a bigger memory chunk that it should.
56 */
57 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
58}
59
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100060static int tce_iommu_enable(struct tce_container *container)
61{
62 int ret = 0;
63 unsigned long locked, lock_limit, npages;
64 struct iommu_table *tbl = container->tbl;
65
66 if (!container->tbl)
67 return -ENXIO;
68
69 if (!current->mm)
70 return -ESRCH; /* process exited */
71
72 if (container->enabled)
73 return -EBUSY;
74
75 /*
76 * When userspace pages are mapped into the IOMMU, they are effectively
77 * locked memory, so, theoretically, we need to update the accounting
78 * of locked pages on each map and unmap. For powerpc, the map unmap
79 * paths can be very hot, though, and the accounting would kill
80 * performance, especially since it would be difficult to impossible
81 * to handle the accounting in real mode only.
82 *
83 * To address that, rather than precisely accounting every page, we
84 * instead account for a worst case on locked memory when the iommu is
85 * enabled and disabled. The worst case upper bound on locked memory
86 * is the size of the whole iommu window, which is usually relatively
87 * small (compared to total memory sizes) on POWER hardware.
88 *
89 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
90 * that would effectively kill the guest at random points, much better
91 * enforcing the limit based on the max that the guest can map.
92 */
93 down_write(&current->mm->mmap_sem);
Alistair Popplee589a4402013-12-09 18:17:01 +110094 npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100095 locked = current->mm->locked_vm + npages;
96 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
97 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
98 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
99 rlimit(RLIMIT_MEMLOCK));
100 ret = -ENOMEM;
101 } else {
102
103 current->mm->locked_vm += npages;
104 container->enabled = true;
105 }
106 up_write(&current->mm->mmap_sem);
107
108 return ret;
109}
110
111static void tce_iommu_disable(struct tce_container *container)
112{
113 if (!container->enabled)
114 return;
115
116 container->enabled = false;
117
118 if (!container->tbl || !current->mm)
119 return;
120
121 down_write(&current->mm->mmap_sem);
122 current->mm->locked_vm -= (container->tbl->it_size <<
Alistair Popplee589a4402013-12-09 18:17:01 +1100123 IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000124 up_write(&current->mm->mmap_sem);
125}
126
127static void *tce_iommu_open(unsigned long arg)
128{
129 struct tce_container *container;
130
131 if (arg != VFIO_SPAPR_TCE_IOMMU) {
132 pr_err("tce_vfio: Wrong IOMMU type\n");
133 return ERR_PTR(-EINVAL);
134 }
135
136 container = kzalloc(sizeof(*container), GFP_KERNEL);
137 if (!container)
138 return ERR_PTR(-ENOMEM);
139
140 mutex_init(&container->lock);
141
142 return container;
143}
144
145static void tce_iommu_release(void *iommu_data)
146{
147 struct tce_container *container = iommu_data;
148
149 WARN_ON(container->tbl && !container->tbl->it_group);
150 tce_iommu_disable(container);
151
152 if (container->tbl && container->tbl->it_group)
153 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
154
155 mutex_destroy(&container->lock);
156
157 kfree(container);
158}
159
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000160static int tce_iommu_clear(struct tce_container *container,
161 struct iommu_table *tbl,
162 unsigned long entry, unsigned long pages)
163{
164 unsigned long oldtce;
165 struct page *page;
166
167 for ( ; pages; --pages, ++entry) {
168 oldtce = iommu_clear_tce(tbl, entry);
169 if (!oldtce)
170 continue;
171
172 page = pfn_to_page(oldtce >> PAGE_SHIFT);
173 WARN_ON(!page);
174 if (page) {
175 if (oldtce & TCE_PCI_WRITE)
176 SetPageDirty(page);
177 put_page(page);
178 }
179 }
180
181 return 0;
182}
183
184static long tce_iommu_build(struct tce_container *container,
185 struct iommu_table *tbl,
186 unsigned long entry, unsigned long tce, unsigned long pages)
187{
188 long i, ret = 0;
189 struct page *page = NULL;
190 unsigned long hva;
191 enum dma_data_direction direction = iommu_tce_direction(tce);
192
193 for (i = 0; i < pages; ++i) {
194 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
195
196 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
197 direction != DMA_TO_DEVICE, &page);
198 if (unlikely(ret != 1)) {
199 ret = -EFAULT;
200 break;
201 }
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +1000202
203 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
204 ret = -EPERM;
205 break;
206 }
207
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000208 hva = (unsigned long) page_address(page) + offset;
209
210 ret = iommu_tce_build(tbl, entry + i, hva, direction);
211 if (ret) {
212 put_page(page);
213 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
214 __func__, entry << tbl->it_page_shift,
215 tce, ret);
216 break;
217 }
218 tce += IOMMU_PAGE_SIZE_4K;
219 }
220
221 if (ret)
222 tce_iommu_clear(container, tbl, entry, i);
223
224 return ret;
225}
226
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000227static long tce_iommu_ioctl(void *iommu_data,
228 unsigned int cmd, unsigned long arg)
229{
230 struct tce_container *container = iommu_data;
231 unsigned long minsz;
232 long ret;
233
234 switch (cmd) {
235 case VFIO_CHECK_EXTENSION:
Gavin Shan1b69be52014-06-10 11:41:57 +1000236 switch (arg) {
237 case VFIO_SPAPR_TCE_IOMMU:
238 ret = 1;
239 break;
240 default:
241 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
242 break;
243 }
244
245 return (ret < 0) ? 0 : ret;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000246
247 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
248 struct vfio_iommu_spapr_tce_info info;
249 struct iommu_table *tbl = container->tbl;
250
251 if (WARN_ON(!tbl))
252 return -ENXIO;
253
254 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
255 dma32_window_size);
256
257 if (copy_from_user(&info, (void __user *)arg, minsz))
258 return -EFAULT;
259
260 if (info.argsz < minsz)
261 return -EINVAL;
262
Alistair Popplee589a4402013-12-09 18:17:01 +1100263 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K;
264 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000265 info.flags = 0;
266
267 if (copy_to_user((void __user *)arg, &info, minsz))
268 return -EFAULT;
269
270 return 0;
271 }
272 case VFIO_IOMMU_MAP_DMA: {
273 struct vfio_iommu_type1_dma_map param;
274 struct iommu_table *tbl = container->tbl;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000275 unsigned long tce;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000276
277 if (!tbl)
278 return -ENXIO;
279
280 BUG_ON(!tbl->it_group);
281
282 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
283
284 if (copy_from_user(&param, (void __user *)arg, minsz))
285 return -EFAULT;
286
287 if (param.argsz < minsz)
288 return -EINVAL;
289
290 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
291 VFIO_DMA_MAP_FLAG_WRITE))
292 return -EINVAL;
293
Alistair Popplee589a4402013-12-09 18:17:01 +1100294 if ((param.size & ~IOMMU_PAGE_MASK_4K) ||
295 (param.vaddr & ~IOMMU_PAGE_MASK_4K))
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000296 return -EINVAL;
297
298 /* iova is checked by the IOMMU API */
299 tce = param.vaddr;
300 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
301 tce |= TCE_PCI_READ;
302 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
303 tce |= TCE_PCI_WRITE;
304
305 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
306 if (ret)
307 return ret;
308
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000309 ret = tce_iommu_build(container, tbl,
310 param.iova >> IOMMU_PAGE_SHIFT_4K,
311 tce, param.size >> IOMMU_PAGE_SHIFT_4K);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000312
313 iommu_flush_tce(tbl);
314
315 return ret;
316 }
317 case VFIO_IOMMU_UNMAP_DMA: {
318 struct vfio_iommu_type1_dma_unmap param;
319 struct iommu_table *tbl = container->tbl;
320
321 if (WARN_ON(!tbl))
322 return -ENXIO;
323
324 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
325 size);
326
327 if (copy_from_user(&param, (void __user *)arg, minsz))
328 return -EFAULT;
329
330 if (param.argsz < minsz)
331 return -EINVAL;
332
333 /* No flag is supported now */
334 if (param.flags)
335 return -EINVAL;
336
Alistair Popplee589a4402013-12-09 18:17:01 +1100337 if (param.size & ~IOMMU_PAGE_MASK_4K)
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000338 return -EINVAL;
339
340 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
Alistair Popplee589a4402013-12-09 18:17:01 +1100341 param.size >> IOMMU_PAGE_SHIFT_4K);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000342 if (ret)
343 return ret;
344
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000345 ret = tce_iommu_clear(container, tbl,
Alistair Popplee589a4402013-12-09 18:17:01 +1100346 param.iova >> IOMMU_PAGE_SHIFT_4K,
347 param.size >> IOMMU_PAGE_SHIFT_4K);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000348 iommu_flush_tce(tbl);
349
350 return ret;
351 }
352 case VFIO_IOMMU_ENABLE:
353 mutex_lock(&container->lock);
354 ret = tce_iommu_enable(container);
355 mutex_unlock(&container->lock);
356 return ret;
357
358
359 case VFIO_IOMMU_DISABLE:
360 mutex_lock(&container->lock);
361 tce_iommu_disable(container);
362 mutex_unlock(&container->lock);
363 return 0;
Gavin Shan1b69be52014-06-10 11:41:57 +1000364 case VFIO_EEH_PE_OP:
365 if (!container->tbl || !container->tbl->it_group)
366 return -ENODEV;
367
368 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
369 cmd, arg);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000370 }
371
372 return -ENOTTY;
373}
374
375static int tce_iommu_attach_group(void *iommu_data,
376 struct iommu_group *iommu_group)
377{
378 int ret;
379 struct tce_container *container = iommu_data;
380 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
381
382 BUG_ON(!tbl);
383 mutex_lock(&container->lock);
384
385 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
386 iommu_group_id(iommu_group), iommu_group); */
387 if (container->tbl) {
388 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
389 iommu_group_id(container->tbl->it_group),
390 iommu_group_id(iommu_group));
391 ret = -EBUSY;
392 } else if (container->enabled) {
393 pr_err("tce_vfio: attaching group #%u to enabled container\n",
394 iommu_group_id(iommu_group));
395 ret = -EBUSY;
396 } else {
397 ret = iommu_take_ownership(tbl);
398 if (!ret)
399 container->tbl = tbl;
400 }
401
402 mutex_unlock(&container->lock);
403
404 return ret;
405}
406
407static void tce_iommu_detach_group(void *iommu_data,
408 struct iommu_group *iommu_group)
409{
410 struct tce_container *container = iommu_data;
411 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
412
413 BUG_ON(!tbl);
414 mutex_lock(&container->lock);
415 if (tbl != container->tbl) {
416 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
417 iommu_group_id(iommu_group),
418 iommu_group_id(tbl->it_group));
419 } else {
420 if (container->enabled) {
421 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
422 iommu_group_id(tbl->it_group));
423 tce_iommu_disable(container);
424 }
425
426 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
427 iommu_group_id(iommu_group), iommu_group); */
428 container->tbl = NULL;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000429 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000430 iommu_release_ownership(tbl);
431 }
432 mutex_unlock(&container->lock);
433}
434
435const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
436 .name = "iommu-vfio-powerpc",
437 .owner = THIS_MODULE,
438 .open = tce_iommu_open,
439 .release = tce_iommu_release,
440 .ioctl = tce_iommu_ioctl,
441 .attach_group = tce_iommu_attach_group,
442 .detach_group = tce_iommu_detach_group,
443};
444
445static int __init tce_iommu_init(void)
446{
447 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
448}
449
450static void __exit tce_iommu_cleanup(void)
451{
452 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
453}
454
455module_init(tce_iommu_init);
456module_exit(tce_iommu_cleanup);
457
458MODULE_VERSION(DRIVER_VERSION);
459MODULE_LICENSE("GPL v2");
460MODULE_AUTHOR(DRIVER_AUTHOR);
461MODULE_DESCRIPTION(DRIVER_DESC);
462