blob: e65bc73cc8a8b8c421f73f5cde804f34af2182e7 [file] [log] [blame]
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100032static long try_increment_locked_vm(long npages)
33{
34 long ret = 0, locked, lock_limit;
35
36 if (!current || !current->mm)
37 return -ESRCH; /* process exited */
38
39 if (!npages)
40 return 0;
41
42 down_write(&current->mm->mmap_sem);
43 locked = current->mm->locked_vm + npages;
44 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
45 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
46 ret = -ENOMEM;
47 else
48 current->mm->locked_vm += npages;
49
50 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
51 npages << PAGE_SHIFT,
52 current->mm->locked_vm << PAGE_SHIFT,
53 rlimit(RLIMIT_MEMLOCK),
54 ret ? " - exceeded" : "");
55
56 up_write(&current->mm->mmap_sem);
57
58 return ret;
59}
60
61static void decrement_locked_vm(long npages)
62{
63 if (!current || !current->mm || !npages)
64 return; /* process exited */
65
66 down_write(&current->mm->mmap_sem);
67 if (WARN_ON_ONCE(npages > current->mm->locked_vm))
68 npages = current->mm->locked_vm;
69 current->mm->locked_vm -= npages;
70 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
71 npages << PAGE_SHIFT,
72 current->mm->locked_vm << PAGE_SHIFT,
73 rlimit(RLIMIT_MEMLOCK));
74 up_write(&current->mm->mmap_sem);
75}
76
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100077/*
78 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
79 *
80 * This code handles mapping and unmapping of user data buffers
81 * into DMA'ble space using the IOMMU
82 */
83
84/*
85 * The container descriptor supports only a single group per container.
86 * Required by the API as the container is not supplied with the IOMMU group
87 * at the moment of initialization.
88 */
89struct tce_container {
90 struct mutex lock;
91 struct iommu_table *tbl;
92 bool enabled;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100093 unsigned long locked_pages;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100094};
95
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +100096static bool tce_page_is_contained(struct page *page, unsigned page_shift)
97{
98 /*
99 * Check that the TCE table granularity is not bigger than the size of
100 * a page we just found. Otherwise the hardware can get access to
101 * a bigger memory chunk that it should.
102 */
103 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
104}
105
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000106static int tce_iommu_enable(struct tce_container *container)
107{
108 int ret = 0;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000109 unsigned long locked;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000110 struct iommu_table *tbl = container->tbl;
111
112 if (!container->tbl)
113 return -ENXIO;
114
115 if (!current->mm)
116 return -ESRCH; /* process exited */
117
118 if (container->enabled)
119 return -EBUSY;
120
121 /*
122 * When userspace pages are mapped into the IOMMU, they are effectively
123 * locked memory, so, theoretically, we need to update the accounting
124 * of locked pages on each map and unmap. For powerpc, the map unmap
125 * paths can be very hot, though, and the accounting would kill
126 * performance, especially since it would be difficult to impossible
127 * to handle the accounting in real mode only.
128 *
129 * To address that, rather than precisely accounting every page, we
130 * instead account for a worst case on locked memory when the iommu is
131 * enabled and disabled. The worst case upper bound on locked memory
132 * is the size of the whole iommu window, which is usually relatively
133 * small (compared to total memory sizes) on POWER hardware.
134 *
135 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
136 * that would effectively kill the guest at random points, much better
137 * enforcing the limit based on the max that the guest can map.
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000138 *
139 * Unfortunately at the moment it counts whole tables, no matter how
140 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
141 * each with 2GB DMA window, 8GB will be counted here. The reason for
142 * this is that we cannot tell here the amount of RAM used by the guest
143 * as this information is only available from KVM and VFIO is
144 * KVM agnostic.
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000145 */
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000146 locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
147 ret = try_increment_locked_vm(locked);
148 if (ret)
149 return ret;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000150
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000151 container->locked_pages = locked;
152
153 container->enabled = true;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000154
155 return ret;
156}
157
158static void tce_iommu_disable(struct tce_container *container)
159{
160 if (!container->enabled)
161 return;
162
163 container->enabled = false;
164
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000165 if (!current->mm)
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000166 return;
167
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000168 decrement_locked_vm(container->locked_pages);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000169}
170
171static void *tce_iommu_open(unsigned long arg)
172{
173 struct tce_container *container;
174
175 if (arg != VFIO_SPAPR_TCE_IOMMU) {
176 pr_err("tce_vfio: Wrong IOMMU type\n");
177 return ERR_PTR(-EINVAL);
178 }
179
180 container = kzalloc(sizeof(*container), GFP_KERNEL);
181 if (!container)
182 return ERR_PTR(-ENOMEM);
183
184 mutex_init(&container->lock);
185
186 return container;
187}
188
189static void tce_iommu_release(void *iommu_data)
190{
191 struct tce_container *container = iommu_data;
192
193 WARN_ON(container->tbl && !container->tbl->it_group);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000194
195 if (container->tbl && container->tbl->it_group)
196 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
197
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000198 tce_iommu_disable(container);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000199 mutex_destroy(&container->lock);
200
201 kfree(container);
202}
203
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000204static void tce_iommu_unuse_page(struct tce_container *container,
205 unsigned long oldtce)
206{
207 struct page *page;
208
209 if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE)))
210 return;
211
212 page = pfn_to_page(oldtce >> PAGE_SHIFT);
213
214 if (oldtce & TCE_PCI_WRITE)
215 SetPageDirty(page);
216
217 put_page(page);
218}
219
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000220static int tce_iommu_clear(struct tce_container *container,
221 struct iommu_table *tbl,
222 unsigned long entry, unsigned long pages)
223{
224 unsigned long oldtce;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000225
226 for ( ; pages; --pages, ++entry) {
227 oldtce = iommu_clear_tce(tbl, entry);
228 if (!oldtce)
229 continue;
230
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000231 tce_iommu_unuse_page(container, oldtce);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000232 }
233
234 return 0;
235}
236
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000237static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
238{
239 struct page *page = NULL;
240 enum dma_data_direction direction = iommu_tce_direction(tce);
241
242 if (get_user_pages_fast(tce & PAGE_MASK, 1,
243 direction != DMA_TO_DEVICE, &page) != 1)
244 return -EFAULT;
245
246 *hpa = __pa((unsigned long) page_address(page));
247
248 return 0;
249}
250
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000251static long tce_iommu_build(struct tce_container *container,
252 struct iommu_table *tbl,
253 unsigned long entry, unsigned long tce, unsigned long pages)
254{
255 long i, ret = 0;
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000256 struct page *page;
257 unsigned long hpa;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000258 enum dma_data_direction direction = iommu_tce_direction(tce);
259
260 for (i = 0; i < pages; ++i) {
261 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
262
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000263 ret = tce_iommu_use_page(tce, &hpa);
264 if (ret)
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000265 break;
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +1000266
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000267 page = pfn_to_page(hpa >> PAGE_SHIFT);
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +1000268 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
269 ret = -EPERM;
270 break;
271 }
272
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000273 hpa |= offset;
274 ret = iommu_tce_build(tbl, entry + i, (unsigned long) __va(hpa),
275 direction);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000276 if (ret) {
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000277 tce_iommu_unuse_page(container, hpa);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000278 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
279 __func__, entry << tbl->it_page_shift,
280 tce, ret);
281 break;
282 }
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000283 tce += IOMMU_PAGE_SIZE(tbl);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000284 }
285
286 if (ret)
287 tce_iommu_clear(container, tbl, entry, i);
288
289 return ret;
290}
291
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000292static long tce_iommu_ioctl(void *iommu_data,
293 unsigned int cmd, unsigned long arg)
294{
295 struct tce_container *container = iommu_data;
296 unsigned long minsz;
297 long ret;
298
299 switch (cmd) {
300 case VFIO_CHECK_EXTENSION:
Gavin Shan1b69be52014-06-10 11:41:57 +1000301 switch (arg) {
302 case VFIO_SPAPR_TCE_IOMMU:
303 ret = 1;
304 break;
305 default:
306 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
307 break;
308 }
309
310 return (ret < 0) ? 0 : ret;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000311
312 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
313 struct vfio_iommu_spapr_tce_info info;
314 struct iommu_table *tbl = container->tbl;
315
316 if (WARN_ON(!tbl))
317 return -ENXIO;
318
319 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
320 dma32_window_size);
321
322 if (copy_from_user(&info, (void __user *)arg, minsz))
323 return -EFAULT;
324
325 if (info.argsz < minsz)
326 return -EINVAL;
327
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000328 info.dma32_window_start = tbl->it_offset << tbl->it_page_shift;
329 info.dma32_window_size = tbl->it_size << tbl->it_page_shift;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000330 info.flags = 0;
331
332 if (copy_to_user((void __user *)arg, &info, minsz))
333 return -EFAULT;
334
335 return 0;
336 }
337 case VFIO_IOMMU_MAP_DMA: {
338 struct vfio_iommu_type1_dma_map param;
339 struct iommu_table *tbl = container->tbl;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000340 unsigned long tce;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000341
Alexey Kardashevskiy3c56e822015-06-05 16:35:02 +1000342 if (!container->enabled)
343 return -EPERM;
344
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000345 if (!tbl)
346 return -ENXIO;
347
348 BUG_ON(!tbl->it_group);
349
350 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
351
352 if (copy_from_user(&param, (void __user *)arg, minsz))
353 return -EFAULT;
354
355 if (param.argsz < minsz)
356 return -EINVAL;
357
358 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
359 VFIO_DMA_MAP_FLAG_WRITE))
360 return -EINVAL;
361
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000362 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
363 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000364 return -EINVAL;
365
366 /* iova is checked by the IOMMU API */
367 tce = param.vaddr;
368 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
369 tce |= TCE_PCI_READ;
370 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
371 tce |= TCE_PCI_WRITE;
372
373 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
374 if (ret)
375 return ret;
376
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000377 ret = tce_iommu_build(container, tbl,
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000378 param.iova >> tbl->it_page_shift,
379 tce, param.size >> tbl->it_page_shift);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000380
381 iommu_flush_tce(tbl);
382
383 return ret;
384 }
385 case VFIO_IOMMU_UNMAP_DMA: {
386 struct vfio_iommu_type1_dma_unmap param;
387 struct iommu_table *tbl = container->tbl;
388
Alexey Kardashevskiy3c56e822015-06-05 16:35:02 +1000389 if (!container->enabled)
390 return -EPERM;
391
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000392 if (WARN_ON(!tbl))
393 return -ENXIO;
394
395 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
396 size);
397
398 if (copy_from_user(&param, (void __user *)arg, minsz))
399 return -EFAULT;
400
401 if (param.argsz < minsz)
402 return -EINVAL;
403
404 /* No flag is supported now */
405 if (param.flags)
406 return -EINVAL;
407
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000408 if (param.size & ~IOMMU_PAGE_MASK(tbl))
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000409 return -EINVAL;
410
411 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000412 param.size >> tbl->it_page_shift);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000413 if (ret)
414 return ret;
415
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000416 ret = tce_iommu_clear(container, tbl,
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000417 param.iova >> tbl->it_page_shift,
418 param.size >> tbl->it_page_shift);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000419 iommu_flush_tce(tbl);
420
421 return ret;
422 }
423 case VFIO_IOMMU_ENABLE:
424 mutex_lock(&container->lock);
425 ret = tce_iommu_enable(container);
426 mutex_unlock(&container->lock);
427 return ret;
428
429
430 case VFIO_IOMMU_DISABLE:
431 mutex_lock(&container->lock);
432 tce_iommu_disable(container);
433 mutex_unlock(&container->lock);
434 return 0;
Gavin Shan1b69be52014-06-10 11:41:57 +1000435 case VFIO_EEH_PE_OP:
436 if (!container->tbl || !container->tbl->it_group)
437 return -ENODEV;
438
439 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
440 cmd, arg);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000441 }
442
443 return -ENOTTY;
444}
445
446static int tce_iommu_attach_group(void *iommu_data,
447 struct iommu_group *iommu_group)
448{
449 int ret;
450 struct tce_container *container = iommu_data;
451 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
452
453 BUG_ON(!tbl);
454 mutex_lock(&container->lock);
455
456 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
457 iommu_group_id(iommu_group), iommu_group); */
458 if (container->tbl) {
459 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
460 iommu_group_id(container->tbl->it_group),
461 iommu_group_id(iommu_group));
462 ret = -EBUSY;
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +1000463 goto unlock_exit;
464 }
465
466 if (container->enabled) {
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000467 pr_err("tce_vfio: attaching group #%u to enabled container\n",
468 iommu_group_id(iommu_group));
469 ret = -EBUSY;
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +1000470 goto unlock_exit;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000471 }
472
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +1000473 ret = iommu_take_ownership(tbl);
474 if (!ret)
475 container->tbl = tbl;
476
477unlock_exit:
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000478 mutex_unlock(&container->lock);
479
480 return ret;
481}
482
483static void tce_iommu_detach_group(void *iommu_data,
484 struct iommu_group *iommu_group)
485{
486 struct tce_container *container = iommu_data;
487 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
488
489 BUG_ON(!tbl);
490 mutex_lock(&container->lock);
491 if (tbl != container->tbl) {
492 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
493 iommu_group_id(iommu_group),
494 iommu_group_id(tbl->it_group));
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +1000495 goto unlock_exit;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000496 }
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +1000497
498 if (container->enabled) {
499 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
500 iommu_group_id(tbl->it_group));
501 tce_iommu_disable(container);
502 }
503
504 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
505 iommu_group_id(iommu_group), iommu_group); */
506 container->tbl = NULL;
507 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
508 iommu_release_ownership(tbl);
509
510unlock_exit:
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000511 mutex_unlock(&container->lock);
512}
513
514const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
515 .name = "iommu-vfio-powerpc",
516 .owner = THIS_MODULE,
517 .open = tce_iommu_open,
518 .release = tce_iommu_release,
519 .ioctl = tce_iommu_ioctl,
520 .attach_group = tce_iommu_attach_group,
521 .detach_group = tce_iommu_detach_group,
522};
523
524static int __init tce_iommu_init(void)
525{
526 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
527}
528
529static void __exit tce_iommu_cleanup(void)
530{
531 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
532}
533
534module_init(tce_iommu_init);
535module_exit(tce_iommu_cleanup);
536
537MODULE_VERSION(DRIVER_VERSION);
538MODULE_LICENSE("GPL v2");
539MODULE_AUTHOR(DRIVER_AUTHOR);
540MODULE_DESCRIPTION(DRIVER_DESC);
541