blob: 21271d8df02374aaae378c552ff7ad10b89e6e84 [file] [log] [blame]
Alex Williamsoncba33452012-07-31 08:16:22 -06001/*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16#include <linux/cdev.h>
17#include <linux/compat.h>
18#include <linux/device.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/idr.h>
23#include <linux/iommu.h>
24#include <linux/list.h>
Alex Williamsond1099902013-12-19 10:17:13 -070025#include <linux/miscdevice.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060026#include <linux/module.h>
27#include <linux/mutex.h>
Alex Williamson9587f442013-04-25 16:12:38 -060028#include <linux/rwsem.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060029#include <linux/sched.h>
30#include <linux/slab.h>
Alex Williamson664e9382013-04-30 15:42:28 -060031#include <linux/stat.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060032#include <linux/string.h>
33#include <linux/uaccess.h>
34#include <linux/vfio.h>
35#include <linux/wait.h>
36
37#define DRIVER_VERSION "0.3"
38#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
39#define DRIVER_DESC "VFIO - User Level meta-driver"
40
41static struct vfio {
42 struct class *class;
43 struct list_head iommu_drivers_list;
44 struct mutex iommu_drivers_lock;
45 struct list_head group_list;
46 struct idr group_idr;
47 struct mutex group_lock;
48 struct cdev group_cdev;
Alex Williamsond1099902013-12-19 10:17:13 -070049 dev_t group_devt;
Alex Williamsoncba33452012-07-31 08:16:22 -060050 wait_queue_head_t release_q;
51} vfio;
52
53struct vfio_iommu_driver {
54 const struct vfio_iommu_driver_ops *ops;
55 struct list_head vfio_next;
56};
57
58struct vfio_container {
59 struct kref kref;
60 struct list_head group_list;
Alex Williamson9587f442013-04-25 16:12:38 -060061 struct rw_semaphore group_lock;
Alex Williamsoncba33452012-07-31 08:16:22 -060062 struct vfio_iommu_driver *iommu_driver;
63 void *iommu_data;
64};
65
66struct vfio_group {
67 struct kref kref;
68 int minor;
69 atomic_t container_users;
70 struct iommu_group *iommu_group;
71 struct vfio_container *container;
72 struct list_head device_list;
73 struct mutex device_lock;
74 struct device *dev;
75 struct notifier_block nb;
76 struct list_head vfio_next;
77 struct list_head container_next;
Alex Williamson6d6768c2013-06-25 16:06:54 -060078 atomic_t opened;
Alex Williamsoncba33452012-07-31 08:16:22 -060079};
80
81struct vfio_device {
82 struct kref kref;
83 struct device *dev;
84 const struct vfio_device_ops *ops;
85 struct vfio_group *group;
86 struct list_head group_next;
87 void *device_data;
88};
89
90/**
91 * IOMMU driver registration
92 */
93int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
94{
95 struct vfio_iommu_driver *driver, *tmp;
96
97 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
98 if (!driver)
99 return -ENOMEM;
100
101 driver->ops = ops;
102
103 mutex_lock(&vfio.iommu_drivers_lock);
104
105 /* Check for duplicates */
106 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
107 if (tmp->ops == ops) {
108 mutex_unlock(&vfio.iommu_drivers_lock);
109 kfree(driver);
110 return -EINVAL;
111 }
112 }
113
114 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
115
116 mutex_unlock(&vfio.iommu_drivers_lock);
117
118 return 0;
119}
120EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
121
122void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
123{
124 struct vfio_iommu_driver *driver;
125
126 mutex_lock(&vfio.iommu_drivers_lock);
127 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
128 if (driver->ops == ops) {
129 list_del(&driver->vfio_next);
130 mutex_unlock(&vfio.iommu_drivers_lock);
131 kfree(driver);
132 return;
133 }
134 }
135 mutex_unlock(&vfio.iommu_drivers_lock);
136}
137EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
138
139/**
140 * Group minor allocation/free - both called with vfio.group_lock held
141 */
142static int vfio_alloc_group_minor(struct vfio_group *group)
143{
Alex Williamsond1099902013-12-19 10:17:13 -0700144 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
Alex Williamsoncba33452012-07-31 08:16:22 -0600145}
146
147static void vfio_free_group_minor(int minor)
148{
149 idr_remove(&vfio.group_idr, minor);
150}
151
152static int vfio_iommu_group_notifier(struct notifier_block *nb,
153 unsigned long action, void *data);
154static void vfio_group_get(struct vfio_group *group);
155
156/**
157 * Container objects - containers are created when /dev/vfio/vfio is
158 * opened, but their lifecycle extends until the last user is done, so
159 * it's freed via kref. Must support container/group/device being
160 * closed in any order.
161 */
162static void vfio_container_get(struct vfio_container *container)
163{
164 kref_get(&container->kref);
165}
166
167static void vfio_container_release(struct kref *kref)
168{
169 struct vfio_container *container;
170 container = container_of(kref, struct vfio_container, kref);
171
172 kfree(container);
173}
174
175static void vfio_container_put(struct vfio_container *container)
176{
177 kref_put(&container->kref, vfio_container_release);
178}
179
Jiang Liu9df7b252012-12-07 13:43:50 -0700180static void vfio_group_unlock_and_free(struct vfio_group *group)
181{
182 mutex_unlock(&vfio.group_lock);
183 /*
184 * Unregister outside of lock. A spurious callback is harmless now
185 * that the group is no longer in vfio.group_list.
186 */
187 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
188 kfree(group);
189}
190
Alex Williamsoncba33452012-07-31 08:16:22 -0600191/**
192 * Group objects - create, release, get, put, search
193 */
194static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
195{
196 struct vfio_group *group, *tmp;
197 struct device *dev;
198 int ret, minor;
199
200 group = kzalloc(sizeof(*group), GFP_KERNEL);
201 if (!group)
202 return ERR_PTR(-ENOMEM);
203
204 kref_init(&group->kref);
205 INIT_LIST_HEAD(&group->device_list);
206 mutex_init(&group->device_lock);
207 atomic_set(&group->container_users, 0);
Alex Williamson6d6768c2013-06-25 16:06:54 -0600208 atomic_set(&group->opened, 0);
Alex Williamsoncba33452012-07-31 08:16:22 -0600209 group->iommu_group = iommu_group;
210
211 group->nb.notifier_call = vfio_iommu_group_notifier;
212
213 /*
214 * blocking notifiers acquire a rwsem around registering and hold
215 * it around callback. Therefore, need to register outside of
216 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
217 * do anything unless it can find the group in vfio.group_list, so
218 * no harm in registering early.
219 */
220 ret = iommu_group_register_notifier(iommu_group, &group->nb);
221 if (ret) {
222 kfree(group);
223 return ERR_PTR(ret);
224 }
225
226 mutex_lock(&vfio.group_lock);
227
228 minor = vfio_alloc_group_minor(group);
229 if (minor < 0) {
Jiang Liu9df7b252012-12-07 13:43:50 -0700230 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600231 return ERR_PTR(minor);
232 }
233
234 /* Did we race creating this group? */
235 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
236 if (tmp->iommu_group == iommu_group) {
237 vfio_group_get(tmp);
238 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700239 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600240 return tmp;
241 }
242 }
243
Alex Williamsond1099902013-12-19 10:17:13 -0700244 dev = device_create(vfio.class, NULL,
245 MKDEV(MAJOR(vfio.group_devt), minor),
Alex Williamsoncba33452012-07-31 08:16:22 -0600246 group, "%d", iommu_group_id(iommu_group));
247 if (IS_ERR(dev)) {
248 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700249 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600250 return (struct vfio_group *)dev; /* ERR_PTR */
251 }
252
253 group->minor = minor;
254 group->dev = dev;
255
256 list_add(&group->vfio_next, &vfio.group_list);
257
258 mutex_unlock(&vfio.group_lock);
259
260 return group;
261}
262
Al Viro6d2cd3c2012-08-17 21:27:32 -0400263/* called with vfio.group_lock held */
Alex Williamsoncba33452012-07-31 08:16:22 -0600264static void vfio_group_release(struct kref *kref)
265{
266 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
267
268 WARN_ON(!list_empty(&group->device_list));
269
Alex Williamsond1099902013-12-19 10:17:13 -0700270 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
Alex Williamsoncba33452012-07-31 08:16:22 -0600271 list_del(&group->vfio_next);
272 vfio_free_group_minor(group->minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700273 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600274}
275
276static void vfio_group_put(struct vfio_group *group)
277{
Al Viro6d2cd3c2012-08-17 21:27:32 -0400278 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600279}
280
281/* Assume group_lock or group reference is held */
282static void vfio_group_get(struct vfio_group *group)
283{
284 kref_get(&group->kref);
285}
286
287/*
288 * Not really a try as we will sleep for mutex, but we need to make
289 * sure the group pointer is valid under lock and get a reference.
290 */
291static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
292{
293 struct vfio_group *target = group;
294
295 mutex_lock(&vfio.group_lock);
296 list_for_each_entry(group, &vfio.group_list, vfio_next) {
297 if (group == target) {
298 vfio_group_get(group);
299 mutex_unlock(&vfio.group_lock);
300 return group;
301 }
302 }
303 mutex_unlock(&vfio.group_lock);
304
305 return NULL;
306}
307
308static
309struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
310{
311 struct vfio_group *group;
312
313 mutex_lock(&vfio.group_lock);
314 list_for_each_entry(group, &vfio.group_list, vfio_next) {
315 if (group->iommu_group == iommu_group) {
316 vfio_group_get(group);
317 mutex_unlock(&vfio.group_lock);
318 return group;
319 }
320 }
321 mutex_unlock(&vfio.group_lock);
322
323 return NULL;
324}
325
326static struct vfio_group *vfio_group_get_from_minor(int minor)
327{
328 struct vfio_group *group;
329
330 mutex_lock(&vfio.group_lock);
331 group = idr_find(&vfio.group_idr, minor);
332 if (!group) {
333 mutex_unlock(&vfio.group_lock);
334 return NULL;
335 }
336 vfio_group_get(group);
337 mutex_unlock(&vfio.group_lock);
338
339 return group;
340}
341
342/**
343 * Device objects - create, release, get, put, search
344 */
345static
346struct vfio_device *vfio_group_create_device(struct vfio_group *group,
347 struct device *dev,
348 const struct vfio_device_ops *ops,
349 void *device_data)
350{
351 struct vfio_device *device;
352 int ret;
353
354 device = kzalloc(sizeof(*device), GFP_KERNEL);
355 if (!device)
356 return ERR_PTR(-ENOMEM);
357
358 kref_init(&device->kref);
359 device->dev = dev;
360 device->group = group;
361 device->ops = ops;
362 device->device_data = device_data;
363
364 ret = dev_set_drvdata(dev, device);
365 if (ret) {
366 kfree(device);
367 return ERR_PTR(ret);
368 }
369
370 /* No need to get group_lock, caller has group reference */
371 vfio_group_get(group);
372
373 mutex_lock(&group->device_lock);
374 list_add(&device->group_next, &group->device_list);
375 mutex_unlock(&group->device_lock);
376
377 return device;
378}
379
380static void vfio_device_release(struct kref *kref)
381{
382 struct vfio_device *device = container_of(kref,
383 struct vfio_device, kref);
384 struct vfio_group *group = device->group;
385
Alex Williamsoncba33452012-07-31 08:16:22 -0600386 list_del(&device->group_next);
387 mutex_unlock(&group->device_lock);
388
389 dev_set_drvdata(device->dev, NULL);
390
391 kfree(device);
392
393 /* vfio_del_group_dev may be waiting for this device */
394 wake_up(&vfio.release_q);
395}
396
397/* Device reference always implies a group reference */
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600398void vfio_device_put(struct vfio_device *device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600399{
Al Viro934ad4c2012-08-17 19:49:09 -0400400 struct vfio_group *group = device->group;
Al Viro90b12532012-08-17 21:29:06 -0400401 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
Al Viro934ad4c2012-08-17 19:49:09 -0400402 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600403}
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600404EXPORT_SYMBOL_GPL(vfio_device_put);
Alex Williamsoncba33452012-07-31 08:16:22 -0600405
406static void vfio_device_get(struct vfio_device *device)
407{
408 vfio_group_get(device->group);
409 kref_get(&device->kref);
410}
411
412static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
413 struct device *dev)
414{
415 struct vfio_device *device;
416
417 mutex_lock(&group->device_lock);
418 list_for_each_entry(device, &group->device_list, group_next) {
419 if (device->dev == dev) {
420 vfio_device_get(device);
421 mutex_unlock(&group->device_lock);
422 return device;
423 }
424 }
425 mutex_unlock(&group->device_lock);
426 return NULL;
427}
428
429/*
430 * Whitelist some drivers that we know are safe (no dma) or just sit on
431 * a device. It's not always practical to leave a device within a group
432 * driverless as it could get re-bound to something unsafe.
433 */
Alex Williamson2b489a42013-02-14 14:02:13 -0700434static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" };
Alex Williamsoncba33452012-07-31 08:16:22 -0600435
436static bool vfio_whitelisted_driver(struct device_driver *drv)
437{
438 int i;
439
440 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
441 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
442 return true;
443 }
444
445 return false;
446}
447
448/*
449 * A vfio group is viable for use by userspace if all devices are either
450 * driver-less or bound to a vfio or whitelisted driver. We test the
451 * latter by the existence of a struct vfio_device matching the dev.
452 */
453static int vfio_dev_viable(struct device *dev, void *data)
454{
455 struct vfio_group *group = data;
456 struct vfio_device *device;
Jiang Liude2b3ee2012-12-07 13:43:50 -0700457 struct device_driver *drv = ACCESS_ONCE(dev->driver);
Alex Williamsoncba33452012-07-31 08:16:22 -0600458
Jiang Liude2b3ee2012-12-07 13:43:50 -0700459 if (!drv || vfio_whitelisted_driver(drv))
Alex Williamsoncba33452012-07-31 08:16:22 -0600460 return 0;
461
462 device = vfio_group_get_device(group, dev);
463 if (device) {
464 vfio_device_put(device);
465 return 0;
466 }
467
468 return -EINVAL;
469}
470
471/**
472 * Async device support
473 */
474static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
475{
476 struct vfio_device *device;
477
478 /* Do we already know about it? We shouldn't */
479 device = vfio_group_get_device(group, dev);
480 if (WARN_ON_ONCE(device)) {
481 vfio_device_put(device);
482 return 0;
483 }
484
485 /* Nothing to do for idle groups */
486 if (!atomic_read(&group->container_users))
487 return 0;
488
489 /* TODO Prevent device auto probing */
490 WARN("Device %s added to live group %d!\n", dev_name(dev),
491 iommu_group_id(group->iommu_group));
492
493 return 0;
494}
495
Alex Williamsoncba33452012-07-31 08:16:22 -0600496static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
497{
498 /* We don't care what happens when the group isn't in use */
499 if (!atomic_read(&group->container_users))
500 return 0;
501
502 return vfio_dev_viable(dev, group);
503}
504
505static int vfio_iommu_group_notifier(struct notifier_block *nb,
506 unsigned long action, void *data)
507{
508 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
509 struct device *dev = data;
510
511 /*
Alex Williamsonc6401932013-06-10 16:40:56 -0600512 * Need to go through a group_lock lookup to get a reference or we
513 * risk racing a group being removed. Ignore spurious notifies.
Alex Williamsoncba33452012-07-31 08:16:22 -0600514 */
515 group = vfio_group_try_get(group);
Alex Williamsonc6401932013-06-10 16:40:56 -0600516 if (!group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600517 return NOTIFY_OK;
518
519 switch (action) {
520 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
521 vfio_group_nb_add_dev(group, dev);
522 break;
523 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
Alex Williamsonde9c7602013-06-10 16:40:56 -0600524 /*
525 * Nothing to do here. If the device is in use, then the
526 * vfio sub-driver should block the remove callback until
527 * it is unused. If the device is unused or attached to a
528 * stub driver, then it should be released and we don't
529 * care that it will be going away.
530 */
Alex Williamsoncba33452012-07-31 08:16:22 -0600531 break;
532 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
533 pr_debug("%s: Device %s, group %d binding to driver\n",
534 __func__, dev_name(dev),
535 iommu_group_id(group->iommu_group));
536 break;
537 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
538 pr_debug("%s: Device %s, group %d bound to driver %s\n",
539 __func__, dev_name(dev),
540 iommu_group_id(group->iommu_group), dev->driver->name);
541 BUG_ON(vfio_group_nb_verify(group, dev));
542 break;
543 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
544 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
545 __func__, dev_name(dev),
546 iommu_group_id(group->iommu_group), dev->driver->name);
547 break;
548 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
549 pr_debug("%s: Device %s, group %d unbound from driver\n",
550 __func__, dev_name(dev),
551 iommu_group_id(group->iommu_group));
552 /*
553 * XXX An unbound device in a live group is ok, but we'd
554 * really like to avoid the above BUG_ON by preventing other
555 * drivers from binding to it. Once that occurs, we have to
556 * stop the system to maintain isolation. At a minimum, we'd
557 * want a toggle to disable driver auto probe for this device.
558 */
559 break;
560 }
561
562 vfio_group_put(group);
563 return NOTIFY_OK;
564}
565
566/**
567 * VFIO driver API
568 */
569int vfio_add_group_dev(struct device *dev,
570 const struct vfio_device_ops *ops, void *device_data)
571{
572 struct iommu_group *iommu_group;
573 struct vfio_group *group;
574 struct vfio_device *device;
575
576 iommu_group = iommu_group_get(dev);
577 if (!iommu_group)
578 return -EINVAL;
579
580 group = vfio_group_get_from_iommu(iommu_group);
581 if (!group) {
582 group = vfio_create_group(iommu_group);
583 if (IS_ERR(group)) {
584 iommu_group_put(iommu_group);
585 return PTR_ERR(group);
586 }
587 }
588
589 device = vfio_group_get_device(group, dev);
590 if (device) {
591 WARN(1, "Device %s already exists on group %d\n",
592 dev_name(dev), iommu_group_id(iommu_group));
593 vfio_device_put(device);
594 vfio_group_put(group);
595 iommu_group_put(iommu_group);
596 return -EBUSY;
597 }
598
599 device = vfio_group_create_device(group, dev, ops, device_data);
600 if (IS_ERR(device)) {
601 vfio_group_put(group);
602 iommu_group_put(iommu_group);
603 return PTR_ERR(device);
604 }
605
606 /*
607 * Added device holds reference to iommu_group and vfio_device
608 * (which in turn holds reference to vfio_group). Drop extra
609 * group reference used while acquiring device.
610 */
611 vfio_group_put(group);
612
613 return 0;
614}
615EXPORT_SYMBOL_GPL(vfio_add_group_dev);
616
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600617/**
618 * Get a reference to the vfio_device for a device that is known to
619 * be bound to a vfio driver. The driver implicitly holds a
620 * vfio_device reference between vfio_add_group_dev and
621 * vfio_del_group_dev. We can therefore use drvdata to increment
622 * that reference from the struct device. This additional
623 * reference must be released by calling vfio_device_put.
624 */
625struct vfio_device *vfio_device_get_from_dev(struct device *dev)
626{
627 struct vfio_device *device = dev_get_drvdata(dev);
628
629 vfio_device_get(device);
630
631 return device;
632}
633EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
634
635/*
636 * Caller must hold a reference to the vfio_device
637 */
638void *vfio_device_data(struct vfio_device *device)
639{
640 return device->device_data;
641}
642EXPORT_SYMBOL_GPL(vfio_device_data);
643
Alex Williamsone014e942013-02-14 14:02:13 -0700644/* Given a referenced group, check if it contains the device */
645static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
Alex Williamsoncba33452012-07-31 08:16:22 -0600646{
Alex Williamsoncba33452012-07-31 08:16:22 -0600647 struct vfio_device *device;
648
Alex Williamsoncba33452012-07-31 08:16:22 -0600649 device = vfio_group_get_device(group, dev);
Alex Williamsone014e942013-02-14 14:02:13 -0700650 if (!device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600651 return false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600652
653 vfio_device_put(device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600654 return true;
655}
656
657/*
658 * Decrement the device reference count and wait for the device to be
659 * removed. Open file descriptors for the device... */
660void *vfio_del_group_dev(struct device *dev)
661{
662 struct vfio_device *device = dev_get_drvdata(dev);
663 struct vfio_group *group = device->group;
664 struct iommu_group *iommu_group = group->iommu_group;
665 void *device_data = device->device_data;
666
Alex Williamsone014e942013-02-14 14:02:13 -0700667 /*
668 * The group exists so long as we have a device reference. Get
669 * a group reference and use it to scan for the device going away.
670 */
671 vfio_group_get(group);
672
Alex Williamsoncba33452012-07-31 08:16:22 -0600673 vfio_device_put(device);
674
675 /* TODO send a signal to encourage this to be released */
Alex Williamsone014e942013-02-14 14:02:13 -0700676 wait_event(vfio.release_q, !vfio_dev_present(group, dev));
677
678 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600679
680 iommu_group_put(iommu_group);
681
682 return device_data;
683}
684EXPORT_SYMBOL_GPL(vfio_del_group_dev);
685
686/**
687 * VFIO base fd, /dev/vfio/vfio
688 */
689static long vfio_ioctl_check_extension(struct vfio_container *container,
690 unsigned long arg)
691{
Alex Williamson0b43c082013-04-29 08:41:36 -0600692 struct vfio_iommu_driver *driver;
Alex Williamsoncba33452012-07-31 08:16:22 -0600693 long ret = 0;
694
Alex Williamson0b43c082013-04-29 08:41:36 -0600695 down_read(&container->group_lock);
696
697 driver = container->iommu_driver;
698
Alex Williamsoncba33452012-07-31 08:16:22 -0600699 switch (arg) {
700 /* No base extensions yet */
701 default:
702 /*
703 * If no driver is set, poll all registered drivers for
704 * extensions and return the first positive result. If
705 * a driver is already set, further queries will be passed
706 * only to that driver.
707 */
708 if (!driver) {
709 mutex_lock(&vfio.iommu_drivers_lock);
710 list_for_each_entry(driver, &vfio.iommu_drivers_list,
711 vfio_next) {
712 if (!try_module_get(driver->ops->owner))
713 continue;
714
715 ret = driver->ops->ioctl(NULL,
716 VFIO_CHECK_EXTENSION,
717 arg);
718 module_put(driver->ops->owner);
719 if (ret > 0)
720 break;
721 }
722 mutex_unlock(&vfio.iommu_drivers_lock);
723 } else
724 ret = driver->ops->ioctl(container->iommu_data,
725 VFIO_CHECK_EXTENSION, arg);
726 }
727
Alex Williamson0b43c082013-04-29 08:41:36 -0600728 up_read(&container->group_lock);
729
Alex Williamsoncba33452012-07-31 08:16:22 -0600730 return ret;
731}
732
Alex Williamson9587f442013-04-25 16:12:38 -0600733/* hold write lock on container->group_lock */
Alex Williamsoncba33452012-07-31 08:16:22 -0600734static int __vfio_container_attach_groups(struct vfio_container *container,
735 struct vfio_iommu_driver *driver,
736 void *data)
737{
738 struct vfio_group *group;
739 int ret = -ENODEV;
740
741 list_for_each_entry(group, &container->group_list, container_next) {
742 ret = driver->ops->attach_group(data, group->iommu_group);
743 if (ret)
744 goto unwind;
745 }
746
747 return ret;
748
749unwind:
750 list_for_each_entry_continue_reverse(group, &container->group_list,
751 container_next) {
752 driver->ops->detach_group(data, group->iommu_group);
753 }
754
755 return ret;
756}
757
758static long vfio_ioctl_set_iommu(struct vfio_container *container,
759 unsigned long arg)
760{
761 struct vfio_iommu_driver *driver;
762 long ret = -ENODEV;
763
Alex Williamson9587f442013-04-25 16:12:38 -0600764 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600765
766 /*
767 * The container is designed to be an unprivileged interface while
768 * the group can be assigned to specific users. Therefore, only by
769 * adding a group to a container does the user get the privilege of
770 * enabling the iommu, which may allocate finite resources. There
771 * is no unset_iommu, but by removing all the groups from a container,
772 * the container is deprivileged and returns to an unset state.
773 */
774 if (list_empty(&container->group_list) || container->iommu_driver) {
Alex Williamson9587f442013-04-25 16:12:38 -0600775 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600776 return -EINVAL;
777 }
778
779 mutex_lock(&vfio.iommu_drivers_lock);
780 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
781 void *data;
782
783 if (!try_module_get(driver->ops->owner))
784 continue;
785
786 /*
787 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
788 * so test which iommu driver reported support for this
789 * extension and call open on them. We also pass them the
790 * magic, allowing a single driver to support multiple
791 * interfaces if they'd like.
792 */
793 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
794 module_put(driver->ops->owner);
795 continue;
796 }
797
798 /* module reference holds the driver we're working on */
799 mutex_unlock(&vfio.iommu_drivers_lock);
800
801 data = driver->ops->open(arg);
802 if (IS_ERR(data)) {
803 ret = PTR_ERR(data);
804 module_put(driver->ops->owner);
805 goto skip_drivers_unlock;
806 }
807
808 ret = __vfio_container_attach_groups(container, driver, data);
809 if (!ret) {
810 container->iommu_driver = driver;
811 container->iommu_data = data;
812 } else {
813 driver->ops->release(data);
814 module_put(driver->ops->owner);
815 }
816
817 goto skip_drivers_unlock;
818 }
819
820 mutex_unlock(&vfio.iommu_drivers_lock);
821skip_drivers_unlock:
Alex Williamson9587f442013-04-25 16:12:38 -0600822 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600823
824 return ret;
825}
826
827static long vfio_fops_unl_ioctl(struct file *filep,
828 unsigned int cmd, unsigned long arg)
829{
830 struct vfio_container *container = filep->private_data;
831 struct vfio_iommu_driver *driver;
832 void *data;
833 long ret = -EINVAL;
834
835 if (!container)
836 return ret;
837
Alex Williamsoncba33452012-07-31 08:16:22 -0600838 switch (cmd) {
839 case VFIO_GET_API_VERSION:
840 ret = VFIO_API_VERSION;
841 break;
842 case VFIO_CHECK_EXTENSION:
843 ret = vfio_ioctl_check_extension(container, arg);
844 break;
845 case VFIO_SET_IOMMU:
846 ret = vfio_ioctl_set_iommu(container, arg);
847 break;
848 default:
Alex Williamson0b43c082013-04-29 08:41:36 -0600849 down_read(&container->group_lock);
850
851 driver = container->iommu_driver;
852 data = container->iommu_data;
853
Alex Williamsoncba33452012-07-31 08:16:22 -0600854 if (driver) /* passthrough all unrecognized ioctls */
855 ret = driver->ops->ioctl(data, cmd, arg);
Alex Williamson0b43c082013-04-29 08:41:36 -0600856
857 up_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600858 }
859
860 return ret;
861}
862
863#ifdef CONFIG_COMPAT
864static long vfio_fops_compat_ioctl(struct file *filep,
865 unsigned int cmd, unsigned long arg)
866{
867 arg = (unsigned long)compat_ptr(arg);
868 return vfio_fops_unl_ioctl(filep, cmd, arg);
869}
870#endif /* CONFIG_COMPAT */
871
872static int vfio_fops_open(struct inode *inode, struct file *filep)
873{
874 struct vfio_container *container;
875
876 container = kzalloc(sizeof(*container), GFP_KERNEL);
877 if (!container)
878 return -ENOMEM;
879
880 INIT_LIST_HEAD(&container->group_list);
Alex Williamson9587f442013-04-25 16:12:38 -0600881 init_rwsem(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600882 kref_init(&container->kref);
883
884 filep->private_data = container;
885
886 return 0;
887}
888
889static int vfio_fops_release(struct inode *inode, struct file *filep)
890{
891 struct vfio_container *container = filep->private_data;
892
893 filep->private_data = NULL;
894
895 vfio_container_put(container);
896
897 return 0;
898}
899
900/*
901 * Once an iommu driver is set, we optionally pass read/write/mmap
902 * on to the driver, allowing management interfaces beyond ioctl.
903 */
904static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
905 size_t count, loff_t *ppos)
906{
907 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -0600908 struct vfio_iommu_driver *driver;
909 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600910
Alex Williamson0b43c082013-04-29 08:41:36 -0600911 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600912
Alex Williamson0b43c082013-04-29 08:41:36 -0600913 driver = container->iommu_driver;
914 if (likely(driver && driver->ops->read))
915 ret = driver->ops->read(container->iommu_data,
916 buf, count, ppos);
917
918 up_read(&container->group_lock);
919
920 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600921}
922
923static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
924 size_t count, loff_t *ppos)
925{
926 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -0600927 struct vfio_iommu_driver *driver;
928 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600929
Alex Williamson0b43c082013-04-29 08:41:36 -0600930 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600931
Alex Williamson0b43c082013-04-29 08:41:36 -0600932 driver = container->iommu_driver;
933 if (likely(driver && driver->ops->write))
934 ret = driver->ops->write(container->iommu_data,
935 buf, count, ppos);
936
937 up_read(&container->group_lock);
938
939 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600940}
941
942static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
943{
944 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -0600945 struct vfio_iommu_driver *driver;
946 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600947
Alex Williamson0b43c082013-04-29 08:41:36 -0600948 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600949
Alex Williamson0b43c082013-04-29 08:41:36 -0600950 driver = container->iommu_driver;
951 if (likely(driver && driver->ops->mmap))
952 ret = driver->ops->mmap(container->iommu_data, vma);
953
954 up_read(&container->group_lock);
955
956 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600957}
958
959static const struct file_operations vfio_fops = {
960 .owner = THIS_MODULE,
961 .open = vfio_fops_open,
962 .release = vfio_fops_release,
963 .read = vfio_fops_read,
964 .write = vfio_fops_write,
965 .unlocked_ioctl = vfio_fops_unl_ioctl,
966#ifdef CONFIG_COMPAT
967 .compat_ioctl = vfio_fops_compat_ioctl,
968#endif
969 .mmap = vfio_fops_mmap,
970};
971
972/**
973 * VFIO Group fd, /dev/vfio/$GROUP
974 */
975static void __vfio_group_unset_container(struct vfio_group *group)
976{
977 struct vfio_container *container = group->container;
978 struct vfio_iommu_driver *driver;
979
Alex Williamson9587f442013-04-25 16:12:38 -0600980 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600981
982 driver = container->iommu_driver;
983 if (driver)
984 driver->ops->detach_group(container->iommu_data,
985 group->iommu_group);
986
987 group->container = NULL;
988 list_del(&group->container_next);
989
990 /* Detaching the last group deprivileges a container, remove iommu */
991 if (driver && list_empty(&container->group_list)) {
992 driver->ops->release(container->iommu_data);
993 module_put(driver->ops->owner);
994 container->iommu_driver = NULL;
995 container->iommu_data = NULL;
996 }
997
Alex Williamson9587f442013-04-25 16:12:38 -0600998 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600999
1000 vfio_container_put(container);
1001}
1002
1003/*
1004 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1005 * if there was no container to unset. Since the ioctl is called on
1006 * the group, we know that still exists, therefore the only valid
1007 * transition here is 1->0.
1008 */
1009static int vfio_group_unset_container(struct vfio_group *group)
1010{
1011 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1012
1013 if (!users)
1014 return -EINVAL;
1015 if (users != 1)
1016 return -EBUSY;
1017
1018 __vfio_group_unset_container(group);
1019
1020 return 0;
1021}
1022
1023/*
1024 * When removing container users, anything that removes the last user
1025 * implicitly removes the group from the container. That is, if the
1026 * group file descriptor is closed, as well as any device file descriptors,
1027 * the group is free.
1028 */
1029static void vfio_group_try_dissolve_container(struct vfio_group *group)
1030{
1031 if (0 == atomic_dec_if_positive(&group->container_users))
1032 __vfio_group_unset_container(group);
1033}
1034
1035static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1036{
Al Viro2903ff02012-08-28 12:52:22 -04001037 struct fd f;
Alex Williamsoncba33452012-07-31 08:16:22 -06001038 struct vfio_container *container;
1039 struct vfio_iommu_driver *driver;
Al Viro2903ff02012-08-28 12:52:22 -04001040 int ret = 0;
Alex Williamsoncba33452012-07-31 08:16:22 -06001041
1042 if (atomic_read(&group->container_users))
1043 return -EINVAL;
1044
Al Viro2903ff02012-08-28 12:52:22 -04001045 f = fdget(container_fd);
1046 if (!f.file)
Alex Williamsoncba33452012-07-31 08:16:22 -06001047 return -EBADF;
1048
1049 /* Sanity check, is this really our fd? */
Al Viro2903ff02012-08-28 12:52:22 -04001050 if (f.file->f_op != &vfio_fops) {
1051 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001052 return -EINVAL;
1053 }
1054
Al Viro2903ff02012-08-28 12:52:22 -04001055 container = f.file->private_data;
Alex Williamsoncba33452012-07-31 08:16:22 -06001056 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1057
Alex Williamson9587f442013-04-25 16:12:38 -06001058 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001059
1060 driver = container->iommu_driver;
1061 if (driver) {
1062 ret = driver->ops->attach_group(container->iommu_data,
1063 group->iommu_group);
1064 if (ret)
1065 goto unlock_out;
1066 }
1067
1068 group->container = container;
1069 list_add(&group->container_next, &container->group_list);
1070
1071 /* Get a reference on the container and mark a user within the group */
1072 vfio_container_get(container);
1073 atomic_inc(&group->container_users);
1074
1075unlock_out:
Alex Williamson9587f442013-04-25 16:12:38 -06001076 up_write(&container->group_lock);
Al Viro2903ff02012-08-28 12:52:22 -04001077 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001078 return ret;
1079}
1080
1081static bool vfio_group_viable(struct vfio_group *group)
1082{
1083 return (iommu_group_for_each_dev(group->iommu_group,
1084 group, vfio_dev_viable) == 0);
1085}
1086
1087static const struct file_operations vfio_device_fops;
1088
1089static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1090{
1091 struct vfio_device *device;
1092 struct file *filep;
1093 int ret = -ENODEV;
1094
1095 if (0 == atomic_read(&group->container_users) ||
1096 !group->container->iommu_driver || !vfio_group_viable(group))
1097 return -EINVAL;
1098
1099 mutex_lock(&group->device_lock);
1100 list_for_each_entry(device, &group->device_list, group_next) {
1101 if (strcmp(dev_name(device->dev), buf))
1102 continue;
1103
1104 ret = device->ops->open(device->device_data);
1105 if (ret)
1106 break;
1107 /*
1108 * We can't use anon_inode_getfd() because we need to modify
1109 * the f_mode flags directly to allow more than just ioctls
1110 */
Alex Williamson5d042fb2013-08-22 10:33:41 -06001111 ret = get_unused_fd_flags(O_CLOEXEC);
Alex Williamsoncba33452012-07-31 08:16:22 -06001112 if (ret < 0) {
1113 device->ops->release(device->device_data);
1114 break;
1115 }
1116
1117 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1118 device, O_RDWR);
1119 if (IS_ERR(filep)) {
1120 put_unused_fd(ret);
1121 ret = PTR_ERR(filep);
1122 device->ops->release(device->device_data);
1123 break;
1124 }
1125
1126 /*
1127 * TODO: add an anon_inode interface to do this.
1128 * Appears to be missing by lack of need rather than
1129 * explicitly prevented. Now there's need.
1130 */
1131 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1132
Alex Williamsoncba33452012-07-31 08:16:22 -06001133 vfio_device_get(device);
1134 atomic_inc(&group->container_users);
Al Viro31605de2012-08-17 21:32:56 -04001135
1136 fd_install(ret, filep);
Alex Williamsoncba33452012-07-31 08:16:22 -06001137 break;
1138 }
1139 mutex_unlock(&group->device_lock);
1140
1141 return ret;
1142}
1143
1144static long vfio_group_fops_unl_ioctl(struct file *filep,
1145 unsigned int cmd, unsigned long arg)
1146{
1147 struct vfio_group *group = filep->private_data;
1148 long ret = -ENOTTY;
1149
1150 switch (cmd) {
1151 case VFIO_GROUP_GET_STATUS:
1152 {
1153 struct vfio_group_status status;
1154 unsigned long minsz;
1155
1156 minsz = offsetofend(struct vfio_group_status, flags);
1157
1158 if (copy_from_user(&status, (void __user *)arg, minsz))
1159 return -EFAULT;
1160
1161 if (status.argsz < minsz)
1162 return -EINVAL;
1163
1164 status.flags = 0;
1165
1166 if (vfio_group_viable(group))
1167 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1168
1169 if (group->container)
1170 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1171
1172 if (copy_to_user((void __user *)arg, &status, minsz))
1173 return -EFAULT;
1174
1175 ret = 0;
1176 break;
1177 }
1178 case VFIO_GROUP_SET_CONTAINER:
1179 {
1180 int fd;
1181
1182 if (get_user(fd, (int __user *)arg))
1183 return -EFAULT;
1184
1185 if (fd < 0)
1186 return -EINVAL;
1187
1188 ret = vfio_group_set_container(group, fd);
1189 break;
1190 }
1191 case VFIO_GROUP_UNSET_CONTAINER:
1192 ret = vfio_group_unset_container(group);
1193 break;
1194 case VFIO_GROUP_GET_DEVICE_FD:
1195 {
1196 char *buf;
1197
1198 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1199 if (IS_ERR(buf))
1200 return PTR_ERR(buf);
1201
1202 ret = vfio_group_get_device_fd(group, buf);
1203 kfree(buf);
1204 break;
1205 }
1206 }
1207
1208 return ret;
1209}
1210
1211#ifdef CONFIG_COMPAT
1212static long vfio_group_fops_compat_ioctl(struct file *filep,
1213 unsigned int cmd, unsigned long arg)
1214{
1215 arg = (unsigned long)compat_ptr(arg);
1216 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1217}
1218#endif /* CONFIG_COMPAT */
1219
1220static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1221{
1222 struct vfio_group *group;
Alex Williamson6d6768c2013-06-25 16:06:54 -06001223 int opened;
Alex Williamsoncba33452012-07-31 08:16:22 -06001224
1225 group = vfio_group_get_from_minor(iminor(inode));
1226 if (!group)
1227 return -ENODEV;
1228
Alex Williamson6d6768c2013-06-25 16:06:54 -06001229 /* Do we need multiple instances of the group open? Seems not. */
1230 opened = atomic_cmpxchg(&group->opened, 0, 1);
1231 if (opened) {
1232 vfio_group_put(group);
1233 return -EBUSY;
1234 }
1235
1236 /* Is something still in use from a previous open? */
Alex Williamsoncba33452012-07-31 08:16:22 -06001237 if (group->container) {
Alex Williamson6d6768c2013-06-25 16:06:54 -06001238 atomic_dec(&group->opened);
Alex Williamsoncba33452012-07-31 08:16:22 -06001239 vfio_group_put(group);
1240 return -EBUSY;
1241 }
1242
1243 filep->private_data = group;
1244
1245 return 0;
1246}
1247
1248static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1249{
1250 struct vfio_group *group = filep->private_data;
1251
1252 filep->private_data = NULL;
1253
1254 vfio_group_try_dissolve_container(group);
1255
Alex Williamson6d6768c2013-06-25 16:06:54 -06001256 atomic_dec(&group->opened);
1257
Alex Williamsoncba33452012-07-31 08:16:22 -06001258 vfio_group_put(group);
1259
1260 return 0;
1261}
1262
1263static const struct file_operations vfio_group_fops = {
1264 .owner = THIS_MODULE,
1265 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1266#ifdef CONFIG_COMPAT
1267 .compat_ioctl = vfio_group_fops_compat_ioctl,
1268#endif
1269 .open = vfio_group_fops_open,
1270 .release = vfio_group_fops_release,
1271};
1272
1273/**
1274 * VFIO Device fd
1275 */
1276static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1277{
1278 struct vfio_device *device = filep->private_data;
1279
1280 device->ops->release(device->device_data);
1281
1282 vfio_group_try_dissolve_container(device->group);
1283
1284 vfio_device_put(device);
1285
1286 return 0;
1287}
1288
1289static long vfio_device_fops_unl_ioctl(struct file *filep,
1290 unsigned int cmd, unsigned long arg)
1291{
1292 struct vfio_device *device = filep->private_data;
1293
1294 if (unlikely(!device->ops->ioctl))
1295 return -EINVAL;
1296
1297 return device->ops->ioctl(device->device_data, cmd, arg);
1298}
1299
1300static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1301 size_t count, loff_t *ppos)
1302{
1303 struct vfio_device *device = filep->private_data;
1304
1305 if (unlikely(!device->ops->read))
1306 return -EINVAL;
1307
1308 return device->ops->read(device->device_data, buf, count, ppos);
1309}
1310
1311static ssize_t vfio_device_fops_write(struct file *filep,
1312 const char __user *buf,
1313 size_t count, loff_t *ppos)
1314{
1315 struct vfio_device *device = filep->private_data;
1316
1317 if (unlikely(!device->ops->write))
1318 return -EINVAL;
1319
1320 return device->ops->write(device->device_data, buf, count, ppos);
1321}
1322
1323static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1324{
1325 struct vfio_device *device = filep->private_data;
1326
1327 if (unlikely(!device->ops->mmap))
1328 return -EINVAL;
1329
1330 return device->ops->mmap(device->device_data, vma);
1331}
1332
1333#ifdef CONFIG_COMPAT
1334static long vfio_device_fops_compat_ioctl(struct file *filep,
1335 unsigned int cmd, unsigned long arg)
1336{
1337 arg = (unsigned long)compat_ptr(arg);
1338 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1339}
1340#endif /* CONFIG_COMPAT */
1341
1342static const struct file_operations vfio_device_fops = {
1343 .owner = THIS_MODULE,
1344 .release = vfio_device_fops_release,
1345 .read = vfio_device_fops_read,
1346 .write = vfio_device_fops_write,
1347 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1348#ifdef CONFIG_COMPAT
1349 .compat_ioctl = vfio_device_fops_compat_ioctl,
1350#endif
1351 .mmap = vfio_device_fops_mmap,
1352};
1353
1354/**
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001355 * External user API, exported by symbols to be linked dynamically.
1356 *
1357 * The protocol includes:
1358 * 1. do normal VFIO init operation:
1359 * - opening a new container;
1360 * - attaching group(s) to it;
1361 * - setting an IOMMU driver for a container.
1362 * When IOMMU is set for a container, all groups in it are
1363 * considered ready to use by an external user.
1364 *
1365 * 2. User space passes a group fd to an external user.
1366 * The external user calls vfio_group_get_external_user()
1367 * to verify that:
1368 * - the group is initialized;
1369 * - IOMMU is set for it.
1370 * If both checks passed, vfio_group_get_external_user()
1371 * increments the container user counter to prevent
1372 * the VFIO group from disposal before KVM exits.
1373 *
1374 * 3. The external user calls vfio_external_user_iommu_id()
1375 * to know an IOMMU ID.
1376 *
1377 * 4. When the external KVM finishes, it calls
1378 * vfio_group_put_external_user() to release the VFIO group.
1379 * This call decrements the container user counter.
1380 */
1381struct vfio_group *vfio_group_get_external_user(struct file *filep)
1382{
1383 struct vfio_group *group = filep->private_data;
1384
1385 if (filep->f_op != &vfio_group_fops)
1386 return ERR_PTR(-EINVAL);
1387
1388 if (!atomic_inc_not_zero(&group->container_users))
1389 return ERR_PTR(-EINVAL);
1390
1391 if (!group->container->iommu_driver ||
1392 !vfio_group_viable(group)) {
1393 atomic_dec(&group->container_users);
1394 return ERR_PTR(-EINVAL);
1395 }
1396
1397 vfio_group_get(group);
1398
1399 return group;
1400}
1401EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1402
1403void vfio_group_put_external_user(struct vfio_group *group)
1404{
1405 vfio_group_put(group);
1406 vfio_group_try_dissolve_container(group);
1407}
1408EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1409
1410int vfio_external_user_iommu_id(struct vfio_group *group)
1411{
1412 return iommu_group_id(group->iommu_group);
1413}
1414EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1415
1416/**
Alex Williamsoncba33452012-07-31 08:16:22 -06001417 * Module/class support
1418 */
1419static char *vfio_devnode(struct device *dev, umode_t *mode)
1420{
1421 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
1422}
1423
Alex Williamsond1099902013-12-19 10:17:13 -07001424static struct miscdevice vfio_dev = {
1425 .minor = VFIO_MINOR,
1426 .name = "vfio",
1427 .fops = &vfio_fops,
1428 .nodename = "vfio/vfio",
1429 .mode = S_IRUGO | S_IWUGO,
1430};
1431
Alex Williamsoncba33452012-07-31 08:16:22 -06001432static int __init vfio_init(void)
1433{
1434 int ret;
1435
1436 idr_init(&vfio.group_idr);
1437 mutex_init(&vfio.group_lock);
1438 mutex_init(&vfio.iommu_drivers_lock);
1439 INIT_LIST_HEAD(&vfio.group_list);
1440 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
1441 init_waitqueue_head(&vfio.release_q);
1442
Alex Williamsond1099902013-12-19 10:17:13 -07001443 ret = misc_register(&vfio_dev);
1444 if (ret) {
1445 pr_err("vfio: misc device register failed\n");
1446 return ret;
1447 }
1448
1449 /* /dev/vfio/$GROUP */
Alex Williamsoncba33452012-07-31 08:16:22 -06001450 vfio.class = class_create(THIS_MODULE, "vfio");
1451 if (IS_ERR(vfio.class)) {
1452 ret = PTR_ERR(vfio.class);
1453 goto err_class;
1454 }
1455
1456 vfio.class->devnode = vfio_devnode;
1457
Alex Williamsond1099902013-12-19 10:17:13 -07001458 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
Alex Williamsoncba33452012-07-31 08:16:22 -06001459 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001460 goto err_alloc_chrdev;
Alex Williamsoncba33452012-07-31 08:16:22 -06001461
Alex Williamsoncba33452012-07-31 08:16:22 -06001462 cdev_init(&vfio.group_cdev, &vfio_group_fops);
Alex Williamsond1099902013-12-19 10:17:13 -07001463 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001464 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001465 goto err_cdev_add;
Alex Williamsoncba33452012-07-31 08:16:22 -06001466
1467 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1468
Alex Williamson73fa0d12012-07-31 08:16:23 -06001469 /*
1470 * Attempt to load known iommu-drivers. This gives us a working
1471 * environment without the user needing to explicitly load iommu
1472 * drivers.
1473 */
1474 request_module_nowait("vfio_iommu_type1");
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001475 request_module_nowait("vfio_iommu_spapr_tce");
Alex Williamson73fa0d12012-07-31 08:16:23 -06001476
Alex Williamsoncba33452012-07-31 08:16:22 -06001477 return 0;
1478
Alex Williamsond1099902013-12-19 10:17:13 -07001479err_cdev_add:
1480 unregister_chrdev_region(vfio.group_devt, MINORMASK);
1481err_alloc_chrdev:
Alex Williamsoncba33452012-07-31 08:16:22 -06001482 class_destroy(vfio.class);
1483 vfio.class = NULL;
1484err_class:
Alex Williamsond1099902013-12-19 10:17:13 -07001485 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001486 return ret;
1487}
1488
1489static void __exit vfio_cleanup(void)
1490{
1491 WARN_ON(!list_empty(&vfio.group_list));
1492
1493 idr_destroy(&vfio.group_idr);
1494 cdev_del(&vfio.group_cdev);
Alex Williamsond1099902013-12-19 10:17:13 -07001495 unregister_chrdev_region(vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001496 class_destroy(vfio.class);
1497 vfio.class = NULL;
Alex Williamsond1099902013-12-19 10:17:13 -07001498 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001499}
1500
1501module_init(vfio_init);
1502module_exit(vfio_cleanup);
1503
1504MODULE_VERSION(DRIVER_VERSION);
1505MODULE_LICENSE("GPL v2");
1506MODULE_AUTHOR(DRIVER_AUTHOR);
1507MODULE_DESCRIPTION(DRIVER_DESC);
Alex Williamsond1099902013-12-19 10:17:13 -07001508MODULE_ALIAS_MISCDEV(VFIO_MINOR);
1509MODULE_ALIAS("devname:vfio/vfio");