blob: 82f25cc1c460dee73c37b2b19895fb7d89bfb0d3 [file] [log] [blame]
Alex Williamsoncba33452012-07-31 08:16:22 -06001/*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16#include <linux/cdev.h>
17#include <linux/compat.h>
18#include <linux/device.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/idr.h>
23#include <linux/iommu.h>
24#include <linux/list.h>
Alex Williamsond1099902013-12-19 10:17:13 -070025#include <linux/miscdevice.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060026#include <linux/module.h>
27#include <linux/mutex.h>
Alex Williamson5f096b12015-10-27 14:53:04 -060028#include <linux/pci.h>
Alex Williamson9587f442013-04-25 16:12:38 -060029#include <linux/rwsem.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060030#include <linux/sched.h>
31#include <linux/slab.h>
Alex Williamson664e9382013-04-30 15:42:28 -060032#include <linux/stat.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060033#include <linux/string.h>
34#include <linux/uaccess.h>
35#include <linux/vfio.h>
36#include <linux/wait.h>
37
38#define DRIVER_VERSION "0.3"
39#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40#define DRIVER_DESC "VFIO - User Level meta-driver"
41
42static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
Alex Williamsond1099902013-12-19 10:17:13 -070050 dev_t group_devt;
Alex Williamsoncba33452012-07-31 08:16:22 -060051 wait_queue_head_t release_q;
52} vfio;
53
54struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57};
58
59struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
Alex Williamson9587f442013-04-25 16:12:38 -060062 struct rw_semaphore group_lock;
Alex Williamsoncba33452012-07-31 08:16:22 -060063 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
Alex Williamson03a76b62015-12-21 15:13:33 -070065 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060066};
67
Alex Williamson60720a02015-02-06 15:05:06 -070068struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71};
72
Alex Williamsoncba33452012-07-31 08:16:22 -060073struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
Alex Williamson60720a02015-02-06 15:05:06 -070085 struct list_head unbound_list;
86 struct mutex unbound_lock;
Alex Williamson6d6768c2013-06-25 16:06:54 -060087 atomic_t opened;
Alex Williamson03a76b62015-12-21 15:13:33 -070088 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060089};
90
91struct vfio_device {
92 struct kref kref;
93 struct device *dev;
94 const struct vfio_device_ops *ops;
95 struct vfio_group *group;
96 struct list_head group_next;
97 void *device_data;
98};
99
Alex Williamson03a76b62015-12-21 15:13:33 -0700100#ifdef CONFIG_VFIO_NOIOMMU
101static bool noiommu __read_mostly;
102module_param_named(enable_unsafe_noiommu_mode,
103 noiommu, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
105#endif
106
107/*
108 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
109 * and remove functions, any use cases other than acquiring the first
110 * reference for the purpose of calling vfio_add_group_dev() or removing
111 * that symmetric reference after vfio_del_group_dev() should use the raw
112 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
113 * removes the device from the dummy group and cannot be nested.
114 */
115struct iommu_group *vfio_iommu_group_get(struct device *dev)
116{
117 struct iommu_group *group;
118 int __maybe_unused ret;
119
120 group = iommu_group_get(dev);
121
122#ifdef CONFIG_VFIO_NOIOMMU
123 /*
124 * With noiommu enabled, an IOMMU group will be created for a device
125 * that doesn't already have one and doesn't have an iommu_ops on their
126 * bus. We use iommu_present() again in the main code to detect these
127 * fake groups.
128 */
129 if (group || !noiommu || iommu_present(dev->bus))
130 return group;
131
132 group = iommu_group_alloc();
133 if (IS_ERR(group))
134 return NULL;
135
136 iommu_group_set_name(group, "vfio-noiommu");
137 ret = iommu_group_add_device(group, dev);
138 iommu_group_put(group);
139 if (ret)
140 return NULL;
141
142 /*
143 * Where to taint? At this point we've added an IOMMU group for a
144 * device that is not backed by iommu_ops, therefore any iommu_
145 * callback using iommu_ops can legitimately Oops. So, while we may
146 * be about to give a DMA capable device to a user without IOMMU
147 * protection, which is clearly taint-worthy, let's go ahead and do
148 * it here.
149 */
150 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
151 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
152#endif
153
154 return group;
155}
156EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
157
158void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
159{
160#ifdef CONFIG_VFIO_NOIOMMU
161 if (!iommu_present(dev->bus))
162 iommu_group_remove_device(dev);
163#endif
164
165 iommu_group_put(group);
166}
167EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
168
169#ifdef CONFIG_VFIO_NOIOMMU
170static void *vfio_noiommu_open(unsigned long arg)
171{
172 if (arg != VFIO_NOIOMMU_IOMMU)
173 return ERR_PTR(-EINVAL);
174 if (!capable(CAP_SYS_RAWIO))
175 return ERR_PTR(-EPERM);
176
177 return NULL;
178}
179
180static void vfio_noiommu_release(void *iommu_data)
181{
182}
183
184static long vfio_noiommu_ioctl(void *iommu_data,
185 unsigned int cmd, unsigned long arg)
186{
187 if (cmd == VFIO_CHECK_EXTENSION)
188 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
189
190 return -ENOTTY;
191}
192
193static int vfio_iommu_present(struct device *dev, void *unused)
194{
195 return iommu_present(dev->bus) ? 1 : 0;
196}
197
198static int vfio_noiommu_attach_group(void *iommu_data,
199 struct iommu_group *iommu_group)
200{
201 return iommu_group_for_each_dev(iommu_group, NULL,
202 vfio_iommu_present) ? -EINVAL : 0;
203}
204
205static void vfio_noiommu_detach_group(void *iommu_data,
206 struct iommu_group *iommu_group)
207{
208}
209
210static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
211 .name = "vfio-noiommu",
212 .owner = THIS_MODULE,
213 .open = vfio_noiommu_open,
214 .release = vfio_noiommu_release,
215 .ioctl = vfio_noiommu_ioctl,
216 .attach_group = vfio_noiommu_attach_group,
217 .detach_group = vfio_noiommu_detach_group,
218};
219#endif
220
221
Alex Williamsoncba33452012-07-31 08:16:22 -0600222/**
223 * IOMMU driver registration
224 */
225int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
226{
227 struct vfio_iommu_driver *driver, *tmp;
228
229 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
230 if (!driver)
231 return -ENOMEM;
232
233 driver->ops = ops;
234
235 mutex_lock(&vfio.iommu_drivers_lock);
236
237 /* Check for duplicates */
238 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
239 if (tmp->ops == ops) {
240 mutex_unlock(&vfio.iommu_drivers_lock);
241 kfree(driver);
242 return -EINVAL;
243 }
244 }
245
246 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
247
248 mutex_unlock(&vfio.iommu_drivers_lock);
249
250 return 0;
251}
252EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
253
254void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
255{
256 struct vfio_iommu_driver *driver;
257
258 mutex_lock(&vfio.iommu_drivers_lock);
259 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
260 if (driver->ops == ops) {
261 list_del(&driver->vfio_next);
262 mutex_unlock(&vfio.iommu_drivers_lock);
263 kfree(driver);
264 return;
265 }
266 }
267 mutex_unlock(&vfio.iommu_drivers_lock);
268}
269EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
270
271/**
272 * Group minor allocation/free - both called with vfio.group_lock held
273 */
274static int vfio_alloc_group_minor(struct vfio_group *group)
275{
Alex Williamsond1099902013-12-19 10:17:13 -0700276 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
Alex Williamsoncba33452012-07-31 08:16:22 -0600277}
278
279static void vfio_free_group_minor(int minor)
280{
281 idr_remove(&vfio.group_idr, minor);
282}
283
284static int vfio_iommu_group_notifier(struct notifier_block *nb,
285 unsigned long action, void *data);
286static void vfio_group_get(struct vfio_group *group);
287
288/**
289 * Container objects - containers are created when /dev/vfio/vfio is
290 * opened, but their lifecycle extends until the last user is done, so
291 * it's freed via kref. Must support container/group/device being
292 * closed in any order.
293 */
294static void vfio_container_get(struct vfio_container *container)
295{
296 kref_get(&container->kref);
297}
298
299static void vfio_container_release(struct kref *kref)
300{
301 struct vfio_container *container;
302 container = container_of(kref, struct vfio_container, kref);
303
304 kfree(container);
305}
306
307static void vfio_container_put(struct vfio_container *container)
308{
309 kref_put(&container->kref, vfio_container_release);
310}
311
Jiang Liu9df7b252012-12-07 13:43:50 -0700312static void vfio_group_unlock_and_free(struct vfio_group *group)
313{
314 mutex_unlock(&vfio.group_lock);
315 /*
316 * Unregister outside of lock. A spurious callback is harmless now
317 * that the group is no longer in vfio.group_list.
318 */
319 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
320 kfree(group);
321}
322
Alex Williamsoncba33452012-07-31 08:16:22 -0600323/**
324 * Group objects - create, release, get, put, search
325 */
Alex Williamson03a76b62015-12-21 15:13:33 -0700326static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
327 bool iommu_present)
Alex Williamsoncba33452012-07-31 08:16:22 -0600328{
329 struct vfio_group *group, *tmp;
330 struct device *dev;
331 int ret, minor;
332
333 group = kzalloc(sizeof(*group), GFP_KERNEL);
334 if (!group)
335 return ERR_PTR(-ENOMEM);
336
337 kref_init(&group->kref);
338 INIT_LIST_HEAD(&group->device_list);
339 mutex_init(&group->device_lock);
Alex Williamson60720a02015-02-06 15:05:06 -0700340 INIT_LIST_HEAD(&group->unbound_list);
341 mutex_init(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600342 atomic_set(&group->container_users, 0);
Alex Williamson6d6768c2013-06-25 16:06:54 -0600343 atomic_set(&group->opened, 0);
Alex Williamsoncba33452012-07-31 08:16:22 -0600344 group->iommu_group = iommu_group;
Alex Williamson03a76b62015-12-21 15:13:33 -0700345 group->noiommu = !iommu_present;
Alex Williamsoncba33452012-07-31 08:16:22 -0600346
347 group->nb.notifier_call = vfio_iommu_group_notifier;
348
349 /*
350 * blocking notifiers acquire a rwsem around registering and hold
351 * it around callback. Therefore, need to register outside of
352 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
353 * do anything unless it can find the group in vfio.group_list, so
354 * no harm in registering early.
355 */
356 ret = iommu_group_register_notifier(iommu_group, &group->nb);
357 if (ret) {
358 kfree(group);
359 return ERR_PTR(ret);
360 }
361
362 mutex_lock(&vfio.group_lock);
363
Alex Williamsoncba33452012-07-31 08:16:22 -0600364 /* Did we race creating this group? */
365 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
366 if (tmp->iommu_group == iommu_group) {
367 vfio_group_get(tmp);
Jiang Liu9df7b252012-12-07 13:43:50 -0700368 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600369 return tmp;
370 }
371 }
372
Zhen Lei2f51bf42015-03-16 14:08:56 -0600373 minor = vfio_alloc_group_minor(group);
374 if (minor < 0) {
375 vfio_group_unlock_and_free(group);
376 return ERR_PTR(minor);
377 }
378
Alex Williamsond1099902013-12-19 10:17:13 -0700379 dev = device_create(vfio.class, NULL,
380 MKDEV(MAJOR(vfio.group_devt), minor),
Alex Williamson03a76b62015-12-21 15:13:33 -0700381 group, "%s%d", group->noiommu ? "noiommu-" : "",
382 iommu_group_id(iommu_group));
Alex Williamsoncba33452012-07-31 08:16:22 -0600383 if (IS_ERR(dev)) {
384 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700385 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600386 return (struct vfio_group *)dev; /* ERR_PTR */
387 }
388
389 group->minor = minor;
390 group->dev = dev;
391
392 list_add(&group->vfio_next, &vfio.group_list);
393
394 mutex_unlock(&vfio.group_lock);
395
396 return group;
397}
398
Al Viro6d2cd3c2012-08-17 21:27:32 -0400399/* called with vfio.group_lock held */
Alex Williamsoncba33452012-07-31 08:16:22 -0600400static void vfio_group_release(struct kref *kref)
401{
402 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
Alex Williamson60720a02015-02-06 15:05:06 -0700403 struct vfio_unbound_dev *unbound, *tmp;
Alex Williamson4a688102015-02-06 15:05:06 -0700404 struct iommu_group *iommu_group = group->iommu_group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600405
406 WARN_ON(!list_empty(&group->device_list));
407
Alex Williamson60720a02015-02-06 15:05:06 -0700408 list_for_each_entry_safe(unbound, tmp,
409 &group->unbound_list, unbound_next) {
410 list_del(&unbound->unbound_next);
411 kfree(unbound);
412 }
413
Alex Williamsond1099902013-12-19 10:17:13 -0700414 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
Alex Williamsoncba33452012-07-31 08:16:22 -0600415 list_del(&group->vfio_next);
416 vfio_free_group_minor(group->minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700417 vfio_group_unlock_and_free(group);
Alex Williamson4a688102015-02-06 15:05:06 -0700418 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600419}
420
421static void vfio_group_put(struct vfio_group *group)
422{
Al Viro6d2cd3c2012-08-17 21:27:32 -0400423 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600424}
425
426/* Assume group_lock or group reference is held */
427static void vfio_group_get(struct vfio_group *group)
428{
429 kref_get(&group->kref);
430}
431
432/*
433 * Not really a try as we will sleep for mutex, but we need to make
434 * sure the group pointer is valid under lock and get a reference.
435 */
436static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
437{
438 struct vfio_group *target = group;
439
440 mutex_lock(&vfio.group_lock);
441 list_for_each_entry(group, &vfio.group_list, vfio_next) {
442 if (group == target) {
443 vfio_group_get(group);
444 mutex_unlock(&vfio.group_lock);
445 return group;
446 }
447 }
448 mutex_unlock(&vfio.group_lock);
449
450 return NULL;
451}
452
453static
454struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
455{
456 struct vfio_group *group;
457
458 mutex_lock(&vfio.group_lock);
459 list_for_each_entry(group, &vfio.group_list, vfio_next) {
460 if (group->iommu_group == iommu_group) {
461 vfio_group_get(group);
462 mutex_unlock(&vfio.group_lock);
463 return group;
464 }
465 }
466 mutex_unlock(&vfio.group_lock);
467
468 return NULL;
469}
470
471static struct vfio_group *vfio_group_get_from_minor(int minor)
472{
473 struct vfio_group *group;
474
475 mutex_lock(&vfio.group_lock);
476 group = idr_find(&vfio.group_idr, minor);
477 if (!group) {
478 mutex_unlock(&vfio.group_lock);
479 return NULL;
480 }
481 vfio_group_get(group);
482 mutex_unlock(&vfio.group_lock);
483
484 return group;
485}
486
487/**
488 * Device objects - create, release, get, put, search
489 */
490static
491struct vfio_device *vfio_group_create_device(struct vfio_group *group,
492 struct device *dev,
493 const struct vfio_device_ops *ops,
494 void *device_data)
495{
496 struct vfio_device *device;
Alex Williamsoncba33452012-07-31 08:16:22 -0600497
498 device = kzalloc(sizeof(*device), GFP_KERNEL);
499 if (!device)
500 return ERR_PTR(-ENOMEM);
501
502 kref_init(&device->kref);
503 device->dev = dev;
504 device->group = group;
505 device->ops = ops;
506 device->device_data = device_data;
Jean Delvare8283b492014-04-14 12:55:38 +0200507 dev_set_drvdata(dev, device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600508
509 /* No need to get group_lock, caller has group reference */
510 vfio_group_get(group);
511
512 mutex_lock(&group->device_lock);
513 list_add(&device->group_next, &group->device_list);
514 mutex_unlock(&group->device_lock);
515
516 return device;
517}
518
519static void vfio_device_release(struct kref *kref)
520{
521 struct vfio_device *device = container_of(kref,
522 struct vfio_device, kref);
523 struct vfio_group *group = device->group;
524
Alex Williamsoncba33452012-07-31 08:16:22 -0600525 list_del(&device->group_next);
526 mutex_unlock(&group->device_lock);
527
528 dev_set_drvdata(device->dev, NULL);
529
530 kfree(device);
531
532 /* vfio_del_group_dev may be waiting for this device */
533 wake_up(&vfio.release_q);
534}
535
536/* Device reference always implies a group reference */
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600537void vfio_device_put(struct vfio_device *device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600538{
Al Viro934ad4c2012-08-17 19:49:09 -0400539 struct vfio_group *group = device->group;
Al Viro90b12532012-08-17 21:29:06 -0400540 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
Al Viro934ad4c2012-08-17 19:49:09 -0400541 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600542}
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600543EXPORT_SYMBOL_GPL(vfio_device_put);
Alex Williamsoncba33452012-07-31 08:16:22 -0600544
545static void vfio_device_get(struct vfio_device *device)
546{
547 vfio_group_get(device->group);
548 kref_get(&device->kref);
549}
550
551static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
552 struct device *dev)
553{
554 struct vfio_device *device;
555
556 mutex_lock(&group->device_lock);
557 list_for_each_entry(device, &group->device_list, group_next) {
558 if (device->dev == dev) {
559 vfio_device_get(device);
560 mutex_unlock(&group->device_lock);
561 return device;
562 }
563 }
564 mutex_unlock(&group->device_lock);
565 return NULL;
566}
567
568/*
Alex Williamson5f096b12015-10-27 14:53:04 -0600569 * Some drivers, like pci-stub, are only used to prevent other drivers from
570 * claiming a device and are therefore perfectly legitimate for a user owned
571 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
572 * of the device, but it does prevent the user from having direct access to
573 * the device, which is useful in some circumstances.
574 *
575 * We also assume that we can include PCI interconnect devices, ie. bridges.
576 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
577 * then all of the downstream devices will be part of the same IOMMU group as
578 * the bridge. Thus, if placing the bridge into the user owned IOVA space
579 * breaks anything, it only does so for user owned devices downstream. Note
580 * that error notification via MSI can be affected for platforms that handle
581 * MSI within the same IOVA space as DMA.
Alex Williamsoncba33452012-07-31 08:16:22 -0600582 */
Alex Williamson5f096b12015-10-27 14:53:04 -0600583static const char * const vfio_driver_whitelist[] = { "pci-stub" };
Alex Williamsoncba33452012-07-31 08:16:22 -0600584
Alex Williamson5f096b12015-10-27 14:53:04 -0600585static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
Alex Williamsoncba33452012-07-31 08:16:22 -0600586{
587 int i;
588
Alex Williamson5f096b12015-10-27 14:53:04 -0600589 if (dev_is_pci(dev)) {
590 struct pci_dev *pdev = to_pci_dev(dev);
591
592 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
593 return true;
594 }
595
Alex Williamsoncba33452012-07-31 08:16:22 -0600596 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
597 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
598 return true;
599 }
600
601 return false;
602}
603
604/*
Alex Williamson60720a02015-02-06 15:05:06 -0700605 * A vfio group is viable for use by userspace if all devices are in
606 * one of the following states:
607 * - driver-less
608 * - bound to a vfio driver
609 * - bound to a whitelisted driver
Alex Williamson5f096b12015-10-27 14:53:04 -0600610 * - a PCI interconnect device
Alex Williamson60720a02015-02-06 15:05:06 -0700611 *
612 * We use two methods to determine whether a device is bound to a vfio
613 * driver. The first is to test whether the device exists in the vfio
614 * group. The second is to test if the device exists on the group
615 * unbound_list, indicating it's in the middle of transitioning from
616 * a vfio driver to driver-less.
Alex Williamsoncba33452012-07-31 08:16:22 -0600617 */
618static int vfio_dev_viable(struct device *dev, void *data)
619{
620 struct vfio_group *group = data;
621 struct vfio_device *device;
Jiang Liude2b3ee2012-12-07 13:43:50 -0700622 struct device_driver *drv = ACCESS_ONCE(dev->driver);
Alex Williamson60720a02015-02-06 15:05:06 -0700623 struct vfio_unbound_dev *unbound;
624 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600625
Alex Williamson60720a02015-02-06 15:05:06 -0700626 mutex_lock(&group->unbound_lock);
627 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
628 if (dev == unbound->dev) {
629 ret = 0;
630 break;
631 }
632 }
633 mutex_unlock(&group->unbound_lock);
634
Alex Williamson5f096b12015-10-27 14:53:04 -0600635 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
Alex Williamsoncba33452012-07-31 08:16:22 -0600636 return 0;
637
638 device = vfio_group_get_device(group, dev);
639 if (device) {
640 vfio_device_put(device);
641 return 0;
642 }
643
Alex Williamson60720a02015-02-06 15:05:06 -0700644 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600645}
646
647/**
648 * Async device support
649 */
650static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
651{
652 struct vfio_device *device;
653
654 /* Do we already know about it? We shouldn't */
655 device = vfio_group_get_device(group, dev);
656 if (WARN_ON_ONCE(device)) {
657 vfio_device_put(device);
658 return 0;
659 }
660
661 /* Nothing to do for idle groups */
662 if (!atomic_read(&group->container_users))
663 return 0;
664
665 /* TODO Prevent device auto probing */
Dan Carpenter049af102015-11-21 13:32:21 +0300666 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
Alex Williamsoncba33452012-07-31 08:16:22 -0600667 iommu_group_id(group->iommu_group));
668
669 return 0;
670}
671
Alex Williamsoncba33452012-07-31 08:16:22 -0600672static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
673{
674 /* We don't care what happens when the group isn't in use */
675 if (!atomic_read(&group->container_users))
676 return 0;
677
678 return vfio_dev_viable(dev, group);
679}
680
681static int vfio_iommu_group_notifier(struct notifier_block *nb,
682 unsigned long action, void *data)
683{
684 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
685 struct device *dev = data;
Alex Williamson60720a02015-02-06 15:05:06 -0700686 struct vfio_unbound_dev *unbound;
Alex Williamsoncba33452012-07-31 08:16:22 -0600687
688 /*
Alex Williamsonc6401932013-06-10 16:40:56 -0600689 * Need to go through a group_lock lookup to get a reference or we
690 * risk racing a group being removed. Ignore spurious notifies.
Alex Williamsoncba33452012-07-31 08:16:22 -0600691 */
692 group = vfio_group_try_get(group);
Alex Williamsonc6401932013-06-10 16:40:56 -0600693 if (!group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600694 return NOTIFY_OK;
695
696 switch (action) {
697 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
698 vfio_group_nb_add_dev(group, dev);
699 break;
700 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
Alex Williamsonde9c7602013-06-10 16:40:56 -0600701 /*
702 * Nothing to do here. If the device is in use, then the
703 * vfio sub-driver should block the remove callback until
704 * it is unused. If the device is unused or attached to a
705 * stub driver, then it should be released and we don't
706 * care that it will be going away.
707 */
Alex Williamsoncba33452012-07-31 08:16:22 -0600708 break;
709 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
710 pr_debug("%s: Device %s, group %d binding to driver\n",
711 __func__, dev_name(dev),
712 iommu_group_id(group->iommu_group));
713 break;
714 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
715 pr_debug("%s: Device %s, group %d bound to driver %s\n",
716 __func__, dev_name(dev),
717 iommu_group_id(group->iommu_group), dev->driver->name);
718 BUG_ON(vfio_group_nb_verify(group, dev));
719 break;
720 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
721 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
722 __func__, dev_name(dev),
723 iommu_group_id(group->iommu_group), dev->driver->name);
724 break;
725 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
726 pr_debug("%s: Device %s, group %d unbound from driver\n",
727 __func__, dev_name(dev),
728 iommu_group_id(group->iommu_group));
729 /*
730 * XXX An unbound device in a live group is ok, but we'd
731 * really like to avoid the above BUG_ON by preventing other
732 * drivers from binding to it. Once that occurs, we have to
733 * stop the system to maintain isolation. At a minimum, we'd
734 * want a toggle to disable driver auto probe for this device.
735 */
Alex Williamson60720a02015-02-06 15:05:06 -0700736
737 mutex_lock(&group->unbound_lock);
738 list_for_each_entry(unbound,
739 &group->unbound_list, unbound_next) {
740 if (dev == unbound->dev) {
741 list_del(&unbound->unbound_next);
742 kfree(unbound);
743 break;
744 }
745 }
746 mutex_unlock(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600747 break;
748 }
749
750 vfio_group_put(group);
751 return NOTIFY_OK;
752}
753
754/**
755 * VFIO driver API
756 */
757int vfio_add_group_dev(struct device *dev,
758 const struct vfio_device_ops *ops, void *device_data)
759{
760 struct iommu_group *iommu_group;
761 struct vfio_group *group;
762 struct vfio_device *device;
763
764 iommu_group = iommu_group_get(dev);
765 if (!iommu_group)
766 return -EINVAL;
767
768 group = vfio_group_get_from_iommu(iommu_group);
769 if (!group) {
Alex Williamson03a76b62015-12-21 15:13:33 -0700770 group = vfio_create_group(iommu_group, iommu_present(dev->bus));
Alex Williamsoncba33452012-07-31 08:16:22 -0600771 if (IS_ERR(group)) {
772 iommu_group_put(iommu_group);
773 return PTR_ERR(group);
774 }
Alex Williamson4a688102015-02-06 15:05:06 -0700775 } else {
776 /*
777 * A found vfio_group already holds a reference to the
778 * iommu_group. A created vfio_group keeps the reference.
779 */
780 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600781 }
782
783 device = vfio_group_get_device(group, dev);
784 if (device) {
785 WARN(1, "Device %s already exists on group %d\n",
786 dev_name(dev), iommu_group_id(iommu_group));
787 vfio_device_put(device);
788 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600789 return -EBUSY;
790 }
791
792 device = vfio_group_create_device(group, dev, ops, device_data);
793 if (IS_ERR(device)) {
794 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600795 return PTR_ERR(device);
796 }
797
798 /*
Alex Williamson4a688102015-02-06 15:05:06 -0700799 * Drop all but the vfio_device reference. The vfio_device holds
800 * a reference to the vfio_group, which holds a reference to the
801 * iommu_group.
Alex Williamsoncba33452012-07-31 08:16:22 -0600802 */
803 vfio_group_put(group);
804
805 return 0;
806}
807EXPORT_SYMBOL_GPL(vfio_add_group_dev);
808
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600809/**
Alex Williamson20f30012015-06-09 10:08:57 -0600810 * Get a reference to the vfio_device for a device. Even if the
811 * caller thinks they own the device, they could be racing with a
812 * release call path, so we can't trust drvdata for the shortcut.
813 * Go the long way around, from the iommu_group to the vfio_group
814 * to the vfio_device.
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600815 */
816struct vfio_device *vfio_device_get_from_dev(struct device *dev)
817{
Alex Williamson20f30012015-06-09 10:08:57 -0600818 struct iommu_group *iommu_group;
819 struct vfio_group *group;
820 struct vfio_device *device;
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600821
Alex Williamson20f30012015-06-09 10:08:57 -0600822 iommu_group = iommu_group_get(dev);
823 if (!iommu_group)
824 return NULL;
825
826 group = vfio_group_get_from_iommu(iommu_group);
827 iommu_group_put(iommu_group);
828 if (!group)
829 return NULL;
830
831 device = vfio_group_get_device(group, dev);
832 vfio_group_put(group);
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600833
834 return device;
835}
836EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
837
Alex Williamson4bc94d52015-07-24 15:14:04 -0600838static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
839 char *buf)
840{
Joerg Roedele324fc82015-11-04 13:53:26 +0100841 struct vfio_device *it, *device = NULL;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600842
843 mutex_lock(&group->device_lock);
Joerg Roedele324fc82015-11-04 13:53:26 +0100844 list_for_each_entry(it, &group->device_list, group_next) {
845 if (!strcmp(dev_name(it->dev), buf)) {
846 device = it;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600847 vfio_device_get(device);
848 break;
849 }
850 }
851 mutex_unlock(&group->device_lock);
852
853 return device;
854}
855
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600856/*
857 * Caller must hold a reference to the vfio_device
858 */
859void *vfio_device_data(struct vfio_device *device)
860{
861 return device->device_data;
862}
863EXPORT_SYMBOL_GPL(vfio_device_data);
864
Alex Williamsone014e942013-02-14 14:02:13 -0700865/* Given a referenced group, check if it contains the device */
866static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
Alex Williamsoncba33452012-07-31 08:16:22 -0600867{
Alex Williamsoncba33452012-07-31 08:16:22 -0600868 struct vfio_device *device;
869
Alex Williamsoncba33452012-07-31 08:16:22 -0600870 device = vfio_group_get_device(group, dev);
Alex Williamsone014e942013-02-14 14:02:13 -0700871 if (!device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600872 return false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600873
874 vfio_device_put(device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600875 return true;
876}
877
878/*
879 * Decrement the device reference count and wait for the device to be
880 * removed. Open file descriptors for the device... */
881void *vfio_del_group_dev(struct device *dev)
882{
883 struct vfio_device *device = dev_get_drvdata(dev);
884 struct vfio_group *group = device->group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600885 void *device_data = device->device_data;
Alex Williamson60720a02015-02-06 15:05:06 -0700886 struct vfio_unbound_dev *unbound;
Alex Williamson13060b62015-02-06 15:05:07 -0700887 unsigned int i = 0;
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600888 long ret;
889 bool interrupted = false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600890
Alex Williamsone014e942013-02-14 14:02:13 -0700891 /*
892 * The group exists so long as we have a device reference. Get
893 * a group reference and use it to scan for the device going away.
894 */
895 vfio_group_get(group);
896
Alex Williamson60720a02015-02-06 15:05:06 -0700897 /*
898 * When the device is removed from the group, the group suddenly
899 * becomes non-viable; the device has a driver (until the unbind
900 * completes), but it's not present in the group. This is bad news
901 * for any external users that need to re-acquire a group reference
902 * in order to match and release their existing reference. To
903 * solve this, we track such devices on the unbound_list to bridge
904 * the gap until they're fully unbound.
905 */
906 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
907 if (unbound) {
908 unbound->dev = dev;
909 mutex_lock(&group->unbound_lock);
910 list_add(&unbound->unbound_next, &group->unbound_list);
911 mutex_unlock(&group->unbound_lock);
912 }
913 WARN_ON(!unbound);
914
Alex Williamsoncba33452012-07-31 08:16:22 -0600915 vfio_device_put(device);
916
Alex Williamson13060b62015-02-06 15:05:07 -0700917 /*
918 * If the device is still present in the group after the above
919 * 'put', then it is in use and we need to request it from the
920 * bus driver. The driver may in turn need to request the
921 * device from the user. We send the request on an arbitrary
922 * interval with counter to allow the driver to take escalating
923 * measures to release the device if it has the ability to do so.
924 */
925 do {
926 device = vfio_group_get_device(group, dev);
927 if (!device)
928 break;
929
930 if (device->ops->request)
931 device->ops->request(device_data, i++);
932
933 vfio_device_put(device);
934
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600935 if (interrupted) {
936 ret = wait_event_timeout(vfio.release_q,
937 !vfio_dev_present(group, dev), HZ * 10);
938 } else {
939 ret = wait_event_interruptible_timeout(vfio.release_q,
940 !vfio_dev_present(group, dev), HZ * 10);
941 if (ret == -ERESTARTSYS) {
942 interrupted = true;
943 dev_warn(dev,
944 "Device is currently in use, task"
945 " \"%s\" (%d) "
946 "blocked until device is released",
947 current->comm, task_pid_nr(current));
948 }
949 }
950 } while (ret <= 0);
Alex Williamsone014e942013-02-14 14:02:13 -0700951
952 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600953
Alex Williamsoncba33452012-07-31 08:16:22 -0600954 return device_data;
955}
956EXPORT_SYMBOL_GPL(vfio_del_group_dev);
957
958/**
959 * VFIO base fd, /dev/vfio/vfio
960 */
961static long vfio_ioctl_check_extension(struct vfio_container *container,
962 unsigned long arg)
963{
Alex Williamson0b43c082013-04-29 08:41:36 -0600964 struct vfio_iommu_driver *driver;
Alex Williamsoncba33452012-07-31 08:16:22 -0600965 long ret = 0;
966
Alex Williamson0b43c082013-04-29 08:41:36 -0600967 down_read(&container->group_lock);
968
969 driver = container->iommu_driver;
970
Alex Williamsoncba33452012-07-31 08:16:22 -0600971 switch (arg) {
972 /* No base extensions yet */
973 default:
974 /*
975 * If no driver is set, poll all registered drivers for
976 * extensions and return the first positive result. If
977 * a driver is already set, further queries will be passed
978 * only to that driver.
979 */
980 if (!driver) {
981 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -0700982 list_for_each_entry(driver, &vfio.iommu_drivers_list,
983 vfio_next) {
Alex Williamson03a76b62015-12-21 15:13:33 -0700984
985#ifdef CONFIG_VFIO_NOIOMMU
986 if (!list_empty(&container->group_list) &&
987 (container->noiommu !=
988 (driver->ops == &vfio_noiommu_ops)))
989 continue;
990#endif
991
Alex Williamsoncba33452012-07-31 08:16:22 -0600992 if (!try_module_get(driver->ops->owner))
993 continue;
994
995 ret = driver->ops->ioctl(NULL,
996 VFIO_CHECK_EXTENSION,
997 arg);
998 module_put(driver->ops->owner);
999 if (ret > 0)
1000 break;
1001 }
1002 mutex_unlock(&vfio.iommu_drivers_lock);
1003 } else
1004 ret = driver->ops->ioctl(container->iommu_data,
1005 VFIO_CHECK_EXTENSION, arg);
1006 }
1007
Alex Williamson0b43c082013-04-29 08:41:36 -06001008 up_read(&container->group_lock);
1009
Alex Williamsoncba33452012-07-31 08:16:22 -06001010 return ret;
1011}
1012
Alex Williamson9587f442013-04-25 16:12:38 -06001013/* hold write lock on container->group_lock */
Alex Williamsoncba33452012-07-31 08:16:22 -06001014static int __vfio_container_attach_groups(struct vfio_container *container,
1015 struct vfio_iommu_driver *driver,
1016 void *data)
1017{
1018 struct vfio_group *group;
1019 int ret = -ENODEV;
1020
1021 list_for_each_entry(group, &container->group_list, container_next) {
1022 ret = driver->ops->attach_group(data, group->iommu_group);
1023 if (ret)
1024 goto unwind;
1025 }
1026
1027 return ret;
1028
1029unwind:
1030 list_for_each_entry_continue_reverse(group, &container->group_list,
1031 container_next) {
1032 driver->ops->detach_group(data, group->iommu_group);
1033 }
1034
1035 return ret;
1036}
1037
1038static long vfio_ioctl_set_iommu(struct vfio_container *container,
1039 unsigned long arg)
1040{
1041 struct vfio_iommu_driver *driver;
1042 long ret = -ENODEV;
1043
Alex Williamson9587f442013-04-25 16:12:38 -06001044 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001045
1046 /*
1047 * The container is designed to be an unprivileged interface while
1048 * the group can be assigned to specific users. Therefore, only by
1049 * adding a group to a container does the user get the privilege of
1050 * enabling the iommu, which may allocate finite resources. There
1051 * is no unset_iommu, but by removing all the groups from a container,
1052 * the container is deprivileged and returns to an unset state.
1053 */
1054 if (list_empty(&container->group_list) || container->iommu_driver) {
Alex Williamson9587f442013-04-25 16:12:38 -06001055 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001056 return -EINVAL;
1057 }
1058
1059 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -07001060 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001061 void *data;
1062
Alex Williamson03a76b62015-12-21 15:13:33 -07001063#ifdef CONFIG_VFIO_NOIOMMU
1064 /*
1065 * Only noiommu containers can use vfio-noiommu and noiommu
1066 * containers can only use vfio-noiommu.
1067 */
1068 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1069 continue;
1070#endif
1071
Alex Williamsoncba33452012-07-31 08:16:22 -06001072 if (!try_module_get(driver->ops->owner))
1073 continue;
1074
1075 /*
1076 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1077 * so test which iommu driver reported support for this
1078 * extension and call open on them. We also pass them the
1079 * magic, allowing a single driver to support multiple
1080 * interfaces if they'd like.
1081 */
1082 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1083 module_put(driver->ops->owner);
1084 continue;
1085 }
1086
1087 /* module reference holds the driver we're working on */
1088 mutex_unlock(&vfio.iommu_drivers_lock);
1089
1090 data = driver->ops->open(arg);
1091 if (IS_ERR(data)) {
1092 ret = PTR_ERR(data);
1093 module_put(driver->ops->owner);
1094 goto skip_drivers_unlock;
1095 }
1096
1097 ret = __vfio_container_attach_groups(container, driver, data);
1098 if (!ret) {
1099 container->iommu_driver = driver;
1100 container->iommu_data = data;
1101 } else {
1102 driver->ops->release(data);
1103 module_put(driver->ops->owner);
1104 }
1105
1106 goto skip_drivers_unlock;
1107 }
1108
1109 mutex_unlock(&vfio.iommu_drivers_lock);
1110skip_drivers_unlock:
Alex Williamson9587f442013-04-25 16:12:38 -06001111 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001112
1113 return ret;
1114}
1115
1116static long vfio_fops_unl_ioctl(struct file *filep,
1117 unsigned int cmd, unsigned long arg)
1118{
1119 struct vfio_container *container = filep->private_data;
1120 struct vfio_iommu_driver *driver;
1121 void *data;
1122 long ret = -EINVAL;
1123
1124 if (!container)
1125 return ret;
1126
Alex Williamsoncba33452012-07-31 08:16:22 -06001127 switch (cmd) {
1128 case VFIO_GET_API_VERSION:
1129 ret = VFIO_API_VERSION;
1130 break;
1131 case VFIO_CHECK_EXTENSION:
1132 ret = vfio_ioctl_check_extension(container, arg);
1133 break;
1134 case VFIO_SET_IOMMU:
1135 ret = vfio_ioctl_set_iommu(container, arg);
1136 break;
1137 default:
Alex Williamson0b43c082013-04-29 08:41:36 -06001138 down_read(&container->group_lock);
1139
1140 driver = container->iommu_driver;
1141 data = container->iommu_data;
1142
Alex Williamsoncba33452012-07-31 08:16:22 -06001143 if (driver) /* passthrough all unrecognized ioctls */
1144 ret = driver->ops->ioctl(data, cmd, arg);
Alex Williamson0b43c082013-04-29 08:41:36 -06001145
1146 up_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001147 }
1148
1149 return ret;
1150}
1151
1152#ifdef CONFIG_COMPAT
1153static long vfio_fops_compat_ioctl(struct file *filep,
1154 unsigned int cmd, unsigned long arg)
1155{
1156 arg = (unsigned long)compat_ptr(arg);
1157 return vfio_fops_unl_ioctl(filep, cmd, arg);
1158}
1159#endif /* CONFIG_COMPAT */
1160
1161static int vfio_fops_open(struct inode *inode, struct file *filep)
1162{
1163 struct vfio_container *container;
1164
1165 container = kzalloc(sizeof(*container), GFP_KERNEL);
1166 if (!container)
1167 return -ENOMEM;
1168
1169 INIT_LIST_HEAD(&container->group_list);
Alex Williamson9587f442013-04-25 16:12:38 -06001170 init_rwsem(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001171 kref_init(&container->kref);
1172
1173 filep->private_data = container;
1174
1175 return 0;
1176}
1177
1178static int vfio_fops_release(struct inode *inode, struct file *filep)
1179{
1180 struct vfio_container *container = filep->private_data;
1181
1182 filep->private_data = NULL;
1183
1184 vfio_container_put(container);
1185
1186 return 0;
1187}
1188
1189/*
1190 * Once an iommu driver is set, we optionally pass read/write/mmap
1191 * on to the driver, allowing management interfaces beyond ioctl.
1192 */
1193static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1194 size_t count, loff_t *ppos)
1195{
1196 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001197 struct vfio_iommu_driver *driver;
1198 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001199
Alex Williamson0b43c082013-04-29 08:41:36 -06001200 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001201
Alex Williamson0b43c082013-04-29 08:41:36 -06001202 driver = container->iommu_driver;
1203 if (likely(driver && driver->ops->read))
1204 ret = driver->ops->read(container->iommu_data,
1205 buf, count, ppos);
1206
1207 up_read(&container->group_lock);
1208
1209 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001210}
1211
1212static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1213 size_t count, loff_t *ppos)
1214{
1215 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001216 struct vfio_iommu_driver *driver;
1217 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001218
Alex Williamson0b43c082013-04-29 08:41:36 -06001219 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001220
Alex Williamson0b43c082013-04-29 08:41:36 -06001221 driver = container->iommu_driver;
1222 if (likely(driver && driver->ops->write))
1223 ret = driver->ops->write(container->iommu_data,
1224 buf, count, ppos);
1225
1226 up_read(&container->group_lock);
1227
1228 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001229}
1230
1231static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1232{
1233 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001234 struct vfio_iommu_driver *driver;
1235 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001236
Alex Williamson0b43c082013-04-29 08:41:36 -06001237 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001238
Alex Williamson0b43c082013-04-29 08:41:36 -06001239 driver = container->iommu_driver;
1240 if (likely(driver && driver->ops->mmap))
1241 ret = driver->ops->mmap(container->iommu_data, vma);
1242
1243 up_read(&container->group_lock);
1244
1245 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001246}
1247
1248static const struct file_operations vfio_fops = {
1249 .owner = THIS_MODULE,
1250 .open = vfio_fops_open,
1251 .release = vfio_fops_release,
1252 .read = vfio_fops_read,
1253 .write = vfio_fops_write,
1254 .unlocked_ioctl = vfio_fops_unl_ioctl,
1255#ifdef CONFIG_COMPAT
1256 .compat_ioctl = vfio_fops_compat_ioctl,
1257#endif
1258 .mmap = vfio_fops_mmap,
1259};
1260
1261/**
1262 * VFIO Group fd, /dev/vfio/$GROUP
1263 */
1264static void __vfio_group_unset_container(struct vfio_group *group)
1265{
1266 struct vfio_container *container = group->container;
1267 struct vfio_iommu_driver *driver;
1268
Alex Williamson9587f442013-04-25 16:12:38 -06001269 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001270
1271 driver = container->iommu_driver;
1272 if (driver)
1273 driver->ops->detach_group(container->iommu_data,
1274 group->iommu_group);
1275
1276 group->container = NULL;
1277 list_del(&group->container_next);
1278
1279 /* Detaching the last group deprivileges a container, remove iommu */
1280 if (driver && list_empty(&container->group_list)) {
1281 driver->ops->release(container->iommu_data);
1282 module_put(driver->ops->owner);
1283 container->iommu_driver = NULL;
1284 container->iommu_data = NULL;
1285 }
1286
Alex Williamson9587f442013-04-25 16:12:38 -06001287 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001288
1289 vfio_container_put(container);
1290}
1291
1292/*
1293 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1294 * if there was no container to unset. Since the ioctl is called on
1295 * the group, we know that still exists, therefore the only valid
1296 * transition here is 1->0.
1297 */
1298static int vfio_group_unset_container(struct vfio_group *group)
1299{
1300 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1301
1302 if (!users)
1303 return -EINVAL;
1304 if (users != 1)
1305 return -EBUSY;
1306
1307 __vfio_group_unset_container(group);
1308
1309 return 0;
1310}
1311
1312/*
1313 * When removing container users, anything that removes the last user
1314 * implicitly removes the group from the container. That is, if the
1315 * group file descriptor is closed, as well as any device file descriptors,
1316 * the group is free.
1317 */
1318static void vfio_group_try_dissolve_container(struct vfio_group *group)
1319{
1320 if (0 == atomic_dec_if_positive(&group->container_users))
1321 __vfio_group_unset_container(group);
1322}
1323
1324static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1325{
Al Viro2903ff02012-08-28 12:52:22 -04001326 struct fd f;
Alex Williamsoncba33452012-07-31 08:16:22 -06001327 struct vfio_container *container;
1328 struct vfio_iommu_driver *driver;
Al Viro2903ff02012-08-28 12:52:22 -04001329 int ret = 0;
Alex Williamsoncba33452012-07-31 08:16:22 -06001330
1331 if (atomic_read(&group->container_users))
1332 return -EINVAL;
1333
Alex Williamson03a76b62015-12-21 15:13:33 -07001334 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1335 return -EPERM;
1336
Al Viro2903ff02012-08-28 12:52:22 -04001337 f = fdget(container_fd);
1338 if (!f.file)
Alex Williamsoncba33452012-07-31 08:16:22 -06001339 return -EBADF;
1340
1341 /* Sanity check, is this really our fd? */
Al Viro2903ff02012-08-28 12:52:22 -04001342 if (f.file->f_op != &vfio_fops) {
1343 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001344 return -EINVAL;
1345 }
1346
Al Viro2903ff02012-08-28 12:52:22 -04001347 container = f.file->private_data;
Alex Williamsoncba33452012-07-31 08:16:22 -06001348 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1349
Alex Williamson9587f442013-04-25 16:12:38 -06001350 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001351
Alex Williamson03a76b62015-12-21 15:13:33 -07001352 /* Real groups and fake groups cannot mix */
1353 if (!list_empty(&container->group_list) &&
1354 container->noiommu != group->noiommu) {
1355 ret = -EPERM;
1356 goto unlock_out;
1357 }
1358
Alex Williamsoncba33452012-07-31 08:16:22 -06001359 driver = container->iommu_driver;
1360 if (driver) {
1361 ret = driver->ops->attach_group(container->iommu_data,
1362 group->iommu_group);
1363 if (ret)
1364 goto unlock_out;
1365 }
1366
1367 group->container = container;
Alex Williamson03a76b62015-12-21 15:13:33 -07001368 container->noiommu = group->noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -06001369 list_add(&group->container_next, &container->group_list);
1370
1371 /* Get a reference on the container and mark a user within the group */
1372 vfio_container_get(container);
1373 atomic_inc(&group->container_users);
1374
1375unlock_out:
Alex Williamson9587f442013-04-25 16:12:38 -06001376 up_write(&container->group_lock);
Al Viro2903ff02012-08-28 12:52:22 -04001377 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001378 return ret;
1379}
1380
1381static bool vfio_group_viable(struct vfio_group *group)
1382{
1383 return (iommu_group_for_each_dev(group->iommu_group,
1384 group, vfio_dev_viable) == 0);
1385}
1386
1387static const struct file_operations vfio_device_fops;
1388
1389static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1390{
1391 struct vfio_device *device;
1392 struct file *filep;
Alex Williamson4bc94d52015-07-24 15:14:04 -06001393 int ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001394
1395 if (0 == atomic_read(&group->container_users) ||
1396 !group->container->iommu_driver || !vfio_group_viable(group))
1397 return -EINVAL;
1398
Alex Williamson03a76b62015-12-21 15:13:33 -07001399 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1400 return -EPERM;
1401
Alex Williamson4bc94d52015-07-24 15:14:04 -06001402 device = vfio_device_get_from_name(group, buf);
1403 if (!device)
1404 return -ENODEV;
Alex Williamsoncba33452012-07-31 08:16:22 -06001405
Alex Williamson4bc94d52015-07-24 15:14:04 -06001406 ret = device->ops->open(device->device_data);
1407 if (ret) {
1408 vfio_device_put(device);
1409 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001410 }
Alex Williamson4bc94d52015-07-24 15:14:04 -06001411
1412 /*
1413 * We can't use anon_inode_getfd() because we need to modify
1414 * the f_mode flags directly to allow more than just ioctls
1415 */
1416 ret = get_unused_fd_flags(O_CLOEXEC);
1417 if (ret < 0) {
1418 device->ops->release(device->device_data);
1419 vfio_device_put(device);
1420 return ret;
1421 }
1422
1423 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1424 device, O_RDWR);
1425 if (IS_ERR(filep)) {
1426 put_unused_fd(ret);
1427 ret = PTR_ERR(filep);
1428 device->ops->release(device->device_data);
1429 vfio_device_put(device);
1430 return ret;
1431 }
1432
1433 /*
1434 * TODO: add an anon_inode interface to do this.
1435 * Appears to be missing by lack of need rather than
1436 * explicitly prevented. Now there's need.
1437 */
1438 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1439
1440 atomic_inc(&group->container_users);
1441
1442 fd_install(ret, filep);
Alex Williamsoncba33452012-07-31 08:16:22 -06001443
Alex Williamson03a76b62015-12-21 15:13:33 -07001444 if (group->noiommu)
1445 dev_warn(device->dev, "vfio-noiommu device opened by user "
1446 "(%s:%d)\n", current->comm, task_pid_nr(current));
1447
Alex Williamsoncba33452012-07-31 08:16:22 -06001448 return ret;
1449}
1450
1451static long vfio_group_fops_unl_ioctl(struct file *filep,
1452 unsigned int cmd, unsigned long arg)
1453{
1454 struct vfio_group *group = filep->private_data;
1455 long ret = -ENOTTY;
1456
1457 switch (cmd) {
1458 case VFIO_GROUP_GET_STATUS:
1459 {
1460 struct vfio_group_status status;
1461 unsigned long minsz;
1462
1463 minsz = offsetofend(struct vfio_group_status, flags);
1464
1465 if (copy_from_user(&status, (void __user *)arg, minsz))
1466 return -EFAULT;
1467
1468 if (status.argsz < minsz)
1469 return -EINVAL;
1470
1471 status.flags = 0;
1472
1473 if (vfio_group_viable(group))
1474 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1475
1476 if (group->container)
1477 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1478
1479 if (copy_to_user((void __user *)arg, &status, minsz))
1480 return -EFAULT;
1481
1482 ret = 0;
1483 break;
1484 }
1485 case VFIO_GROUP_SET_CONTAINER:
1486 {
1487 int fd;
1488
1489 if (get_user(fd, (int __user *)arg))
1490 return -EFAULT;
1491
1492 if (fd < 0)
1493 return -EINVAL;
1494
1495 ret = vfio_group_set_container(group, fd);
1496 break;
1497 }
1498 case VFIO_GROUP_UNSET_CONTAINER:
1499 ret = vfio_group_unset_container(group);
1500 break;
1501 case VFIO_GROUP_GET_DEVICE_FD:
1502 {
1503 char *buf;
1504
1505 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1506 if (IS_ERR(buf))
1507 return PTR_ERR(buf);
1508
1509 ret = vfio_group_get_device_fd(group, buf);
1510 kfree(buf);
1511 break;
1512 }
1513 }
1514
1515 return ret;
1516}
1517
1518#ifdef CONFIG_COMPAT
1519static long vfio_group_fops_compat_ioctl(struct file *filep,
1520 unsigned int cmd, unsigned long arg)
1521{
1522 arg = (unsigned long)compat_ptr(arg);
1523 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1524}
1525#endif /* CONFIG_COMPAT */
1526
1527static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1528{
1529 struct vfio_group *group;
Alex Williamson6d6768c2013-06-25 16:06:54 -06001530 int opened;
Alex Williamsoncba33452012-07-31 08:16:22 -06001531
1532 group = vfio_group_get_from_minor(iminor(inode));
1533 if (!group)
1534 return -ENODEV;
1535
Alex Williamson03a76b62015-12-21 15:13:33 -07001536 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1537 vfio_group_put(group);
1538 return -EPERM;
1539 }
1540
Alex Williamson6d6768c2013-06-25 16:06:54 -06001541 /* Do we need multiple instances of the group open? Seems not. */
1542 opened = atomic_cmpxchg(&group->opened, 0, 1);
1543 if (opened) {
1544 vfio_group_put(group);
1545 return -EBUSY;
1546 }
1547
1548 /* Is something still in use from a previous open? */
Alex Williamsoncba33452012-07-31 08:16:22 -06001549 if (group->container) {
Alex Williamson6d6768c2013-06-25 16:06:54 -06001550 atomic_dec(&group->opened);
Alex Williamsoncba33452012-07-31 08:16:22 -06001551 vfio_group_put(group);
1552 return -EBUSY;
1553 }
1554
1555 filep->private_data = group;
1556
1557 return 0;
1558}
1559
1560static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1561{
1562 struct vfio_group *group = filep->private_data;
1563
1564 filep->private_data = NULL;
1565
1566 vfio_group_try_dissolve_container(group);
1567
Alex Williamson6d6768c2013-06-25 16:06:54 -06001568 atomic_dec(&group->opened);
1569
Alex Williamsoncba33452012-07-31 08:16:22 -06001570 vfio_group_put(group);
1571
1572 return 0;
1573}
1574
1575static const struct file_operations vfio_group_fops = {
1576 .owner = THIS_MODULE,
1577 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1578#ifdef CONFIG_COMPAT
1579 .compat_ioctl = vfio_group_fops_compat_ioctl,
1580#endif
1581 .open = vfio_group_fops_open,
1582 .release = vfio_group_fops_release,
1583};
1584
1585/**
1586 * VFIO Device fd
1587 */
1588static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1589{
1590 struct vfio_device *device = filep->private_data;
1591
1592 device->ops->release(device->device_data);
1593
1594 vfio_group_try_dissolve_container(device->group);
1595
1596 vfio_device_put(device);
1597
1598 return 0;
1599}
1600
1601static long vfio_device_fops_unl_ioctl(struct file *filep,
1602 unsigned int cmd, unsigned long arg)
1603{
1604 struct vfio_device *device = filep->private_data;
1605
1606 if (unlikely(!device->ops->ioctl))
1607 return -EINVAL;
1608
1609 return device->ops->ioctl(device->device_data, cmd, arg);
1610}
1611
1612static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1613 size_t count, loff_t *ppos)
1614{
1615 struct vfio_device *device = filep->private_data;
1616
1617 if (unlikely(!device->ops->read))
1618 return -EINVAL;
1619
1620 return device->ops->read(device->device_data, buf, count, ppos);
1621}
1622
1623static ssize_t vfio_device_fops_write(struct file *filep,
1624 const char __user *buf,
1625 size_t count, loff_t *ppos)
1626{
1627 struct vfio_device *device = filep->private_data;
1628
1629 if (unlikely(!device->ops->write))
1630 return -EINVAL;
1631
1632 return device->ops->write(device->device_data, buf, count, ppos);
1633}
1634
1635static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1636{
1637 struct vfio_device *device = filep->private_data;
1638
1639 if (unlikely(!device->ops->mmap))
1640 return -EINVAL;
1641
1642 return device->ops->mmap(device->device_data, vma);
1643}
1644
1645#ifdef CONFIG_COMPAT
1646static long vfio_device_fops_compat_ioctl(struct file *filep,
1647 unsigned int cmd, unsigned long arg)
1648{
1649 arg = (unsigned long)compat_ptr(arg);
1650 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1651}
1652#endif /* CONFIG_COMPAT */
1653
1654static const struct file_operations vfio_device_fops = {
1655 .owner = THIS_MODULE,
1656 .release = vfio_device_fops_release,
1657 .read = vfio_device_fops_read,
1658 .write = vfio_device_fops_write,
1659 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1660#ifdef CONFIG_COMPAT
1661 .compat_ioctl = vfio_device_fops_compat_ioctl,
1662#endif
1663 .mmap = vfio_device_fops_mmap,
1664};
1665
1666/**
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001667 * External user API, exported by symbols to be linked dynamically.
1668 *
1669 * The protocol includes:
1670 * 1. do normal VFIO init operation:
1671 * - opening a new container;
1672 * - attaching group(s) to it;
1673 * - setting an IOMMU driver for a container.
1674 * When IOMMU is set for a container, all groups in it are
1675 * considered ready to use by an external user.
1676 *
1677 * 2. User space passes a group fd to an external user.
1678 * The external user calls vfio_group_get_external_user()
1679 * to verify that:
1680 * - the group is initialized;
1681 * - IOMMU is set for it.
1682 * If both checks passed, vfio_group_get_external_user()
1683 * increments the container user counter to prevent
1684 * the VFIO group from disposal before KVM exits.
1685 *
1686 * 3. The external user calls vfio_external_user_iommu_id()
1687 * to know an IOMMU ID.
1688 *
1689 * 4. When the external KVM finishes, it calls
1690 * vfio_group_put_external_user() to release the VFIO group.
1691 * This call decrements the container user counter.
1692 */
1693struct vfio_group *vfio_group_get_external_user(struct file *filep)
1694{
1695 struct vfio_group *group = filep->private_data;
1696
1697 if (filep->f_op != &vfio_group_fops)
1698 return ERR_PTR(-EINVAL);
1699
1700 if (!atomic_inc_not_zero(&group->container_users))
1701 return ERR_PTR(-EINVAL);
1702
Alex Williamson03a76b62015-12-21 15:13:33 -07001703 if (group->noiommu) {
1704 atomic_dec(&group->container_users);
1705 return ERR_PTR(-EPERM);
1706 }
1707
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001708 if (!group->container->iommu_driver ||
1709 !vfio_group_viable(group)) {
1710 atomic_dec(&group->container_users);
1711 return ERR_PTR(-EINVAL);
1712 }
1713
1714 vfio_group_get(group);
1715
1716 return group;
1717}
1718EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1719
1720void vfio_group_put_external_user(struct vfio_group *group)
1721{
1722 vfio_group_put(group);
1723 vfio_group_try_dissolve_container(group);
1724}
1725EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1726
1727int vfio_external_user_iommu_id(struct vfio_group *group)
1728{
1729 return iommu_group_id(group->iommu_group);
1730}
1731EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1732
Alex Williamson88d7ab82014-02-26 11:38:39 -07001733long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1734{
1735 return vfio_ioctl_check_extension(group->container, arg);
1736}
1737EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1738
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001739/**
Alex Williamsoncba33452012-07-31 08:16:22 -06001740 * Module/class support
1741 */
1742static char *vfio_devnode(struct device *dev, umode_t *mode)
1743{
1744 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
1745}
1746
Alex Williamsond1099902013-12-19 10:17:13 -07001747static struct miscdevice vfio_dev = {
1748 .minor = VFIO_MINOR,
1749 .name = "vfio",
1750 .fops = &vfio_fops,
1751 .nodename = "vfio/vfio",
1752 .mode = S_IRUGO | S_IWUGO,
1753};
1754
Alex Williamsoncba33452012-07-31 08:16:22 -06001755static int __init vfio_init(void)
1756{
1757 int ret;
1758
1759 idr_init(&vfio.group_idr);
1760 mutex_init(&vfio.group_lock);
1761 mutex_init(&vfio.iommu_drivers_lock);
1762 INIT_LIST_HEAD(&vfio.group_list);
1763 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
1764 init_waitqueue_head(&vfio.release_q);
1765
Alex Williamsond1099902013-12-19 10:17:13 -07001766 ret = misc_register(&vfio_dev);
1767 if (ret) {
1768 pr_err("vfio: misc device register failed\n");
1769 return ret;
1770 }
1771
1772 /* /dev/vfio/$GROUP */
Alex Williamsoncba33452012-07-31 08:16:22 -06001773 vfio.class = class_create(THIS_MODULE, "vfio");
1774 if (IS_ERR(vfio.class)) {
1775 ret = PTR_ERR(vfio.class);
1776 goto err_class;
1777 }
1778
1779 vfio.class->devnode = vfio_devnode;
1780
Alex Williamsond1099902013-12-19 10:17:13 -07001781 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
Alex Williamsoncba33452012-07-31 08:16:22 -06001782 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001783 goto err_alloc_chrdev;
Alex Williamsoncba33452012-07-31 08:16:22 -06001784
Alex Williamsoncba33452012-07-31 08:16:22 -06001785 cdev_init(&vfio.group_cdev, &vfio_group_fops);
Alex Williamsond1099902013-12-19 10:17:13 -07001786 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001787 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001788 goto err_cdev_add;
Alex Williamsoncba33452012-07-31 08:16:22 -06001789
1790 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1791
Alex Williamson73fa0d12012-07-31 08:16:23 -06001792 /*
1793 * Attempt to load known iommu-drivers. This gives us a working
1794 * environment without the user needing to explicitly load iommu
1795 * drivers.
1796 */
1797 request_module_nowait("vfio_iommu_type1");
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001798 request_module_nowait("vfio_iommu_spapr_tce");
Alex Williamson73fa0d12012-07-31 08:16:23 -06001799
Alex Williamson03a76b62015-12-21 15:13:33 -07001800#ifdef CONFIG_VFIO_NOIOMMU
1801 vfio_register_iommu_driver(&vfio_noiommu_ops);
1802#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06001803 return 0;
1804
Alex Williamsond1099902013-12-19 10:17:13 -07001805err_cdev_add:
1806 unregister_chrdev_region(vfio.group_devt, MINORMASK);
1807err_alloc_chrdev:
Alex Williamsoncba33452012-07-31 08:16:22 -06001808 class_destroy(vfio.class);
1809 vfio.class = NULL;
1810err_class:
Alex Williamsond1099902013-12-19 10:17:13 -07001811 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001812 return ret;
1813}
1814
1815static void __exit vfio_cleanup(void)
1816{
1817 WARN_ON(!list_empty(&vfio.group_list));
1818
Alex Williamson03a76b62015-12-21 15:13:33 -07001819#ifdef CONFIG_VFIO_NOIOMMU
1820 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
1821#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06001822 idr_destroy(&vfio.group_idr);
1823 cdev_del(&vfio.group_cdev);
Alex Williamsond1099902013-12-19 10:17:13 -07001824 unregister_chrdev_region(vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001825 class_destroy(vfio.class);
1826 vfio.class = NULL;
Alex Williamsond1099902013-12-19 10:17:13 -07001827 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001828}
1829
1830module_init(vfio_init);
1831module_exit(vfio_cleanup);
1832
1833MODULE_VERSION(DRIVER_VERSION);
1834MODULE_LICENSE("GPL v2");
1835MODULE_AUTHOR(DRIVER_AUTHOR);
1836MODULE_DESCRIPTION(DRIVER_DESC);
Alex Williamsond1099902013-12-19 10:17:13 -07001837MODULE_ALIAS_MISCDEV(VFIO_MINOR);
1838MODULE_ALIAS("devname:vfio/vfio");