blob: 8e8b73b05e549daeff397f1532f0f3f8a2031671 [file] [log] [blame]
Alex Williamsoncba33452012-07-31 08:16:22 -06001/*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16#include <linux/cdev.h>
17#include <linux/compat.h>
18#include <linux/device.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/idr.h>
23#include <linux/iommu.h>
24#include <linux/list.h>
Alex Williamsond1099902013-12-19 10:17:13 -070025#include <linux/miscdevice.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060026#include <linux/module.h>
27#include <linux/mutex.h>
Alex Williamson5f096b12015-10-27 14:53:04 -060028#include <linux/pci.h>
Alex Williamson9587f442013-04-25 16:12:38 -060029#include <linux/rwsem.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060030#include <linux/sched.h>
31#include <linux/slab.h>
Alex Williamson664e9382013-04-30 15:42:28 -060032#include <linux/stat.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060033#include <linux/string.h>
34#include <linux/uaccess.h>
35#include <linux/vfio.h>
36#include <linux/wait.h>
37
38#define DRIVER_VERSION "0.3"
39#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40#define DRIVER_DESC "VFIO - User Level meta-driver"
41
42static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
Alex Williamsond1099902013-12-19 10:17:13 -070050 dev_t group_devt;
Alex Williamsoncba33452012-07-31 08:16:22 -060051 wait_queue_head_t release_q;
52} vfio;
53
54struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57};
58
59struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
Alex Williamson9587f442013-04-25 16:12:38 -060062 struct rw_semaphore group_lock;
Alex Williamsoncba33452012-07-31 08:16:22 -060063 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
Alex Williamson03a76b62015-12-21 15:13:33 -070065 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060066};
67
Alex Williamson60720a02015-02-06 15:05:06 -070068struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71};
72
Alex Williamsoncba33452012-07-31 08:16:22 -060073struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
Alex Williamson60720a02015-02-06 15:05:06 -070085 struct list_head unbound_list;
86 struct mutex unbound_lock;
Alex Williamson6d6768c2013-06-25 16:06:54 -060087 atomic_t opened;
Alex Williamson03a76b62015-12-21 15:13:33 -070088 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060089};
90
91struct vfio_device {
92 struct kref kref;
93 struct device *dev;
94 const struct vfio_device_ops *ops;
95 struct vfio_group *group;
96 struct list_head group_next;
97 void *device_data;
98};
99
Alex Williamson03a76b62015-12-21 15:13:33 -0700100#ifdef CONFIG_VFIO_NOIOMMU
101static bool noiommu __read_mostly;
102module_param_named(enable_unsafe_noiommu_mode,
103 noiommu, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
105#endif
106
107/*
108 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
109 * and remove functions, any use cases other than acquiring the first
110 * reference for the purpose of calling vfio_add_group_dev() or removing
111 * that symmetric reference after vfio_del_group_dev() should use the raw
112 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
113 * removes the device from the dummy group and cannot be nested.
114 */
115struct iommu_group *vfio_iommu_group_get(struct device *dev)
116{
117 struct iommu_group *group;
118 int __maybe_unused ret;
119
120 group = iommu_group_get(dev);
121
122#ifdef CONFIG_VFIO_NOIOMMU
123 /*
124 * With noiommu enabled, an IOMMU group will be created for a device
125 * that doesn't already have one and doesn't have an iommu_ops on their
Alex Williamson16ab8a52016-01-27 11:22:25 -0700126 * bus. We set iommudata simply to be able to identify these groups
127 * as special use and for reclamation later.
Alex Williamson03a76b62015-12-21 15:13:33 -0700128 */
129 if (group || !noiommu || iommu_present(dev->bus))
130 return group;
131
132 group = iommu_group_alloc();
133 if (IS_ERR(group))
134 return NULL;
135
136 iommu_group_set_name(group, "vfio-noiommu");
Alex Williamson16ab8a52016-01-27 11:22:25 -0700137 iommu_group_set_iommudata(group, &noiommu, NULL);
Alex Williamson03a76b62015-12-21 15:13:33 -0700138 ret = iommu_group_add_device(group, dev);
139 iommu_group_put(group);
140 if (ret)
141 return NULL;
142
143 /*
144 * Where to taint? At this point we've added an IOMMU group for a
145 * device that is not backed by iommu_ops, therefore any iommu_
146 * callback using iommu_ops can legitimately Oops. So, while we may
147 * be about to give a DMA capable device to a user without IOMMU
148 * protection, which is clearly taint-worthy, let's go ahead and do
149 * it here.
150 */
151 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
152 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
153#endif
154
155 return group;
156}
157EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
158
159void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
160{
161#ifdef CONFIG_VFIO_NOIOMMU
Alex Williamson16ab8a52016-01-27 11:22:25 -0700162 if (iommu_group_get_iommudata(group) == &noiommu)
Alex Williamson03a76b62015-12-21 15:13:33 -0700163 iommu_group_remove_device(dev);
164#endif
165
166 iommu_group_put(group);
167}
168EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
169
170#ifdef CONFIG_VFIO_NOIOMMU
171static void *vfio_noiommu_open(unsigned long arg)
172{
173 if (arg != VFIO_NOIOMMU_IOMMU)
174 return ERR_PTR(-EINVAL);
175 if (!capable(CAP_SYS_RAWIO))
176 return ERR_PTR(-EPERM);
177
178 return NULL;
179}
180
181static void vfio_noiommu_release(void *iommu_data)
182{
183}
184
185static long vfio_noiommu_ioctl(void *iommu_data,
186 unsigned int cmd, unsigned long arg)
187{
188 if (cmd == VFIO_CHECK_EXTENSION)
189 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
190
191 return -ENOTTY;
192}
193
Alex Williamson03a76b62015-12-21 15:13:33 -0700194static int vfio_noiommu_attach_group(void *iommu_data,
195 struct iommu_group *iommu_group)
196{
Alex Williamson16ab8a52016-01-27 11:22:25 -0700197 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
Alex Williamson03a76b62015-12-21 15:13:33 -0700198}
199
200static void vfio_noiommu_detach_group(void *iommu_data,
201 struct iommu_group *iommu_group)
202{
203}
204
205static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
206 .name = "vfio-noiommu",
207 .owner = THIS_MODULE,
208 .open = vfio_noiommu_open,
209 .release = vfio_noiommu_release,
210 .ioctl = vfio_noiommu_ioctl,
211 .attach_group = vfio_noiommu_attach_group,
212 .detach_group = vfio_noiommu_detach_group,
213};
214#endif
215
216
Alex Williamsoncba33452012-07-31 08:16:22 -0600217/**
218 * IOMMU driver registration
219 */
220int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
221{
222 struct vfio_iommu_driver *driver, *tmp;
223
224 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
225 if (!driver)
226 return -ENOMEM;
227
228 driver->ops = ops;
229
230 mutex_lock(&vfio.iommu_drivers_lock);
231
232 /* Check for duplicates */
233 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
234 if (tmp->ops == ops) {
235 mutex_unlock(&vfio.iommu_drivers_lock);
236 kfree(driver);
237 return -EINVAL;
238 }
239 }
240
241 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
242
243 mutex_unlock(&vfio.iommu_drivers_lock);
244
245 return 0;
246}
247EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
248
249void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
250{
251 struct vfio_iommu_driver *driver;
252
253 mutex_lock(&vfio.iommu_drivers_lock);
254 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
255 if (driver->ops == ops) {
256 list_del(&driver->vfio_next);
257 mutex_unlock(&vfio.iommu_drivers_lock);
258 kfree(driver);
259 return;
260 }
261 }
262 mutex_unlock(&vfio.iommu_drivers_lock);
263}
264EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
265
266/**
267 * Group minor allocation/free - both called with vfio.group_lock held
268 */
269static int vfio_alloc_group_minor(struct vfio_group *group)
270{
Alex Williamsond1099902013-12-19 10:17:13 -0700271 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
Alex Williamsoncba33452012-07-31 08:16:22 -0600272}
273
274static void vfio_free_group_minor(int minor)
275{
276 idr_remove(&vfio.group_idr, minor);
277}
278
279static int vfio_iommu_group_notifier(struct notifier_block *nb,
280 unsigned long action, void *data);
281static void vfio_group_get(struct vfio_group *group);
282
283/**
284 * Container objects - containers are created when /dev/vfio/vfio is
285 * opened, but their lifecycle extends until the last user is done, so
286 * it's freed via kref. Must support container/group/device being
287 * closed in any order.
288 */
289static void vfio_container_get(struct vfio_container *container)
290{
291 kref_get(&container->kref);
292}
293
294static void vfio_container_release(struct kref *kref)
295{
296 struct vfio_container *container;
297 container = container_of(kref, struct vfio_container, kref);
298
299 kfree(container);
300}
301
302static void vfio_container_put(struct vfio_container *container)
303{
304 kref_put(&container->kref, vfio_container_release);
305}
306
Jiang Liu9df7b252012-12-07 13:43:50 -0700307static void vfio_group_unlock_and_free(struct vfio_group *group)
308{
309 mutex_unlock(&vfio.group_lock);
310 /*
311 * Unregister outside of lock. A spurious callback is harmless now
312 * that the group is no longer in vfio.group_list.
313 */
314 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
315 kfree(group);
316}
317
Alex Williamsoncba33452012-07-31 08:16:22 -0600318/**
319 * Group objects - create, release, get, put, search
320 */
Alex Williamson16ab8a52016-01-27 11:22:25 -0700321static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600322{
323 struct vfio_group *group, *tmp;
324 struct device *dev;
325 int ret, minor;
326
327 group = kzalloc(sizeof(*group), GFP_KERNEL);
328 if (!group)
329 return ERR_PTR(-ENOMEM);
330
331 kref_init(&group->kref);
332 INIT_LIST_HEAD(&group->device_list);
333 mutex_init(&group->device_lock);
Alex Williamson60720a02015-02-06 15:05:06 -0700334 INIT_LIST_HEAD(&group->unbound_list);
335 mutex_init(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600336 atomic_set(&group->container_users, 0);
Alex Williamson6d6768c2013-06-25 16:06:54 -0600337 atomic_set(&group->opened, 0);
Alex Williamsoncba33452012-07-31 08:16:22 -0600338 group->iommu_group = iommu_group;
Alex Williamson16ab8a52016-01-27 11:22:25 -0700339#ifdef CONFIG_VFIO_NOIOMMU
340 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
341#endif
Alex Williamsoncba33452012-07-31 08:16:22 -0600342
343 group->nb.notifier_call = vfio_iommu_group_notifier;
344
345 /*
346 * blocking notifiers acquire a rwsem around registering and hold
347 * it around callback. Therefore, need to register outside of
348 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
349 * do anything unless it can find the group in vfio.group_list, so
350 * no harm in registering early.
351 */
352 ret = iommu_group_register_notifier(iommu_group, &group->nb);
353 if (ret) {
354 kfree(group);
355 return ERR_PTR(ret);
356 }
357
358 mutex_lock(&vfio.group_lock);
359
Alex Williamsoncba33452012-07-31 08:16:22 -0600360 /* Did we race creating this group? */
361 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
362 if (tmp->iommu_group == iommu_group) {
363 vfio_group_get(tmp);
Jiang Liu9df7b252012-12-07 13:43:50 -0700364 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600365 return tmp;
366 }
367 }
368
Zhen Lei2f51bf42015-03-16 14:08:56 -0600369 minor = vfio_alloc_group_minor(group);
370 if (minor < 0) {
371 vfio_group_unlock_and_free(group);
372 return ERR_PTR(minor);
373 }
374
Alex Williamsond1099902013-12-19 10:17:13 -0700375 dev = device_create(vfio.class, NULL,
376 MKDEV(MAJOR(vfio.group_devt), minor),
Alex Williamson03a76b62015-12-21 15:13:33 -0700377 group, "%s%d", group->noiommu ? "noiommu-" : "",
378 iommu_group_id(iommu_group));
Alex Williamsoncba33452012-07-31 08:16:22 -0600379 if (IS_ERR(dev)) {
380 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700381 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600382 return (struct vfio_group *)dev; /* ERR_PTR */
383 }
384
385 group->minor = minor;
386 group->dev = dev;
387
388 list_add(&group->vfio_next, &vfio.group_list);
389
390 mutex_unlock(&vfio.group_lock);
391
392 return group;
393}
394
Al Viro6d2cd3c2012-08-17 21:27:32 -0400395/* called with vfio.group_lock held */
Alex Williamsoncba33452012-07-31 08:16:22 -0600396static void vfio_group_release(struct kref *kref)
397{
398 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
Alex Williamson60720a02015-02-06 15:05:06 -0700399 struct vfio_unbound_dev *unbound, *tmp;
Alex Williamson4a688102015-02-06 15:05:06 -0700400 struct iommu_group *iommu_group = group->iommu_group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600401
402 WARN_ON(!list_empty(&group->device_list));
403
Alex Williamson60720a02015-02-06 15:05:06 -0700404 list_for_each_entry_safe(unbound, tmp,
405 &group->unbound_list, unbound_next) {
406 list_del(&unbound->unbound_next);
407 kfree(unbound);
408 }
409
Alex Williamsond1099902013-12-19 10:17:13 -0700410 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
Alex Williamsoncba33452012-07-31 08:16:22 -0600411 list_del(&group->vfio_next);
412 vfio_free_group_minor(group->minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700413 vfio_group_unlock_and_free(group);
Alex Williamson4a688102015-02-06 15:05:06 -0700414 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600415}
416
417static void vfio_group_put(struct vfio_group *group)
418{
Al Viro6d2cd3c2012-08-17 21:27:32 -0400419 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600420}
421
Alex Williamsone91a5572017-06-19 09:10:32 -0600422struct vfio_group_put_work {
423 struct work_struct work;
424 struct vfio_group *group;
425};
426
427static void vfio_group_put_bg(struct work_struct *work)
428{
429 struct vfio_group_put_work *do_work;
430
431 do_work = container_of(work, struct vfio_group_put_work, work);
432
433 vfio_group_put(do_work->group);
434 kfree(do_work);
435}
436
437static void vfio_group_schedule_put(struct vfio_group *group)
438{
439 struct vfio_group_put_work *do_work;
440
441 do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
442 if (WARN_ON(!do_work))
443 return;
444
445 INIT_WORK(&do_work->work, vfio_group_put_bg);
446 do_work->group = group;
447 schedule_work(&do_work->work);
448}
449
Alex Williamsoncba33452012-07-31 08:16:22 -0600450/* Assume group_lock or group reference is held */
451static void vfio_group_get(struct vfio_group *group)
452{
453 kref_get(&group->kref);
454}
455
456/*
457 * Not really a try as we will sleep for mutex, but we need to make
458 * sure the group pointer is valid under lock and get a reference.
459 */
460static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
461{
462 struct vfio_group *target = group;
463
464 mutex_lock(&vfio.group_lock);
465 list_for_each_entry(group, &vfio.group_list, vfio_next) {
466 if (group == target) {
467 vfio_group_get(group);
468 mutex_unlock(&vfio.group_lock);
469 return group;
470 }
471 }
472 mutex_unlock(&vfio.group_lock);
473
474 return NULL;
475}
476
477static
478struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
479{
480 struct vfio_group *group;
481
482 mutex_lock(&vfio.group_lock);
483 list_for_each_entry(group, &vfio.group_list, vfio_next) {
484 if (group->iommu_group == iommu_group) {
485 vfio_group_get(group);
486 mutex_unlock(&vfio.group_lock);
487 return group;
488 }
489 }
490 mutex_unlock(&vfio.group_lock);
491
492 return NULL;
493}
494
495static struct vfio_group *vfio_group_get_from_minor(int minor)
496{
497 struct vfio_group *group;
498
499 mutex_lock(&vfio.group_lock);
500 group = idr_find(&vfio.group_idr, minor);
501 if (!group) {
502 mutex_unlock(&vfio.group_lock);
503 return NULL;
504 }
505 vfio_group_get(group);
506 mutex_unlock(&vfio.group_lock);
507
508 return group;
509}
510
511/**
512 * Device objects - create, release, get, put, search
513 */
514static
515struct vfio_device *vfio_group_create_device(struct vfio_group *group,
516 struct device *dev,
517 const struct vfio_device_ops *ops,
518 void *device_data)
519{
520 struct vfio_device *device;
Alex Williamsoncba33452012-07-31 08:16:22 -0600521
522 device = kzalloc(sizeof(*device), GFP_KERNEL);
523 if (!device)
524 return ERR_PTR(-ENOMEM);
525
526 kref_init(&device->kref);
527 device->dev = dev;
528 device->group = group;
529 device->ops = ops;
530 device->device_data = device_data;
Jean Delvare8283b492014-04-14 12:55:38 +0200531 dev_set_drvdata(dev, device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600532
533 /* No need to get group_lock, caller has group reference */
534 vfio_group_get(group);
535
536 mutex_lock(&group->device_lock);
537 list_add(&device->group_next, &group->device_list);
538 mutex_unlock(&group->device_lock);
539
540 return device;
541}
542
543static void vfio_device_release(struct kref *kref)
544{
545 struct vfio_device *device = container_of(kref,
546 struct vfio_device, kref);
547 struct vfio_group *group = device->group;
548
Alex Williamsoncba33452012-07-31 08:16:22 -0600549 list_del(&device->group_next);
550 mutex_unlock(&group->device_lock);
551
552 dev_set_drvdata(device->dev, NULL);
553
554 kfree(device);
555
556 /* vfio_del_group_dev may be waiting for this device */
557 wake_up(&vfio.release_q);
558}
559
560/* Device reference always implies a group reference */
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600561void vfio_device_put(struct vfio_device *device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600562{
Al Viro934ad4c2012-08-17 19:49:09 -0400563 struct vfio_group *group = device->group;
Al Viro90b12532012-08-17 21:29:06 -0400564 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
Al Viro934ad4c2012-08-17 19:49:09 -0400565 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600566}
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600567EXPORT_SYMBOL_GPL(vfio_device_put);
Alex Williamsoncba33452012-07-31 08:16:22 -0600568
569static void vfio_device_get(struct vfio_device *device)
570{
571 vfio_group_get(device->group);
572 kref_get(&device->kref);
573}
574
575static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
576 struct device *dev)
577{
578 struct vfio_device *device;
579
580 mutex_lock(&group->device_lock);
581 list_for_each_entry(device, &group->device_list, group_next) {
582 if (device->dev == dev) {
583 vfio_device_get(device);
584 mutex_unlock(&group->device_lock);
585 return device;
586 }
587 }
588 mutex_unlock(&group->device_lock);
589 return NULL;
590}
591
592/*
Alex Williamson5f096b12015-10-27 14:53:04 -0600593 * Some drivers, like pci-stub, are only used to prevent other drivers from
594 * claiming a device and are therefore perfectly legitimate for a user owned
595 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
596 * of the device, but it does prevent the user from having direct access to
597 * the device, which is useful in some circumstances.
598 *
599 * We also assume that we can include PCI interconnect devices, ie. bridges.
600 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
601 * then all of the downstream devices will be part of the same IOMMU group as
602 * the bridge. Thus, if placing the bridge into the user owned IOVA space
603 * breaks anything, it only does so for user owned devices downstream. Note
604 * that error notification via MSI can be affected for platforms that handle
605 * MSI within the same IOVA space as DMA.
Alex Williamsoncba33452012-07-31 08:16:22 -0600606 */
Alex Williamson5f096b12015-10-27 14:53:04 -0600607static const char * const vfio_driver_whitelist[] = { "pci-stub" };
Alex Williamsoncba33452012-07-31 08:16:22 -0600608
Alex Williamson5f096b12015-10-27 14:53:04 -0600609static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
Alex Williamsoncba33452012-07-31 08:16:22 -0600610{
611 int i;
612
Alex Williamson5f096b12015-10-27 14:53:04 -0600613 if (dev_is_pci(dev)) {
614 struct pci_dev *pdev = to_pci_dev(dev);
615
616 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
617 return true;
618 }
619
Alex Williamsoncba33452012-07-31 08:16:22 -0600620 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
621 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
622 return true;
623 }
624
625 return false;
626}
627
628/*
Alex Williamson60720a02015-02-06 15:05:06 -0700629 * A vfio group is viable for use by userspace if all devices are in
630 * one of the following states:
631 * - driver-less
632 * - bound to a vfio driver
633 * - bound to a whitelisted driver
Alex Williamson5f096b12015-10-27 14:53:04 -0600634 * - a PCI interconnect device
Alex Williamson60720a02015-02-06 15:05:06 -0700635 *
636 * We use two methods to determine whether a device is bound to a vfio
637 * driver. The first is to test whether the device exists in the vfio
638 * group. The second is to test if the device exists on the group
639 * unbound_list, indicating it's in the middle of transitioning from
640 * a vfio driver to driver-less.
Alex Williamsoncba33452012-07-31 08:16:22 -0600641 */
642static int vfio_dev_viable(struct device *dev, void *data)
643{
644 struct vfio_group *group = data;
645 struct vfio_device *device;
Jiang Liude2b3ee2012-12-07 13:43:50 -0700646 struct device_driver *drv = ACCESS_ONCE(dev->driver);
Alex Williamson60720a02015-02-06 15:05:06 -0700647 struct vfio_unbound_dev *unbound;
648 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600649
Alex Williamson60720a02015-02-06 15:05:06 -0700650 mutex_lock(&group->unbound_lock);
651 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
652 if (dev == unbound->dev) {
653 ret = 0;
654 break;
655 }
656 }
657 mutex_unlock(&group->unbound_lock);
658
Alex Williamson5f096b12015-10-27 14:53:04 -0600659 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
Alex Williamsoncba33452012-07-31 08:16:22 -0600660 return 0;
661
662 device = vfio_group_get_device(group, dev);
663 if (device) {
664 vfio_device_put(device);
665 return 0;
666 }
667
Alex Williamson60720a02015-02-06 15:05:06 -0700668 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600669}
670
671/**
672 * Async device support
673 */
674static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
675{
676 struct vfio_device *device;
677
678 /* Do we already know about it? We shouldn't */
679 device = vfio_group_get_device(group, dev);
680 if (WARN_ON_ONCE(device)) {
681 vfio_device_put(device);
682 return 0;
683 }
684
685 /* Nothing to do for idle groups */
686 if (!atomic_read(&group->container_users))
687 return 0;
688
689 /* TODO Prevent device auto probing */
Dan Carpenter049af102015-11-21 13:32:21 +0300690 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
Alex Williamsoncba33452012-07-31 08:16:22 -0600691 iommu_group_id(group->iommu_group));
692
693 return 0;
694}
695
Alex Williamsoncba33452012-07-31 08:16:22 -0600696static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
697{
698 /* We don't care what happens when the group isn't in use */
699 if (!atomic_read(&group->container_users))
700 return 0;
701
702 return vfio_dev_viable(dev, group);
703}
704
705static int vfio_iommu_group_notifier(struct notifier_block *nb,
706 unsigned long action, void *data)
707{
708 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
709 struct device *dev = data;
Alex Williamson60720a02015-02-06 15:05:06 -0700710 struct vfio_unbound_dev *unbound;
Alex Williamsoncba33452012-07-31 08:16:22 -0600711
712 /*
Alex Williamsonc6401932013-06-10 16:40:56 -0600713 * Need to go through a group_lock lookup to get a reference or we
714 * risk racing a group being removed. Ignore spurious notifies.
Alex Williamsoncba33452012-07-31 08:16:22 -0600715 */
716 group = vfio_group_try_get(group);
Alex Williamsonc6401932013-06-10 16:40:56 -0600717 if (!group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600718 return NOTIFY_OK;
719
720 switch (action) {
721 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
722 vfio_group_nb_add_dev(group, dev);
723 break;
724 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
Alex Williamsonde9c7602013-06-10 16:40:56 -0600725 /*
726 * Nothing to do here. If the device is in use, then the
727 * vfio sub-driver should block the remove callback until
728 * it is unused. If the device is unused or attached to a
729 * stub driver, then it should be released and we don't
730 * care that it will be going away.
731 */
Alex Williamsoncba33452012-07-31 08:16:22 -0600732 break;
733 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
734 pr_debug("%s: Device %s, group %d binding to driver\n",
735 __func__, dev_name(dev),
736 iommu_group_id(group->iommu_group));
737 break;
738 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
739 pr_debug("%s: Device %s, group %d bound to driver %s\n",
740 __func__, dev_name(dev),
741 iommu_group_id(group->iommu_group), dev->driver->name);
742 BUG_ON(vfio_group_nb_verify(group, dev));
743 break;
744 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
745 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
746 __func__, dev_name(dev),
747 iommu_group_id(group->iommu_group), dev->driver->name);
748 break;
749 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
750 pr_debug("%s: Device %s, group %d unbound from driver\n",
751 __func__, dev_name(dev),
752 iommu_group_id(group->iommu_group));
753 /*
754 * XXX An unbound device in a live group is ok, but we'd
755 * really like to avoid the above BUG_ON by preventing other
756 * drivers from binding to it. Once that occurs, we have to
757 * stop the system to maintain isolation. At a minimum, we'd
758 * want a toggle to disable driver auto probe for this device.
759 */
Alex Williamson60720a02015-02-06 15:05:06 -0700760
761 mutex_lock(&group->unbound_lock);
762 list_for_each_entry(unbound,
763 &group->unbound_list, unbound_next) {
764 if (dev == unbound->dev) {
765 list_del(&unbound->unbound_next);
766 kfree(unbound);
767 break;
768 }
769 }
770 mutex_unlock(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600771 break;
772 }
773
Alex Williamsone91a5572017-06-19 09:10:32 -0600774 /*
775 * If we're the last reference to the group, the group will be
776 * released, which includes unregistering the iommu group notifier.
777 * We hold a read-lock on that notifier list, unregistering needs
778 * a write-lock... deadlock. Release our reference asynchronously
779 * to avoid that situation.
780 */
781 vfio_group_schedule_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600782 return NOTIFY_OK;
783}
784
785/**
786 * VFIO driver API
787 */
788int vfio_add_group_dev(struct device *dev,
789 const struct vfio_device_ops *ops, void *device_data)
790{
791 struct iommu_group *iommu_group;
792 struct vfio_group *group;
793 struct vfio_device *device;
794
795 iommu_group = iommu_group_get(dev);
796 if (!iommu_group)
797 return -EINVAL;
798
799 group = vfio_group_get_from_iommu(iommu_group);
800 if (!group) {
Alex Williamson16ab8a52016-01-27 11:22:25 -0700801 group = vfio_create_group(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600802 if (IS_ERR(group)) {
803 iommu_group_put(iommu_group);
804 return PTR_ERR(group);
805 }
Alex Williamson4a688102015-02-06 15:05:06 -0700806 } else {
807 /*
808 * A found vfio_group already holds a reference to the
809 * iommu_group. A created vfio_group keeps the reference.
810 */
811 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600812 }
813
814 device = vfio_group_get_device(group, dev);
815 if (device) {
816 WARN(1, "Device %s already exists on group %d\n",
817 dev_name(dev), iommu_group_id(iommu_group));
818 vfio_device_put(device);
819 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600820 return -EBUSY;
821 }
822
823 device = vfio_group_create_device(group, dev, ops, device_data);
824 if (IS_ERR(device)) {
825 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600826 return PTR_ERR(device);
827 }
828
829 /*
Alex Williamson4a688102015-02-06 15:05:06 -0700830 * Drop all but the vfio_device reference. The vfio_device holds
831 * a reference to the vfio_group, which holds a reference to the
832 * iommu_group.
Alex Williamsoncba33452012-07-31 08:16:22 -0600833 */
834 vfio_group_put(group);
835
836 return 0;
837}
838EXPORT_SYMBOL_GPL(vfio_add_group_dev);
839
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600840/**
Alex Williamson20f30012015-06-09 10:08:57 -0600841 * Get a reference to the vfio_device for a device. Even if the
842 * caller thinks they own the device, they could be racing with a
843 * release call path, so we can't trust drvdata for the shortcut.
844 * Go the long way around, from the iommu_group to the vfio_group
845 * to the vfio_device.
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600846 */
847struct vfio_device *vfio_device_get_from_dev(struct device *dev)
848{
Alex Williamson20f30012015-06-09 10:08:57 -0600849 struct iommu_group *iommu_group;
850 struct vfio_group *group;
851 struct vfio_device *device;
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600852
Alex Williamson20f30012015-06-09 10:08:57 -0600853 iommu_group = iommu_group_get(dev);
854 if (!iommu_group)
855 return NULL;
856
857 group = vfio_group_get_from_iommu(iommu_group);
858 iommu_group_put(iommu_group);
859 if (!group)
860 return NULL;
861
862 device = vfio_group_get_device(group, dev);
863 vfio_group_put(group);
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600864
865 return device;
866}
867EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
868
Alex Williamson4bc94d52015-07-24 15:14:04 -0600869static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
870 char *buf)
871{
Joerg Roedele324fc82015-11-04 13:53:26 +0100872 struct vfio_device *it, *device = NULL;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600873
874 mutex_lock(&group->device_lock);
Joerg Roedele324fc82015-11-04 13:53:26 +0100875 list_for_each_entry(it, &group->device_list, group_next) {
876 if (!strcmp(dev_name(it->dev), buf)) {
877 device = it;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600878 vfio_device_get(device);
879 break;
880 }
881 }
882 mutex_unlock(&group->device_lock);
883
884 return device;
885}
886
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600887/*
888 * Caller must hold a reference to the vfio_device
889 */
890void *vfio_device_data(struct vfio_device *device)
891{
892 return device->device_data;
893}
894EXPORT_SYMBOL_GPL(vfio_device_data);
895
Alex Williamsone014e942013-02-14 14:02:13 -0700896/* Given a referenced group, check if it contains the device */
897static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
Alex Williamsoncba33452012-07-31 08:16:22 -0600898{
Alex Williamsoncba33452012-07-31 08:16:22 -0600899 struct vfio_device *device;
900
Alex Williamsoncba33452012-07-31 08:16:22 -0600901 device = vfio_group_get_device(group, dev);
Alex Williamsone014e942013-02-14 14:02:13 -0700902 if (!device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600903 return false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600904
905 vfio_device_put(device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600906 return true;
907}
908
909/*
910 * Decrement the device reference count and wait for the device to be
911 * removed. Open file descriptors for the device... */
912void *vfio_del_group_dev(struct device *dev)
913{
914 struct vfio_device *device = dev_get_drvdata(dev);
915 struct vfio_group *group = device->group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600916 void *device_data = device->device_data;
Alex Williamson60720a02015-02-06 15:05:06 -0700917 struct vfio_unbound_dev *unbound;
Alex Williamson13060b62015-02-06 15:05:07 -0700918 unsigned int i = 0;
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600919 long ret;
920 bool interrupted = false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600921
Alex Williamsone014e942013-02-14 14:02:13 -0700922 /*
923 * The group exists so long as we have a device reference. Get
924 * a group reference and use it to scan for the device going away.
925 */
926 vfio_group_get(group);
927
Alex Williamson60720a02015-02-06 15:05:06 -0700928 /*
929 * When the device is removed from the group, the group suddenly
930 * becomes non-viable; the device has a driver (until the unbind
931 * completes), but it's not present in the group. This is bad news
932 * for any external users that need to re-acquire a group reference
933 * in order to match and release their existing reference. To
934 * solve this, we track such devices on the unbound_list to bridge
935 * the gap until they're fully unbound.
936 */
937 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
938 if (unbound) {
939 unbound->dev = dev;
940 mutex_lock(&group->unbound_lock);
941 list_add(&unbound->unbound_next, &group->unbound_list);
942 mutex_unlock(&group->unbound_lock);
943 }
944 WARN_ON(!unbound);
945
Alex Williamsoncba33452012-07-31 08:16:22 -0600946 vfio_device_put(device);
947
Alex Williamson13060b62015-02-06 15:05:07 -0700948 /*
949 * If the device is still present in the group after the above
950 * 'put', then it is in use and we need to request it from the
951 * bus driver. The driver may in turn need to request the
952 * device from the user. We send the request on an arbitrary
953 * interval with counter to allow the driver to take escalating
954 * measures to release the device if it has the ability to do so.
955 */
956 do {
957 device = vfio_group_get_device(group, dev);
958 if (!device)
959 break;
960
961 if (device->ops->request)
962 device->ops->request(device_data, i++);
963
964 vfio_device_put(device);
965
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600966 if (interrupted) {
967 ret = wait_event_timeout(vfio.release_q,
968 !vfio_dev_present(group, dev), HZ * 10);
969 } else {
970 ret = wait_event_interruptible_timeout(vfio.release_q,
971 !vfio_dev_present(group, dev), HZ * 10);
972 if (ret == -ERESTARTSYS) {
973 interrupted = true;
974 dev_warn(dev,
975 "Device is currently in use, task"
976 " \"%s\" (%d) "
977 "blocked until device is released",
978 current->comm, task_pid_nr(current));
979 }
980 }
981 } while (ret <= 0);
Alex Williamsone014e942013-02-14 14:02:13 -0700982
983 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600984
Alex Williamsoncba33452012-07-31 08:16:22 -0600985 return device_data;
986}
987EXPORT_SYMBOL_GPL(vfio_del_group_dev);
988
989/**
990 * VFIO base fd, /dev/vfio/vfio
991 */
992static long vfio_ioctl_check_extension(struct vfio_container *container,
993 unsigned long arg)
994{
Alex Williamson0b43c082013-04-29 08:41:36 -0600995 struct vfio_iommu_driver *driver;
Alex Williamsoncba33452012-07-31 08:16:22 -0600996 long ret = 0;
997
Alex Williamson0b43c082013-04-29 08:41:36 -0600998 down_read(&container->group_lock);
999
1000 driver = container->iommu_driver;
1001
Alex Williamsoncba33452012-07-31 08:16:22 -06001002 switch (arg) {
1003 /* No base extensions yet */
1004 default:
1005 /*
1006 * If no driver is set, poll all registered drivers for
1007 * extensions and return the first positive result. If
1008 * a driver is already set, further queries will be passed
1009 * only to that driver.
1010 */
1011 if (!driver) {
1012 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -07001013 list_for_each_entry(driver, &vfio.iommu_drivers_list,
1014 vfio_next) {
Alex Williamson03a76b62015-12-21 15:13:33 -07001015
1016#ifdef CONFIG_VFIO_NOIOMMU
1017 if (!list_empty(&container->group_list) &&
1018 (container->noiommu !=
1019 (driver->ops == &vfio_noiommu_ops)))
1020 continue;
1021#endif
1022
Alex Williamsoncba33452012-07-31 08:16:22 -06001023 if (!try_module_get(driver->ops->owner))
1024 continue;
1025
1026 ret = driver->ops->ioctl(NULL,
1027 VFIO_CHECK_EXTENSION,
1028 arg);
1029 module_put(driver->ops->owner);
1030 if (ret > 0)
1031 break;
1032 }
1033 mutex_unlock(&vfio.iommu_drivers_lock);
1034 } else
1035 ret = driver->ops->ioctl(container->iommu_data,
1036 VFIO_CHECK_EXTENSION, arg);
1037 }
1038
Alex Williamson0b43c082013-04-29 08:41:36 -06001039 up_read(&container->group_lock);
1040
Alex Williamsoncba33452012-07-31 08:16:22 -06001041 return ret;
1042}
1043
Alex Williamson9587f442013-04-25 16:12:38 -06001044/* hold write lock on container->group_lock */
Alex Williamsoncba33452012-07-31 08:16:22 -06001045static int __vfio_container_attach_groups(struct vfio_container *container,
1046 struct vfio_iommu_driver *driver,
1047 void *data)
1048{
1049 struct vfio_group *group;
1050 int ret = -ENODEV;
1051
1052 list_for_each_entry(group, &container->group_list, container_next) {
1053 ret = driver->ops->attach_group(data, group->iommu_group);
1054 if (ret)
1055 goto unwind;
1056 }
1057
1058 return ret;
1059
1060unwind:
1061 list_for_each_entry_continue_reverse(group, &container->group_list,
1062 container_next) {
1063 driver->ops->detach_group(data, group->iommu_group);
1064 }
1065
1066 return ret;
1067}
1068
1069static long vfio_ioctl_set_iommu(struct vfio_container *container,
1070 unsigned long arg)
1071{
1072 struct vfio_iommu_driver *driver;
1073 long ret = -ENODEV;
1074
Alex Williamson9587f442013-04-25 16:12:38 -06001075 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001076
1077 /*
1078 * The container is designed to be an unprivileged interface while
1079 * the group can be assigned to specific users. Therefore, only by
1080 * adding a group to a container does the user get the privilege of
1081 * enabling the iommu, which may allocate finite resources. There
1082 * is no unset_iommu, but by removing all the groups from a container,
1083 * the container is deprivileged and returns to an unset state.
1084 */
1085 if (list_empty(&container->group_list) || container->iommu_driver) {
Alex Williamson9587f442013-04-25 16:12:38 -06001086 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001087 return -EINVAL;
1088 }
1089
1090 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -07001091 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001092 void *data;
1093
Alex Williamson03a76b62015-12-21 15:13:33 -07001094#ifdef CONFIG_VFIO_NOIOMMU
1095 /*
1096 * Only noiommu containers can use vfio-noiommu and noiommu
1097 * containers can only use vfio-noiommu.
1098 */
1099 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1100 continue;
1101#endif
1102
Alex Williamsoncba33452012-07-31 08:16:22 -06001103 if (!try_module_get(driver->ops->owner))
1104 continue;
1105
1106 /*
1107 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1108 * so test which iommu driver reported support for this
1109 * extension and call open on them. We also pass them the
1110 * magic, allowing a single driver to support multiple
1111 * interfaces if they'd like.
1112 */
1113 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1114 module_put(driver->ops->owner);
1115 continue;
1116 }
1117
Alex Williamsoncba33452012-07-31 08:16:22 -06001118 data = driver->ops->open(arg);
1119 if (IS_ERR(data)) {
1120 ret = PTR_ERR(data);
1121 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001122 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001123 }
1124
1125 ret = __vfio_container_attach_groups(container, driver, data);
Alex Williamson7c435b42016-02-22 16:02:30 -07001126 if (ret) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001127 driver->ops->release(data);
1128 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001129 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001130 }
1131
Alex Williamson7c435b42016-02-22 16:02:30 -07001132 container->iommu_driver = driver;
1133 container->iommu_data = data;
1134 break;
Alex Williamsoncba33452012-07-31 08:16:22 -06001135 }
1136
1137 mutex_unlock(&vfio.iommu_drivers_lock);
Alex Williamson9587f442013-04-25 16:12:38 -06001138 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001139
1140 return ret;
1141}
1142
1143static long vfio_fops_unl_ioctl(struct file *filep,
1144 unsigned int cmd, unsigned long arg)
1145{
1146 struct vfio_container *container = filep->private_data;
1147 struct vfio_iommu_driver *driver;
1148 void *data;
1149 long ret = -EINVAL;
1150
1151 if (!container)
1152 return ret;
1153
Alex Williamsoncba33452012-07-31 08:16:22 -06001154 switch (cmd) {
1155 case VFIO_GET_API_VERSION:
1156 ret = VFIO_API_VERSION;
1157 break;
1158 case VFIO_CHECK_EXTENSION:
1159 ret = vfio_ioctl_check_extension(container, arg);
1160 break;
1161 case VFIO_SET_IOMMU:
1162 ret = vfio_ioctl_set_iommu(container, arg);
1163 break;
1164 default:
Alex Williamson0b43c082013-04-29 08:41:36 -06001165 down_read(&container->group_lock);
1166
1167 driver = container->iommu_driver;
1168 data = container->iommu_data;
1169
Alex Williamsoncba33452012-07-31 08:16:22 -06001170 if (driver) /* passthrough all unrecognized ioctls */
1171 ret = driver->ops->ioctl(data, cmd, arg);
Alex Williamson0b43c082013-04-29 08:41:36 -06001172
1173 up_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001174 }
1175
1176 return ret;
1177}
1178
1179#ifdef CONFIG_COMPAT
1180static long vfio_fops_compat_ioctl(struct file *filep,
1181 unsigned int cmd, unsigned long arg)
1182{
1183 arg = (unsigned long)compat_ptr(arg);
1184 return vfio_fops_unl_ioctl(filep, cmd, arg);
1185}
1186#endif /* CONFIG_COMPAT */
1187
1188static int vfio_fops_open(struct inode *inode, struct file *filep)
1189{
1190 struct vfio_container *container;
1191
1192 container = kzalloc(sizeof(*container), GFP_KERNEL);
1193 if (!container)
1194 return -ENOMEM;
1195
1196 INIT_LIST_HEAD(&container->group_list);
Alex Williamson9587f442013-04-25 16:12:38 -06001197 init_rwsem(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001198 kref_init(&container->kref);
1199
1200 filep->private_data = container;
1201
1202 return 0;
1203}
1204
1205static int vfio_fops_release(struct inode *inode, struct file *filep)
1206{
1207 struct vfio_container *container = filep->private_data;
1208
1209 filep->private_data = NULL;
1210
1211 vfio_container_put(container);
1212
1213 return 0;
1214}
1215
1216/*
1217 * Once an iommu driver is set, we optionally pass read/write/mmap
1218 * on to the driver, allowing management interfaces beyond ioctl.
1219 */
1220static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1221 size_t count, loff_t *ppos)
1222{
1223 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001224 struct vfio_iommu_driver *driver;
1225 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001226
Alex Williamson0b43c082013-04-29 08:41:36 -06001227 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001228
Alex Williamson0b43c082013-04-29 08:41:36 -06001229 driver = container->iommu_driver;
1230 if (likely(driver && driver->ops->read))
1231 ret = driver->ops->read(container->iommu_data,
1232 buf, count, ppos);
1233
1234 up_read(&container->group_lock);
1235
1236 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001237}
1238
1239static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1240 size_t count, loff_t *ppos)
1241{
1242 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001243 struct vfio_iommu_driver *driver;
1244 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001245
Alex Williamson0b43c082013-04-29 08:41:36 -06001246 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001247
Alex Williamson0b43c082013-04-29 08:41:36 -06001248 driver = container->iommu_driver;
1249 if (likely(driver && driver->ops->write))
1250 ret = driver->ops->write(container->iommu_data,
1251 buf, count, ppos);
1252
1253 up_read(&container->group_lock);
1254
1255 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001256}
1257
1258static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1259{
1260 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001261 struct vfio_iommu_driver *driver;
1262 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001263
Alex Williamson0b43c082013-04-29 08:41:36 -06001264 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001265
Alex Williamson0b43c082013-04-29 08:41:36 -06001266 driver = container->iommu_driver;
1267 if (likely(driver && driver->ops->mmap))
1268 ret = driver->ops->mmap(container->iommu_data, vma);
1269
1270 up_read(&container->group_lock);
1271
1272 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001273}
1274
1275static const struct file_operations vfio_fops = {
1276 .owner = THIS_MODULE,
1277 .open = vfio_fops_open,
1278 .release = vfio_fops_release,
1279 .read = vfio_fops_read,
1280 .write = vfio_fops_write,
1281 .unlocked_ioctl = vfio_fops_unl_ioctl,
1282#ifdef CONFIG_COMPAT
1283 .compat_ioctl = vfio_fops_compat_ioctl,
1284#endif
1285 .mmap = vfio_fops_mmap,
1286};
1287
1288/**
1289 * VFIO Group fd, /dev/vfio/$GROUP
1290 */
1291static void __vfio_group_unset_container(struct vfio_group *group)
1292{
1293 struct vfio_container *container = group->container;
1294 struct vfio_iommu_driver *driver;
1295
Alex Williamson9587f442013-04-25 16:12:38 -06001296 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001297
1298 driver = container->iommu_driver;
1299 if (driver)
1300 driver->ops->detach_group(container->iommu_data,
1301 group->iommu_group);
1302
1303 group->container = NULL;
1304 list_del(&group->container_next);
1305
1306 /* Detaching the last group deprivileges a container, remove iommu */
1307 if (driver && list_empty(&container->group_list)) {
1308 driver->ops->release(container->iommu_data);
1309 module_put(driver->ops->owner);
1310 container->iommu_driver = NULL;
1311 container->iommu_data = NULL;
1312 }
1313
Alex Williamson9587f442013-04-25 16:12:38 -06001314 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001315
1316 vfio_container_put(container);
1317}
1318
1319/*
1320 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1321 * if there was no container to unset. Since the ioctl is called on
1322 * the group, we know that still exists, therefore the only valid
1323 * transition here is 1->0.
1324 */
1325static int vfio_group_unset_container(struct vfio_group *group)
1326{
1327 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1328
1329 if (!users)
1330 return -EINVAL;
1331 if (users != 1)
1332 return -EBUSY;
1333
1334 __vfio_group_unset_container(group);
1335
1336 return 0;
1337}
1338
1339/*
1340 * When removing container users, anything that removes the last user
1341 * implicitly removes the group from the container. That is, if the
1342 * group file descriptor is closed, as well as any device file descriptors,
1343 * the group is free.
1344 */
1345static void vfio_group_try_dissolve_container(struct vfio_group *group)
1346{
1347 if (0 == atomic_dec_if_positive(&group->container_users))
1348 __vfio_group_unset_container(group);
1349}
1350
1351static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1352{
Al Viro2903ff02012-08-28 12:52:22 -04001353 struct fd f;
Alex Williamsoncba33452012-07-31 08:16:22 -06001354 struct vfio_container *container;
1355 struct vfio_iommu_driver *driver;
Al Viro2903ff02012-08-28 12:52:22 -04001356 int ret = 0;
Alex Williamsoncba33452012-07-31 08:16:22 -06001357
1358 if (atomic_read(&group->container_users))
1359 return -EINVAL;
1360
Alex Williamson03a76b62015-12-21 15:13:33 -07001361 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1362 return -EPERM;
1363
Al Viro2903ff02012-08-28 12:52:22 -04001364 f = fdget(container_fd);
1365 if (!f.file)
Alex Williamsoncba33452012-07-31 08:16:22 -06001366 return -EBADF;
1367
1368 /* Sanity check, is this really our fd? */
Al Viro2903ff02012-08-28 12:52:22 -04001369 if (f.file->f_op != &vfio_fops) {
1370 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001371 return -EINVAL;
1372 }
1373
Al Viro2903ff02012-08-28 12:52:22 -04001374 container = f.file->private_data;
Alex Williamsoncba33452012-07-31 08:16:22 -06001375 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1376
Alex Williamson9587f442013-04-25 16:12:38 -06001377 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001378
Alex Williamson03a76b62015-12-21 15:13:33 -07001379 /* Real groups and fake groups cannot mix */
1380 if (!list_empty(&container->group_list) &&
1381 container->noiommu != group->noiommu) {
1382 ret = -EPERM;
1383 goto unlock_out;
1384 }
1385
Alex Williamsoncba33452012-07-31 08:16:22 -06001386 driver = container->iommu_driver;
1387 if (driver) {
1388 ret = driver->ops->attach_group(container->iommu_data,
1389 group->iommu_group);
1390 if (ret)
1391 goto unlock_out;
1392 }
1393
1394 group->container = container;
Alex Williamson03a76b62015-12-21 15:13:33 -07001395 container->noiommu = group->noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -06001396 list_add(&group->container_next, &container->group_list);
1397
1398 /* Get a reference on the container and mark a user within the group */
1399 vfio_container_get(container);
1400 atomic_inc(&group->container_users);
1401
1402unlock_out:
Alex Williamson9587f442013-04-25 16:12:38 -06001403 up_write(&container->group_lock);
Al Viro2903ff02012-08-28 12:52:22 -04001404 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001405 return ret;
1406}
1407
1408static bool vfio_group_viable(struct vfio_group *group)
1409{
1410 return (iommu_group_for_each_dev(group->iommu_group,
1411 group, vfio_dev_viable) == 0);
1412}
1413
1414static const struct file_operations vfio_device_fops;
1415
1416static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1417{
1418 struct vfio_device *device;
1419 struct file *filep;
Alex Williamson4bc94d52015-07-24 15:14:04 -06001420 int ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001421
1422 if (0 == atomic_read(&group->container_users) ||
1423 !group->container->iommu_driver || !vfio_group_viable(group))
1424 return -EINVAL;
1425
Alex Williamson03a76b62015-12-21 15:13:33 -07001426 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1427 return -EPERM;
1428
Alex Williamson4bc94d52015-07-24 15:14:04 -06001429 device = vfio_device_get_from_name(group, buf);
1430 if (!device)
1431 return -ENODEV;
Alex Williamsoncba33452012-07-31 08:16:22 -06001432
Alex Williamson4bc94d52015-07-24 15:14:04 -06001433 ret = device->ops->open(device->device_data);
1434 if (ret) {
1435 vfio_device_put(device);
1436 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001437 }
Alex Williamson4bc94d52015-07-24 15:14:04 -06001438
1439 /*
1440 * We can't use anon_inode_getfd() because we need to modify
1441 * the f_mode flags directly to allow more than just ioctls
1442 */
1443 ret = get_unused_fd_flags(O_CLOEXEC);
1444 if (ret < 0) {
1445 device->ops->release(device->device_data);
1446 vfio_device_put(device);
1447 return ret;
1448 }
1449
1450 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1451 device, O_RDWR);
1452 if (IS_ERR(filep)) {
1453 put_unused_fd(ret);
1454 ret = PTR_ERR(filep);
1455 device->ops->release(device->device_data);
1456 vfio_device_put(device);
1457 return ret;
1458 }
1459
1460 /*
1461 * TODO: add an anon_inode interface to do this.
1462 * Appears to be missing by lack of need rather than
1463 * explicitly prevented. Now there's need.
1464 */
1465 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1466
1467 atomic_inc(&group->container_users);
1468
1469 fd_install(ret, filep);
Alex Williamsoncba33452012-07-31 08:16:22 -06001470
Alex Williamson03a76b62015-12-21 15:13:33 -07001471 if (group->noiommu)
1472 dev_warn(device->dev, "vfio-noiommu device opened by user "
1473 "(%s:%d)\n", current->comm, task_pid_nr(current));
1474
Alex Williamsoncba33452012-07-31 08:16:22 -06001475 return ret;
1476}
1477
1478static long vfio_group_fops_unl_ioctl(struct file *filep,
1479 unsigned int cmd, unsigned long arg)
1480{
1481 struct vfio_group *group = filep->private_data;
1482 long ret = -ENOTTY;
1483
1484 switch (cmd) {
1485 case VFIO_GROUP_GET_STATUS:
1486 {
1487 struct vfio_group_status status;
1488 unsigned long minsz;
1489
1490 minsz = offsetofend(struct vfio_group_status, flags);
1491
1492 if (copy_from_user(&status, (void __user *)arg, minsz))
1493 return -EFAULT;
1494
1495 if (status.argsz < minsz)
1496 return -EINVAL;
1497
1498 status.flags = 0;
1499
1500 if (vfio_group_viable(group))
1501 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1502
1503 if (group->container)
1504 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1505
1506 if (copy_to_user((void __user *)arg, &status, minsz))
1507 return -EFAULT;
1508
1509 ret = 0;
1510 break;
1511 }
1512 case VFIO_GROUP_SET_CONTAINER:
1513 {
1514 int fd;
1515
1516 if (get_user(fd, (int __user *)arg))
1517 return -EFAULT;
1518
1519 if (fd < 0)
1520 return -EINVAL;
1521
1522 ret = vfio_group_set_container(group, fd);
1523 break;
1524 }
1525 case VFIO_GROUP_UNSET_CONTAINER:
1526 ret = vfio_group_unset_container(group);
1527 break;
1528 case VFIO_GROUP_GET_DEVICE_FD:
1529 {
1530 char *buf;
1531
1532 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1533 if (IS_ERR(buf))
1534 return PTR_ERR(buf);
1535
1536 ret = vfio_group_get_device_fd(group, buf);
1537 kfree(buf);
1538 break;
1539 }
1540 }
1541
1542 return ret;
1543}
1544
1545#ifdef CONFIG_COMPAT
1546static long vfio_group_fops_compat_ioctl(struct file *filep,
1547 unsigned int cmd, unsigned long arg)
1548{
1549 arg = (unsigned long)compat_ptr(arg);
1550 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1551}
1552#endif /* CONFIG_COMPAT */
1553
1554static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1555{
1556 struct vfio_group *group;
Alex Williamson6d6768c2013-06-25 16:06:54 -06001557 int opened;
Alex Williamsoncba33452012-07-31 08:16:22 -06001558
1559 group = vfio_group_get_from_minor(iminor(inode));
1560 if (!group)
1561 return -ENODEV;
1562
Alex Williamson03a76b62015-12-21 15:13:33 -07001563 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1564 vfio_group_put(group);
1565 return -EPERM;
1566 }
1567
Alex Williamson6d6768c2013-06-25 16:06:54 -06001568 /* Do we need multiple instances of the group open? Seems not. */
1569 opened = atomic_cmpxchg(&group->opened, 0, 1);
1570 if (opened) {
1571 vfio_group_put(group);
1572 return -EBUSY;
1573 }
1574
1575 /* Is something still in use from a previous open? */
Alex Williamsoncba33452012-07-31 08:16:22 -06001576 if (group->container) {
Alex Williamson6d6768c2013-06-25 16:06:54 -06001577 atomic_dec(&group->opened);
Alex Williamsoncba33452012-07-31 08:16:22 -06001578 vfio_group_put(group);
1579 return -EBUSY;
1580 }
1581
1582 filep->private_data = group;
1583
1584 return 0;
1585}
1586
1587static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1588{
1589 struct vfio_group *group = filep->private_data;
1590
1591 filep->private_data = NULL;
1592
1593 vfio_group_try_dissolve_container(group);
1594
Alex Williamson6d6768c2013-06-25 16:06:54 -06001595 atomic_dec(&group->opened);
1596
Alex Williamsoncba33452012-07-31 08:16:22 -06001597 vfio_group_put(group);
1598
1599 return 0;
1600}
1601
1602static const struct file_operations vfio_group_fops = {
1603 .owner = THIS_MODULE,
1604 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1605#ifdef CONFIG_COMPAT
1606 .compat_ioctl = vfio_group_fops_compat_ioctl,
1607#endif
1608 .open = vfio_group_fops_open,
1609 .release = vfio_group_fops_release,
1610};
1611
1612/**
1613 * VFIO Device fd
1614 */
1615static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1616{
1617 struct vfio_device *device = filep->private_data;
1618
1619 device->ops->release(device->device_data);
1620
1621 vfio_group_try_dissolve_container(device->group);
1622
1623 vfio_device_put(device);
1624
1625 return 0;
1626}
1627
1628static long vfio_device_fops_unl_ioctl(struct file *filep,
1629 unsigned int cmd, unsigned long arg)
1630{
1631 struct vfio_device *device = filep->private_data;
1632
1633 if (unlikely(!device->ops->ioctl))
1634 return -EINVAL;
1635
1636 return device->ops->ioctl(device->device_data, cmd, arg);
1637}
1638
1639static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1640 size_t count, loff_t *ppos)
1641{
1642 struct vfio_device *device = filep->private_data;
1643
1644 if (unlikely(!device->ops->read))
1645 return -EINVAL;
1646
1647 return device->ops->read(device->device_data, buf, count, ppos);
1648}
1649
1650static ssize_t vfio_device_fops_write(struct file *filep,
1651 const char __user *buf,
1652 size_t count, loff_t *ppos)
1653{
1654 struct vfio_device *device = filep->private_data;
1655
1656 if (unlikely(!device->ops->write))
1657 return -EINVAL;
1658
1659 return device->ops->write(device->device_data, buf, count, ppos);
1660}
1661
1662static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1663{
1664 struct vfio_device *device = filep->private_data;
1665
1666 if (unlikely(!device->ops->mmap))
1667 return -EINVAL;
1668
1669 return device->ops->mmap(device->device_data, vma);
1670}
1671
1672#ifdef CONFIG_COMPAT
1673static long vfio_device_fops_compat_ioctl(struct file *filep,
1674 unsigned int cmd, unsigned long arg)
1675{
1676 arg = (unsigned long)compat_ptr(arg);
1677 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1678}
1679#endif /* CONFIG_COMPAT */
1680
1681static const struct file_operations vfio_device_fops = {
1682 .owner = THIS_MODULE,
1683 .release = vfio_device_fops_release,
1684 .read = vfio_device_fops_read,
1685 .write = vfio_device_fops_write,
1686 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1687#ifdef CONFIG_COMPAT
1688 .compat_ioctl = vfio_device_fops_compat_ioctl,
1689#endif
1690 .mmap = vfio_device_fops_mmap,
1691};
1692
1693/**
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001694 * External user API, exported by symbols to be linked dynamically.
1695 *
1696 * The protocol includes:
1697 * 1. do normal VFIO init operation:
1698 * - opening a new container;
1699 * - attaching group(s) to it;
1700 * - setting an IOMMU driver for a container.
1701 * When IOMMU is set for a container, all groups in it are
1702 * considered ready to use by an external user.
1703 *
1704 * 2. User space passes a group fd to an external user.
1705 * The external user calls vfio_group_get_external_user()
1706 * to verify that:
1707 * - the group is initialized;
1708 * - IOMMU is set for it.
1709 * If both checks passed, vfio_group_get_external_user()
1710 * increments the container user counter to prevent
1711 * the VFIO group from disposal before KVM exits.
1712 *
1713 * 3. The external user calls vfio_external_user_iommu_id()
1714 * to know an IOMMU ID.
1715 *
1716 * 4. When the external KVM finishes, it calls
1717 * vfio_group_put_external_user() to release the VFIO group.
1718 * This call decrements the container user counter.
1719 */
1720struct vfio_group *vfio_group_get_external_user(struct file *filep)
1721{
1722 struct vfio_group *group = filep->private_data;
1723
1724 if (filep->f_op != &vfio_group_fops)
1725 return ERR_PTR(-EINVAL);
1726
1727 if (!atomic_inc_not_zero(&group->container_users))
1728 return ERR_PTR(-EINVAL);
1729
Alex Williamson03a76b62015-12-21 15:13:33 -07001730 if (group->noiommu) {
1731 atomic_dec(&group->container_users);
1732 return ERR_PTR(-EPERM);
1733 }
1734
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001735 if (!group->container->iommu_driver ||
1736 !vfio_group_viable(group)) {
1737 atomic_dec(&group->container_users);
1738 return ERR_PTR(-EINVAL);
1739 }
1740
1741 vfio_group_get(group);
1742
1743 return group;
1744}
1745EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1746
1747void vfio_group_put_external_user(struct vfio_group *group)
1748{
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001749 vfio_group_try_dissolve_container(group);
Ilya Lesokhind370c912016-07-14 16:50:19 +03001750 vfio_group_put(group);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001751}
1752EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1753
1754int vfio_external_user_iommu_id(struct vfio_group *group)
1755{
1756 return iommu_group_id(group->iommu_group);
1757}
1758EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1759
Alex Williamson88d7ab82014-02-26 11:38:39 -07001760long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1761{
1762 return vfio_ioctl_check_extension(group->container, arg);
1763}
1764EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1765
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001766/**
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001767 * Sub-module support
1768 */
1769/*
1770 * Helper for managing a buffer of info chain capabilities, allocate or
1771 * reallocate a buffer with additional @size, filling in @id and @version
1772 * of the capability. A pointer to the new capability is returned.
1773 *
1774 * NB. The chain is based at the head of the buffer, so new entries are
1775 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1776 * next offsets prior to copying to the user buffer.
1777 */
1778struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1779 size_t size, u16 id, u16 version)
1780{
1781 void *buf;
1782 struct vfio_info_cap_header *header, *tmp;
1783
1784 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1785 if (!buf) {
1786 kfree(caps->buf);
1787 caps->size = 0;
1788 return ERR_PTR(-ENOMEM);
1789 }
1790
1791 caps->buf = buf;
1792 header = buf + caps->size;
1793
1794 /* Eventually copied to user buffer, zero */
1795 memset(header, 0, size);
1796
1797 header->id = id;
1798 header->version = version;
1799
1800 /* Add to the end of the capability chain */
1801 for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next)
1802 ; /* nothing */
1803
1804 tmp->next = caps->size;
1805 caps->size += size;
1806
1807 return header;
1808}
1809EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1810
1811void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1812{
1813 struct vfio_info_cap_header *tmp;
1814
1815 for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
1816 tmp->next += offset;
1817}
1818EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
1819
1820/**
Alex Williamsoncba33452012-07-31 08:16:22 -06001821 * Module/class support
1822 */
1823static char *vfio_devnode(struct device *dev, umode_t *mode)
1824{
1825 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
1826}
1827
Alex Williamsond1099902013-12-19 10:17:13 -07001828static struct miscdevice vfio_dev = {
1829 .minor = VFIO_MINOR,
1830 .name = "vfio",
1831 .fops = &vfio_fops,
1832 .nodename = "vfio/vfio",
1833 .mode = S_IRUGO | S_IWUGO,
1834};
1835
Alex Williamsoncba33452012-07-31 08:16:22 -06001836static int __init vfio_init(void)
1837{
1838 int ret;
1839
1840 idr_init(&vfio.group_idr);
1841 mutex_init(&vfio.group_lock);
1842 mutex_init(&vfio.iommu_drivers_lock);
1843 INIT_LIST_HEAD(&vfio.group_list);
1844 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
1845 init_waitqueue_head(&vfio.release_q);
1846
Alex Williamsond1099902013-12-19 10:17:13 -07001847 ret = misc_register(&vfio_dev);
1848 if (ret) {
1849 pr_err("vfio: misc device register failed\n");
1850 return ret;
1851 }
1852
1853 /* /dev/vfio/$GROUP */
Alex Williamsoncba33452012-07-31 08:16:22 -06001854 vfio.class = class_create(THIS_MODULE, "vfio");
1855 if (IS_ERR(vfio.class)) {
1856 ret = PTR_ERR(vfio.class);
1857 goto err_class;
1858 }
1859
1860 vfio.class->devnode = vfio_devnode;
1861
Alex Williamsond1099902013-12-19 10:17:13 -07001862 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
Alex Williamsoncba33452012-07-31 08:16:22 -06001863 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001864 goto err_alloc_chrdev;
Alex Williamsoncba33452012-07-31 08:16:22 -06001865
Alex Williamsoncba33452012-07-31 08:16:22 -06001866 cdev_init(&vfio.group_cdev, &vfio_group_fops);
Alex Williamsond1099902013-12-19 10:17:13 -07001867 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001868 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001869 goto err_cdev_add;
Alex Williamsoncba33452012-07-31 08:16:22 -06001870
1871 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1872
Alex Williamson73fa0d12012-07-31 08:16:23 -06001873 /*
1874 * Attempt to load known iommu-drivers. This gives us a working
1875 * environment without the user needing to explicitly load iommu
1876 * drivers.
1877 */
1878 request_module_nowait("vfio_iommu_type1");
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001879 request_module_nowait("vfio_iommu_spapr_tce");
Alex Williamson73fa0d12012-07-31 08:16:23 -06001880
Alex Williamson03a76b62015-12-21 15:13:33 -07001881#ifdef CONFIG_VFIO_NOIOMMU
1882 vfio_register_iommu_driver(&vfio_noiommu_ops);
1883#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06001884 return 0;
1885
Alex Williamsond1099902013-12-19 10:17:13 -07001886err_cdev_add:
1887 unregister_chrdev_region(vfio.group_devt, MINORMASK);
1888err_alloc_chrdev:
Alex Williamsoncba33452012-07-31 08:16:22 -06001889 class_destroy(vfio.class);
1890 vfio.class = NULL;
1891err_class:
Alex Williamsond1099902013-12-19 10:17:13 -07001892 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001893 return ret;
1894}
1895
1896static void __exit vfio_cleanup(void)
1897{
1898 WARN_ON(!list_empty(&vfio.group_list));
1899
Alex Williamson03a76b62015-12-21 15:13:33 -07001900#ifdef CONFIG_VFIO_NOIOMMU
1901 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
1902#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06001903 idr_destroy(&vfio.group_idr);
1904 cdev_del(&vfio.group_cdev);
Alex Williamsond1099902013-12-19 10:17:13 -07001905 unregister_chrdev_region(vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001906 class_destroy(vfio.class);
1907 vfio.class = NULL;
Alex Williamsond1099902013-12-19 10:17:13 -07001908 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001909}
1910
1911module_init(vfio_init);
1912module_exit(vfio_cleanup);
1913
1914MODULE_VERSION(DRIVER_VERSION);
1915MODULE_LICENSE("GPL v2");
1916MODULE_AUTHOR(DRIVER_AUTHOR);
1917MODULE_DESCRIPTION(DRIVER_DESC);
Alex Williamsond1099902013-12-19 10:17:13 -07001918MODULE_ALIAS_MISCDEV(VFIO_MINOR);
1919MODULE_ALIAS("devname:vfio/vfio");