blob: 609f4f982c74c59a5b4fd87dfd83367765ebbc1e [file] [log] [blame]
Alex Williamsoncba33452012-07-31 08:16:22 -06001/*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16#include <linux/cdev.h>
17#include <linux/compat.h>
18#include <linux/device.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/idr.h>
23#include <linux/iommu.h>
24#include <linux/list.h>
Alex Williamsond1099902013-12-19 10:17:13 -070025#include <linux/miscdevice.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060026#include <linux/module.h>
27#include <linux/mutex.h>
Alex Williamson5f096b12015-10-27 14:53:04 -060028#include <linux/pci.h>
Alex Williamson9587f442013-04-25 16:12:38 -060029#include <linux/rwsem.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060030#include <linux/sched.h>
31#include <linux/slab.h>
Alex Williamson664e9382013-04-30 15:42:28 -060032#include <linux/stat.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060033#include <linux/string.h>
34#include <linux/uaccess.h>
35#include <linux/vfio.h>
36#include <linux/wait.h>
37
38#define DRIVER_VERSION "0.3"
39#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40#define DRIVER_DESC "VFIO - User Level meta-driver"
41
42static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
Alex Williamsond1099902013-12-19 10:17:13 -070050 dev_t group_devt;
Alex Williamsoncba33452012-07-31 08:16:22 -060051 wait_queue_head_t release_q;
52} vfio;
53
54struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57};
58
59struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
Alex Williamson9587f442013-04-25 16:12:38 -060062 struct rw_semaphore group_lock;
Alex Williamsoncba33452012-07-31 08:16:22 -060063 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
Alex Williamson03a76b62015-12-21 15:13:33 -070065 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060066};
67
Alex Williamson60720a02015-02-06 15:05:06 -070068struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71};
72
Alex Williamsoncba33452012-07-31 08:16:22 -060073struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
Alex Williamson60720a02015-02-06 15:05:06 -070085 struct list_head unbound_list;
86 struct mutex unbound_lock;
Alex Williamson6d6768c2013-06-25 16:06:54 -060087 atomic_t opened;
Alex Williamson03a76b62015-12-21 15:13:33 -070088 bool noiommu;
Jike Songccd46db2016-12-01 13:20:06 +080089 struct kvm *kvm;
90 struct blocking_notifier_head notifier;
Alex Williamsoncba33452012-07-31 08:16:22 -060091};
92
93struct vfio_device {
94 struct kref kref;
95 struct device *dev;
96 const struct vfio_device_ops *ops;
97 struct vfio_group *group;
98 struct list_head group_next;
99 void *device_data;
100};
101
Alex Williamson03a76b62015-12-21 15:13:33 -0700102#ifdef CONFIG_VFIO_NOIOMMU
103static bool noiommu __read_mostly;
104module_param_named(enable_unsafe_noiommu_mode,
105 noiommu, bool, S_IRUGO | S_IWUSR);
106MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
107#endif
108
109/*
110 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
111 * and remove functions, any use cases other than acquiring the first
112 * reference for the purpose of calling vfio_add_group_dev() or removing
113 * that symmetric reference after vfio_del_group_dev() should use the raw
114 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
115 * removes the device from the dummy group and cannot be nested.
116 */
117struct iommu_group *vfio_iommu_group_get(struct device *dev)
118{
119 struct iommu_group *group;
120 int __maybe_unused ret;
121
122 group = iommu_group_get(dev);
123
124#ifdef CONFIG_VFIO_NOIOMMU
125 /*
126 * With noiommu enabled, an IOMMU group will be created for a device
127 * that doesn't already have one and doesn't have an iommu_ops on their
Alex Williamson16ab8a52016-01-27 11:22:25 -0700128 * bus. We set iommudata simply to be able to identify these groups
129 * as special use and for reclamation later.
Alex Williamson03a76b62015-12-21 15:13:33 -0700130 */
131 if (group || !noiommu || iommu_present(dev->bus))
132 return group;
133
134 group = iommu_group_alloc();
135 if (IS_ERR(group))
136 return NULL;
137
138 iommu_group_set_name(group, "vfio-noiommu");
Alex Williamson16ab8a52016-01-27 11:22:25 -0700139 iommu_group_set_iommudata(group, &noiommu, NULL);
Alex Williamson03a76b62015-12-21 15:13:33 -0700140 ret = iommu_group_add_device(group, dev);
141 iommu_group_put(group);
142 if (ret)
143 return NULL;
144
145 /*
146 * Where to taint? At this point we've added an IOMMU group for a
147 * device that is not backed by iommu_ops, therefore any iommu_
148 * callback using iommu_ops can legitimately Oops. So, while we may
149 * be about to give a DMA capable device to a user without IOMMU
150 * protection, which is clearly taint-worthy, let's go ahead and do
151 * it here.
152 */
153 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
154 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
155#endif
156
157 return group;
158}
159EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
160
161void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
162{
163#ifdef CONFIG_VFIO_NOIOMMU
Alex Williamson16ab8a52016-01-27 11:22:25 -0700164 if (iommu_group_get_iommudata(group) == &noiommu)
Alex Williamson03a76b62015-12-21 15:13:33 -0700165 iommu_group_remove_device(dev);
166#endif
167
168 iommu_group_put(group);
169}
170EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
171
172#ifdef CONFIG_VFIO_NOIOMMU
173static void *vfio_noiommu_open(unsigned long arg)
174{
175 if (arg != VFIO_NOIOMMU_IOMMU)
176 return ERR_PTR(-EINVAL);
177 if (!capable(CAP_SYS_RAWIO))
178 return ERR_PTR(-EPERM);
179
180 return NULL;
181}
182
183static void vfio_noiommu_release(void *iommu_data)
184{
185}
186
187static long vfio_noiommu_ioctl(void *iommu_data,
188 unsigned int cmd, unsigned long arg)
189{
190 if (cmd == VFIO_CHECK_EXTENSION)
191 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
192
193 return -ENOTTY;
194}
195
Alex Williamson03a76b62015-12-21 15:13:33 -0700196static int vfio_noiommu_attach_group(void *iommu_data,
197 struct iommu_group *iommu_group)
198{
Alex Williamson16ab8a52016-01-27 11:22:25 -0700199 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
Alex Williamson03a76b62015-12-21 15:13:33 -0700200}
201
202static void vfio_noiommu_detach_group(void *iommu_data,
203 struct iommu_group *iommu_group)
204{
205}
206
207static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
208 .name = "vfio-noiommu",
209 .owner = THIS_MODULE,
210 .open = vfio_noiommu_open,
211 .release = vfio_noiommu_release,
212 .ioctl = vfio_noiommu_ioctl,
213 .attach_group = vfio_noiommu_attach_group,
214 .detach_group = vfio_noiommu_detach_group,
215};
216#endif
217
218
Alex Williamsoncba33452012-07-31 08:16:22 -0600219/**
220 * IOMMU driver registration
221 */
222int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
223{
224 struct vfio_iommu_driver *driver, *tmp;
225
226 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
227 if (!driver)
228 return -ENOMEM;
229
230 driver->ops = ops;
231
232 mutex_lock(&vfio.iommu_drivers_lock);
233
234 /* Check for duplicates */
235 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
236 if (tmp->ops == ops) {
237 mutex_unlock(&vfio.iommu_drivers_lock);
238 kfree(driver);
239 return -EINVAL;
240 }
241 }
242
243 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
244
245 mutex_unlock(&vfio.iommu_drivers_lock);
246
247 return 0;
248}
249EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
250
251void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
252{
253 struct vfio_iommu_driver *driver;
254
255 mutex_lock(&vfio.iommu_drivers_lock);
256 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
257 if (driver->ops == ops) {
258 list_del(&driver->vfio_next);
259 mutex_unlock(&vfio.iommu_drivers_lock);
260 kfree(driver);
261 return;
262 }
263 }
264 mutex_unlock(&vfio.iommu_drivers_lock);
265}
266EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
267
268/**
269 * Group minor allocation/free - both called with vfio.group_lock held
270 */
271static int vfio_alloc_group_minor(struct vfio_group *group)
272{
Alex Williamsond1099902013-12-19 10:17:13 -0700273 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
Alex Williamsoncba33452012-07-31 08:16:22 -0600274}
275
276static void vfio_free_group_minor(int minor)
277{
278 idr_remove(&vfio.group_idr, minor);
279}
280
281static int vfio_iommu_group_notifier(struct notifier_block *nb,
282 unsigned long action, void *data);
283static void vfio_group_get(struct vfio_group *group);
284
285/**
286 * Container objects - containers are created when /dev/vfio/vfio is
287 * opened, but their lifecycle extends until the last user is done, so
288 * it's freed via kref. Must support container/group/device being
289 * closed in any order.
290 */
291static void vfio_container_get(struct vfio_container *container)
292{
293 kref_get(&container->kref);
294}
295
296static void vfio_container_release(struct kref *kref)
297{
298 struct vfio_container *container;
299 container = container_of(kref, struct vfio_container, kref);
300
301 kfree(container);
302}
303
304static void vfio_container_put(struct vfio_container *container)
305{
306 kref_put(&container->kref, vfio_container_release);
307}
308
Jiang Liu9df7b252012-12-07 13:43:50 -0700309static void vfio_group_unlock_and_free(struct vfio_group *group)
310{
311 mutex_unlock(&vfio.group_lock);
312 /*
313 * Unregister outside of lock. A spurious callback is harmless now
314 * that the group is no longer in vfio.group_list.
315 */
316 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
317 kfree(group);
318}
319
Alex Williamsoncba33452012-07-31 08:16:22 -0600320/**
321 * Group objects - create, release, get, put, search
322 */
Alex Williamson16ab8a52016-01-27 11:22:25 -0700323static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600324{
325 struct vfio_group *group, *tmp;
326 struct device *dev;
327 int ret, minor;
328
329 group = kzalloc(sizeof(*group), GFP_KERNEL);
330 if (!group)
331 return ERR_PTR(-ENOMEM);
332
333 kref_init(&group->kref);
334 INIT_LIST_HEAD(&group->device_list);
335 mutex_init(&group->device_lock);
Alex Williamson60720a02015-02-06 15:05:06 -0700336 INIT_LIST_HEAD(&group->unbound_list);
337 mutex_init(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600338 atomic_set(&group->container_users, 0);
Alex Williamson6d6768c2013-06-25 16:06:54 -0600339 atomic_set(&group->opened, 0);
Alex Williamsoncba33452012-07-31 08:16:22 -0600340 group->iommu_group = iommu_group;
Alex Williamson16ab8a52016-01-27 11:22:25 -0700341#ifdef CONFIG_VFIO_NOIOMMU
342 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
343#endif
Jike Songccd46db2016-12-01 13:20:06 +0800344 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
Alex Williamsoncba33452012-07-31 08:16:22 -0600345
346 group->nb.notifier_call = vfio_iommu_group_notifier;
347
348 /*
349 * blocking notifiers acquire a rwsem around registering and hold
350 * it around callback. Therefore, need to register outside of
351 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
352 * do anything unless it can find the group in vfio.group_list, so
353 * no harm in registering early.
354 */
355 ret = iommu_group_register_notifier(iommu_group, &group->nb);
356 if (ret) {
357 kfree(group);
358 return ERR_PTR(ret);
359 }
360
361 mutex_lock(&vfio.group_lock);
362
Alex Williamsoncba33452012-07-31 08:16:22 -0600363 /* Did we race creating this group? */
364 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
365 if (tmp->iommu_group == iommu_group) {
366 vfio_group_get(tmp);
Jiang Liu9df7b252012-12-07 13:43:50 -0700367 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600368 return tmp;
369 }
370 }
371
Zhen Lei2f51bf42015-03-16 14:08:56 -0600372 minor = vfio_alloc_group_minor(group);
373 if (minor < 0) {
374 vfio_group_unlock_and_free(group);
375 return ERR_PTR(minor);
376 }
377
Alex Williamsond1099902013-12-19 10:17:13 -0700378 dev = device_create(vfio.class, NULL,
379 MKDEV(MAJOR(vfio.group_devt), minor),
Alex Williamson03a76b62015-12-21 15:13:33 -0700380 group, "%s%d", group->noiommu ? "noiommu-" : "",
381 iommu_group_id(iommu_group));
Alex Williamsoncba33452012-07-31 08:16:22 -0600382 if (IS_ERR(dev)) {
383 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700384 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600385 return (struct vfio_group *)dev; /* ERR_PTR */
386 }
387
388 group->minor = minor;
389 group->dev = dev;
390
391 list_add(&group->vfio_next, &vfio.group_list);
392
393 mutex_unlock(&vfio.group_lock);
394
395 return group;
396}
397
Al Viro6d2cd3c2012-08-17 21:27:32 -0400398/* called with vfio.group_lock held */
Alex Williamsoncba33452012-07-31 08:16:22 -0600399static void vfio_group_release(struct kref *kref)
400{
401 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
Alex Williamson60720a02015-02-06 15:05:06 -0700402 struct vfio_unbound_dev *unbound, *tmp;
Alex Williamson4a688102015-02-06 15:05:06 -0700403 struct iommu_group *iommu_group = group->iommu_group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600404
405 WARN_ON(!list_empty(&group->device_list));
406
Alex Williamson60720a02015-02-06 15:05:06 -0700407 list_for_each_entry_safe(unbound, tmp,
408 &group->unbound_list, unbound_next) {
409 list_del(&unbound->unbound_next);
410 kfree(unbound);
411 }
412
Alex Williamsond1099902013-12-19 10:17:13 -0700413 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
Alex Williamsoncba33452012-07-31 08:16:22 -0600414 list_del(&group->vfio_next);
415 vfio_free_group_minor(group->minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700416 vfio_group_unlock_and_free(group);
Alex Williamson4a688102015-02-06 15:05:06 -0700417 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600418}
419
420static void vfio_group_put(struct vfio_group *group)
421{
Al Viro6d2cd3c2012-08-17 21:27:32 -0400422 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600423}
424
425/* Assume group_lock or group reference is held */
426static void vfio_group_get(struct vfio_group *group)
427{
428 kref_get(&group->kref);
429}
430
431/*
432 * Not really a try as we will sleep for mutex, but we need to make
433 * sure the group pointer is valid under lock and get a reference.
434 */
435static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
436{
437 struct vfio_group *target = group;
438
439 mutex_lock(&vfio.group_lock);
440 list_for_each_entry(group, &vfio.group_list, vfio_next) {
441 if (group == target) {
442 vfio_group_get(group);
443 mutex_unlock(&vfio.group_lock);
444 return group;
445 }
446 }
447 mutex_unlock(&vfio.group_lock);
448
449 return NULL;
450}
451
452static
453struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
454{
455 struct vfio_group *group;
456
457 mutex_lock(&vfio.group_lock);
458 list_for_each_entry(group, &vfio.group_list, vfio_next) {
459 if (group->iommu_group == iommu_group) {
460 vfio_group_get(group);
461 mutex_unlock(&vfio.group_lock);
462 return group;
463 }
464 }
465 mutex_unlock(&vfio.group_lock);
466
467 return NULL;
468}
469
470static struct vfio_group *vfio_group_get_from_minor(int minor)
471{
472 struct vfio_group *group;
473
474 mutex_lock(&vfio.group_lock);
475 group = idr_find(&vfio.group_idr, minor);
476 if (!group) {
477 mutex_unlock(&vfio.group_lock);
478 return NULL;
479 }
480 vfio_group_get(group);
481 mutex_unlock(&vfio.group_lock);
482
483 return group;
484}
485
Kirti Wankhede7ed3ea82016-11-17 02:16:15 +0530486static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
487{
488 struct iommu_group *iommu_group;
489 struct vfio_group *group;
490
491 iommu_group = iommu_group_get(dev);
492 if (!iommu_group)
493 return NULL;
494
495 group = vfio_group_get_from_iommu(iommu_group);
496 iommu_group_put(iommu_group);
497
498 return group;
499}
500
Alex Williamsoncba33452012-07-31 08:16:22 -0600501/**
502 * Device objects - create, release, get, put, search
503 */
504static
505struct vfio_device *vfio_group_create_device(struct vfio_group *group,
506 struct device *dev,
507 const struct vfio_device_ops *ops,
508 void *device_data)
509{
510 struct vfio_device *device;
Alex Williamsoncba33452012-07-31 08:16:22 -0600511
512 device = kzalloc(sizeof(*device), GFP_KERNEL);
513 if (!device)
514 return ERR_PTR(-ENOMEM);
515
516 kref_init(&device->kref);
517 device->dev = dev;
518 device->group = group;
519 device->ops = ops;
520 device->device_data = device_data;
Jean Delvare8283b492014-04-14 12:55:38 +0200521 dev_set_drvdata(dev, device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600522
523 /* No need to get group_lock, caller has group reference */
524 vfio_group_get(group);
525
526 mutex_lock(&group->device_lock);
527 list_add(&device->group_next, &group->device_list);
528 mutex_unlock(&group->device_lock);
529
530 return device;
531}
532
533static void vfio_device_release(struct kref *kref)
534{
535 struct vfio_device *device = container_of(kref,
536 struct vfio_device, kref);
537 struct vfio_group *group = device->group;
538
Alex Williamsoncba33452012-07-31 08:16:22 -0600539 list_del(&device->group_next);
540 mutex_unlock(&group->device_lock);
541
542 dev_set_drvdata(device->dev, NULL);
543
544 kfree(device);
545
546 /* vfio_del_group_dev may be waiting for this device */
547 wake_up(&vfio.release_q);
548}
549
550/* Device reference always implies a group reference */
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600551void vfio_device_put(struct vfio_device *device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600552{
Al Viro934ad4c2012-08-17 19:49:09 -0400553 struct vfio_group *group = device->group;
Al Viro90b12532012-08-17 21:29:06 -0400554 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
Al Viro934ad4c2012-08-17 19:49:09 -0400555 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600556}
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600557EXPORT_SYMBOL_GPL(vfio_device_put);
Alex Williamsoncba33452012-07-31 08:16:22 -0600558
559static void vfio_device_get(struct vfio_device *device)
560{
561 vfio_group_get(device->group);
562 kref_get(&device->kref);
563}
564
565static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
566 struct device *dev)
567{
568 struct vfio_device *device;
569
570 mutex_lock(&group->device_lock);
571 list_for_each_entry(device, &group->device_list, group_next) {
572 if (device->dev == dev) {
573 vfio_device_get(device);
574 mutex_unlock(&group->device_lock);
575 return device;
576 }
577 }
578 mutex_unlock(&group->device_lock);
579 return NULL;
580}
581
582/*
Alex Williamson5f096b12015-10-27 14:53:04 -0600583 * Some drivers, like pci-stub, are only used to prevent other drivers from
584 * claiming a device and are therefore perfectly legitimate for a user owned
585 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
586 * of the device, but it does prevent the user from having direct access to
587 * the device, which is useful in some circumstances.
588 *
589 * We also assume that we can include PCI interconnect devices, ie. bridges.
590 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
591 * then all of the downstream devices will be part of the same IOMMU group as
592 * the bridge. Thus, if placing the bridge into the user owned IOVA space
593 * breaks anything, it only does so for user owned devices downstream. Note
594 * that error notification via MSI can be affected for platforms that handle
595 * MSI within the same IOVA space as DMA.
Alex Williamsoncba33452012-07-31 08:16:22 -0600596 */
Alex Williamson5f096b12015-10-27 14:53:04 -0600597static const char * const vfio_driver_whitelist[] = { "pci-stub" };
Alex Williamsoncba33452012-07-31 08:16:22 -0600598
Alex Williamson5f096b12015-10-27 14:53:04 -0600599static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
Alex Williamsoncba33452012-07-31 08:16:22 -0600600{
601 int i;
602
Alex Williamson5f096b12015-10-27 14:53:04 -0600603 if (dev_is_pci(dev)) {
604 struct pci_dev *pdev = to_pci_dev(dev);
605
606 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
607 return true;
608 }
609
Alex Williamsoncba33452012-07-31 08:16:22 -0600610 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
611 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
612 return true;
613 }
614
615 return false;
616}
617
618/*
Alex Williamson60720a02015-02-06 15:05:06 -0700619 * A vfio group is viable for use by userspace if all devices are in
620 * one of the following states:
621 * - driver-less
622 * - bound to a vfio driver
623 * - bound to a whitelisted driver
Alex Williamson5f096b12015-10-27 14:53:04 -0600624 * - a PCI interconnect device
Alex Williamson60720a02015-02-06 15:05:06 -0700625 *
626 * We use two methods to determine whether a device is bound to a vfio
627 * driver. The first is to test whether the device exists in the vfio
628 * group. The second is to test if the device exists on the group
629 * unbound_list, indicating it's in the middle of transitioning from
630 * a vfio driver to driver-less.
Alex Williamsoncba33452012-07-31 08:16:22 -0600631 */
632static int vfio_dev_viable(struct device *dev, void *data)
633{
634 struct vfio_group *group = data;
635 struct vfio_device *device;
Jiang Liude2b3ee2012-12-07 13:43:50 -0700636 struct device_driver *drv = ACCESS_ONCE(dev->driver);
Alex Williamson60720a02015-02-06 15:05:06 -0700637 struct vfio_unbound_dev *unbound;
638 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600639
Alex Williamson60720a02015-02-06 15:05:06 -0700640 mutex_lock(&group->unbound_lock);
641 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
642 if (dev == unbound->dev) {
643 ret = 0;
644 break;
645 }
646 }
647 mutex_unlock(&group->unbound_lock);
648
Alex Williamson5f096b12015-10-27 14:53:04 -0600649 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
Alex Williamsoncba33452012-07-31 08:16:22 -0600650 return 0;
651
652 device = vfio_group_get_device(group, dev);
653 if (device) {
654 vfio_device_put(device);
655 return 0;
656 }
657
Alex Williamson60720a02015-02-06 15:05:06 -0700658 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600659}
660
661/**
662 * Async device support
663 */
664static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
665{
666 struct vfio_device *device;
667
668 /* Do we already know about it? We shouldn't */
669 device = vfio_group_get_device(group, dev);
670 if (WARN_ON_ONCE(device)) {
671 vfio_device_put(device);
672 return 0;
673 }
674
675 /* Nothing to do for idle groups */
676 if (!atomic_read(&group->container_users))
677 return 0;
678
679 /* TODO Prevent device auto probing */
Dan Carpenter049af102015-11-21 13:32:21 +0300680 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
Alex Williamsoncba33452012-07-31 08:16:22 -0600681 iommu_group_id(group->iommu_group));
682
683 return 0;
684}
685
Alex Williamsoncba33452012-07-31 08:16:22 -0600686static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
687{
688 /* We don't care what happens when the group isn't in use */
689 if (!atomic_read(&group->container_users))
690 return 0;
691
692 return vfio_dev_viable(dev, group);
693}
694
695static int vfio_iommu_group_notifier(struct notifier_block *nb,
696 unsigned long action, void *data)
697{
698 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
699 struct device *dev = data;
Alex Williamson60720a02015-02-06 15:05:06 -0700700 struct vfio_unbound_dev *unbound;
Alex Williamsoncba33452012-07-31 08:16:22 -0600701
702 /*
Alex Williamsonc6401932013-06-10 16:40:56 -0600703 * Need to go through a group_lock lookup to get a reference or we
704 * risk racing a group being removed. Ignore spurious notifies.
Alex Williamsoncba33452012-07-31 08:16:22 -0600705 */
706 group = vfio_group_try_get(group);
Alex Williamsonc6401932013-06-10 16:40:56 -0600707 if (!group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600708 return NOTIFY_OK;
709
710 switch (action) {
711 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
712 vfio_group_nb_add_dev(group, dev);
713 break;
714 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
Alex Williamsonde9c7602013-06-10 16:40:56 -0600715 /*
716 * Nothing to do here. If the device is in use, then the
717 * vfio sub-driver should block the remove callback until
718 * it is unused. If the device is unused or attached to a
719 * stub driver, then it should be released and we don't
720 * care that it will be going away.
721 */
Alex Williamsoncba33452012-07-31 08:16:22 -0600722 break;
723 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
724 pr_debug("%s: Device %s, group %d binding to driver\n",
725 __func__, dev_name(dev),
726 iommu_group_id(group->iommu_group));
727 break;
728 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
729 pr_debug("%s: Device %s, group %d bound to driver %s\n",
730 __func__, dev_name(dev),
731 iommu_group_id(group->iommu_group), dev->driver->name);
732 BUG_ON(vfio_group_nb_verify(group, dev));
733 break;
734 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
735 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
736 __func__, dev_name(dev),
737 iommu_group_id(group->iommu_group), dev->driver->name);
738 break;
739 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
740 pr_debug("%s: Device %s, group %d unbound from driver\n",
741 __func__, dev_name(dev),
742 iommu_group_id(group->iommu_group));
743 /*
744 * XXX An unbound device in a live group is ok, but we'd
745 * really like to avoid the above BUG_ON by preventing other
746 * drivers from binding to it. Once that occurs, we have to
747 * stop the system to maintain isolation. At a minimum, we'd
748 * want a toggle to disable driver auto probe for this device.
749 */
Alex Williamson60720a02015-02-06 15:05:06 -0700750
751 mutex_lock(&group->unbound_lock);
752 list_for_each_entry(unbound,
753 &group->unbound_list, unbound_next) {
754 if (dev == unbound->dev) {
755 list_del(&unbound->unbound_next);
756 kfree(unbound);
757 break;
758 }
759 }
760 mutex_unlock(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600761 break;
762 }
763
764 vfio_group_put(group);
765 return NOTIFY_OK;
766}
767
768/**
769 * VFIO driver API
770 */
771int vfio_add_group_dev(struct device *dev,
772 const struct vfio_device_ops *ops, void *device_data)
773{
774 struct iommu_group *iommu_group;
775 struct vfio_group *group;
776 struct vfio_device *device;
777
778 iommu_group = iommu_group_get(dev);
779 if (!iommu_group)
780 return -EINVAL;
781
782 group = vfio_group_get_from_iommu(iommu_group);
783 if (!group) {
Alex Williamson16ab8a52016-01-27 11:22:25 -0700784 group = vfio_create_group(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600785 if (IS_ERR(group)) {
786 iommu_group_put(iommu_group);
787 return PTR_ERR(group);
788 }
Alex Williamson4a688102015-02-06 15:05:06 -0700789 } else {
790 /*
791 * A found vfio_group already holds a reference to the
792 * iommu_group. A created vfio_group keeps the reference.
793 */
794 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600795 }
796
797 device = vfio_group_get_device(group, dev);
798 if (device) {
799 WARN(1, "Device %s already exists on group %d\n",
800 dev_name(dev), iommu_group_id(iommu_group));
801 vfio_device_put(device);
802 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600803 return -EBUSY;
804 }
805
806 device = vfio_group_create_device(group, dev, ops, device_data);
807 if (IS_ERR(device)) {
808 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600809 return PTR_ERR(device);
810 }
811
812 /*
Alex Williamson4a688102015-02-06 15:05:06 -0700813 * Drop all but the vfio_device reference. The vfio_device holds
814 * a reference to the vfio_group, which holds a reference to the
815 * iommu_group.
Alex Williamsoncba33452012-07-31 08:16:22 -0600816 */
817 vfio_group_put(group);
818
819 return 0;
820}
821EXPORT_SYMBOL_GPL(vfio_add_group_dev);
822
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600823/**
Alex Williamson20f30012015-06-09 10:08:57 -0600824 * Get a reference to the vfio_device for a device. Even if the
825 * caller thinks they own the device, they could be racing with a
826 * release call path, so we can't trust drvdata for the shortcut.
827 * Go the long way around, from the iommu_group to the vfio_group
828 * to the vfio_device.
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600829 */
830struct vfio_device *vfio_device_get_from_dev(struct device *dev)
831{
Alex Williamson20f30012015-06-09 10:08:57 -0600832 struct vfio_group *group;
833 struct vfio_device *device;
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600834
Kirti Wankhede7ed3ea82016-11-17 02:16:15 +0530835 group = vfio_group_get_from_dev(dev);
Alex Williamson20f30012015-06-09 10:08:57 -0600836 if (!group)
837 return NULL;
838
839 device = vfio_group_get_device(group, dev);
840 vfio_group_put(group);
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600841
842 return device;
843}
844EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
845
Alex Williamson4bc94d52015-07-24 15:14:04 -0600846static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
847 char *buf)
848{
Joerg Roedele324fc82015-11-04 13:53:26 +0100849 struct vfio_device *it, *device = NULL;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600850
851 mutex_lock(&group->device_lock);
Joerg Roedele324fc82015-11-04 13:53:26 +0100852 list_for_each_entry(it, &group->device_list, group_next) {
853 if (!strcmp(dev_name(it->dev), buf)) {
854 device = it;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600855 vfio_device_get(device);
856 break;
857 }
858 }
859 mutex_unlock(&group->device_lock);
860
861 return device;
862}
863
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600864/*
865 * Caller must hold a reference to the vfio_device
866 */
867void *vfio_device_data(struct vfio_device *device)
868{
869 return device->device_data;
870}
871EXPORT_SYMBOL_GPL(vfio_device_data);
872
Alex Williamsone014e942013-02-14 14:02:13 -0700873/* Given a referenced group, check if it contains the device */
874static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
Alex Williamsoncba33452012-07-31 08:16:22 -0600875{
Alex Williamsoncba33452012-07-31 08:16:22 -0600876 struct vfio_device *device;
877
Alex Williamsoncba33452012-07-31 08:16:22 -0600878 device = vfio_group_get_device(group, dev);
Alex Williamsone014e942013-02-14 14:02:13 -0700879 if (!device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600880 return false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600881
882 vfio_device_put(device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600883 return true;
884}
885
886/*
887 * Decrement the device reference count and wait for the device to be
888 * removed. Open file descriptors for the device... */
889void *vfio_del_group_dev(struct device *dev)
890{
891 struct vfio_device *device = dev_get_drvdata(dev);
892 struct vfio_group *group = device->group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600893 void *device_data = device->device_data;
Alex Williamson60720a02015-02-06 15:05:06 -0700894 struct vfio_unbound_dev *unbound;
Alex Williamson13060b62015-02-06 15:05:07 -0700895 unsigned int i = 0;
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600896 long ret;
897 bool interrupted = false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600898
Alex Williamsone014e942013-02-14 14:02:13 -0700899 /*
900 * The group exists so long as we have a device reference. Get
901 * a group reference and use it to scan for the device going away.
902 */
903 vfio_group_get(group);
904
Alex Williamson60720a02015-02-06 15:05:06 -0700905 /*
906 * When the device is removed from the group, the group suddenly
907 * becomes non-viable; the device has a driver (until the unbind
908 * completes), but it's not present in the group. This is bad news
909 * for any external users that need to re-acquire a group reference
910 * in order to match and release their existing reference. To
911 * solve this, we track such devices on the unbound_list to bridge
912 * the gap until they're fully unbound.
913 */
914 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
915 if (unbound) {
916 unbound->dev = dev;
917 mutex_lock(&group->unbound_lock);
918 list_add(&unbound->unbound_next, &group->unbound_list);
919 mutex_unlock(&group->unbound_lock);
920 }
921 WARN_ON(!unbound);
922
Alex Williamsoncba33452012-07-31 08:16:22 -0600923 vfio_device_put(device);
924
Alex Williamson13060b62015-02-06 15:05:07 -0700925 /*
926 * If the device is still present in the group after the above
927 * 'put', then it is in use and we need to request it from the
928 * bus driver. The driver may in turn need to request the
929 * device from the user. We send the request on an arbitrary
930 * interval with counter to allow the driver to take escalating
931 * measures to release the device if it has the ability to do so.
932 */
933 do {
934 device = vfio_group_get_device(group, dev);
935 if (!device)
936 break;
937
938 if (device->ops->request)
939 device->ops->request(device_data, i++);
940
941 vfio_device_put(device);
942
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600943 if (interrupted) {
944 ret = wait_event_timeout(vfio.release_q,
945 !vfio_dev_present(group, dev), HZ * 10);
946 } else {
947 ret = wait_event_interruptible_timeout(vfio.release_q,
948 !vfio_dev_present(group, dev), HZ * 10);
949 if (ret == -ERESTARTSYS) {
950 interrupted = true;
951 dev_warn(dev,
952 "Device is currently in use, task"
953 " \"%s\" (%d) "
954 "blocked until device is released",
955 current->comm, task_pid_nr(current));
956 }
957 }
958 } while (ret <= 0);
Alex Williamsone014e942013-02-14 14:02:13 -0700959
960 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600961
Alex Williamsoncba33452012-07-31 08:16:22 -0600962 return device_data;
963}
964EXPORT_SYMBOL_GPL(vfio_del_group_dev);
965
966/**
967 * VFIO base fd, /dev/vfio/vfio
968 */
969static long vfio_ioctl_check_extension(struct vfio_container *container,
970 unsigned long arg)
971{
Alex Williamson0b43c082013-04-29 08:41:36 -0600972 struct vfio_iommu_driver *driver;
Alex Williamsoncba33452012-07-31 08:16:22 -0600973 long ret = 0;
974
Alex Williamson0b43c082013-04-29 08:41:36 -0600975 down_read(&container->group_lock);
976
977 driver = container->iommu_driver;
978
Alex Williamsoncba33452012-07-31 08:16:22 -0600979 switch (arg) {
980 /* No base extensions yet */
981 default:
982 /*
983 * If no driver is set, poll all registered drivers for
984 * extensions and return the first positive result. If
985 * a driver is already set, further queries will be passed
986 * only to that driver.
987 */
988 if (!driver) {
989 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -0700990 list_for_each_entry(driver, &vfio.iommu_drivers_list,
991 vfio_next) {
Alex Williamson03a76b62015-12-21 15:13:33 -0700992
993#ifdef CONFIG_VFIO_NOIOMMU
994 if (!list_empty(&container->group_list) &&
995 (container->noiommu !=
996 (driver->ops == &vfio_noiommu_ops)))
997 continue;
998#endif
999
Alex Williamsoncba33452012-07-31 08:16:22 -06001000 if (!try_module_get(driver->ops->owner))
1001 continue;
1002
1003 ret = driver->ops->ioctl(NULL,
1004 VFIO_CHECK_EXTENSION,
1005 arg);
1006 module_put(driver->ops->owner);
1007 if (ret > 0)
1008 break;
1009 }
1010 mutex_unlock(&vfio.iommu_drivers_lock);
1011 } else
1012 ret = driver->ops->ioctl(container->iommu_data,
1013 VFIO_CHECK_EXTENSION, arg);
1014 }
1015
Alex Williamson0b43c082013-04-29 08:41:36 -06001016 up_read(&container->group_lock);
1017
Alex Williamsoncba33452012-07-31 08:16:22 -06001018 return ret;
1019}
1020
Alex Williamson9587f442013-04-25 16:12:38 -06001021/* hold write lock on container->group_lock */
Alex Williamsoncba33452012-07-31 08:16:22 -06001022static int __vfio_container_attach_groups(struct vfio_container *container,
1023 struct vfio_iommu_driver *driver,
1024 void *data)
1025{
1026 struct vfio_group *group;
1027 int ret = -ENODEV;
1028
1029 list_for_each_entry(group, &container->group_list, container_next) {
1030 ret = driver->ops->attach_group(data, group->iommu_group);
1031 if (ret)
1032 goto unwind;
1033 }
1034
1035 return ret;
1036
1037unwind:
1038 list_for_each_entry_continue_reverse(group, &container->group_list,
1039 container_next) {
1040 driver->ops->detach_group(data, group->iommu_group);
1041 }
1042
1043 return ret;
1044}
1045
1046static long vfio_ioctl_set_iommu(struct vfio_container *container,
1047 unsigned long arg)
1048{
1049 struct vfio_iommu_driver *driver;
1050 long ret = -ENODEV;
1051
Alex Williamson9587f442013-04-25 16:12:38 -06001052 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001053
1054 /*
1055 * The container is designed to be an unprivileged interface while
1056 * the group can be assigned to specific users. Therefore, only by
1057 * adding a group to a container does the user get the privilege of
1058 * enabling the iommu, which may allocate finite resources. There
1059 * is no unset_iommu, but by removing all the groups from a container,
1060 * the container is deprivileged and returns to an unset state.
1061 */
1062 if (list_empty(&container->group_list) || container->iommu_driver) {
Alex Williamson9587f442013-04-25 16:12:38 -06001063 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001064 return -EINVAL;
1065 }
1066
1067 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -07001068 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001069 void *data;
1070
Alex Williamson03a76b62015-12-21 15:13:33 -07001071#ifdef CONFIG_VFIO_NOIOMMU
1072 /*
1073 * Only noiommu containers can use vfio-noiommu and noiommu
1074 * containers can only use vfio-noiommu.
1075 */
1076 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1077 continue;
1078#endif
1079
Alex Williamsoncba33452012-07-31 08:16:22 -06001080 if (!try_module_get(driver->ops->owner))
1081 continue;
1082
1083 /*
1084 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1085 * so test which iommu driver reported support for this
1086 * extension and call open on them. We also pass them the
1087 * magic, allowing a single driver to support multiple
1088 * interfaces if they'd like.
1089 */
1090 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1091 module_put(driver->ops->owner);
1092 continue;
1093 }
1094
Alex Williamsoncba33452012-07-31 08:16:22 -06001095 data = driver->ops->open(arg);
1096 if (IS_ERR(data)) {
1097 ret = PTR_ERR(data);
1098 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001099 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001100 }
1101
1102 ret = __vfio_container_attach_groups(container, driver, data);
Alex Williamson7c435b42016-02-22 16:02:30 -07001103 if (ret) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001104 driver->ops->release(data);
1105 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001106 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001107 }
1108
Alex Williamson7c435b42016-02-22 16:02:30 -07001109 container->iommu_driver = driver;
1110 container->iommu_data = data;
1111 break;
Alex Williamsoncba33452012-07-31 08:16:22 -06001112 }
1113
1114 mutex_unlock(&vfio.iommu_drivers_lock);
Alex Williamson9587f442013-04-25 16:12:38 -06001115 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001116
1117 return ret;
1118}
1119
1120static long vfio_fops_unl_ioctl(struct file *filep,
1121 unsigned int cmd, unsigned long arg)
1122{
1123 struct vfio_container *container = filep->private_data;
1124 struct vfio_iommu_driver *driver;
1125 void *data;
1126 long ret = -EINVAL;
1127
1128 if (!container)
1129 return ret;
1130
Alex Williamsoncba33452012-07-31 08:16:22 -06001131 switch (cmd) {
1132 case VFIO_GET_API_VERSION:
1133 ret = VFIO_API_VERSION;
1134 break;
1135 case VFIO_CHECK_EXTENSION:
1136 ret = vfio_ioctl_check_extension(container, arg);
1137 break;
1138 case VFIO_SET_IOMMU:
1139 ret = vfio_ioctl_set_iommu(container, arg);
1140 break;
1141 default:
Alex Williamson0b43c082013-04-29 08:41:36 -06001142 down_read(&container->group_lock);
1143
1144 driver = container->iommu_driver;
1145 data = container->iommu_data;
1146
Alex Williamsoncba33452012-07-31 08:16:22 -06001147 if (driver) /* passthrough all unrecognized ioctls */
1148 ret = driver->ops->ioctl(data, cmd, arg);
Alex Williamson0b43c082013-04-29 08:41:36 -06001149
1150 up_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001151 }
1152
1153 return ret;
1154}
1155
1156#ifdef CONFIG_COMPAT
1157static long vfio_fops_compat_ioctl(struct file *filep,
1158 unsigned int cmd, unsigned long arg)
1159{
1160 arg = (unsigned long)compat_ptr(arg);
1161 return vfio_fops_unl_ioctl(filep, cmd, arg);
1162}
1163#endif /* CONFIG_COMPAT */
1164
1165static int vfio_fops_open(struct inode *inode, struct file *filep)
1166{
1167 struct vfio_container *container;
1168
1169 container = kzalloc(sizeof(*container), GFP_KERNEL);
1170 if (!container)
1171 return -ENOMEM;
1172
1173 INIT_LIST_HEAD(&container->group_list);
Alex Williamson9587f442013-04-25 16:12:38 -06001174 init_rwsem(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001175 kref_init(&container->kref);
1176
1177 filep->private_data = container;
1178
1179 return 0;
1180}
1181
1182static int vfio_fops_release(struct inode *inode, struct file *filep)
1183{
1184 struct vfio_container *container = filep->private_data;
1185
1186 filep->private_data = NULL;
1187
1188 vfio_container_put(container);
1189
1190 return 0;
1191}
1192
1193/*
1194 * Once an iommu driver is set, we optionally pass read/write/mmap
1195 * on to the driver, allowing management interfaces beyond ioctl.
1196 */
1197static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1198 size_t count, loff_t *ppos)
1199{
1200 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001201 struct vfio_iommu_driver *driver;
1202 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001203
Alex Williamson0b43c082013-04-29 08:41:36 -06001204 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001205
Alex Williamson0b43c082013-04-29 08:41:36 -06001206 driver = container->iommu_driver;
1207 if (likely(driver && driver->ops->read))
1208 ret = driver->ops->read(container->iommu_data,
1209 buf, count, ppos);
1210
1211 up_read(&container->group_lock);
1212
1213 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001214}
1215
1216static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1217 size_t count, loff_t *ppos)
1218{
1219 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001220 struct vfio_iommu_driver *driver;
1221 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001222
Alex Williamson0b43c082013-04-29 08:41:36 -06001223 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001224
Alex Williamson0b43c082013-04-29 08:41:36 -06001225 driver = container->iommu_driver;
1226 if (likely(driver && driver->ops->write))
1227 ret = driver->ops->write(container->iommu_data,
1228 buf, count, ppos);
1229
1230 up_read(&container->group_lock);
1231
1232 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001233}
1234
1235static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1236{
1237 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001238 struct vfio_iommu_driver *driver;
1239 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001240
Alex Williamson0b43c082013-04-29 08:41:36 -06001241 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001242
Alex Williamson0b43c082013-04-29 08:41:36 -06001243 driver = container->iommu_driver;
1244 if (likely(driver && driver->ops->mmap))
1245 ret = driver->ops->mmap(container->iommu_data, vma);
1246
1247 up_read(&container->group_lock);
1248
1249 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001250}
1251
1252static const struct file_operations vfio_fops = {
1253 .owner = THIS_MODULE,
1254 .open = vfio_fops_open,
1255 .release = vfio_fops_release,
1256 .read = vfio_fops_read,
1257 .write = vfio_fops_write,
1258 .unlocked_ioctl = vfio_fops_unl_ioctl,
1259#ifdef CONFIG_COMPAT
1260 .compat_ioctl = vfio_fops_compat_ioctl,
1261#endif
1262 .mmap = vfio_fops_mmap,
1263};
1264
1265/**
1266 * VFIO Group fd, /dev/vfio/$GROUP
1267 */
1268static void __vfio_group_unset_container(struct vfio_group *group)
1269{
1270 struct vfio_container *container = group->container;
1271 struct vfio_iommu_driver *driver;
1272
Alex Williamson9587f442013-04-25 16:12:38 -06001273 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001274
1275 driver = container->iommu_driver;
1276 if (driver)
1277 driver->ops->detach_group(container->iommu_data,
1278 group->iommu_group);
1279
1280 group->container = NULL;
1281 list_del(&group->container_next);
1282
1283 /* Detaching the last group deprivileges a container, remove iommu */
1284 if (driver && list_empty(&container->group_list)) {
1285 driver->ops->release(container->iommu_data);
1286 module_put(driver->ops->owner);
1287 container->iommu_driver = NULL;
1288 container->iommu_data = NULL;
1289 }
1290
Alex Williamson9587f442013-04-25 16:12:38 -06001291 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001292
1293 vfio_container_put(container);
1294}
1295
1296/*
1297 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1298 * if there was no container to unset. Since the ioctl is called on
1299 * the group, we know that still exists, therefore the only valid
1300 * transition here is 1->0.
1301 */
1302static int vfio_group_unset_container(struct vfio_group *group)
1303{
1304 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1305
1306 if (!users)
1307 return -EINVAL;
1308 if (users != 1)
1309 return -EBUSY;
1310
1311 __vfio_group_unset_container(group);
1312
1313 return 0;
1314}
1315
1316/*
1317 * When removing container users, anything that removes the last user
1318 * implicitly removes the group from the container. That is, if the
1319 * group file descriptor is closed, as well as any device file descriptors,
1320 * the group is free.
1321 */
1322static void vfio_group_try_dissolve_container(struct vfio_group *group)
1323{
1324 if (0 == atomic_dec_if_positive(&group->container_users))
1325 __vfio_group_unset_container(group);
1326}
1327
1328static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1329{
Al Viro2903ff02012-08-28 12:52:22 -04001330 struct fd f;
Alex Williamsoncba33452012-07-31 08:16:22 -06001331 struct vfio_container *container;
1332 struct vfio_iommu_driver *driver;
Al Viro2903ff02012-08-28 12:52:22 -04001333 int ret = 0;
Alex Williamsoncba33452012-07-31 08:16:22 -06001334
1335 if (atomic_read(&group->container_users))
1336 return -EINVAL;
1337
Alex Williamson03a76b62015-12-21 15:13:33 -07001338 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1339 return -EPERM;
1340
Al Viro2903ff02012-08-28 12:52:22 -04001341 f = fdget(container_fd);
1342 if (!f.file)
Alex Williamsoncba33452012-07-31 08:16:22 -06001343 return -EBADF;
1344
1345 /* Sanity check, is this really our fd? */
Al Viro2903ff02012-08-28 12:52:22 -04001346 if (f.file->f_op != &vfio_fops) {
1347 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001348 return -EINVAL;
1349 }
1350
Al Viro2903ff02012-08-28 12:52:22 -04001351 container = f.file->private_data;
Alex Williamsoncba33452012-07-31 08:16:22 -06001352 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1353
Alex Williamson9587f442013-04-25 16:12:38 -06001354 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001355
Alex Williamson03a76b62015-12-21 15:13:33 -07001356 /* Real groups and fake groups cannot mix */
1357 if (!list_empty(&container->group_list) &&
1358 container->noiommu != group->noiommu) {
1359 ret = -EPERM;
1360 goto unlock_out;
1361 }
1362
Alex Williamsoncba33452012-07-31 08:16:22 -06001363 driver = container->iommu_driver;
1364 if (driver) {
1365 ret = driver->ops->attach_group(container->iommu_data,
1366 group->iommu_group);
1367 if (ret)
1368 goto unlock_out;
1369 }
1370
1371 group->container = container;
Alex Williamson03a76b62015-12-21 15:13:33 -07001372 container->noiommu = group->noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -06001373 list_add(&group->container_next, &container->group_list);
1374
1375 /* Get a reference on the container and mark a user within the group */
1376 vfio_container_get(container);
1377 atomic_inc(&group->container_users);
1378
1379unlock_out:
Alex Williamson9587f442013-04-25 16:12:38 -06001380 up_write(&container->group_lock);
Al Viro2903ff02012-08-28 12:52:22 -04001381 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001382 return ret;
1383}
1384
1385static bool vfio_group_viable(struct vfio_group *group)
1386{
1387 return (iommu_group_for_each_dev(group->iommu_group,
1388 group, vfio_dev_viable) == 0);
1389}
1390
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301391static int vfio_group_add_container_user(struct vfio_group *group)
1392{
1393 if (!atomic_inc_not_zero(&group->container_users))
1394 return -EINVAL;
1395
1396 if (group->noiommu) {
1397 atomic_dec(&group->container_users);
1398 return -EPERM;
1399 }
1400 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1401 atomic_dec(&group->container_users);
1402 return -EINVAL;
1403 }
1404
1405 return 0;
1406}
1407
Alex Williamsoncba33452012-07-31 08:16:22 -06001408static const struct file_operations vfio_device_fops;
1409
1410static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1411{
1412 struct vfio_device *device;
1413 struct file *filep;
Alex Williamson4bc94d52015-07-24 15:14:04 -06001414 int ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001415
1416 if (0 == atomic_read(&group->container_users) ||
1417 !group->container->iommu_driver || !vfio_group_viable(group))
1418 return -EINVAL;
1419
Alex Williamson03a76b62015-12-21 15:13:33 -07001420 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1421 return -EPERM;
1422
Alex Williamson4bc94d52015-07-24 15:14:04 -06001423 device = vfio_device_get_from_name(group, buf);
1424 if (!device)
1425 return -ENODEV;
Alex Williamsoncba33452012-07-31 08:16:22 -06001426
Alex Williamson4bc94d52015-07-24 15:14:04 -06001427 ret = device->ops->open(device->device_data);
1428 if (ret) {
1429 vfio_device_put(device);
1430 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001431 }
Alex Williamson4bc94d52015-07-24 15:14:04 -06001432
1433 /*
1434 * We can't use anon_inode_getfd() because we need to modify
1435 * the f_mode flags directly to allow more than just ioctls
1436 */
1437 ret = get_unused_fd_flags(O_CLOEXEC);
1438 if (ret < 0) {
1439 device->ops->release(device->device_data);
1440 vfio_device_put(device);
1441 return ret;
1442 }
1443
1444 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1445 device, O_RDWR);
1446 if (IS_ERR(filep)) {
1447 put_unused_fd(ret);
1448 ret = PTR_ERR(filep);
1449 device->ops->release(device->device_data);
1450 vfio_device_put(device);
1451 return ret;
1452 }
1453
1454 /*
1455 * TODO: add an anon_inode interface to do this.
1456 * Appears to be missing by lack of need rather than
1457 * explicitly prevented. Now there's need.
1458 */
1459 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1460
1461 atomic_inc(&group->container_users);
1462
1463 fd_install(ret, filep);
Alex Williamsoncba33452012-07-31 08:16:22 -06001464
Alex Williamson03a76b62015-12-21 15:13:33 -07001465 if (group->noiommu)
1466 dev_warn(device->dev, "vfio-noiommu device opened by user "
1467 "(%s:%d)\n", current->comm, task_pid_nr(current));
1468
Alex Williamsoncba33452012-07-31 08:16:22 -06001469 return ret;
1470}
1471
1472static long vfio_group_fops_unl_ioctl(struct file *filep,
1473 unsigned int cmd, unsigned long arg)
1474{
1475 struct vfio_group *group = filep->private_data;
1476 long ret = -ENOTTY;
1477
1478 switch (cmd) {
1479 case VFIO_GROUP_GET_STATUS:
1480 {
1481 struct vfio_group_status status;
1482 unsigned long minsz;
1483
1484 minsz = offsetofend(struct vfio_group_status, flags);
1485
1486 if (copy_from_user(&status, (void __user *)arg, minsz))
1487 return -EFAULT;
1488
1489 if (status.argsz < minsz)
1490 return -EINVAL;
1491
1492 status.flags = 0;
1493
1494 if (vfio_group_viable(group))
1495 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1496
1497 if (group->container)
1498 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1499
1500 if (copy_to_user((void __user *)arg, &status, minsz))
1501 return -EFAULT;
1502
1503 ret = 0;
1504 break;
1505 }
1506 case VFIO_GROUP_SET_CONTAINER:
1507 {
1508 int fd;
1509
1510 if (get_user(fd, (int __user *)arg))
1511 return -EFAULT;
1512
1513 if (fd < 0)
1514 return -EINVAL;
1515
1516 ret = vfio_group_set_container(group, fd);
1517 break;
1518 }
1519 case VFIO_GROUP_UNSET_CONTAINER:
1520 ret = vfio_group_unset_container(group);
1521 break;
1522 case VFIO_GROUP_GET_DEVICE_FD:
1523 {
1524 char *buf;
1525
1526 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1527 if (IS_ERR(buf))
1528 return PTR_ERR(buf);
1529
1530 ret = vfio_group_get_device_fd(group, buf);
1531 kfree(buf);
1532 break;
1533 }
1534 }
1535
1536 return ret;
1537}
1538
1539#ifdef CONFIG_COMPAT
1540static long vfio_group_fops_compat_ioctl(struct file *filep,
1541 unsigned int cmd, unsigned long arg)
1542{
1543 arg = (unsigned long)compat_ptr(arg);
1544 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1545}
1546#endif /* CONFIG_COMPAT */
1547
1548static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1549{
1550 struct vfio_group *group;
Alex Williamson6d6768c2013-06-25 16:06:54 -06001551 int opened;
Alex Williamsoncba33452012-07-31 08:16:22 -06001552
1553 group = vfio_group_get_from_minor(iminor(inode));
1554 if (!group)
1555 return -ENODEV;
1556
Alex Williamson03a76b62015-12-21 15:13:33 -07001557 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1558 vfio_group_put(group);
1559 return -EPERM;
1560 }
1561
Alex Williamson6d6768c2013-06-25 16:06:54 -06001562 /* Do we need multiple instances of the group open? Seems not. */
1563 opened = atomic_cmpxchg(&group->opened, 0, 1);
1564 if (opened) {
1565 vfio_group_put(group);
1566 return -EBUSY;
1567 }
1568
1569 /* Is something still in use from a previous open? */
Alex Williamsoncba33452012-07-31 08:16:22 -06001570 if (group->container) {
Alex Williamson6d6768c2013-06-25 16:06:54 -06001571 atomic_dec(&group->opened);
Alex Williamsoncba33452012-07-31 08:16:22 -06001572 vfio_group_put(group);
1573 return -EBUSY;
1574 }
1575
1576 filep->private_data = group;
1577
1578 return 0;
1579}
1580
1581static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1582{
1583 struct vfio_group *group = filep->private_data;
1584
1585 filep->private_data = NULL;
1586
Jike Songccd46db2016-12-01 13:20:06 +08001587 /* Any user didn't unregister? */
1588 WARN_ON(group->notifier.head);
1589
Alex Williamsoncba33452012-07-31 08:16:22 -06001590 vfio_group_try_dissolve_container(group);
1591
Alex Williamson6d6768c2013-06-25 16:06:54 -06001592 atomic_dec(&group->opened);
1593
Alex Williamsoncba33452012-07-31 08:16:22 -06001594 vfio_group_put(group);
1595
1596 return 0;
1597}
1598
1599static const struct file_operations vfio_group_fops = {
1600 .owner = THIS_MODULE,
1601 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1602#ifdef CONFIG_COMPAT
1603 .compat_ioctl = vfio_group_fops_compat_ioctl,
1604#endif
1605 .open = vfio_group_fops_open,
1606 .release = vfio_group_fops_release,
1607};
1608
1609/**
1610 * VFIO Device fd
1611 */
1612static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1613{
1614 struct vfio_device *device = filep->private_data;
1615
1616 device->ops->release(device->device_data);
1617
1618 vfio_group_try_dissolve_container(device->group);
1619
1620 vfio_device_put(device);
1621
1622 return 0;
1623}
1624
1625static long vfio_device_fops_unl_ioctl(struct file *filep,
1626 unsigned int cmd, unsigned long arg)
1627{
1628 struct vfio_device *device = filep->private_data;
1629
1630 if (unlikely(!device->ops->ioctl))
1631 return -EINVAL;
1632
1633 return device->ops->ioctl(device->device_data, cmd, arg);
1634}
1635
1636static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1637 size_t count, loff_t *ppos)
1638{
1639 struct vfio_device *device = filep->private_data;
1640
1641 if (unlikely(!device->ops->read))
1642 return -EINVAL;
1643
1644 return device->ops->read(device->device_data, buf, count, ppos);
1645}
1646
1647static ssize_t vfio_device_fops_write(struct file *filep,
1648 const char __user *buf,
1649 size_t count, loff_t *ppos)
1650{
1651 struct vfio_device *device = filep->private_data;
1652
1653 if (unlikely(!device->ops->write))
1654 return -EINVAL;
1655
1656 return device->ops->write(device->device_data, buf, count, ppos);
1657}
1658
1659static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1660{
1661 struct vfio_device *device = filep->private_data;
1662
1663 if (unlikely(!device->ops->mmap))
1664 return -EINVAL;
1665
1666 return device->ops->mmap(device->device_data, vma);
1667}
1668
1669#ifdef CONFIG_COMPAT
1670static long vfio_device_fops_compat_ioctl(struct file *filep,
1671 unsigned int cmd, unsigned long arg)
1672{
1673 arg = (unsigned long)compat_ptr(arg);
1674 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1675}
1676#endif /* CONFIG_COMPAT */
1677
1678static const struct file_operations vfio_device_fops = {
1679 .owner = THIS_MODULE,
1680 .release = vfio_device_fops_release,
1681 .read = vfio_device_fops_read,
1682 .write = vfio_device_fops_write,
1683 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1684#ifdef CONFIG_COMPAT
1685 .compat_ioctl = vfio_device_fops_compat_ioctl,
1686#endif
1687 .mmap = vfio_device_fops_mmap,
1688};
1689
1690/**
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001691 * External user API, exported by symbols to be linked dynamically.
1692 *
1693 * The protocol includes:
1694 * 1. do normal VFIO init operation:
1695 * - opening a new container;
1696 * - attaching group(s) to it;
1697 * - setting an IOMMU driver for a container.
1698 * When IOMMU is set for a container, all groups in it are
1699 * considered ready to use by an external user.
1700 *
1701 * 2. User space passes a group fd to an external user.
1702 * The external user calls vfio_group_get_external_user()
1703 * to verify that:
1704 * - the group is initialized;
1705 * - IOMMU is set for it.
1706 * If both checks passed, vfio_group_get_external_user()
1707 * increments the container user counter to prevent
1708 * the VFIO group from disposal before KVM exits.
1709 *
1710 * 3. The external user calls vfio_external_user_iommu_id()
1711 * to know an IOMMU ID.
1712 *
1713 * 4. When the external KVM finishes, it calls
1714 * vfio_group_put_external_user() to release the VFIO group.
1715 * This call decrements the container user counter.
1716 */
1717struct vfio_group *vfio_group_get_external_user(struct file *filep)
1718{
1719 struct vfio_group *group = filep->private_data;
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301720 int ret;
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001721
1722 if (filep->f_op != &vfio_group_fops)
1723 return ERR_PTR(-EINVAL);
1724
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301725 ret = vfio_group_add_container_user(group);
1726 if (ret)
1727 return ERR_PTR(ret);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001728
1729 vfio_group_get(group);
1730
1731 return group;
1732}
1733EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1734
1735void vfio_group_put_external_user(struct vfio_group *group)
1736{
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001737 vfio_group_try_dissolve_container(group);
Ilya Lesokhind370c912016-07-14 16:50:19 +03001738 vfio_group_put(group);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001739}
1740EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1741
1742int vfio_external_user_iommu_id(struct vfio_group *group)
1743{
1744 return iommu_group_id(group->iommu_group);
1745}
1746EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1747
Alex Williamson88d7ab82014-02-26 11:38:39 -07001748long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1749{
1750 return vfio_ioctl_check_extension(group->container, arg);
1751}
1752EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1753
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001754/**
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001755 * Sub-module support
1756 */
1757/*
1758 * Helper for managing a buffer of info chain capabilities, allocate or
1759 * reallocate a buffer with additional @size, filling in @id and @version
1760 * of the capability. A pointer to the new capability is returned.
1761 *
1762 * NB. The chain is based at the head of the buffer, so new entries are
1763 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1764 * next offsets prior to copying to the user buffer.
1765 */
1766struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1767 size_t size, u16 id, u16 version)
1768{
1769 void *buf;
1770 struct vfio_info_cap_header *header, *tmp;
1771
1772 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1773 if (!buf) {
1774 kfree(caps->buf);
1775 caps->size = 0;
1776 return ERR_PTR(-ENOMEM);
1777 }
1778
1779 caps->buf = buf;
1780 header = buf + caps->size;
1781
1782 /* Eventually copied to user buffer, zero */
1783 memset(header, 0, size);
1784
1785 header->id = id;
1786 header->version = version;
1787
1788 /* Add to the end of the capability chain */
Eric Auger5ba6de92016-11-21 07:21:02 +01001789 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001790 ; /* nothing */
1791
1792 tmp->next = caps->size;
1793 caps->size += size;
1794
1795 return header;
1796}
1797EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1798
1799void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1800{
1801 struct vfio_info_cap_header *tmp;
Eric Auger5ba6de92016-11-21 07:21:02 +01001802 void *buf = (void *)caps->buf;
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001803
Eric Auger5ba6de92016-11-21 07:21:02 +01001804 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001805 tmp->next += offset;
1806}
Kirti Wankhedeb3c0a862016-11-17 02:16:25 +05301807EXPORT_SYMBOL(vfio_info_cap_shift);
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001808
Kirti Wankhedeb3c0a862016-11-17 02:16:25 +05301809static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
1810{
1811 struct vfio_info_cap_header *header;
1812 struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
1813 size_t size;
1814
1815 size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas);
1816 header = vfio_info_cap_add(caps, size,
1817 VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
1818 if (IS_ERR(header))
1819 return PTR_ERR(header);
1820
1821 sparse_cap = container_of(header,
1822 struct vfio_region_info_cap_sparse_mmap, header);
1823 sparse_cap->nr_areas = sparse->nr_areas;
1824 memcpy(sparse_cap->areas, sparse->areas,
1825 sparse->nr_areas * sizeof(*sparse->areas));
1826 return 0;
1827}
1828
1829static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
1830{
1831 struct vfio_info_cap_header *header;
1832 struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
1833
1834 header = vfio_info_cap_add(caps, sizeof(*cap),
1835 VFIO_REGION_INFO_CAP_TYPE, 1);
1836 if (IS_ERR(header))
1837 return PTR_ERR(header);
1838
1839 type_cap = container_of(header, struct vfio_region_info_cap_type,
1840 header);
1841 type_cap->type = cap->type;
1842 type_cap->subtype = cap->subtype;
1843 return 0;
1844}
1845
1846int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
1847 void *cap_type)
1848{
1849 int ret = -EINVAL;
1850
1851 if (!cap_type)
1852 return 0;
1853
1854 switch (cap_type_id) {
1855 case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1856 ret = sparse_mmap_cap(caps, cap_type);
1857 break;
1858
1859 case VFIO_REGION_INFO_CAP_TYPE:
1860 ret = region_type_cap(caps, cap_type);
1861 break;
1862 }
1863
1864 return ret;
1865}
1866EXPORT_SYMBOL(vfio_info_add_capability);
Kirti Wankhede21690372016-11-17 02:16:17 +05301867
Kirti Wankhedec747f082016-11-17 02:16:27 +05301868int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1869 int max_irq_type, size_t *data_size)
1870{
1871 unsigned long minsz;
1872 size_t size;
1873
1874 minsz = offsetofend(struct vfio_irq_set, count);
1875
1876 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1877 (hdr->count >= (U32_MAX - hdr->start)) ||
1878 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1879 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1880 return -EINVAL;
1881
1882 if (data_size)
1883 *data_size = 0;
1884
1885 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1886 return -EINVAL;
1887
1888 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1889 case VFIO_IRQ_SET_DATA_NONE:
1890 size = 0;
1891 break;
1892 case VFIO_IRQ_SET_DATA_BOOL:
1893 size = sizeof(uint8_t);
1894 break;
1895 case VFIO_IRQ_SET_DATA_EVENTFD:
1896 size = sizeof(int32_t);
1897 break;
1898 default:
1899 return -EINVAL;
1900 }
1901
1902 if (size) {
1903 if (hdr->argsz - minsz < hdr->count * size)
1904 return -EINVAL;
1905
1906 if (!data_size)
1907 return -EINVAL;
1908
1909 *data_size = hdr->count * size;
1910 }
1911
1912 return 0;
1913}
1914EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1915
Kirti Wankhede21690372016-11-17 02:16:17 +05301916/*
1917 * Pin a set of guest PFNs and return their associated host PFNs for local
1918 * domain only.
1919 * @dev [in] : device
Changbin Dud9d84782017-02-06 15:03:37 +08001920 * @user_pfn [in]: array of user/guest PFNs to be pinned.
Kirti Wankhede21690372016-11-17 02:16:17 +05301921 * @npage [in] : count of elements in user_pfn array. This count should not
1922 * be greater VFIO_PIN_PAGES_MAX_ENTRIES.
1923 * @prot [in] : protection flags
1924 * @phys_pfn[out]: array of host PFNs
1925 * Return error or number of pages pinned.
1926 */
1927int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1928 int prot, unsigned long *phys_pfn)
1929{
1930 struct vfio_container *container;
1931 struct vfio_group *group;
1932 struct vfio_iommu_driver *driver;
1933 int ret;
1934
1935 if (!dev || !user_pfn || !phys_pfn || !npage)
1936 return -EINVAL;
1937
1938 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1939 return -E2BIG;
1940
1941 group = vfio_group_get_from_dev(dev);
Christophe JAILLETd2564592016-11-30 08:06:12 +01001942 if (!group)
1943 return -ENODEV;
Kirti Wankhede21690372016-11-17 02:16:17 +05301944
1945 ret = vfio_group_add_container_user(group);
1946 if (ret)
1947 goto err_pin_pages;
1948
1949 container = group->container;
1950 down_read(&container->group_lock);
1951
1952 driver = container->iommu_driver;
1953 if (likely(driver && driver->ops->pin_pages))
1954 ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
1955 npage, prot, phys_pfn);
1956 else
1957 ret = -ENOTTY;
1958
1959 up_read(&container->group_lock);
1960 vfio_group_try_dissolve_container(group);
1961
1962err_pin_pages:
1963 vfio_group_put(group);
1964 return ret;
1965}
1966EXPORT_SYMBOL(vfio_pin_pages);
1967
1968/*
1969 * Unpin set of host PFNs for local domain only.
1970 * @dev [in] : device
1971 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
1972 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1973 * @npage [in] : count of elements in user_pfn array. This count should not
1974 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1975 * Return error or number of pages unpinned.
1976 */
1977int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1978{
1979 struct vfio_container *container;
1980 struct vfio_group *group;
1981 struct vfio_iommu_driver *driver;
1982 int ret;
1983
1984 if (!dev || !user_pfn || !npage)
1985 return -EINVAL;
1986
1987 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1988 return -E2BIG;
1989
1990 group = vfio_group_get_from_dev(dev);
Christophe JAILLETd2564592016-11-30 08:06:12 +01001991 if (!group)
1992 return -ENODEV;
Kirti Wankhede21690372016-11-17 02:16:17 +05301993
1994 ret = vfio_group_add_container_user(group);
1995 if (ret)
1996 goto err_unpin_pages;
1997
1998 container = group->container;
1999 down_read(&container->group_lock);
2000
2001 driver = container->iommu_driver;
2002 if (likely(driver && driver->ops->unpin_pages))
2003 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
2004 npage);
2005 else
2006 ret = -ENOTTY;
2007
2008 up_read(&container->group_lock);
2009 vfio_group_try_dissolve_container(group);
2010
2011err_unpin_pages:
2012 vfio_group_put(group);
2013 return ret;
2014}
2015EXPORT_SYMBOL(vfio_unpin_pages);
2016
Jike Song22195cb2016-12-01 13:20:05 +08002017static int vfio_register_iommu_notifier(struct vfio_group *group,
2018 unsigned long *events,
2019 struct notifier_block *nb)
Kirti Wankhedec086de812016-11-17 10:28:26 +05302020{
2021 struct vfio_container *container;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302022 struct vfio_iommu_driver *driver;
2023 int ret;
2024
Kirti Wankhedec086de812016-11-17 10:28:26 +05302025 ret = vfio_group_add_container_user(group);
2026 if (ret)
Jike Song22195cb2016-12-01 13:20:05 +08002027 return -EINVAL;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302028
2029 container = group->container;
2030 down_read(&container->group_lock);
2031
2032 driver = container->iommu_driver;
2033 if (likely(driver && driver->ops->register_notifier))
Jike Song22195cb2016-12-01 13:20:05 +08002034 ret = driver->ops->register_notifier(container->iommu_data,
2035 events, nb);
Kirti Wankhedec086de812016-11-17 10:28:26 +05302036 else
2037 ret = -ENOTTY;
2038
2039 up_read(&container->group_lock);
2040 vfio_group_try_dissolve_container(group);
2041
Kirti Wankhedec086de812016-11-17 10:28:26 +05302042 return ret;
2043}
Kirti Wankhedec086de812016-11-17 10:28:26 +05302044
Jike Song22195cb2016-12-01 13:20:05 +08002045static int vfio_unregister_iommu_notifier(struct vfio_group *group,
2046 struct notifier_block *nb)
Kirti Wankhedec086de812016-11-17 10:28:26 +05302047{
2048 struct vfio_container *container;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302049 struct vfio_iommu_driver *driver;
2050 int ret;
2051
Kirti Wankhedec086de812016-11-17 10:28:26 +05302052 ret = vfio_group_add_container_user(group);
2053 if (ret)
Jike Song22195cb2016-12-01 13:20:05 +08002054 return -EINVAL;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302055
2056 container = group->container;
2057 down_read(&container->group_lock);
2058
2059 driver = container->iommu_driver;
2060 if (likely(driver && driver->ops->unregister_notifier))
2061 ret = driver->ops->unregister_notifier(container->iommu_data,
2062 nb);
2063 else
2064 ret = -ENOTTY;
2065
2066 up_read(&container->group_lock);
2067 vfio_group_try_dissolve_container(group);
2068
Jike Song22195cb2016-12-01 13:20:05 +08002069 return ret;
2070}
2071
Jike Songccd46db2016-12-01 13:20:06 +08002072void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2073{
2074 group->kvm = kvm;
2075 blocking_notifier_call_chain(&group->notifier,
2076 VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2077}
2078EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2079
2080static int vfio_register_group_notifier(struct vfio_group *group,
2081 unsigned long *events,
2082 struct notifier_block *nb)
2083{
2084 struct vfio_container *container;
2085 int ret;
2086 bool set_kvm = false;
2087
2088 if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2089 set_kvm = true;
2090
2091 /* clear known events */
2092 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2093
2094 /* refuse to continue if still events remaining */
2095 if (*events)
2096 return -EINVAL;
2097
2098 ret = vfio_group_add_container_user(group);
2099 if (ret)
2100 return -EINVAL;
2101
2102 container = group->container;
2103 down_read(&container->group_lock);
2104
2105 ret = blocking_notifier_chain_register(&group->notifier, nb);
2106
2107 /*
2108 * The attaching of kvm and vfio_group might already happen, so
2109 * here we replay once upon registration.
2110 */
2111 if (!ret && set_kvm && group->kvm)
2112 blocking_notifier_call_chain(&group->notifier,
2113 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2114
2115 up_read(&container->group_lock);
2116 vfio_group_try_dissolve_container(group);
2117
2118 return ret;
2119}
2120
2121static int vfio_unregister_group_notifier(struct vfio_group *group,
2122 struct notifier_block *nb)
2123{
2124 struct vfio_container *container;
2125 int ret;
2126
2127 ret = vfio_group_add_container_user(group);
2128 if (ret)
2129 return -EINVAL;
2130
2131 container = group->container;
2132 down_read(&container->group_lock);
2133
2134 ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2135
2136 up_read(&container->group_lock);
2137 vfio_group_try_dissolve_container(group);
2138
2139 return ret;
2140}
2141
Jike Song22195cb2016-12-01 13:20:05 +08002142int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2143 unsigned long *events, struct notifier_block *nb)
2144{
2145 struct vfio_group *group;
2146 int ret;
2147
2148 if (!dev || !nb || !events || (*events == 0))
2149 return -EINVAL;
2150
2151 group = vfio_group_get_from_dev(dev);
2152 if (!group)
2153 return -ENODEV;
2154
2155 switch (type) {
2156 case VFIO_IOMMU_NOTIFY:
2157 ret = vfio_register_iommu_notifier(group, events, nb);
2158 break;
Jike Songccd46db2016-12-01 13:20:06 +08002159 case VFIO_GROUP_NOTIFY:
2160 ret = vfio_register_group_notifier(group, events, nb);
2161 break;
Jike Song22195cb2016-12-01 13:20:05 +08002162 default:
2163 ret = -EINVAL;
2164 }
2165
2166 vfio_group_put(group);
2167 return ret;
2168}
2169EXPORT_SYMBOL(vfio_register_notifier);
2170
2171int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2172 struct notifier_block *nb)
2173{
2174 struct vfio_group *group;
2175 int ret;
2176
2177 if (!dev || !nb)
2178 return -EINVAL;
2179
2180 group = vfio_group_get_from_dev(dev);
2181 if (!group)
2182 return -ENODEV;
2183
2184 switch (type) {
2185 case VFIO_IOMMU_NOTIFY:
2186 ret = vfio_unregister_iommu_notifier(group, nb);
2187 break;
Jike Songccd46db2016-12-01 13:20:06 +08002188 case VFIO_GROUP_NOTIFY:
2189 ret = vfio_unregister_group_notifier(group, nb);
2190 break;
Jike Song22195cb2016-12-01 13:20:05 +08002191 default:
2192 ret = -EINVAL;
2193 }
2194
Kirti Wankhedec086de812016-11-17 10:28:26 +05302195 vfio_group_put(group);
2196 return ret;
2197}
2198EXPORT_SYMBOL(vfio_unregister_notifier);
2199
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07002200/**
Alex Williamsoncba33452012-07-31 08:16:22 -06002201 * Module/class support
2202 */
2203static char *vfio_devnode(struct device *dev, umode_t *mode)
2204{
2205 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2206}
2207
Alex Williamsond1099902013-12-19 10:17:13 -07002208static struct miscdevice vfio_dev = {
2209 .minor = VFIO_MINOR,
2210 .name = "vfio",
2211 .fops = &vfio_fops,
2212 .nodename = "vfio/vfio",
2213 .mode = S_IRUGO | S_IWUGO,
2214};
2215
Alex Williamsoncba33452012-07-31 08:16:22 -06002216static int __init vfio_init(void)
2217{
2218 int ret;
2219
2220 idr_init(&vfio.group_idr);
2221 mutex_init(&vfio.group_lock);
2222 mutex_init(&vfio.iommu_drivers_lock);
2223 INIT_LIST_HEAD(&vfio.group_list);
2224 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2225 init_waitqueue_head(&vfio.release_q);
2226
Alex Williamsond1099902013-12-19 10:17:13 -07002227 ret = misc_register(&vfio_dev);
2228 if (ret) {
2229 pr_err("vfio: misc device register failed\n");
2230 return ret;
2231 }
2232
2233 /* /dev/vfio/$GROUP */
Alex Williamsoncba33452012-07-31 08:16:22 -06002234 vfio.class = class_create(THIS_MODULE, "vfio");
2235 if (IS_ERR(vfio.class)) {
2236 ret = PTR_ERR(vfio.class);
2237 goto err_class;
2238 }
2239
2240 vfio.class->devnode = vfio_devnode;
2241
Alex Williamsond1099902013-12-19 10:17:13 -07002242 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
Alex Williamsoncba33452012-07-31 08:16:22 -06002243 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07002244 goto err_alloc_chrdev;
Alex Williamsoncba33452012-07-31 08:16:22 -06002245
Alex Williamsoncba33452012-07-31 08:16:22 -06002246 cdev_init(&vfio.group_cdev, &vfio_group_fops);
Alex Williamsond1099902013-12-19 10:17:13 -07002247 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06002248 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07002249 goto err_cdev_add;
Alex Williamsoncba33452012-07-31 08:16:22 -06002250
2251 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2252
Alex Williamson03a76b62015-12-21 15:13:33 -07002253#ifdef CONFIG_VFIO_NOIOMMU
2254 vfio_register_iommu_driver(&vfio_noiommu_ops);
2255#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06002256 return 0;
2257
Alex Williamsond1099902013-12-19 10:17:13 -07002258err_cdev_add:
2259 unregister_chrdev_region(vfio.group_devt, MINORMASK);
2260err_alloc_chrdev:
Alex Williamsoncba33452012-07-31 08:16:22 -06002261 class_destroy(vfio.class);
2262 vfio.class = NULL;
2263err_class:
Alex Williamsond1099902013-12-19 10:17:13 -07002264 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06002265 return ret;
2266}
2267
2268static void __exit vfio_cleanup(void)
2269{
2270 WARN_ON(!list_empty(&vfio.group_list));
2271
Alex Williamson03a76b62015-12-21 15:13:33 -07002272#ifdef CONFIG_VFIO_NOIOMMU
2273 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2274#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06002275 idr_destroy(&vfio.group_idr);
2276 cdev_del(&vfio.group_cdev);
Alex Williamsond1099902013-12-19 10:17:13 -07002277 unregister_chrdev_region(vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06002278 class_destroy(vfio.class);
2279 vfio.class = NULL;
Alex Williamsond1099902013-12-19 10:17:13 -07002280 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06002281}
2282
2283module_init(vfio_init);
2284module_exit(vfio_cleanup);
2285
2286MODULE_VERSION(DRIVER_VERSION);
2287MODULE_LICENSE("GPL v2");
2288MODULE_AUTHOR(DRIVER_AUTHOR);
2289MODULE_DESCRIPTION(DRIVER_DESC);
Alex Williamsond1099902013-12-19 10:17:13 -07002290MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2291MODULE_ALIAS("devname:vfio/vfio");
Alex Williamson0ca582f2017-02-08 13:13:26 -07002292MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");