Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1 | /* |
| 2 | * VFIO core |
| 3 | * |
| 4 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
| 5 | * Author: Alex Williamson <alex.williamson@redhat.com> |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or modify |
| 8 | * it under the terms of the GNU General Public License version 2 as |
| 9 | * published by the Free Software Foundation. |
| 10 | * |
| 11 | * Derived from original vfio: |
| 12 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. |
| 13 | * Author: Tom Lyon, pugs@cisco.com |
| 14 | */ |
| 15 | |
| 16 | #include <linux/cdev.h> |
| 17 | #include <linux/compat.h> |
| 18 | #include <linux/device.h> |
| 19 | #include <linux/file.h> |
| 20 | #include <linux/anon_inodes.h> |
| 21 | #include <linux/fs.h> |
| 22 | #include <linux/idr.h> |
| 23 | #include <linux/iommu.h> |
| 24 | #include <linux/list.h> |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 25 | #include <linux/miscdevice.h> |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 26 | #include <linux/module.h> |
| 27 | #include <linux/mutex.h> |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 28 | #include <linux/pci.h> |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 29 | #include <linux/rwsem.h> |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 30 | #include <linux/sched.h> |
| 31 | #include <linux/slab.h> |
Alex Williamson | 664e938 | 2013-04-30 15:42:28 -0600 | [diff] [blame] | 32 | #include <linux/stat.h> |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 33 | #include <linux/string.h> |
| 34 | #include <linux/uaccess.h> |
| 35 | #include <linux/vfio.h> |
| 36 | #include <linux/wait.h> |
| 37 | |
| 38 | #define DRIVER_VERSION "0.3" |
| 39 | #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" |
| 40 | #define DRIVER_DESC "VFIO - User Level meta-driver" |
| 41 | |
| 42 | static struct vfio { |
| 43 | struct class *class; |
| 44 | struct list_head iommu_drivers_list; |
| 45 | struct mutex iommu_drivers_lock; |
| 46 | struct list_head group_list; |
| 47 | struct idr group_idr; |
| 48 | struct mutex group_lock; |
| 49 | struct cdev group_cdev; |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 50 | dev_t group_devt; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 51 | wait_queue_head_t release_q; |
| 52 | } vfio; |
| 53 | |
| 54 | struct vfio_iommu_driver { |
| 55 | const struct vfio_iommu_driver_ops *ops; |
| 56 | struct list_head vfio_next; |
| 57 | }; |
| 58 | |
| 59 | struct vfio_container { |
| 60 | struct kref kref; |
| 61 | struct list_head group_list; |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 62 | struct rw_semaphore group_lock; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 63 | struct vfio_iommu_driver *iommu_driver; |
| 64 | void *iommu_data; |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 65 | bool noiommu; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 66 | }; |
| 67 | |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 68 | struct vfio_unbound_dev { |
| 69 | struct device *dev; |
| 70 | struct list_head unbound_next; |
| 71 | }; |
| 72 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 73 | struct vfio_group { |
| 74 | struct kref kref; |
| 75 | int minor; |
| 76 | atomic_t container_users; |
| 77 | struct iommu_group *iommu_group; |
| 78 | struct vfio_container *container; |
| 79 | struct list_head device_list; |
| 80 | struct mutex device_lock; |
| 81 | struct device *dev; |
| 82 | struct notifier_block nb; |
| 83 | struct list_head vfio_next; |
| 84 | struct list_head container_next; |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 85 | struct list_head unbound_list; |
| 86 | struct mutex unbound_lock; |
Alex Williamson | 6d6768c | 2013-06-25 16:06:54 -0600 | [diff] [blame] | 87 | atomic_t opened; |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 88 | bool noiommu; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 89 | }; |
| 90 | |
| 91 | struct vfio_device { |
| 92 | struct kref kref; |
| 93 | struct device *dev; |
| 94 | const struct vfio_device_ops *ops; |
| 95 | struct vfio_group *group; |
| 96 | struct list_head group_next; |
| 97 | void *device_data; |
| 98 | }; |
| 99 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 100 | #ifdef CONFIG_VFIO_NOIOMMU |
| 101 | static bool noiommu __read_mostly; |
| 102 | module_param_named(enable_unsafe_noiommu_mode, |
| 103 | noiommu, bool, S_IRUGO | S_IWUSR); |
| 104 | MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); |
| 105 | #endif |
| 106 | |
| 107 | /* |
| 108 | * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe |
| 109 | * and remove functions, any use cases other than acquiring the first |
| 110 | * reference for the purpose of calling vfio_add_group_dev() or removing |
| 111 | * that symmetric reference after vfio_del_group_dev() should use the raw |
| 112 | * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put() |
| 113 | * removes the device from the dummy group and cannot be nested. |
| 114 | */ |
| 115 | struct iommu_group *vfio_iommu_group_get(struct device *dev) |
| 116 | { |
| 117 | struct iommu_group *group; |
| 118 | int __maybe_unused ret; |
| 119 | |
| 120 | group = iommu_group_get(dev); |
| 121 | |
| 122 | #ifdef CONFIG_VFIO_NOIOMMU |
| 123 | /* |
| 124 | * With noiommu enabled, an IOMMU group will be created for a device |
| 125 | * that doesn't already have one and doesn't have an iommu_ops on their |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 126 | * bus. We set iommudata simply to be able to identify these groups |
| 127 | * as special use and for reclamation later. |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 128 | */ |
| 129 | if (group || !noiommu || iommu_present(dev->bus)) |
| 130 | return group; |
| 131 | |
| 132 | group = iommu_group_alloc(); |
| 133 | if (IS_ERR(group)) |
| 134 | return NULL; |
| 135 | |
| 136 | iommu_group_set_name(group, "vfio-noiommu"); |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 137 | iommu_group_set_iommudata(group, &noiommu, NULL); |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 138 | ret = iommu_group_add_device(group, dev); |
| 139 | iommu_group_put(group); |
| 140 | if (ret) |
| 141 | return NULL; |
| 142 | |
| 143 | /* |
| 144 | * Where to taint? At this point we've added an IOMMU group for a |
| 145 | * device that is not backed by iommu_ops, therefore any iommu_ |
| 146 | * callback using iommu_ops can legitimately Oops. So, while we may |
| 147 | * be about to give a DMA capable device to a user without IOMMU |
| 148 | * protection, which is clearly taint-worthy, let's go ahead and do |
| 149 | * it here. |
| 150 | */ |
| 151 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); |
| 152 | dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); |
| 153 | #endif |
| 154 | |
| 155 | return group; |
| 156 | } |
| 157 | EXPORT_SYMBOL_GPL(vfio_iommu_group_get); |
| 158 | |
| 159 | void vfio_iommu_group_put(struct iommu_group *group, struct device *dev) |
| 160 | { |
| 161 | #ifdef CONFIG_VFIO_NOIOMMU |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 162 | if (iommu_group_get_iommudata(group) == &noiommu) |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 163 | iommu_group_remove_device(dev); |
| 164 | #endif |
| 165 | |
| 166 | iommu_group_put(group); |
| 167 | } |
| 168 | EXPORT_SYMBOL_GPL(vfio_iommu_group_put); |
| 169 | |
| 170 | #ifdef CONFIG_VFIO_NOIOMMU |
| 171 | static void *vfio_noiommu_open(unsigned long arg) |
| 172 | { |
| 173 | if (arg != VFIO_NOIOMMU_IOMMU) |
| 174 | return ERR_PTR(-EINVAL); |
| 175 | if (!capable(CAP_SYS_RAWIO)) |
| 176 | return ERR_PTR(-EPERM); |
| 177 | |
| 178 | return NULL; |
| 179 | } |
| 180 | |
| 181 | static void vfio_noiommu_release(void *iommu_data) |
| 182 | { |
| 183 | } |
| 184 | |
| 185 | static long vfio_noiommu_ioctl(void *iommu_data, |
| 186 | unsigned int cmd, unsigned long arg) |
| 187 | { |
| 188 | if (cmd == VFIO_CHECK_EXTENSION) |
| 189 | return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; |
| 190 | |
| 191 | return -ENOTTY; |
| 192 | } |
| 193 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 194 | static int vfio_noiommu_attach_group(void *iommu_data, |
| 195 | struct iommu_group *iommu_group) |
| 196 | { |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 197 | return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL; |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 198 | } |
| 199 | |
| 200 | static void vfio_noiommu_detach_group(void *iommu_data, |
| 201 | struct iommu_group *iommu_group) |
| 202 | { |
| 203 | } |
| 204 | |
| 205 | static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { |
| 206 | .name = "vfio-noiommu", |
| 207 | .owner = THIS_MODULE, |
| 208 | .open = vfio_noiommu_open, |
| 209 | .release = vfio_noiommu_release, |
| 210 | .ioctl = vfio_noiommu_ioctl, |
| 211 | .attach_group = vfio_noiommu_attach_group, |
| 212 | .detach_group = vfio_noiommu_detach_group, |
| 213 | }; |
| 214 | #endif |
| 215 | |
| 216 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 217 | /** |
| 218 | * IOMMU driver registration |
| 219 | */ |
| 220 | int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
| 221 | { |
| 222 | struct vfio_iommu_driver *driver, *tmp; |
| 223 | |
| 224 | driver = kzalloc(sizeof(*driver), GFP_KERNEL); |
| 225 | if (!driver) |
| 226 | return -ENOMEM; |
| 227 | |
| 228 | driver->ops = ops; |
| 229 | |
| 230 | mutex_lock(&vfio.iommu_drivers_lock); |
| 231 | |
| 232 | /* Check for duplicates */ |
| 233 | list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { |
| 234 | if (tmp->ops == ops) { |
| 235 | mutex_unlock(&vfio.iommu_drivers_lock); |
| 236 | kfree(driver); |
| 237 | return -EINVAL; |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | list_add(&driver->vfio_next, &vfio.iommu_drivers_list); |
| 242 | |
| 243 | mutex_unlock(&vfio.iommu_drivers_lock); |
| 244 | |
| 245 | return 0; |
| 246 | } |
| 247 | EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); |
| 248 | |
| 249 | void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
| 250 | { |
| 251 | struct vfio_iommu_driver *driver; |
| 252 | |
| 253 | mutex_lock(&vfio.iommu_drivers_lock); |
| 254 | list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
| 255 | if (driver->ops == ops) { |
| 256 | list_del(&driver->vfio_next); |
| 257 | mutex_unlock(&vfio.iommu_drivers_lock); |
| 258 | kfree(driver); |
| 259 | return; |
| 260 | } |
| 261 | } |
| 262 | mutex_unlock(&vfio.iommu_drivers_lock); |
| 263 | } |
| 264 | EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); |
| 265 | |
| 266 | /** |
| 267 | * Group minor allocation/free - both called with vfio.group_lock held |
| 268 | */ |
| 269 | static int vfio_alloc_group_minor(struct vfio_group *group) |
| 270 | { |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 271 | return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 272 | } |
| 273 | |
| 274 | static void vfio_free_group_minor(int minor) |
| 275 | { |
| 276 | idr_remove(&vfio.group_idr, minor); |
| 277 | } |
| 278 | |
| 279 | static int vfio_iommu_group_notifier(struct notifier_block *nb, |
| 280 | unsigned long action, void *data); |
| 281 | static void vfio_group_get(struct vfio_group *group); |
| 282 | |
| 283 | /** |
| 284 | * Container objects - containers are created when /dev/vfio/vfio is |
| 285 | * opened, but their lifecycle extends until the last user is done, so |
| 286 | * it's freed via kref. Must support container/group/device being |
| 287 | * closed in any order. |
| 288 | */ |
| 289 | static void vfio_container_get(struct vfio_container *container) |
| 290 | { |
| 291 | kref_get(&container->kref); |
| 292 | } |
| 293 | |
| 294 | static void vfio_container_release(struct kref *kref) |
| 295 | { |
| 296 | struct vfio_container *container; |
| 297 | container = container_of(kref, struct vfio_container, kref); |
| 298 | |
| 299 | kfree(container); |
| 300 | } |
| 301 | |
| 302 | static void vfio_container_put(struct vfio_container *container) |
| 303 | { |
| 304 | kref_put(&container->kref, vfio_container_release); |
| 305 | } |
| 306 | |
Jiang Liu | 9df7b25 | 2012-12-07 13:43:50 -0700 | [diff] [blame] | 307 | static void vfio_group_unlock_and_free(struct vfio_group *group) |
| 308 | { |
| 309 | mutex_unlock(&vfio.group_lock); |
| 310 | /* |
| 311 | * Unregister outside of lock. A spurious callback is harmless now |
| 312 | * that the group is no longer in vfio.group_list. |
| 313 | */ |
| 314 | iommu_group_unregister_notifier(group->iommu_group, &group->nb); |
| 315 | kfree(group); |
| 316 | } |
| 317 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 318 | /** |
| 319 | * Group objects - create, release, get, put, search |
| 320 | */ |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 321 | static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 322 | { |
| 323 | struct vfio_group *group, *tmp; |
| 324 | struct device *dev; |
| 325 | int ret, minor; |
| 326 | |
| 327 | group = kzalloc(sizeof(*group), GFP_KERNEL); |
| 328 | if (!group) |
| 329 | return ERR_PTR(-ENOMEM); |
| 330 | |
| 331 | kref_init(&group->kref); |
| 332 | INIT_LIST_HEAD(&group->device_list); |
| 333 | mutex_init(&group->device_lock); |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 334 | INIT_LIST_HEAD(&group->unbound_list); |
| 335 | mutex_init(&group->unbound_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 336 | atomic_set(&group->container_users, 0); |
Alex Williamson | 6d6768c | 2013-06-25 16:06:54 -0600 | [diff] [blame] | 337 | atomic_set(&group->opened, 0); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 338 | group->iommu_group = iommu_group; |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 339 | #ifdef CONFIG_VFIO_NOIOMMU |
| 340 | group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu); |
| 341 | #endif |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 342 | |
| 343 | group->nb.notifier_call = vfio_iommu_group_notifier; |
| 344 | |
| 345 | /* |
| 346 | * blocking notifiers acquire a rwsem around registering and hold |
| 347 | * it around callback. Therefore, need to register outside of |
| 348 | * vfio.group_lock to avoid A-B/B-A contention. Our callback won't |
| 349 | * do anything unless it can find the group in vfio.group_list, so |
| 350 | * no harm in registering early. |
| 351 | */ |
| 352 | ret = iommu_group_register_notifier(iommu_group, &group->nb); |
| 353 | if (ret) { |
| 354 | kfree(group); |
| 355 | return ERR_PTR(ret); |
| 356 | } |
| 357 | |
| 358 | mutex_lock(&vfio.group_lock); |
| 359 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 360 | /* Did we race creating this group? */ |
| 361 | list_for_each_entry(tmp, &vfio.group_list, vfio_next) { |
| 362 | if (tmp->iommu_group == iommu_group) { |
| 363 | vfio_group_get(tmp); |
Jiang Liu | 9df7b25 | 2012-12-07 13:43:50 -0700 | [diff] [blame] | 364 | vfio_group_unlock_and_free(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 365 | return tmp; |
| 366 | } |
| 367 | } |
| 368 | |
Zhen Lei | 2f51bf4 | 2015-03-16 14:08:56 -0600 | [diff] [blame] | 369 | minor = vfio_alloc_group_minor(group); |
| 370 | if (minor < 0) { |
| 371 | vfio_group_unlock_and_free(group); |
| 372 | return ERR_PTR(minor); |
| 373 | } |
| 374 | |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 375 | dev = device_create(vfio.class, NULL, |
| 376 | MKDEV(MAJOR(vfio.group_devt), minor), |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 377 | group, "%s%d", group->noiommu ? "noiommu-" : "", |
| 378 | iommu_group_id(iommu_group)); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 379 | if (IS_ERR(dev)) { |
| 380 | vfio_free_group_minor(minor); |
Jiang Liu | 9df7b25 | 2012-12-07 13:43:50 -0700 | [diff] [blame] | 381 | vfio_group_unlock_and_free(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 382 | return (struct vfio_group *)dev; /* ERR_PTR */ |
| 383 | } |
| 384 | |
| 385 | group->minor = minor; |
| 386 | group->dev = dev; |
| 387 | |
| 388 | list_add(&group->vfio_next, &vfio.group_list); |
| 389 | |
| 390 | mutex_unlock(&vfio.group_lock); |
| 391 | |
| 392 | return group; |
| 393 | } |
| 394 | |
Al Viro | 6d2cd3c | 2012-08-17 21:27:32 -0400 | [diff] [blame] | 395 | /* called with vfio.group_lock held */ |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 396 | static void vfio_group_release(struct kref *kref) |
| 397 | { |
| 398 | struct vfio_group *group = container_of(kref, struct vfio_group, kref); |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 399 | struct vfio_unbound_dev *unbound, *tmp; |
Alex Williamson | 4a68810 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 400 | struct iommu_group *iommu_group = group->iommu_group; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 401 | |
| 402 | WARN_ON(!list_empty(&group->device_list)); |
| 403 | |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 404 | list_for_each_entry_safe(unbound, tmp, |
| 405 | &group->unbound_list, unbound_next) { |
| 406 | list_del(&unbound->unbound_next); |
| 407 | kfree(unbound); |
| 408 | } |
| 409 | |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 410 | device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor)); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 411 | list_del(&group->vfio_next); |
| 412 | vfio_free_group_minor(group->minor); |
Jiang Liu | 9df7b25 | 2012-12-07 13:43:50 -0700 | [diff] [blame] | 413 | vfio_group_unlock_and_free(group); |
Alex Williamson | 4a68810 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 414 | iommu_group_put(iommu_group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 415 | } |
| 416 | |
| 417 | static void vfio_group_put(struct vfio_group *group) |
| 418 | { |
Al Viro | 6d2cd3c | 2012-08-17 21:27:32 -0400 | [diff] [blame] | 419 | kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 420 | } |
| 421 | |
Alex Williamson | e91a557 | 2017-06-19 09:10:32 -0600 | [diff] [blame^] | 422 | struct vfio_group_put_work { |
| 423 | struct work_struct work; |
| 424 | struct vfio_group *group; |
| 425 | }; |
| 426 | |
| 427 | static void vfio_group_put_bg(struct work_struct *work) |
| 428 | { |
| 429 | struct vfio_group_put_work *do_work; |
| 430 | |
| 431 | do_work = container_of(work, struct vfio_group_put_work, work); |
| 432 | |
| 433 | vfio_group_put(do_work->group); |
| 434 | kfree(do_work); |
| 435 | } |
| 436 | |
| 437 | static void vfio_group_schedule_put(struct vfio_group *group) |
| 438 | { |
| 439 | struct vfio_group_put_work *do_work; |
| 440 | |
| 441 | do_work = kmalloc(sizeof(*do_work), GFP_KERNEL); |
| 442 | if (WARN_ON(!do_work)) |
| 443 | return; |
| 444 | |
| 445 | INIT_WORK(&do_work->work, vfio_group_put_bg); |
| 446 | do_work->group = group; |
| 447 | schedule_work(&do_work->work); |
| 448 | } |
| 449 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 450 | /* Assume group_lock or group reference is held */ |
| 451 | static void vfio_group_get(struct vfio_group *group) |
| 452 | { |
| 453 | kref_get(&group->kref); |
| 454 | } |
| 455 | |
| 456 | /* |
| 457 | * Not really a try as we will sleep for mutex, but we need to make |
| 458 | * sure the group pointer is valid under lock and get a reference. |
| 459 | */ |
| 460 | static struct vfio_group *vfio_group_try_get(struct vfio_group *group) |
| 461 | { |
| 462 | struct vfio_group *target = group; |
| 463 | |
| 464 | mutex_lock(&vfio.group_lock); |
| 465 | list_for_each_entry(group, &vfio.group_list, vfio_next) { |
| 466 | if (group == target) { |
| 467 | vfio_group_get(group); |
| 468 | mutex_unlock(&vfio.group_lock); |
| 469 | return group; |
| 470 | } |
| 471 | } |
| 472 | mutex_unlock(&vfio.group_lock); |
| 473 | |
| 474 | return NULL; |
| 475 | } |
| 476 | |
| 477 | static |
| 478 | struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) |
| 479 | { |
| 480 | struct vfio_group *group; |
| 481 | |
| 482 | mutex_lock(&vfio.group_lock); |
| 483 | list_for_each_entry(group, &vfio.group_list, vfio_next) { |
| 484 | if (group->iommu_group == iommu_group) { |
| 485 | vfio_group_get(group); |
| 486 | mutex_unlock(&vfio.group_lock); |
| 487 | return group; |
| 488 | } |
| 489 | } |
| 490 | mutex_unlock(&vfio.group_lock); |
| 491 | |
| 492 | return NULL; |
| 493 | } |
| 494 | |
| 495 | static struct vfio_group *vfio_group_get_from_minor(int minor) |
| 496 | { |
| 497 | struct vfio_group *group; |
| 498 | |
| 499 | mutex_lock(&vfio.group_lock); |
| 500 | group = idr_find(&vfio.group_idr, minor); |
| 501 | if (!group) { |
| 502 | mutex_unlock(&vfio.group_lock); |
| 503 | return NULL; |
| 504 | } |
| 505 | vfio_group_get(group); |
| 506 | mutex_unlock(&vfio.group_lock); |
| 507 | |
| 508 | return group; |
| 509 | } |
| 510 | |
| 511 | /** |
| 512 | * Device objects - create, release, get, put, search |
| 513 | */ |
| 514 | static |
| 515 | struct vfio_device *vfio_group_create_device(struct vfio_group *group, |
| 516 | struct device *dev, |
| 517 | const struct vfio_device_ops *ops, |
| 518 | void *device_data) |
| 519 | { |
| 520 | struct vfio_device *device; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 521 | |
| 522 | device = kzalloc(sizeof(*device), GFP_KERNEL); |
| 523 | if (!device) |
| 524 | return ERR_PTR(-ENOMEM); |
| 525 | |
| 526 | kref_init(&device->kref); |
| 527 | device->dev = dev; |
| 528 | device->group = group; |
| 529 | device->ops = ops; |
| 530 | device->device_data = device_data; |
Jean Delvare | 8283b49 | 2014-04-14 12:55:38 +0200 | [diff] [blame] | 531 | dev_set_drvdata(dev, device); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 532 | |
| 533 | /* No need to get group_lock, caller has group reference */ |
| 534 | vfio_group_get(group); |
| 535 | |
| 536 | mutex_lock(&group->device_lock); |
| 537 | list_add(&device->group_next, &group->device_list); |
| 538 | mutex_unlock(&group->device_lock); |
| 539 | |
| 540 | return device; |
| 541 | } |
| 542 | |
| 543 | static void vfio_device_release(struct kref *kref) |
| 544 | { |
| 545 | struct vfio_device *device = container_of(kref, |
| 546 | struct vfio_device, kref); |
| 547 | struct vfio_group *group = device->group; |
| 548 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 549 | list_del(&device->group_next); |
| 550 | mutex_unlock(&group->device_lock); |
| 551 | |
| 552 | dev_set_drvdata(device->dev, NULL); |
| 553 | |
| 554 | kfree(device); |
| 555 | |
| 556 | /* vfio_del_group_dev may be waiting for this device */ |
| 557 | wake_up(&vfio.release_q); |
| 558 | } |
| 559 | |
| 560 | /* Device reference always implies a group reference */ |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 561 | void vfio_device_put(struct vfio_device *device) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 562 | { |
Al Viro | 934ad4c | 2012-08-17 19:49:09 -0400 | [diff] [blame] | 563 | struct vfio_group *group = device->group; |
Al Viro | 90b1253 | 2012-08-17 21:29:06 -0400 | [diff] [blame] | 564 | kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); |
Al Viro | 934ad4c | 2012-08-17 19:49:09 -0400 | [diff] [blame] | 565 | vfio_group_put(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 566 | } |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 567 | EXPORT_SYMBOL_GPL(vfio_device_put); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 568 | |
| 569 | static void vfio_device_get(struct vfio_device *device) |
| 570 | { |
| 571 | vfio_group_get(device->group); |
| 572 | kref_get(&device->kref); |
| 573 | } |
| 574 | |
| 575 | static struct vfio_device *vfio_group_get_device(struct vfio_group *group, |
| 576 | struct device *dev) |
| 577 | { |
| 578 | struct vfio_device *device; |
| 579 | |
| 580 | mutex_lock(&group->device_lock); |
| 581 | list_for_each_entry(device, &group->device_list, group_next) { |
| 582 | if (device->dev == dev) { |
| 583 | vfio_device_get(device); |
| 584 | mutex_unlock(&group->device_lock); |
| 585 | return device; |
| 586 | } |
| 587 | } |
| 588 | mutex_unlock(&group->device_lock); |
| 589 | return NULL; |
| 590 | } |
| 591 | |
| 592 | /* |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 593 | * Some drivers, like pci-stub, are only used to prevent other drivers from |
| 594 | * claiming a device and are therefore perfectly legitimate for a user owned |
| 595 | * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping |
| 596 | * of the device, but it does prevent the user from having direct access to |
| 597 | * the device, which is useful in some circumstances. |
| 598 | * |
| 599 | * We also assume that we can include PCI interconnect devices, ie. bridges. |
| 600 | * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge |
| 601 | * then all of the downstream devices will be part of the same IOMMU group as |
| 602 | * the bridge. Thus, if placing the bridge into the user owned IOVA space |
| 603 | * breaks anything, it only does so for user owned devices downstream. Note |
| 604 | * that error notification via MSI can be affected for platforms that handle |
| 605 | * MSI within the same IOVA space as DMA. |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 606 | */ |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 607 | static const char * const vfio_driver_whitelist[] = { "pci-stub" }; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 608 | |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 609 | static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 610 | { |
| 611 | int i; |
| 612 | |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 613 | if (dev_is_pci(dev)) { |
| 614 | struct pci_dev *pdev = to_pci_dev(dev); |
| 615 | |
| 616 | if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) |
| 617 | return true; |
| 618 | } |
| 619 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 620 | for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { |
| 621 | if (!strcmp(drv->name, vfio_driver_whitelist[i])) |
| 622 | return true; |
| 623 | } |
| 624 | |
| 625 | return false; |
| 626 | } |
| 627 | |
| 628 | /* |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 629 | * A vfio group is viable for use by userspace if all devices are in |
| 630 | * one of the following states: |
| 631 | * - driver-less |
| 632 | * - bound to a vfio driver |
| 633 | * - bound to a whitelisted driver |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 634 | * - a PCI interconnect device |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 635 | * |
| 636 | * We use two methods to determine whether a device is bound to a vfio |
| 637 | * driver. The first is to test whether the device exists in the vfio |
| 638 | * group. The second is to test if the device exists on the group |
| 639 | * unbound_list, indicating it's in the middle of transitioning from |
| 640 | * a vfio driver to driver-less. |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 641 | */ |
| 642 | static int vfio_dev_viable(struct device *dev, void *data) |
| 643 | { |
| 644 | struct vfio_group *group = data; |
| 645 | struct vfio_device *device; |
Jiang Liu | de2b3ee | 2012-12-07 13:43:50 -0700 | [diff] [blame] | 646 | struct device_driver *drv = ACCESS_ONCE(dev->driver); |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 647 | struct vfio_unbound_dev *unbound; |
| 648 | int ret = -EINVAL; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 649 | |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 650 | mutex_lock(&group->unbound_lock); |
| 651 | list_for_each_entry(unbound, &group->unbound_list, unbound_next) { |
| 652 | if (dev == unbound->dev) { |
| 653 | ret = 0; |
| 654 | break; |
| 655 | } |
| 656 | } |
| 657 | mutex_unlock(&group->unbound_lock); |
| 658 | |
Alex Williamson | 5f096b1 | 2015-10-27 14:53:04 -0600 | [diff] [blame] | 659 | if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 660 | return 0; |
| 661 | |
| 662 | device = vfio_group_get_device(group, dev); |
| 663 | if (device) { |
| 664 | vfio_device_put(device); |
| 665 | return 0; |
| 666 | } |
| 667 | |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 668 | return ret; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 669 | } |
| 670 | |
| 671 | /** |
| 672 | * Async device support |
| 673 | */ |
| 674 | static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) |
| 675 | { |
| 676 | struct vfio_device *device; |
| 677 | |
| 678 | /* Do we already know about it? We shouldn't */ |
| 679 | device = vfio_group_get_device(group, dev); |
| 680 | if (WARN_ON_ONCE(device)) { |
| 681 | vfio_device_put(device); |
| 682 | return 0; |
| 683 | } |
| 684 | |
| 685 | /* Nothing to do for idle groups */ |
| 686 | if (!atomic_read(&group->container_users)) |
| 687 | return 0; |
| 688 | |
| 689 | /* TODO Prevent device auto probing */ |
Dan Carpenter | 049af10 | 2015-11-21 13:32:21 +0300 | [diff] [blame] | 690 | WARN(1, "Device %s added to live group %d!\n", dev_name(dev), |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 691 | iommu_group_id(group->iommu_group)); |
| 692 | |
| 693 | return 0; |
| 694 | } |
| 695 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 696 | static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) |
| 697 | { |
| 698 | /* We don't care what happens when the group isn't in use */ |
| 699 | if (!atomic_read(&group->container_users)) |
| 700 | return 0; |
| 701 | |
| 702 | return vfio_dev_viable(dev, group); |
| 703 | } |
| 704 | |
| 705 | static int vfio_iommu_group_notifier(struct notifier_block *nb, |
| 706 | unsigned long action, void *data) |
| 707 | { |
| 708 | struct vfio_group *group = container_of(nb, struct vfio_group, nb); |
| 709 | struct device *dev = data; |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 710 | struct vfio_unbound_dev *unbound; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 711 | |
| 712 | /* |
Alex Williamson | c640193 | 2013-06-10 16:40:56 -0600 | [diff] [blame] | 713 | * Need to go through a group_lock lookup to get a reference or we |
| 714 | * risk racing a group being removed. Ignore spurious notifies. |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 715 | */ |
| 716 | group = vfio_group_try_get(group); |
Alex Williamson | c640193 | 2013-06-10 16:40:56 -0600 | [diff] [blame] | 717 | if (!group) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 718 | return NOTIFY_OK; |
| 719 | |
| 720 | switch (action) { |
| 721 | case IOMMU_GROUP_NOTIFY_ADD_DEVICE: |
| 722 | vfio_group_nb_add_dev(group, dev); |
| 723 | break; |
| 724 | case IOMMU_GROUP_NOTIFY_DEL_DEVICE: |
Alex Williamson | de9c760 | 2013-06-10 16:40:56 -0600 | [diff] [blame] | 725 | /* |
| 726 | * Nothing to do here. If the device is in use, then the |
| 727 | * vfio sub-driver should block the remove callback until |
| 728 | * it is unused. If the device is unused or attached to a |
| 729 | * stub driver, then it should be released and we don't |
| 730 | * care that it will be going away. |
| 731 | */ |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 732 | break; |
| 733 | case IOMMU_GROUP_NOTIFY_BIND_DRIVER: |
| 734 | pr_debug("%s: Device %s, group %d binding to driver\n", |
| 735 | __func__, dev_name(dev), |
| 736 | iommu_group_id(group->iommu_group)); |
| 737 | break; |
| 738 | case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: |
| 739 | pr_debug("%s: Device %s, group %d bound to driver %s\n", |
| 740 | __func__, dev_name(dev), |
| 741 | iommu_group_id(group->iommu_group), dev->driver->name); |
| 742 | BUG_ON(vfio_group_nb_verify(group, dev)); |
| 743 | break; |
| 744 | case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: |
| 745 | pr_debug("%s: Device %s, group %d unbinding from driver %s\n", |
| 746 | __func__, dev_name(dev), |
| 747 | iommu_group_id(group->iommu_group), dev->driver->name); |
| 748 | break; |
| 749 | case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: |
| 750 | pr_debug("%s: Device %s, group %d unbound from driver\n", |
| 751 | __func__, dev_name(dev), |
| 752 | iommu_group_id(group->iommu_group)); |
| 753 | /* |
| 754 | * XXX An unbound device in a live group is ok, but we'd |
| 755 | * really like to avoid the above BUG_ON by preventing other |
| 756 | * drivers from binding to it. Once that occurs, we have to |
| 757 | * stop the system to maintain isolation. At a minimum, we'd |
| 758 | * want a toggle to disable driver auto probe for this device. |
| 759 | */ |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 760 | |
| 761 | mutex_lock(&group->unbound_lock); |
| 762 | list_for_each_entry(unbound, |
| 763 | &group->unbound_list, unbound_next) { |
| 764 | if (dev == unbound->dev) { |
| 765 | list_del(&unbound->unbound_next); |
| 766 | kfree(unbound); |
| 767 | break; |
| 768 | } |
| 769 | } |
| 770 | mutex_unlock(&group->unbound_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 771 | break; |
| 772 | } |
| 773 | |
Alex Williamson | e91a557 | 2017-06-19 09:10:32 -0600 | [diff] [blame^] | 774 | /* |
| 775 | * If we're the last reference to the group, the group will be |
| 776 | * released, which includes unregistering the iommu group notifier. |
| 777 | * We hold a read-lock on that notifier list, unregistering needs |
| 778 | * a write-lock... deadlock. Release our reference asynchronously |
| 779 | * to avoid that situation. |
| 780 | */ |
| 781 | vfio_group_schedule_put(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 782 | return NOTIFY_OK; |
| 783 | } |
| 784 | |
| 785 | /** |
| 786 | * VFIO driver API |
| 787 | */ |
| 788 | int vfio_add_group_dev(struct device *dev, |
| 789 | const struct vfio_device_ops *ops, void *device_data) |
| 790 | { |
| 791 | struct iommu_group *iommu_group; |
| 792 | struct vfio_group *group; |
| 793 | struct vfio_device *device; |
| 794 | |
| 795 | iommu_group = iommu_group_get(dev); |
| 796 | if (!iommu_group) |
| 797 | return -EINVAL; |
| 798 | |
| 799 | group = vfio_group_get_from_iommu(iommu_group); |
| 800 | if (!group) { |
Alex Williamson | 16ab8a5 | 2016-01-27 11:22:25 -0700 | [diff] [blame] | 801 | group = vfio_create_group(iommu_group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 802 | if (IS_ERR(group)) { |
| 803 | iommu_group_put(iommu_group); |
| 804 | return PTR_ERR(group); |
| 805 | } |
Alex Williamson | 4a68810 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 806 | } else { |
| 807 | /* |
| 808 | * A found vfio_group already holds a reference to the |
| 809 | * iommu_group. A created vfio_group keeps the reference. |
| 810 | */ |
| 811 | iommu_group_put(iommu_group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 812 | } |
| 813 | |
| 814 | device = vfio_group_get_device(group, dev); |
| 815 | if (device) { |
| 816 | WARN(1, "Device %s already exists on group %d\n", |
| 817 | dev_name(dev), iommu_group_id(iommu_group)); |
| 818 | vfio_device_put(device); |
| 819 | vfio_group_put(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 820 | return -EBUSY; |
| 821 | } |
| 822 | |
| 823 | device = vfio_group_create_device(group, dev, ops, device_data); |
| 824 | if (IS_ERR(device)) { |
| 825 | vfio_group_put(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 826 | return PTR_ERR(device); |
| 827 | } |
| 828 | |
| 829 | /* |
Alex Williamson | 4a68810 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 830 | * Drop all but the vfio_device reference. The vfio_device holds |
| 831 | * a reference to the vfio_group, which holds a reference to the |
| 832 | * iommu_group. |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 833 | */ |
| 834 | vfio_group_put(group); |
| 835 | |
| 836 | return 0; |
| 837 | } |
| 838 | EXPORT_SYMBOL_GPL(vfio_add_group_dev); |
| 839 | |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 840 | /** |
Alex Williamson | 20f3001 | 2015-06-09 10:08:57 -0600 | [diff] [blame] | 841 | * Get a reference to the vfio_device for a device. Even if the |
| 842 | * caller thinks they own the device, they could be racing with a |
| 843 | * release call path, so we can't trust drvdata for the shortcut. |
| 844 | * Go the long way around, from the iommu_group to the vfio_group |
| 845 | * to the vfio_device. |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 846 | */ |
| 847 | struct vfio_device *vfio_device_get_from_dev(struct device *dev) |
| 848 | { |
Alex Williamson | 20f3001 | 2015-06-09 10:08:57 -0600 | [diff] [blame] | 849 | struct iommu_group *iommu_group; |
| 850 | struct vfio_group *group; |
| 851 | struct vfio_device *device; |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 852 | |
Alex Williamson | 20f3001 | 2015-06-09 10:08:57 -0600 | [diff] [blame] | 853 | iommu_group = iommu_group_get(dev); |
| 854 | if (!iommu_group) |
| 855 | return NULL; |
| 856 | |
| 857 | group = vfio_group_get_from_iommu(iommu_group); |
| 858 | iommu_group_put(iommu_group); |
| 859 | if (!group) |
| 860 | return NULL; |
| 861 | |
| 862 | device = vfio_group_get_device(group, dev); |
| 863 | vfio_group_put(group); |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 864 | |
| 865 | return device; |
| 866 | } |
| 867 | EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); |
| 868 | |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 869 | static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, |
| 870 | char *buf) |
| 871 | { |
Joerg Roedel | e324fc8 | 2015-11-04 13:53:26 +0100 | [diff] [blame] | 872 | struct vfio_device *it, *device = NULL; |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 873 | |
| 874 | mutex_lock(&group->device_lock); |
Joerg Roedel | e324fc8 | 2015-11-04 13:53:26 +0100 | [diff] [blame] | 875 | list_for_each_entry(it, &group->device_list, group_next) { |
| 876 | if (!strcmp(dev_name(it->dev), buf)) { |
| 877 | device = it; |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 878 | vfio_device_get(device); |
| 879 | break; |
| 880 | } |
| 881 | } |
| 882 | mutex_unlock(&group->device_lock); |
| 883 | |
| 884 | return device; |
| 885 | } |
| 886 | |
Vijay Mohan Pandarathil | 44f5071 | 2013-03-11 09:28:44 -0600 | [diff] [blame] | 887 | /* |
| 888 | * Caller must hold a reference to the vfio_device |
| 889 | */ |
| 890 | void *vfio_device_data(struct vfio_device *device) |
| 891 | { |
| 892 | return device->device_data; |
| 893 | } |
| 894 | EXPORT_SYMBOL_GPL(vfio_device_data); |
| 895 | |
Alex Williamson | e014e94 | 2013-02-14 14:02:13 -0700 | [diff] [blame] | 896 | /* Given a referenced group, check if it contains the device */ |
| 897 | static bool vfio_dev_present(struct vfio_group *group, struct device *dev) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 898 | { |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 899 | struct vfio_device *device; |
| 900 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 901 | device = vfio_group_get_device(group, dev); |
Alex Williamson | e014e94 | 2013-02-14 14:02:13 -0700 | [diff] [blame] | 902 | if (!device) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 903 | return false; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 904 | |
| 905 | vfio_device_put(device); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 906 | return true; |
| 907 | } |
| 908 | |
| 909 | /* |
| 910 | * Decrement the device reference count and wait for the device to be |
| 911 | * removed. Open file descriptors for the device... */ |
| 912 | void *vfio_del_group_dev(struct device *dev) |
| 913 | { |
| 914 | struct vfio_device *device = dev_get_drvdata(dev); |
| 915 | struct vfio_group *group = device->group; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 916 | void *device_data = device->device_data; |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 917 | struct vfio_unbound_dev *unbound; |
Alex Williamson | 13060b6 | 2015-02-06 15:05:07 -0700 | [diff] [blame] | 918 | unsigned int i = 0; |
Alex Williamson | db7d4d7 | 2015-05-01 16:31:41 -0600 | [diff] [blame] | 919 | long ret; |
| 920 | bool interrupted = false; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 921 | |
Alex Williamson | e014e94 | 2013-02-14 14:02:13 -0700 | [diff] [blame] | 922 | /* |
| 923 | * The group exists so long as we have a device reference. Get |
| 924 | * a group reference and use it to scan for the device going away. |
| 925 | */ |
| 926 | vfio_group_get(group); |
| 927 | |
Alex Williamson | 60720a0 | 2015-02-06 15:05:06 -0700 | [diff] [blame] | 928 | /* |
| 929 | * When the device is removed from the group, the group suddenly |
| 930 | * becomes non-viable; the device has a driver (until the unbind |
| 931 | * completes), but it's not present in the group. This is bad news |
| 932 | * for any external users that need to re-acquire a group reference |
| 933 | * in order to match and release their existing reference. To |
| 934 | * solve this, we track such devices on the unbound_list to bridge |
| 935 | * the gap until they're fully unbound. |
| 936 | */ |
| 937 | unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); |
| 938 | if (unbound) { |
| 939 | unbound->dev = dev; |
| 940 | mutex_lock(&group->unbound_lock); |
| 941 | list_add(&unbound->unbound_next, &group->unbound_list); |
| 942 | mutex_unlock(&group->unbound_lock); |
| 943 | } |
| 944 | WARN_ON(!unbound); |
| 945 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 946 | vfio_device_put(device); |
| 947 | |
Alex Williamson | 13060b6 | 2015-02-06 15:05:07 -0700 | [diff] [blame] | 948 | /* |
| 949 | * If the device is still present in the group after the above |
| 950 | * 'put', then it is in use and we need to request it from the |
| 951 | * bus driver. The driver may in turn need to request the |
| 952 | * device from the user. We send the request on an arbitrary |
| 953 | * interval with counter to allow the driver to take escalating |
| 954 | * measures to release the device if it has the ability to do so. |
| 955 | */ |
| 956 | do { |
| 957 | device = vfio_group_get_device(group, dev); |
| 958 | if (!device) |
| 959 | break; |
| 960 | |
| 961 | if (device->ops->request) |
| 962 | device->ops->request(device_data, i++); |
| 963 | |
| 964 | vfio_device_put(device); |
| 965 | |
Alex Williamson | db7d4d7 | 2015-05-01 16:31:41 -0600 | [diff] [blame] | 966 | if (interrupted) { |
| 967 | ret = wait_event_timeout(vfio.release_q, |
| 968 | !vfio_dev_present(group, dev), HZ * 10); |
| 969 | } else { |
| 970 | ret = wait_event_interruptible_timeout(vfio.release_q, |
| 971 | !vfio_dev_present(group, dev), HZ * 10); |
| 972 | if (ret == -ERESTARTSYS) { |
| 973 | interrupted = true; |
| 974 | dev_warn(dev, |
| 975 | "Device is currently in use, task" |
| 976 | " \"%s\" (%d) " |
| 977 | "blocked until device is released", |
| 978 | current->comm, task_pid_nr(current)); |
| 979 | } |
| 980 | } |
| 981 | } while (ret <= 0); |
Alex Williamson | e014e94 | 2013-02-14 14:02:13 -0700 | [diff] [blame] | 982 | |
| 983 | vfio_group_put(group); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 984 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 985 | return device_data; |
| 986 | } |
| 987 | EXPORT_SYMBOL_GPL(vfio_del_group_dev); |
| 988 | |
| 989 | /** |
| 990 | * VFIO base fd, /dev/vfio/vfio |
| 991 | */ |
| 992 | static long vfio_ioctl_check_extension(struct vfio_container *container, |
| 993 | unsigned long arg) |
| 994 | { |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 995 | struct vfio_iommu_driver *driver; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 996 | long ret = 0; |
| 997 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 998 | down_read(&container->group_lock); |
| 999 | |
| 1000 | driver = container->iommu_driver; |
| 1001 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1002 | switch (arg) { |
| 1003 | /* No base extensions yet */ |
| 1004 | default: |
| 1005 | /* |
| 1006 | * If no driver is set, poll all registered drivers for |
| 1007 | * extensions and return the first positive result. If |
| 1008 | * a driver is already set, further queries will be passed |
| 1009 | * only to that driver. |
| 1010 | */ |
| 1011 | if (!driver) { |
| 1012 | mutex_lock(&vfio.iommu_drivers_lock); |
Alex Williamson | ae5515d | 2015-12-04 08:38:42 -0700 | [diff] [blame] | 1013 | list_for_each_entry(driver, &vfio.iommu_drivers_list, |
| 1014 | vfio_next) { |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1015 | |
| 1016 | #ifdef CONFIG_VFIO_NOIOMMU |
| 1017 | if (!list_empty(&container->group_list) && |
| 1018 | (container->noiommu != |
| 1019 | (driver->ops == &vfio_noiommu_ops))) |
| 1020 | continue; |
| 1021 | #endif |
| 1022 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1023 | if (!try_module_get(driver->ops->owner)) |
| 1024 | continue; |
| 1025 | |
| 1026 | ret = driver->ops->ioctl(NULL, |
| 1027 | VFIO_CHECK_EXTENSION, |
| 1028 | arg); |
| 1029 | module_put(driver->ops->owner); |
| 1030 | if (ret > 0) |
| 1031 | break; |
| 1032 | } |
| 1033 | mutex_unlock(&vfio.iommu_drivers_lock); |
| 1034 | } else |
| 1035 | ret = driver->ops->ioctl(container->iommu_data, |
| 1036 | VFIO_CHECK_EXTENSION, arg); |
| 1037 | } |
| 1038 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1039 | up_read(&container->group_lock); |
| 1040 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1041 | return ret; |
| 1042 | } |
| 1043 | |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1044 | /* hold write lock on container->group_lock */ |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1045 | static int __vfio_container_attach_groups(struct vfio_container *container, |
| 1046 | struct vfio_iommu_driver *driver, |
| 1047 | void *data) |
| 1048 | { |
| 1049 | struct vfio_group *group; |
| 1050 | int ret = -ENODEV; |
| 1051 | |
| 1052 | list_for_each_entry(group, &container->group_list, container_next) { |
| 1053 | ret = driver->ops->attach_group(data, group->iommu_group); |
| 1054 | if (ret) |
| 1055 | goto unwind; |
| 1056 | } |
| 1057 | |
| 1058 | return ret; |
| 1059 | |
| 1060 | unwind: |
| 1061 | list_for_each_entry_continue_reverse(group, &container->group_list, |
| 1062 | container_next) { |
| 1063 | driver->ops->detach_group(data, group->iommu_group); |
| 1064 | } |
| 1065 | |
| 1066 | return ret; |
| 1067 | } |
| 1068 | |
| 1069 | static long vfio_ioctl_set_iommu(struct vfio_container *container, |
| 1070 | unsigned long arg) |
| 1071 | { |
| 1072 | struct vfio_iommu_driver *driver; |
| 1073 | long ret = -ENODEV; |
| 1074 | |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1075 | down_write(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1076 | |
| 1077 | /* |
| 1078 | * The container is designed to be an unprivileged interface while |
| 1079 | * the group can be assigned to specific users. Therefore, only by |
| 1080 | * adding a group to a container does the user get the privilege of |
| 1081 | * enabling the iommu, which may allocate finite resources. There |
| 1082 | * is no unset_iommu, but by removing all the groups from a container, |
| 1083 | * the container is deprivileged and returns to an unset state. |
| 1084 | */ |
| 1085 | if (list_empty(&container->group_list) || container->iommu_driver) { |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1086 | up_write(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1087 | return -EINVAL; |
| 1088 | } |
| 1089 | |
| 1090 | mutex_lock(&vfio.iommu_drivers_lock); |
Alex Williamson | ae5515d | 2015-12-04 08:38:42 -0700 | [diff] [blame] | 1091 | list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1092 | void *data; |
| 1093 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1094 | #ifdef CONFIG_VFIO_NOIOMMU |
| 1095 | /* |
| 1096 | * Only noiommu containers can use vfio-noiommu and noiommu |
| 1097 | * containers can only use vfio-noiommu. |
| 1098 | */ |
| 1099 | if (container->noiommu != (driver->ops == &vfio_noiommu_ops)) |
| 1100 | continue; |
| 1101 | #endif |
| 1102 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1103 | if (!try_module_get(driver->ops->owner)) |
| 1104 | continue; |
| 1105 | |
| 1106 | /* |
| 1107 | * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, |
| 1108 | * so test which iommu driver reported support for this |
| 1109 | * extension and call open on them. We also pass them the |
| 1110 | * magic, allowing a single driver to support multiple |
| 1111 | * interfaces if they'd like. |
| 1112 | */ |
| 1113 | if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { |
| 1114 | module_put(driver->ops->owner); |
| 1115 | continue; |
| 1116 | } |
| 1117 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1118 | data = driver->ops->open(arg); |
| 1119 | if (IS_ERR(data)) { |
| 1120 | ret = PTR_ERR(data); |
| 1121 | module_put(driver->ops->owner); |
Alex Williamson | 7c435b4 | 2016-02-22 16:02:30 -0700 | [diff] [blame] | 1122 | continue; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1123 | } |
| 1124 | |
| 1125 | ret = __vfio_container_attach_groups(container, driver, data); |
Alex Williamson | 7c435b4 | 2016-02-22 16:02:30 -0700 | [diff] [blame] | 1126 | if (ret) { |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1127 | driver->ops->release(data); |
| 1128 | module_put(driver->ops->owner); |
Alex Williamson | 7c435b4 | 2016-02-22 16:02:30 -0700 | [diff] [blame] | 1129 | continue; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1130 | } |
| 1131 | |
Alex Williamson | 7c435b4 | 2016-02-22 16:02:30 -0700 | [diff] [blame] | 1132 | container->iommu_driver = driver; |
| 1133 | container->iommu_data = data; |
| 1134 | break; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1135 | } |
| 1136 | |
| 1137 | mutex_unlock(&vfio.iommu_drivers_lock); |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1138 | up_write(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1139 | |
| 1140 | return ret; |
| 1141 | } |
| 1142 | |
| 1143 | static long vfio_fops_unl_ioctl(struct file *filep, |
| 1144 | unsigned int cmd, unsigned long arg) |
| 1145 | { |
| 1146 | struct vfio_container *container = filep->private_data; |
| 1147 | struct vfio_iommu_driver *driver; |
| 1148 | void *data; |
| 1149 | long ret = -EINVAL; |
| 1150 | |
| 1151 | if (!container) |
| 1152 | return ret; |
| 1153 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1154 | switch (cmd) { |
| 1155 | case VFIO_GET_API_VERSION: |
| 1156 | ret = VFIO_API_VERSION; |
| 1157 | break; |
| 1158 | case VFIO_CHECK_EXTENSION: |
| 1159 | ret = vfio_ioctl_check_extension(container, arg); |
| 1160 | break; |
| 1161 | case VFIO_SET_IOMMU: |
| 1162 | ret = vfio_ioctl_set_iommu(container, arg); |
| 1163 | break; |
| 1164 | default: |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1165 | down_read(&container->group_lock); |
| 1166 | |
| 1167 | driver = container->iommu_driver; |
| 1168 | data = container->iommu_data; |
| 1169 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1170 | if (driver) /* passthrough all unrecognized ioctls */ |
| 1171 | ret = driver->ops->ioctl(data, cmd, arg); |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1172 | |
| 1173 | up_read(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1174 | } |
| 1175 | |
| 1176 | return ret; |
| 1177 | } |
| 1178 | |
| 1179 | #ifdef CONFIG_COMPAT |
| 1180 | static long vfio_fops_compat_ioctl(struct file *filep, |
| 1181 | unsigned int cmd, unsigned long arg) |
| 1182 | { |
| 1183 | arg = (unsigned long)compat_ptr(arg); |
| 1184 | return vfio_fops_unl_ioctl(filep, cmd, arg); |
| 1185 | } |
| 1186 | #endif /* CONFIG_COMPAT */ |
| 1187 | |
| 1188 | static int vfio_fops_open(struct inode *inode, struct file *filep) |
| 1189 | { |
| 1190 | struct vfio_container *container; |
| 1191 | |
| 1192 | container = kzalloc(sizeof(*container), GFP_KERNEL); |
| 1193 | if (!container) |
| 1194 | return -ENOMEM; |
| 1195 | |
| 1196 | INIT_LIST_HEAD(&container->group_list); |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1197 | init_rwsem(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1198 | kref_init(&container->kref); |
| 1199 | |
| 1200 | filep->private_data = container; |
| 1201 | |
| 1202 | return 0; |
| 1203 | } |
| 1204 | |
| 1205 | static int vfio_fops_release(struct inode *inode, struct file *filep) |
| 1206 | { |
| 1207 | struct vfio_container *container = filep->private_data; |
| 1208 | |
| 1209 | filep->private_data = NULL; |
| 1210 | |
| 1211 | vfio_container_put(container); |
| 1212 | |
| 1213 | return 0; |
| 1214 | } |
| 1215 | |
| 1216 | /* |
| 1217 | * Once an iommu driver is set, we optionally pass read/write/mmap |
| 1218 | * on to the driver, allowing management interfaces beyond ioctl. |
| 1219 | */ |
| 1220 | static ssize_t vfio_fops_read(struct file *filep, char __user *buf, |
| 1221 | size_t count, loff_t *ppos) |
| 1222 | { |
| 1223 | struct vfio_container *container = filep->private_data; |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1224 | struct vfio_iommu_driver *driver; |
| 1225 | ssize_t ret = -EINVAL; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1226 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1227 | down_read(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1228 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1229 | driver = container->iommu_driver; |
| 1230 | if (likely(driver && driver->ops->read)) |
| 1231 | ret = driver->ops->read(container->iommu_data, |
| 1232 | buf, count, ppos); |
| 1233 | |
| 1234 | up_read(&container->group_lock); |
| 1235 | |
| 1236 | return ret; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1237 | } |
| 1238 | |
| 1239 | static ssize_t vfio_fops_write(struct file *filep, const char __user *buf, |
| 1240 | size_t count, loff_t *ppos) |
| 1241 | { |
| 1242 | struct vfio_container *container = filep->private_data; |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1243 | struct vfio_iommu_driver *driver; |
| 1244 | ssize_t ret = -EINVAL; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1245 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1246 | down_read(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1247 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1248 | driver = container->iommu_driver; |
| 1249 | if (likely(driver && driver->ops->write)) |
| 1250 | ret = driver->ops->write(container->iommu_data, |
| 1251 | buf, count, ppos); |
| 1252 | |
| 1253 | up_read(&container->group_lock); |
| 1254 | |
| 1255 | return ret; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1256 | } |
| 1257 | |
| 1258 | static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) |
| 1259 | { |
| 1260 | struct vfio_container *container = filep->private_data; |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1261 | struct vfio_iommu_driver *driver; |
| 1262 | int ret = -EINVAL; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1263 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1264 | down_read(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1265 | |
Alex Williamson | 0b43c08 | 2013-04-29 08:41:36 -0600 | [diff] [blame] | 1266 | driver = container->iommu_driver; |
| 1267 | if (likely(driver && driver->ops->mmap)) |
| 1268 | ret = driver->ops->mmap(container->iommu_data, vma); |
| 1269 | |
| 1270 | up_read(&container->group_lock); |
| 1271 | |
| 1272 | return ret; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1273 | } |
| 1274 | |
| 1275 | static const struct file_operations vfio_fops = { |
| 1276 | .owner = THIS_MODULE, |
| 1277 | .open = vfio_fops_open, |
| 1278 | .release = vfio_fops_release, |
| 1279 | .read = vfio_fops_read, |
| 1280 | .write = vfio_fops_write, |
| 1281 | .unlocked_ioctl = vfio_fops_unl_ioctl, |
| 1282 | #ifdef CONFIG_COMPAT |
| 1283 | .compat_ioctl = vfio_fops_compat_ioctl, |
| 1284 | #endif |
| 1285 | .mmap = vfio_fops_mmap, |
| 1286 | }; |
| 1287 | |
| 1288 | /** |
| 1289 | * VFIO Group fd, /dev/vfio/$GROUP |
| 1290 | */ |
| 1291 | static void __vfio_group_unset_container(struct vfio_group *group) |
| 1292 | { |
| 1293 | struct vfio_container *container = group->container; |
| 1294 | struct vfio_iommu_driver *driver; |
| 1295 | |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1296 | down_write(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1297 | |
| 1298 | driver = container->iommu_driver; |
| 1299 | if (driver) |
| 1300 | driver->ops->detach_group(container->iommu_data, |
| 1301 | group->iommu_group); |
| 1302 | |
| 1303 | group->container = NULL; |
| 1304 | list_del(&group->container_next); |
| 1305 | |
| 1306 | /* Detaching the last group deprivileges a container, remove iommu */ |
| 1307 | if (driver && list_empty(&container->group_list)) { |
| 1308 | driver->ops->release(container->iommu_data); |
| 1309 | module_put(driver->ops->owner); |
| 1310 | container->iommu_driver = NULL; |
| 1311 | container->iommu_data = NULL; |
| 1312 | } |
| 1313 | |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1314 | up_write(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1315 | |
| 1316 | vfio_container_put(container); |
| 1317 | } |
| 1318 | |
| 1319 | /* |
| 1320 | * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or |
| 1321 | * if there was no container to unset. Since the ioctl is called on |
| 1322 | * the group, we know that still exists, therefore the only valid |
| 1323 | * transition here is 1->0. |
| 1324 | */ |
| 1325 | static int vfio_group_unset_container(struct vfio_group *group) |
| 1326 | { |
| 1327 | int users = atomic_cmpxchg(&group->container_users, 1, 0); |
| 1328 | |
| 1329 | if (!users) |
| 1330 | return -EINVAL; |
| 1331 | if (users != 1) |
| 1332 | return -EBUSY; |
| 1333 | |
| 1334 | __vfio_group_unset_container(group); |
| 1335 | |
| 1336 | return 0; |
| 1337 | } |
| 1338 | |
| 1339 | /* |
| 1340 | * When removing container users, anything that removes the last user |
| 1341 | * implicitly removes the group from the container. That is, if the |
| 1342 | * group file descriptor is closed, as well as any device file descriptors, |
| 1343 | * the group is free. |
| 1344 | */ |
| 1345 | static void vfio_group_try_dissolve_container(struct vfio_group *group) |
| 1346 | { |
| 1347 | if (0 == atomic_dec_if_positive(&group->container_users)) |
| 1348 | __vfio_group_unset_container(group); |
| 1349 | } |
| 1350 | |
| 1351 | static int vfio_group_set_container(struct vfio_group *group, int container_fd) |
| 1352 | { |
Al Viro | 2903ff0 | 2012-08-28 12:52:22 -0400 | [diff] [blame] | 1353 | struct fd f; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1354 | struct vfio_container *container; |
| 1355 | struct vfio_iommu_driver *driver; |
Al Viro | 2903ff0 | 2012-08-28 12:52:22 -0400 | [diff] [blame] | 1356 | int ret = 0; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1357 | |
| 1358 | if (atomic_read(&group->container_users)) |
| 1359 | return -EINVAL; |
| 1360 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1361 | if (group->noiommu && !capable(CAP_SYS_RAWIO)) |
| 1362 | return -EPERM; |
| 1363 | |
Al Viro | 2903ff0 | 2012-08-28 12:52:22 -0400 | [diff] [blame] | 1364 | f = fdget(container_fd); |
| 1365 | if (!f.file) |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1366 | return -EBADF; |
| 1367 | |
| 1368 | /* Sanity check, is this really our fd? */ |
Al Viro | 2903ff0 | 2012-08-28 12:52:22 -0400 | [diff] [blame] | 1369 | if (f.file->f_op != &vfio_fops) { |
| 1370 | fdput(f); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1371 | return -EINVAL; |
| 1372 | } |
| 1373 | |
Al Viro | 2903ff0 | 2012-08-28 12:52:22 -0400 | [diff] [blame] | 1374 | container = f.file->private_data; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1375 | WARN_ON(!container); /* fget ensures we don't race vfio_release */ |
| 1376 | |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1377 | down_write(&container->group_lock); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1378 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1379 | /* Real groups and fake groups cannot mix */ |
| 1380 | if (!list_empty(&container->group_list) && |
| 1381 | container->noiommu != group->noiommu) { |
| 1382 | ret = -EPERM; |
| 1383 | goto unlock_out; |
| 1384 | } |
| 1385 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1386 | driver = container->iommu_driver; |
| 1387 | if (driver) { |
| 1388 | ret = driver->ops->attach_group(container->iommu_data, |
| 1389 | group->iommu_group); |
| 1390 | if (ret) |
| 1391 | goto unlock_out; |
| 1392 | } |
| 1393 | |
| 1394 | group->container = container; |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1395 | container->noiommu = group->noiommu; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1396 | list_add(&group->container_next, &container->group_list); |
| 1397 | |
| 1398 | /* Get a reference on the container and mark a user within the group */ |
| 1399 | vfio_container_get(container); |
| 1400 | atomic_inc(&group->container_users); |
| 1401 | |
| 1402 | unlock_out: |
Alex Williamson | 9587f44 | 2013-04-25 16:12:38 -0600 | [diff] [blame] | 1403 | up_write(&container->group_lock); |
Al Viro | 2903ff0 | 2012-08-28 12:52:22 -0400 | [diff] [blame] | 1404 | fdput(f); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1405 | return ret; |
| 1406 | } |
| 1407 | |
| 1408 | static bool vfio_group_viable(struct vfio_group *group) |
| 1409 | { |
| 1410 | return (iommu_group_for_each_dev(group->iommu_group, |
| 1411 | group, vfio_dev_viable) == 0); |
| 1412 | } |
| 1413 | |
| 1414 | static const struct file_operations vfio_device_fops; |
| 1415 | |
| 1416 | static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) |
| 1417 | { |
| 1418 | struct vfio_device *device; |
| 1419 | struct file *filep; |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 1420 | int ret; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1421 | |
| 1422 | if (0 == atomic_read(&group->container_users) || |
| 1423 | !group->container->iommu_driver || !vfio_group_viable(group)) |
| 1424 | return -EINVAL; |
| 1425 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1426 | if (group->noiommu && !capable(CAP_SYS_RAWIO)) |
| 1427 | return -EPERM; |
| 1428 | |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 1429 | device = vfio_device_get_from_name(group, buf); |
| 1430 | if (!device) |
| 1431 | return -ENODEV; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1432 | |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 1433 | ret = device->ops->open(device->device_data); |
| 1434 | if (ret) { |
| 1435 | vfio_device_put(device); |
| 1436 | return ret; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1437 | } |
Alex Williamson | 4bc94d5 | 2015-07-24 15:14:04 -0600 | [diff] [blame] | 1438 | |
| 1439 | /* |
| 1440 | * We can't use anon_inode_getfd() because we need to modify |
| 1441 | * the f_mode flags directly to allow more than just ioctls |
| 1442 | */ |
| 1443 | ret = get_unused_fd_flags(O_CLOEXEC); |
| 1444 | if (ret < 0) { |
| 1445 | device->ops->release(device->device_data); |
| 1446 | vfio_device_put(device); |
| 1447 | return ret; |
| 1448 | } |
| 1449 | |
| 1450 | filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, |
| 1451 | device, O_RDWR); |
| 1452 | if (IS_ERR(filep)) { |
| 1453 | put_unused_fd(ret); |
| 1454 | ret = PTR_ERR(filep); |
| 1455 | device->ops->release(device->device_data); |
| 1456 | vfio_device_put(device); |
| 1457 | return ret; |
| 1458 | } |
| 1459 | |
| 1460 | /* |
| 1461 | * TODO: add an anon_inode interface to do this. |
| 1462 | * Appears to be missing by lack of need rather than |
| 1463 | * explicitly prevented. Now there's need. |
| 1464 | */ |
| 1465 | filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); |
| 1466 | |
| 1467 | atomic_inc(&group->container_users); |
| 1468 | |
| 1469 | fd_install(ret, filep); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1470 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1471 | if (group->noiommu) |
| 1472 | dev_warn(device->dev, "vfio-noiommu device opened by user " |
| 1473 | "(%s:%d)\n", current->comm, task_pid_nr(current)); |
| 1474 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1475 | return ret; |
| 1476 | } |
| 1477 | |
| 1478 | static long vfio_group_fops_unl_ioctl(struct file *filep, |
| 1479 | unsigned int cmd, unsigned long arg) |
| 1480 | { |
| 1481 | struct vfio_group *group = filep->private_data; |
| 1482 | long ret = -ENOTTY; |
| 1483 | |
| 1484 | switch (cmd) { |
| 1485 | case VFIO_GROUP_GET_STATUS: |
| 1486 | { |
| 1487 | struct vfio_group_status status; |
| 1488 | unsigned long minsz; |
| 1489 | |
| 1490 | minsz = offsetofend(struct vfio_group_status, flags); |
| 1491 | |
| 1492 | if (copy_from_user(&status, (void __user *)arg, minsz)) |
| 1493 | return -EFAULT; |
| 1494 | |
| 1495 | if (status.argsz < minsz) |
| 1496 | return -EINVAL; |
| 1497 | |
| 1498 | status.flags = 0; |
| 1499 | |
| 1500 | if (vfio_group_viable(group)) |
| 1501 | status.flags |= VFIO_GROUP_FLAGS_VIABLE; |
| 1502 | |
| 1503 | if (group->container) |
| 1504 | status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; |
| 1505 | |
| 1506 | if (copy_to_user((void __user *)arg, &status, minsz)) |
| 1507 | return -EFAULT; |
| 1508 | |
| 1509 | ret = 0; |
| 1510 | break; |
| 1511 | } |
| 1512 | case VFIO_GROUP_SET_CONTAINER: |
| 1513 | { |
| 1514 | int fd; |
| 1515 | |
| 1516 | if (get_user(fd, (int __user *)arg)) |
| 1517 | return -EFAULT; |
| 1518 | |
| 1519 | if (fd < 0) |
| 1520 | return -EINVAL; |
| 1521 | |
| 1522 | ret = vfio_group_set_container(group, fd); |
| 1523 | break; |
| 1524 | } |
| 1525 | case VFIO_GROUP_UNSET_CONTAINER: |
| 1526 | ret = vfio_group_unset_container(group); |
| 1527 | break; |
| 1528 | case VFIO_GROUP_GET_DEVICE_FD: |
| 1529 | { |
| 1530 | char *buf; |
| 1531 | |
| 1532 | buf = strndup_user((const char __user *)arg, PAGE_SIZE); |
| 1533 | if (IS_ERR(buf)) |
| 1534 | return PTR_ERR(buf); |
| 1535 | |
| 1536 | ret = vfio_group_get_device_fd(group, buf); |
| 1537 | kfree(buf); |
| 1538 | break; |
| 1539 | } |
| 1540 | } |
| 1541 | |
| 1542 | return ret; |
| 1543 | } |
| 1544 | |
| 1545 | #ifdef CONFIG_COMPAT |
| 1546 | static long vfio_group_fops_compat_ioctl(struct file *filep, |
| 1547 | unsigned int cmd, unsigned long arg) |
| 1548 | { |
| 1549 | arg = (unsigned long)compat_ptr(arg); |
| 1550 | return vfio_group_fops_unl_ioctl(filep, cmd, arg); |
| 1551 | } |
| 1552 | #endif /* CONFIG_COMPAT */ |
| 1553 | |
| 1554 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) |
| 1555 | { |
| 1556 | struct vfio_group *group; |
Alex Williamson | 6d6768c | 2013-06-25 16:06:54 -0600 | [diff] [blame] | 1557 | int opened; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1558 | |
| 1559 | group = vfio_group_get_from_minor(iminor(inode)); |
| 1560 | if (!group) |
| 1561 | return -ENODEV; |
| 1562 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1563 | if (group->noiommu && !capable(CAP_SYS_RAWIO)) { |
| 1564 | vfio_group_put(group); |
| 1565 | return -EPERM; |
| 1566 | } |
| 1567 | |
Alex Williamson | 6d6768c | 2013-06-25 16:06:54 -0600 | [diff] [blame] | 1568 | /* Do we need multiple instances of the group open? Seems not. */ |
| 1569 | opened = atomic_cmpxchg(&group->opened, 0, 1); |
| 1570 | if (opened) { |
| 1571 | vfio_group_put(group); |
| 1572 | return -EBUSY; |
| 1573 | } |
| 1574 | |
| 1575 | /* Is something still in use from a previous open? */ |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1576 | if (group->container) { |
Alex Williamson | 6d6768c | 2013-06-25 16:06:54 -0600 | [diff] [blame] | 1577 | atomic_dec(&group->opened); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1578 | vfio_group_put(group); |
| 1579 | return -EBUSY; |
| 1580 | } |
| 1581 | |
| 1582 | filep->private_data = group; |
| 1583 | |
| 1584 | return 0; |
| 1585 | } |
| 1586 | |
| 1587 | static int vfio_group_fops_release(struct inode *inode, struct file *filep) |
| 1588 | { |
| 1589 | struct vfio_group *group = filep->private_data; |
| 1590 | |
| 1591 | filep->private_data = NULL; |
| 1592 | |
| 1593 | vfio_group_try_dissolve_container(group); |
| 1594 | |
Alex Williamson | 6d6768c | 2013-06-25 16:06:54 -0600 | [diff] [blame] | 1595 | atomic_dec(&group->opened); |
| 1596 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1597 | vfio_group_put(group); |
| 1598 | |
| 1599 | return 0; |
| 1600 | } |
| 1601 | |
| 1602 | static const struct file_operations vfio_group_fops = { |
| 1603 | .owner = THIS_MODULE, |
| 1604 | .unlocked_ioctl = vfio_group_fops_unl_ioctl, |
| 1605 | #ifdef CONFIG_COMPAT |
| 1606 | .compat_ioctl = vfio_group_fops_compat_ioctl, |
| 1607 | #endif |
| 1608 | .open = vfio_group_fops_open, |
| 1609 | .release = vfio_group_fops_release, |
| 1610 | }; |
| 1611 | |
| 1612 | /** |
| 1613 | * VFIO Device fd |
| 1614 | */ |
| 1615 | static int vfio_device_fops_release(struct inode *inode, struct file *filep) |
| 1616 | { |
| 1617 | struct vfio_device *device = filep->private_data; |
| 1618 | |
| 1619 | device->ops->release(device->device_data); |
| 1620 | |
| 1621 | vfio_group_try_dissolve_container(device->group); |
| 1622 | |
| 1623 | vfio_device_put(device); |
| 1624 | |
| 1625 | return 0; |
| 1626 | } |
| 1627 | |
| 1628 | static long vfio_device_fops_unl_ioctl(struct file *filep, |
| 1629 | unsigned int cmd, unsigned long arg) |
| 1630 | { |
| 1631 | struct vfio_device *device = filep->private_data; |
| 1632 | |
| 1633 | if (unlikely(!device->ops->ioctl)) |
| 1634 | return -EINVAL; |
| 1635 | |
| 1636 | return device->ops->ioctl(device->device_data, cmd, arg); |
| 1637 | } |
| 1638 | |
| 1639 | static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, |
| 1640 | size_t count, loff_t *ppos) |
| 1641 | { |
| 1642 | struct vfio_device *device = filep->private_data; |
| 1643 | |
| 1644 | if (unlikely(!device->ops->read)) |
| 1645 | return -EINVAL; |
| 1646 | |
| 1647 | return device->ops->read(device->device_data, buf, count, ppos); |
| 1648 | } |
| 1649 | |
| 1650 | static ssize_t vfio_device_fops_write(struct file *filep, |
| 1651 | const char __user *buf, |
| 1652 | size_t count, loff_t *ppos) |
| 1653 | { |
| 1654 | struct vfio_device *device = filep->private_data; |
| 1655 | |
| 1656 | if (unlikely(!device->ops->write)) |
| 1657 | return -EINVAL; |
| 1658 | |
| 1659 | return device->ops->write(device->device_data, buf, count, ppos); |
| 1660 | } |
| 1661 | |
| 1662 | static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) |
| 1663 | { |
| 1664 | struct vfio_device *device = filep->private_data; |
| 1665 | |
| 1666 | if (unlikely(!device->ops->mmap)) |
| 1667 | return -EINVAL; |
| 1668 | |
| 1669 | return device->ops->mmap(device->device_data, vma); |
| 1670 | } |
| 1671 | |
| 1672 | #ifdef CONFIG_COMPAT |
| 1673 | static long vfio_device_fops_compat_ioctl(struct file *filep, |
| 1674 | unsigned int cmd, unsigned long arg) |
| 1675 | { |
| 1676 | arg = (unsigned long)compat_ptr(arg); |
| 1677 | return vfio_device_fops_unl_ioctl(filep, cmd, arg); |
| 1678 | } |
| 1679 | #endif /* CONFIG_COMPAT */ |
| 1680 | |
| 1681 | static const struct file_operations vfio_device_fops = { |
| 1682 | .owner = THIS_MODULE, |
| 1683 | .release = vfio_device_fops_release, |
| 1684 | .read = vfio_device_fops_read, |
| 1685 | .write = vfio_device_fops_write, |
| 1686 | .unlocked_ioctl = vfio_device_fops_unl_ioctl, |
| 1687 | #ifdef CONFIG_COMPAT |
| 1688 | .compat_ioctl = vfio_device_fops_compat_ioctl, |
| 1689 | #endif |
| 1690 | .mmap = vfio_device_fops_mmap, |
| 1691 | }; |
| 1692 | |
| 1693 | /** |
Alexey Kardashevskiy | 6cdd978 | 2013-08-05 10:52:36 -0600 | [diff] [blame] | 1694 | * External user API, exported by symbols to be linked dynamically. |
| 1695 | * |
| 1696 | * The protocol includes: |
| 1697 | * 1. do normal VFIO init operation: |
| 1698 | * - opening a new container; |
| 1699 | * - attaching group(s) to it; |
| 1700 | * - setting an IOMMU driver for a container. |
| 1701 | * When IOMMU is set for a container, all groups in it are |
| 1702 | * considered ready to use by an external user. |
| 1703 | * |
| 1704 | * 2. User space passes a group fd to an external user. |
| 1705 | * The external user calls vfio_group_get_external_user() |
| 1706 | * to verify that: |
| 1707 | * - the group is initialized; |
| 1708 | * - IOMMU is set for it. |
| 1709 | * If both checks passed, vfio_group_get_external_user() |
| 1710 | * increments the container user counter to prevent |
| 1711 | * the VFIO group from disposal before KVM exits. |
| 1712 | * |
| 1713 | * 3. The external user calls vfio_external_user_iommu_id() |
| 1714 | * to know an IOMMU ID. |
| 1715 | * |
| 1716 | * 4. When the external KVM finishes, it calls |
| 1717 | * vfio_group_put_external_user() to release the VFIO group. |
| 1718 | * This call decrements the container user counter. |
| 1719 | */ |
| 1720 | struct vfio_group *vfio_group_get_external_user(struct file *filep) |
| 1721 | { |
| 1722 | struct vfio_group *group = filep->private_data; |
| 1723 | |
| 1724 | if (filep->f_op != &vfio_group_fops) |
| 1725 | return ERR_PTR(-EINVAL); |
| 1726 | |
| 1727 | if (!atomic_inc_not_zero(&group->container_users)) |
| 1728 | return ERR_PTR(-EINVAL); |
| 1729 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1730 | if (group->noiommu) { |
| 1731 | atomic_dec(&group->container_users); |
| 1732 | return ERR_PTR(-EPERM); |
| 1733 | } |
| 1734 | |
Alexey Kardashevskiy | 6cdd978 | 2013-08-05 10:52:36 -0600 | [diff] [blame] | 1735 | if (!group->container->iommu_driver || |
| 1736 | !vfio_group_viable(group)) { |
| 1737 | atomic_dec(&group->container_users); |
| 1738 | return ERR_PTR(-EINVAL); |
| 1739 | } |
| 1740 | |
| 1741 | vfio_group_get(group); |
| 1742 | |
| 1743 | return group; |
| 1744 | } |
| 1745 | EXPORT_SYMBOL_GPL(vfio_group_get_external_user); |
| 1746 | |
| 1747 | void vfio_group_put_external_user(struct vfio_group *group) |
| 1748 | { |
Alexey Kardashevskiy | 6cdd978 | 2013-08-05 10:52:36 -0600 | [diff] [blame] | 1749 | vfio_group_try_dissolve_container(group); |
Ilya Lesokhin | d370c91 | 2016-07-14 16:50:19 +0300 | [diff] [blame] | 1750 | vfio_group_put(group); |
Alexey Kardashevskiy | 6cdd978 | 2013-08-05 10:52:36 -0600 | [diff] [blame] | 1751 | } |
| 1752 | EXPORT_SYMBOL_GPL(vfio_group_put_external_user); |
| 1753 | |
| 1754 | int vfio_external_user_iommu_id(struct vfio_group *group) |
| 1755 | { |
| 1756 | return iommu_group_id(group->iommu_group); |
| 1757 | } |
| 1758 | EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id); |
| 1759 | |
Alex Williamson | 88d7ab8 | 2014-02-26 11:38:39 -0700 | [diff] [blame] | 1760 | long vfio_external_check_extension(struct vfio_group *group, unsigned long arg) |
| 1761 | { |
| 1762 | return vfio_ioctl_check_extension(group->container, arg); |
| 1763 | } |
| 1764 | EXPORT_SYMBOL_GPL(vfio_external_check_extension); |
| 1765 | |
Alexey Kardashevskiy | 6cdd978 | 2013-08-05 10:52:36 -0600 | [diff] [blame] | 1766 | /** |
Alex Williamson | d7a8d5e | 2016-02-22 16:02:33 -0700 | [diff] [blame] | 1767 | * Sub-module support |
| 1768 | */ |
| 1769 | /* |
| 1770 | * Helper for managing a buffer of info chain capabilities, allocate or |
| 1771 | * reallocate a buffer with additional @size, filling in @id and @version |
| 1772 | * of the capability. A pointer to the new capability is returned. |
| 1773 | * |
| 1774 | * NB. The chain is based at the head of the buffer, so new entries are |
| 1775 | * added to the tail, vfio_info_cap_shift() should be called to fixup the |
| 1776 | * next offsets prior to copying to the user buffer. |
| 1777 | */ |
| 1778 | struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, |
| 1779 | size_t size, u16 id, u16 version) |
| 1780 | { |
| 1781 | void *buf; |
| 1782 | struct vfio_info_cap_header *header, *tmp; |
| 1783 | |
| 1784 | buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); |
| 1785 | if (!buf) { |
| 1786 | kfree(caps->buf); |
| 1787 | caps->size = 0; |
| 1788 | return ERR_PTR(-ENOMEM); |
| 1789 | } |
| 1790 | |
| 1791 | caps->buf = buf; |
| 1792 | header = buf + caps->size; |
| 1793 | |
| 1794 | /* Eventually copied to user buffer, zero */ |
| 1795 | memset(header, 0, size); |
| 1796 | |
| 1797 | header->id = id; |
| 1798 | header->version = version; |
| 1799 | |
| 1800 | /* Add to the end of the capability chain */ |
| 1801 | for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next) |
| 1802 | ; /* nothing */ |
| 1803 | |
| 1804 | tmp->next = caps->size; |
| 1805 | caps->size += size; |
| 1806 | |
| 1807 | return header; |
| 1808 | } |
| 1809 | EXPORT_SYMBOL_GPL(vfio_info_cap_add); |
| 1810 | |
| 1811 | void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) |
| 1812 | { |
| 1813 | struct vfio_info_cap_header *tmp; |
| 1814 | |
| 1815 | for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset) |
| 1816 | tmp->next += offset; |
| 1817 | } |
| 1818 | EXPORT_SYMBOL_GPL(vfio_info_cap_shift); |
| 1819 | |
| 1820 | /** |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1821 | * Module/class support |
| 1822 | */ |
| 1823 | static char *vfio_devnode(struct device *dev, umode_t *mode) |
| 1824 | { |
| 1825 | return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); |
| 1826 | } |
| 1827 | |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1828 | static struct miscdevice vfio_dev = { |
| 1829 | .minor = VFIO_MINOR, |
| 1830 | .name = "vfio", |
| 1831 | .fops = &vfio_fops, |
| 1832 | .nodename = "vfio/vfio", |
| 1833 | .mode = S_IRUGO | S_IWUGO, |
| 1834 | }; |
| 1835 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1836 | static int __init vfio_init(void) |
| 1837 | { |
| 1838 | int ret; |
| 1839 | |
| 1840 | idr_init(&vfio.group_idr); |
| 1841 | mutex_init(&vfio.group_lock); |
| 1842 | mutex_init(&vfio.iommu_drivers_lock); |
| 1843 | INIT_LIST_HEAD(&vfio.group_list); |
| 1844 | INIT_LIST_HEAD(&vfio.iommu_drivers_list); |
| 1845 | init_waitqueue_head(&vfio.release_q); |
| 1846 | |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1847 | ret = misc_register(&vfio_dev); |
| 1848 | if (ret) { |
| 1849 | pr_err("vfio: misc device register failed\n"); |
| 1850 | return ret; |
| 1851 | } |
| 1852 | |
| 1853 | /* /dev/vfio/$GROUP */ |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1854 | vfio.class = class_create(THIS_MODULE, "vfio"); |
| 1855 | if (IS_ERR(vfio.class)) { |
| 1856 | ret = PTR_ERR(vfio.class); |
| 1857 | goto err_class; |
| 1858 | } |
| 1859 | |
| 1860 | vfio.class->devnode = vfio_devnode; |
| 1861 | |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1862 | ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio"); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1863 | if (ret) |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1864 | goto err_alloc_chrdev; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1865 | |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1866 | cdev_init(&vfio.group_cdev, &vfio_group_fops); |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1867 | ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1868 | if (ret) |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1869 | goto err_cdev_add; |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1870 | |
| 1871 | pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); |
| 1872 | |
Alex Williamson | 73fa0d1 | 2012-07-31 08:16:23 -0600 | [diff] [blame] | 1873 | /* |
| 1874 | * Attempt to load known iommu-drivers. This gives us a working |
| 1875 | * environment without the user needing to explicitly load iommu |
| 1876 | * drivers. |
| 1877 | */ |
| 1878 | request_module_nowait("vfio_iommu_type1"); |
Alexey Kardashevskiy | 5ffd229 | 2013-05-21 13:33:10 +1000 | [diff] [blame] | 1879 | request_module_nowait("vfio_iommu_spapr_tce"); |
Alex Williamson | 73fa0d1 | 2012-07-31 08:16:23 -0600 | [diff] [blame] | 1880 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1881 | #ifdef CONFIG_VFIO_NOIOMMU |
| 1882 | vfio_register_iommu_driver(&vfio_noiommu_ops); |
| 1883 | #endif |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1884 | return 0; |
| 1885 | |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1886 | err_cdev_add: |
| 1887 | unregister_chrdev_region(vfio.group_devt, MINORMASK); |
| 1888 | err_alloc_chrdev: |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1889 | class_destroy(vfio.class); |
| 1890 | vfio.class = NULL; |
| 1891 | err_class: |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1892 | misc_deregister(&vfio_dev); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1893 | return ret; |
| 1894 | } |
| 1895 | |
| 1896 | static void __exit vfio_cleanup(void) |
| 1897 | { |
| 1898 | WARN_ON(!list_empty(&vfio.group_list)); |
| 1899 | |
Alex Williamson | 03a76b6 | 2015-12-21 15:13:33 -0700 | [diff] [blame] | 1900 | #ifdef CONFIG_VFIO_NOIOMMU |
| 1901 | vfio_unregister_iommu_driver(&vfio_noiommu_ops); |
| 1902 | #endif |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1903 | idr_destroy(&vfio.group_idr); |
| 1904 | cdev_del(&vfio.group_cdev); |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1905 | unregister_chrdev_region(vfio.group_devt, MINORMASK); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1906 | class_destroy(vfio.class); |
| 1907 | vfio.class = NULL; |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1908 | misc_deregister(&vfio_dev); |
Alex Williamson | cba3345 | 2012-07-31 08:16:22 -0600 | [diff] [blame] | 1909 | } |
| 1910 | |
| 1911 | module_init(vfio_init); |
| 1912 | module_exit(vfio_cleanup); |
| 1913 | |
| 1914 | MODULE_VERSION(DRIVER_VERSION); |
| 1915 | MODULE_LICENSE("GPL v2"); |
| 1916 | MODULE_AUTHOR(DRIVER_AUTHOR); |
| 1917 | MODULE_DESCRIPTION(DRIVER_DESC); |
Alex Williamson | d109990 | 2013-12-19 10:17:13 -0700 | [diff] [blame] | 1918 | MODULE_ALIAS_MISCDEV(VFIO_MINOR); |
| 1919 | MODULE_ALIAS("devname:vfio/vfio"); |