| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 1 | /* | 
 | 2 |  * kvm eventfd support - use eventfd objects to signal various KVM events | 
 | 3 |  * | 
 | 4 |  * Copyright 2009 Novell.  All Rights Reserved. | 
| Avi Kivity | 221d059 | 2010-05-23 18:37:00 +0300 | [diff] [blame] | 5 |  * Copyright 2010 Red Hat, Inc. and/or its affiliates. | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 6 |  * | 
 | 7 |  * Author: | 
 | 8 |  *	Gregory Haskins <ghaskins@novell.com> | 
 | 9 |  * | 
 | 10 |  * This file is free software; you can redistribute it and/or modify | 
 | 11 |  * it under the terms of version 2 of the GNU General Public License | 
 | 12 |  * as published by the Free Software Foundation. | 
 | 13 |  * | 
 | 14 |  * This program is distributed in the hope that it will be useful, | 
 | 15 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 16 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the | 
 | 17 |  * GNU General Public License for more details. | 
 | 18 |  * | 
 | 19 |  * You should have received a copy of the GNU General Public License | 
 | 20 |  * along with this program; if not, write to the Free Software Foundation, | 
 | 21 |  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | 
 | 22 |  */ | 
 | 23 |  | 
 | 24 | #include <linux/kvm_host.h> | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 25 | #include <linux/kvm.h> | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 26 | #include <linux/workqueue.h> | 
 | 27 | #include <linux/syscalls.h> | 
 | 28 | #include <linux/wait.h> | 
 | 29 | #include <linux/poll.h> | 
 | 30 | #include <linux/file.h> | 
 | 31 | #include <linux/list.h> | 
 | 32 | #include <linux/eventfd.h> | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 33 | #include <linux/kernel.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 34 | #include <linux/slab.h> | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 35 |  | 
 | 36 | #include "iodev.h" | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 37 |  | 
 | 38 | /* | 
 | 39 |  * -------------------------------------------------------------------- | 
 | 40 |  * irqfd: Allows an fd to be used to inject an interrupt to the guest | 
 | 41 |  * | 
 | 42 |  * Credit goes to Avi Kivity for the original idea. | 
 | 43 |  * -------------------------------------------------------------------- | 
 | 44 |  */ | 
 | 45 |  | 
 | 46 | struct _irqfd { | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 47 | 	/* Used for MSI fast-path */ | 
 | 48 | 	struct kvm *kvm; | 
 | 49 | 	wait_queue_t wait; | 
 | 50 | 	/* Update side is protected by irqfds.lock */ | 
 | 51 | 	struct kvm_kernel_irq_routing_entry __rcu *irq_entry; | 
 | 52 | 	/* Used for level IRQ fast-path */ | 
 | 53 | 	int gsi; | 
 | 54 | 	struct work_struct inject; | 
 | 55 | 	/* Used for setup/shutdown */ | 
 | 56 | 	struct eventfd_ctx *eventfd; | 
 | 57 | 	struct list_head list; | 
 | 58 | 	poll_table pt; | 
 | 59 | 	struct work_struct shutdown; | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 60 | }; | 
 | 61 |  | 
 | 62 | static struct workqueue_struct *irqfd_cleanup_wq; | 
 | 63 |  | 
 | 64 | static void | 
 | 65 | irqfd_inject(struct work_struct *work) | 
 | 66 | { | 
 | 67 | 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 
 | 68 | 	struct kvm *kvm = irqfd->kvm; | 
 | 69 |  | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 70 | 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 
 | 71 | 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 72 | } | 
 | 73 |  | 
 | 74 | /* | 
 | 75 |  * Race-free decouple logic (ordering is critical) | 
 | 76 |  */ | 
 | 77 | static void | 
 | 78 | irqfd_shutdown(struct work_struct *work) | 
 | 79 | { | 
 | 80 | 	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | 
| Michael S. Tsirkin | b6a114d | 2010-01-13 19:12:30 +0200 | [diff] [blame] | 81 | 	u64 cnt; | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 82 |  | 
 | 83 | 	/* | 
 | 84 | 	 * Synchronize with the wait-queue and unhook ourselves to prevent | 
 | 85 | 	 * further events. | 
 | 86 | 	 */ | 
| Michael S. Tsirkin | b6a114d | 2010-01-13 19:12:30 +0200 | [diff] [blame] | 87 | 	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 88 |  | 
 | 89 | 	/* | 
 | 90 | 	 * We know no new events will be scheduled at this point, so block | 
 | 91 | 	 * until all previously outstanding events have completed | 
 | 92 | 	 */ | 
| Michael S. Tsirkin | 9e02fb9 | 2011-03-17 10:53:33 +0200 | [diff] [blame] | 93 | 	flush_work_sync(&irqfd->inject); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 94 |  | 
 | 95 | 	/* | 
 | 96 | 	 * It is now safe to release the object's resources | 
 | 97 | 	 */ | 
 | 98 | 	eventfd_ctx_put(irqfd->eventfd); | 
 | 99 | 	kfree(irqfd); | 
 | 100 | } | 
 | 101 |  | 
 | 102 |  | 
 | 103 | /* assumes kvm->irqfds.lock is held */ | 
 | 104 | static bool | 
 | 105 | irqfd_is_active(struct _irqfd *irqfd) | 
 | 106 | { | 
 | 107 | 	return list_empty(&irqfd->list) ? false : true; | 
 | 108 | } | 
 | 109 |  | 
 | 110 | /* | 
 | 111 |  * Mark the irqfd as inactive and schedule it for removal | 
 | 112 |  * | 
 | 113 |  * assumes kvm->irqfds.lock is held | 
 | 114 |  */ | 
 | 115 | static void | 
 | 116 | irqfd_deactivate(struct _irqfd *irqfd) | 
 | 117 | { | 
 | 118 | 	BUG_ON(!irqfd_is_active(irqfd)); | 
 | 119 |  | 
 | 120 | 	list_del_init(&irqfd->list); | 
 | 121 |  | 
 | 122 | 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown); | 
 | 123 | } | 
 | 124 |  | 
 | 125 | /* | 
 | 126 |  * Called with wqh->lock held and interrupts disabled | 
 | 127 |  */ | 
 | 128 | static int | 
 | 129 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | 
 | 130 | { | 
 | 131 | 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | 
 | 132 | 	unsigned long flags = (unsigned long)key; | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 133 | 	struct kvm_kernel_irq_routing_entry *irq; | 
 | 134 | 	struct kvm *kvm = irqfd->kvm; | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 135 |  | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 136 | 	if (flags & POLLIN) { | 
 | 137 | 		rcu_read_lock(); | 
 | 138 | 		irq = rcu_dereference(irqfd->irq_entry); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 139 | 		/* An event has been signaled, inject an interrupt */ | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 140 | 		if (irq) | 
 | 141 | 			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); | 
 | 142 | 		else | 
 | 143 | 			schedule_work(&irqfd->inject); | 
 | 144 | 		rcu_read_unlock(); | 
 | 145 | 	} | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 146 |  | 
 | 147 | 	if (flags & POLLHUP) { | 
 | 148 | 		/* The eventfd is closing, detach from KVM */ | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 149 | 		unsigned long flags; | 
 | 150 |  | 
 | 151 | 		spin_lock_irqsave(&kvm->irqfds.lock, flags); | 
 | 152 |  | 
 | 153 | 		/* | 
 | 154 | 		 * We must check if someone deactivated the irqfd before | 
 | 155 | 		 * we could acquire the irqfds.lock since the item is | 
 | 156 | 		 * deactivated from the KVM side before it is unhooked from | 
 | 157 | 		 * the wait-queue.  If it is already deactivated, we can | 
 | 158 | 		 * simply return knowing the other side will cleanup for us. | 
 | 159 | 		 * We cannot race against the irqfd going away since the | 
 | 160 | 		 * other side is required to acquire wqh->lock, which we hold | 
 | 161 | 		 */ | 
 | 162 | 		if (irqfd_is_active(irqfd)) | 
 | 163 | 			irqfd_deactivate(irqfd); | 
 | 164 |  | 
 | 165 | 		spin_unlock_irqrestore(&kvm->irqfds.lock, flags); | 
 | 166 | 	} | 
 | 167 |  | 
 | 168 | 	return 0; | 
 | 169 | } | 
 | 170 |  | 
 | 171 | static void | 
 | 172 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | 
 | 173 | 			poll_table *pt) | 
 | 174 | { | 
 | 175 | 	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 176 | 	add_wait_queue(wqh, &irqfd->wait); | 
 | 177 | } | 
 | 178 |  | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 179 | /* Must be called under irqfds.lock */ | 
 | 180 | static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, | 
 | 181 | 			 struct kvm_irq_routing_table *irq_rt) | 
 | 182 | { | 
 | 183 | 	struct kvm_kernel_irq_routing_entry *e; | 
 | 184 | 	struct hlist_node *n; | 
 | 185 |  | 
 | 186 | 	if (irqfd->gsi >= irq_rt->nr_rt_entries) { | 
 | 187 | 		rcu_assign_pointer(irqfd->irq_entry, NULL); | 
 | 188 | 		return; | 
 | 189 | 	} | 
 | 190 |  | 
 | 191 | 	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) { | 
 | 192 | 		/* Only fast-path MSI. */ | 
 | 193 | 		if (e->type == KVM_IRQ_ROUTING_MSI) | 
 | 194 | 			rcu_assign_pointer(irqfd->irq_entry, e); | 
 | 195 | 		else | 
 | 196 | 			rcu_assign_pointer(irqfd->irq_entry, NULL); | 
 | 197 | 	} | 
 | 198 | } | 
 | 199 |  | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 200 | static int | 
 | 201 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | 
 | 202 | { | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 203 | 	struct kvm_irq_routing_table *irq_rt; | 
| Michael S. Tsirkin | f1d1c30 | 2010-01-13 18:58:09 +0200 | [diff] [blame] | 204 | 	struct _irqfd *irqfd, *tmp; | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 205 | 	struct file *file = NULL; | 
 | 206 | 	struct eventfd_ctx *eventfd = NULL; | 
 | 207 | 	int ret; | 
 | 208 | 	unsigned int events; | 
 | 209 |  | 
 | 210 | 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); | 
 | 211 | 	if (!irqfd) | 
 | 212 | 		return -ENOMEM; | 
 | 213 |  | 
 | 214 | 	irqfd->kvm = kvm; | 
 | 215 | 	irqfd->gsi = gsi; | 
 | 216 | 	INIT_LIST_HEAD(&irqfd->list); | 
 | 217 | 	INIT_WORK(&irqfd->inject, irqfd_inject); | 
 | 218 | 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown); | 
 | 219 |  | 
 | 220 | 	file = eventfd_fget(fd); | 
 | 221 | 	if (IS_ERR(file)) { | 
 | 222 | 		ret = PTR_ERR(file); | 
 | 223 | 		goto fail; | 
 | 224 | 	} | 
 | 225 |  | 
 | 226 | 	eventfd = eventfd_ctx_fileget(file); | 
 | 227 | 	if (IS_ERR(eventfd)) { | 
 | 228 | 		ret = PTR_ERR(eventfd); | 
 | 229 | 		goto fail; | 
 | 230 | 	} | 
 | 231 |  | 
 | 232 | 	irqfd->eventfd = eventfd; | 
 | 233 |  | 
 | 234 | 	/* | 
 | 235 | 	 * Install our own custom wake-up handling so we are notified via | 
 | 236 | 	 * a callback whenever someone signals the underlying eventfd | 
 | 237 | 	 */ | 
 | 238 | 	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); | 
 | 239 | 	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); | 
 | 240 |  | 
| Michael S. Tsirkin | f1d1c30 | 2010-01-13 18:58:09 +0200 | [diff] [blame] | 241 | 	spin_lock_irq(&kvm->irqfds.lock); | 
 | 242 |  | 
 | 243 | 	ret = 0; | 
 | 244 | 	list_for_each_entry(tmp, &kvm->irqfds.items, list) { | 
 | 245 | 		if (irqfd->eventfd != tmp->eventfd) | 
 | 246 | 			continue; | 
 | 247 | 		/* This fd is used for another irq already. */ | 
 | 248 | 		ret = -EBUSY; | 
 | 249 | 		spin_unlock_irq(&kvm->irqfds.lock); | 
 | 250 | 		goto fail; | 
 | 251 | 	} | 
 | 252 |  | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 253 | 	irq_rt = rcu_dereference_protected(kvm->irq_routing, | 
 | 254 | 					   lockdep_is_held(&kvm->irqfds.lock)); | 
 | 255 | 	irqfd_update(kvm, irqfd, irq_rt); | 
 | 256 |  | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 257 | 	events = file->f_op->poll(file, &irqfd->pt); | 
 | 258 |  | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 259 | 	list_add_tail(&irqfd->list, &kvm->irqfds.items); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 260 |  | 
 | 261 | 	/* | 
 | 262 | 	 * Check if there was an event already pending on the eventfd | 
 | 263 | 	 * before we registered, and trigger it as if we didn't miss it. | 
 | 264 | 	 */ | 
 | 265 | 	if (events & POLLIN) | 
 | 266 | 		schedule_work(&irqfd->inject); | 
 | 267 |  | 
| Michael S. Tsirkin | 6bbfb26 | 2010-09-19 19:02:31 +0200 | [diff] [blame] | 268 | 	spin_unlock_irq(&kvm->irqfds.lock); | 
 | 269 |  | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 270 | 	/* | 
 | 271 | 	 * do not drop the file until the irqfd is fully initialized, otherwise | 
 | 272 | 	 * we might race against the POLLHUP | 
 | 273 | 	 */ | 
 | 274 | 	fput(file); | 
 | 275 |  | 
 | 276 | 	return 0; | 
 | 277 |  | 
 | 278 | fail: | 
 | 279 | 	if (eventfd && !IS_ERR(eventfd)) | 
 | 280 | 		eventfd_ctx_put(eventfd); | 
 | 281 |  | 
| Julia Lawall | 6223011 | 2009-07-28 17:53:24 +0200 | [diff] [blame] | 282 | 	if (!IS_ERR(file)) | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 283 | 		fput(file); | 
 | 284 |  | 
 | 285 | 	kfree(irqfd); | 
 | 286 | 	return ret; | 
 | 287 | } | 
 | 288 |  | 
 | 289 | void | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 290 | kvm_eventfd_init(struct kvm *kvm) | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 291 | { | 
 | 292 | 	spin_lock_init(&kvm->irqfds.lock); | 
 | 293 | 	INIT_LIST_HEAD(&kvm->irqfds.items); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 294 | 	INIT_LIST_HEAD(&kvm->ioeventfds); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 295 | } | 
 | 296 |  | 
 | 297 | /* | 
 | 298 |  * shutdown any irqfd's that match fd+gsi | 
 | 299 |  */ | 
 | 300 | static int | 
 | 301 | kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) | 
 | 302 | { | 
 | 303 | 	struct _irqfd *irqfd, *tmp; | 
 | 304 | 	struct eventfd_ctx *eventfd; | 
 | 305 |  | 
 | 306 | 	eventfd = eventfd_ctx_fdget(fd); | 
 | 307 | 	if (IS_ERR(eventfd)) | 
 | 308 | 		return PTR_ERR(eventfd); | 
 | 309 |  | 
 | 310 | 	spin_lock_irq(&kvm->irqfds.lock); | 
 | 311 |  | 
 | 312 | 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 313 | 		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { | 
 | 314 | 			/* | 
 | 315 | 			 * This rcu_assign_pointer is needed for when | 
| Michael S. Tsirkin | c8ce057 | 2011-03-06 13:03:26 +0200 | [diff] [blame] | 316 | 			 * another thread calls kvm_irq_routing_update before | 
 | 317 | 			 * we flush workqueue below (we synchronize with | 
 | 318 | 			 * kvm_irq_routing_update using irqfds.lock). | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 319 | 			 * It is paired with synchronize_rcu done by caller | 
 | 320 | 			 * of that function. | 
 | 321 | 			 */ | 
 | 322 | 			rcu_assign_pointer(irqfd->irq_entry, NULL); | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 323 | 			irqfd_deactivate(irqfd); | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 324 | 		} | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 325 | 	} | 
 | 326 |  | 
 | 327 | 	spin_unlock_irq(&kvm->irqfds.lock); | 
 | 328 | 	eventfd_ctx_put(eventfd); | 
 | 329 |  | 
 | 330 | 	/* | 
 | 331 | 	 * Block until we know all outstanding shutdown jobs have completed | 
 | 332 | 	 * so that we guarantee there will not be any more interrupts on this | 
 | 333 | 	 * gsi once this deassign function returns. | 
 | 334 | 	 */ | 
 | 335 | 	flush_workqueue(irqfd_cleanup_wq); | 
 | 336 |  | 
 | 337 | 	return 0; | 
 | 338 | } | 
 | 339 |  | 
 | 340 | int | 
 | 341 | kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) | 
 | 342 | { | 
 | 343 | 	if (flags & KVM_IRQFD_FLAG_DEASSIGN) | 
 | 344 | 		return kvm_irqfd_deassign(kvm, fd, gsi); | 
 | 345 |  | 
 | 346 | 	return kvm_irqfd_assign(kvm, fd, gsi); | 
 | 347 | } | 
 | 348 |  | 
 | 349 | /* | 
 | 350 |  * This function is called as the kvm VM fd is being released. Shutdown all | 
 | 351 |  * irqfds that still remain open | 
 | 352 |  */ | 
 | 353 | void | 
 | 354 | kvm_irqfd_release(struct kvm *kvm) | 
 | 355 | { | 
 | 356 | 	struct _irqfd *irqfd, *tmp; | 
 | 357 |  | 
 | 358 | 	spin_lock_irq(&kvm->irqfds.lock); | 
 | 359 |  | 
 | 360 | 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) | 
 | 361 | 		irqfd_deactivate(irqfd); | 
 | 362 |  | 
 | 363 | 	spin_unlock_irq(&kvm->irqfds.lock); | 
 | 364 |  | 
 | 365 | 	/* | 
 | 366 | 	 * Block until we know all outstanding shutdown jobs have completed | 
 | 367 | 	 * since we do not take a kvm* reference. | 
 | 368 | 	 */ | 
 | 369 | 	flush_workqueue(irqfd_cleanup_wq); | 
 | 370 |  | 
 | 371 | } | 
 | 372 |  | 
 | 373 | /* | 
| Michael S. Tsirkin | bd2b53b | 2010-11-18 19:09:08 +0200 | [diff] [blame] | 374 |  * Change irq_routing and irqfd. | 
 | 375 |  * Caller must invoke synchronize_rcu afterwards. | 
 | 376 |  */ | 
 | 377 | void kvm_irq_routing_update(struct kvm *kvm, | 
 | 378 | 			    struct kvm_irq_routing_table *irq_rt) | 
 | 379 | { | 
 | 380 | 	struct _irqfd *irqfd; | 
 | 381 |  | 
 | 382 | 	spin_lock_irq(&kvm->irqfds.lock); | 
 | 383 |  | 
 | 384 | 	rcu_assign_pointer(kvm->irq_routing, irq_rt); | 
 | 385 |  | 
 | 386 | 	list_for_each_entry(irqfd, &kvm->irqfds.items, list) | 
 | 387 | 		irqfd_update(kvm, irqfd, irq_rt); | 
 | 388 |  | 
 | 389 | 	spin_unlock_irq(&kvm->irqfds.lock); | 
 | 390 | } | 
 | 391 |  | 
 | 392 | /* | 
| Gregory Haskins | 721eecbf | 2009-05-20 10:30:49 -0400 | [diff] [blame] | 393 |  * create a host-wide workqueue for issuing deferred shutdown requests | 
 | 394 |  * aggregated from all vm* instances. We need our own isolated single-thread | 
 | 395 |  * queue to prevent deadlock against flushing the normal work-queue. | 
 | 396 |  */ | 
 | 397 | static int __init irqfd_module_init(void) | 
 | 398 | { | 
 | 399 | 	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); | 
 | 400 | 	if (!irqfd_cleanup_wq) | 
 | 401 | 		return -ENOMEM; | 
 | 402 |  | 
 | 403 | 	return 0; | 
 | 404 | } | 
 | 405 |  | 
 | 406 | static void __exit irqfd_module_exit(void) | 
 | 407 | { | 
 | 408 | 	destroy_workqueue(irqfd_cleanup_wq); | 
 | 409 | } | 
 | 410 |  | 
 | 411 | module_init(irqfd_module_init); | 
 | 412 | module_exit(irqfd_module_exit); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 413 |  | 
 | 414 | /* | 
 | 415 |  * -------------------------------------------------------------------- | 
 | 416 |  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. | 
 | 417 |  * | 
 | 418 |  * userspace can register a PIO/MMIO address with an eventfd for receiving | 
 | 419 |  * notification when the memory has been touched. | 
 | 420 |  * -------------------------------------------------------------------- | 
 | 421 |  */ | 
 | 422 |  | 
 | 423 | struct _ioeventfd { | 
 | 424 | 	struct list_head     list; | 
 | 425 | 	u64                  addr; | 
 | 426 | 	int                  length; | 
 | 427 | 	struct eventfd_ctx  *eventfd; | 
 | 428 | 	u64                  datamatch; | 
 | 429 | 	struct kvm_io_device dev; | 
 | 430 | 	bool                 wildcard; | 
 | 431 | }; | 
 | 432 |  | 
 | 433 | static inline struct _ioeventfd * | 
 | 434 | to_ioeventfd(struct kvm_io_device *dev) | 
 | 435 | { | 
 | 436 | 	return container_of(dev, struct _ioeventfd, dev); | 
 | 437 | } | 
 | 438 |  | 
 | 439 | static void | 
 | 440 | ioeventfd_release(struct _ioeventfd *p) | 
 | 441 | { | 
 | 442 | 	eventfd_ctx_put(p->eventfd); | 
 | 443 | 	list_del(&p->list); | 
 | 444 | 	kfree(p); | 
 | 445 | } | 
 | 446 |  | 
 | 447 | static bool | 
 | 448 | ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) | 
 | 449 | { | 
 | 450 | 	u64 _val; | 
 | 451 |  | 
 | 452 | 	if (!(addr == p->addr && len == p->length)) | 
 | 453 | 		/* address-range must be precise for a hit */ | 
 | 454 | 		return false; | 
 | 455 |  | 
 | 456 | 	if (p->wildcard) | 
 | 457 | 		/* all else equal, wildcard is always a hit */ | 
 | 458 | 		return true; | 
 | 459 |  | 
 | 460 | 	/* otherwise, we have to actually compare the data */ | 
 | 461 |  | 
 | 462 | 	BUG_ON(!IS_ALIGNED((unsigned long)val, len)); | 
 | 463 |  | 
 | 464 | 	switch (len) { | 
 | 465 | 	case 1: | 
 | 466 | 		_val = *(u8 *)val; | 
 | 467 | 		break; | 
 | 468 | 	case 2: | 
 | 469 | 		_val = *(u16 *)val; | 
 | 470 | 		break; | 
 | 471 | 	case 4: | 
 | 472 | 		_val = *(u32 *)val; | 
 | 473 | 		break; | 
 | 474 | 	case 8: | 
 | 475 | 		_val = *(u64 *)val; | 
 | 476 | 		break; | 
 | 477 | 	default: | 
 | 478 | 		return false; | 
 | 479 | 	} | 
 | 480 |  | 
 | 481 | 	return _val == p->datamatch ? true : false; | 
 | 482 | } | 
 | 483 |  | 
 | 484 | /* MMIO/PIO writes trigger an event if the addr/val match */ | 
 | 485 | static int | 
 | 486 | ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, | 
 | 487 | 		const void *val) | 
 | 488 | { | 
 | 489 | 	struct _ioeventfd *p = to_ioeventfd(this); | 
 | 490 |  | 
 | 491 | 	if (!ioeventfd_in_range(p, addr, len, val)) | 
 | 492 | 		return -EOPNOTSUPP; | 
 | 493 |  | 
 | 494 | 	eventfd_signal(p->eventfd, 1); | 
 | 495 | 	return 0; | 
 | 496 | } | 
 | 497 |  | 
 | 498 | /* | 
 | 499 |  * This function is called as KVM is completely shutting down.  We do not | 
 | 500 |  * need to worry about locking just nuke anything we have as quickly as possible | 
 | 501 |  */ | 
 | 502 | static void | 
 | 503 | ioeventfd_destructor(struct kvm_io_device *this) | 
 | 504 | { | 
 | 505 | 	struct _ioeventfd *p = to_ioeventfd(this); | 
 | 506 |  | 
 | 507 | 	ioeventfd_release(p); | 
 | 508 | } | 
 | 509 |  | 
 | 510 | static const struct kvm_io_device_ops ioeventfd_ops = { | 
 | 511 | 	.write      = ioeventfd_write, | 
 | 512 | 	.destructor = ioeventfd_destructor, | 
 | 513 | }; | 
 | 514 |  | 
 | 515 | /* assumes kvm->slots_lock held */ | 
 | 516 | static bool | 
 | 517 | ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) | 
 | 518 | { | 
 | 519 | 	struct _ioeventfd *_p; | 
 | 520 |  | 
 | 521 | 	list_for_each_entry(_p, &kvm->ioeventfds, list) | 
 | 522 | 		if (_p->addr == p->addr && _p->length == p->length && | 
 | 523 | 		    (_p->wildcard || p->wildcard || | 
 | 524 | 		     _p->datamatch == p->datamatch)) | 
 | 525 | 			return true; | 
 | 526 |  | 
 | 527 | 	return false; | 
 | 528 | } | 
 | 529 |  | 
 | 530 | static int | 
 | 531 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 
 | 532 | { | 
 | 533 | 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 
| Marcelo Tosatti | e93f8a0 | 2009-12-23 14:35:24 -0200 | [diff] [blame] | 534 | 	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 535 | 	struct _ioeventfd        *p; | 
 | 536 | 	struct eventfd_ctx       *eventfd; | 
 | 537 | 	int                       ret; | 
 | 538 |  | 
 | 539 | 	/* must be natural-word sized */ | 
 | 540 | 	switch (args->len) { | 
 | 541 | 	case 1: | 
 | 542 | 	case 2: | 
 | 543 | 	case 4: | 
 | 544 | 	case 8: | 
 | 545 | 		break; | 
 | 546 | 	default: | 
 | 547 | 		return -EINVAL; | 
 | 548 | 	} | 
 | 549 |  | 
 | 550 | 	/* check for range overflow */ | 
 | 551 | 	if (args->addr + args->len < args->addr) | 
 | 552 | 		return -EINVAL; | 
 | 553 |  | 
 | 554 | 	/* check for extra flags that we don't understand */ | 
 | 555 | 	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) | 
 | 556 | 		return -EINVAL; | 
 | 557 |  | 
 | 558 | 	eventfd = eventfd_ctx_fdget(args->fd); | 
 | 559 | 	if (IS_ERR(eventfd)) | 
 | 560 | 		return PTR_ERR(eventfd); | 
 | 561 |  | 
 | 562 | 	p = kzalloc(sizeof(*p), GFP_KERNEL); | 
 | 563 | 	if (!p) { | 
 | 564 | 		ret = -ENOMEM; | 
 | 565 | 		goto fail; | 
 | 566 | 	} | 
 | 567 |  | 
 | 568 | 	INIT_LIST_HEAD(&p->list); | 
 | 569 | 	p->addr    = args->addr; | 
 | 570 | 	p->length  = args->len; | 
 | 571 | 	p->eventfd = eventfd; | 
 | 572 |  | 
 | 573 | 	/* The datamatch feature is optional, otherwise this is a wildcard */ | 
 | 574 | 	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) | 
 | 575 | 		p->datamatch = args->datamatch; | 
 | 576 | 	else | 
 | 577 | 		p->wildcard = true; | 
 | 578 |  | 
| Marcelo Tosatti | 79fac95 | 2009-12-23 14:35:26 -0200 | [diff] [blame] | 579 | 	mutex_lock(&kvm->slots_lock); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 580 |  | 
| Lucas De Marchi | 25985ed | 2011-03-30 22:57:33 -0300 | [diff] [blame] | 581 | 	/* Verify that there isn't a match already */ | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 582 | 	if (ioeventfd_check_collision(kvm, p)) { | 
 | 583 | 		ret = -EEXIST; | 
 | 584 | 		goto unlock_fail; | 
 | 585 | 	} | 
 | 586 |  | 
 | 587 | 	kvm_iodevice_init(&p->dev, &ioeventfd_ops); | 
 | 588 |  | 
| Sasha Levin | 743eeb0 | 2011-07-27 16:00:48 +0300 | [diff] [blame] | 589 | 	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, | 
 | 590 | 				      &p->dev); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 591 | 	if (ret < 0) | 
 | 592 | 		goto unlock_fail; | 
 | 593 |  | 
 | 594 | 	list_add_tail(&p->list, &kvm->ioeventfds); | 
 | 595 |  | 
| Marcelo Tosatti | 79fac95 | 2009-12-23 14:35:26 -0200 | [diff] [blame] | 596 | 	mutex_unlock(&kvm->slots_lock); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 597 |  | 
 | 598 | 	return 0; | 
 | 599 |  | 
 | 600 | unlock_fail: | 
| Marcelo Tosatti | 79fac95 | 2009-12-23 14:35:26 -0200 | [diff] [blame] | 601 | 	mutex_unlock(&kvm->slots_lock); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 602 |  | 
 | 603 | fail: | 
 | 604 | 	kfree(p); | 
 | 605 | 	eventfd_ctx_put(eventfd); | 
 | 606 |  | 
 | 607 | 	return ret; | 
 | 608 | } | 
 | 609 |  | 
 | 610 | static int | 
 | 611 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 
 | 612 | { | 
 | 613 | 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 
| Marcelo Tosatti | e93f8a0 | 2009-12-23 14:35:24 -0200 | [diff] [blame] | 614 | 	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 615 | 	struct _ioeventfd        *p, *tmp; | 
 | 616 | 	struct eventfd_ctx       *eventfd; | 
 | 617 | 	int                       ret = -ENOENT; | 
 | 618 |  | 
 | 619 | 	eventfd = eventfd_ctx_fdget(args->fd); | 
 | 620 | 	if (IS_ERR(eventfd)) | 
 | 621 | 		return PTR_ERR(eventfd); | 
 | 622 |  | 
| Marcelo Tosatti | 79fac95 | 2009-12-23 14:35:26 -0200 | [diff] [blame] | 623 | 	mutex_lock(&kvm->slots_lock); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 624 |  | 
 | 625 | 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | 
 | 626 | 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | 
 | 627 |  | 
 | 628 | 		if (p->eventfd != eventfd  || | 
 | 629 | 		    p->addr != args->addr  || | 
 | 630 | 		    p->length != args->len || | 
 | 631 | 		    p->wildcard != wildcard) | 
 | 632 | 			continue; | 
 | 633 |  | 
 | 634 | 		if (!p->wildcard && p->datamatch != args->datamatch) | 
 | 635 | 			continue; | 
 | 636 |  | 
| Marcelo Tosatti | e93f8a0 | 2009-12-23 14:35:24 -0200 | [diff] [blame] | 637 | 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 638 | 		ioeventfd_release(p); | 
 | 639 | 		ret = 0; | 
 | 640 | 		break; | 
 | 641 | 	} | 
 | 642 |  | 
| Marcelo Tosatti | 79fac95 | 2009-12-23 14:35:26 -0200 | [diff] [blame] | 643 | 	mutex_unlock(&kvm->slots_lock); | 
| Gregory Haskins | d34e6b1 | 2009-07-07 17:08:49 -0400 | [diff] [blame] | 644 |  | 
 | 645 | 	eventfd_ctx_put(eventfd); | 
 | 646 |  | 
 | 647 | 	return ret; | 
 | 648 | } | 
 | 649 |  | 
 | 650 | int | 
 | 651 | kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 
 | 652 | { | 
 | 653 | 	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) | 
 | 654 | 		return kvm_deassign_ioeventfd(kvm, args); | 
 | 655 |  | 
 | 656 | 	return kvm_assign_ioeventfd(kvm, args); | 
 | 657 | } |