Timur Tabi | 6db7199 | 2011-06-09 15:52:06 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Freescale Hypervisor Management Driver |
| 3 | |
| 4 | * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. |
| 5 | * Author: Timur Tabi <timur@freescale.com> |
| 6 | * |
| 7 | * This file is licensed under the terms of the GNU General Public License |
| 8 | * version 2. This program is licensed "as is" without any warranty of any |
| 9 | * kind, whether express or implied. |
| 10 | * |
| 11 | * The Freescale hypervisor management driver provides several services to |
| 12 | * drivers and applications related to the Freescale hypervisor: |
| 13 | * |
| 14 | * 1. An ioctl interface for querying and managing partitions. |
| 15 | * |
| 16 | * 2. A file interface to reading incoming doorbells. |
| 17 | * |
| 18 | * 3. An interrupt handler for shutting down the partition upon receiving the |
| 19 | * shutdown doorbell from a manager partition. |
| 20 | * |
| 21 | * 4. A kernel interface for receiving callbacks when a managed partition |
| 22 | * shuts down. |
| 23 | */ |
| 24 | |
| 25 | #include <linux/kernel.h> |
| 26 | #include <linux/module.h> |
| 27 | #include <linux/init.h> |
| 28 | #include <linux/types.h> |
| 29 | #include <linux/err.h> |
| 30 | #include <linux/fs.h> |
| 31 | #include <linux/miscdevice.h> |
| 32 | #include <linux/mm.h> |
| 33 | #include <linux/pagemap.h> |
| 34 | #include <linux/slab.h> |
| 35 | #include <linux/poll.h> |
| 36 | #include <linux/of.h> |
| 37 | #include <linux/reboot.h> |
| 38 | #include <linux/uaccess.h> |
| 39 | #include <linux/notifier.h> |
Timur Tabi | f1f4ee0 | 2011-07-19 15:45:51 -0500 | [diff] [blame] | 40 | #include <linux/interrupt.h> |
Timur Tabi | 6db7199 | 2011-06-09 15:52:06 -0500 | [diff] [blame] | 41 | |
| 42 | #include <linux/io.h> |
| 43 | #include <asm/fsl_hcalls.h> |
| 44 | |
| 45 | #include <linux/fsl_hypervisor.h> |
| 46 | |
| 47 | static BLOCKING_NOTIFIER_HEAD(failover_subscribers); |
| 48 | |
| 49 | /* |
| 50 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART |
| 51 | * |
| 52 | * Restart a running partition |
| 53 | */ |
| 54 | static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p) |
| 55 | { |
| 56 | struct fsl_hv_ioctl_restart param; |
| 57 | |
| 58 | /* Get the parameters from the user */ |
| 59 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_restart))) |
| 60 | return -EFAULT; |
| 61 | |
| 62 | param.ret = fh_partition_restart(param.partition); |
| 63 | |
| 64 | if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) |
| 65 | return -EFAULT; |
| 66 | |
| 67 | return 0; |
| 68 | } |
| 69 | |
| 70 | /* |
| 71 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS |
| 72 | * |
| 73 | * Query the status of a partition |
| 74 | */ |
| 75 | static long ioctl_status(struct fsl_hv_ioctl_status __user *p) |
| 76 | { |
| 77 | struct fsl_hv_ioctl_status param; |
| 78 | u32 status; |
| 79 | |
| 80 | /* Get the parameters from the user */ |
| 81 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_status))) |
| 82 | return -EFAULT; |
| 83 | |
| 84 | param.ret = fh_partition_get_status(param.partition, &status); |
| 85 | if (!param.ret) |
| 86 | param.status = status; |
| 87 | |
| 88 | if (copy_to_user(p, ¶m, sizeof(struct fsl_hv_ioctl_status))) |
| 89 | return -EFAULT; |
| 90 | |
| 91 | return 0; |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_START |
| 96 | * |
| 97 | * Start a stopped partition. |
| 98 | */ |
| 99 | static long ioctl_start(struct fsl_hv_ioctl_start __user *p) |
| 100 | { |
| 101 | struct fsl_hv_ioctl_start param; |
| 102 | |
| 103 | /* Get the parameters from the user */ |
| 104 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_start))) |
| 105 | return -EFAULT; |
| 106 | |
| 107 | param.ret = fh_partition_start(param.partition, param.entry_point, |
| 108 | param.load); |
| 109 | |
| 110 | if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) |
| 111 | return -EFAULT; |
| 112 | |
| 113 | return 0; |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP |
| 118 | * |
| 119 | * Stop a running partition |
| 120 | */ |
| 121 | static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p) |
| 122 | { |
| 123 | struct fsl_hv_ioctl_stop param; |
| 124 | |
| 125 | /* Get the parameters from the user */ |
| 126 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_stop))) |
| 127 | return -EFAULT; |
| 128 | |
| 129 | param.ret = fh_partition_stop(param.partition); |
| 130 | |
| 131 | if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) |
| 132 | return -EFAULT; |
| 133 | |
| 134 | return 0; |
| 135 | } |
| 136 | |
| 137 | /* |
| 138 | * Ioctl interface for FSL_HV_IOCTL_MEMCPY |
| 139 | * |
| 140 | * The FH_MEMCPY hypercall takes an array of address/address/size structures |
| 141 | * to represent the data being copied. As a convenience to the user, this |
| 142 | * ioctl takes a user-create buffer and a pointer to a guest physically |
| 143 | * contiguous buffer in the remote partition, and creates the |
| 144 | * address/address/size array for the hypercall. |
| 145 | */ |
| 146 | static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) |
| 147 | { |
| 148 | struct fsl_hv_ioctl_memcpy param; |
| 149 | |
| 150 | struct page **pages = NULL; |
| 151 | void *sg_list_unaligned = NULL; |
| 152 | struct fh_sg_list *sg_list = NULL; |
| 153 | |
| 154 | unsigned int num_pages; |
| 155 | unsigned long lb_offset; /* Offset within a page of the local buffer */ |
| 156 | |
| 157 | unsigned int i; |
| 158 | long ret = 0; |
| 159 | int num_pinned; /* return value from get_user_pages() */ |
| 160 | phys_addr_t remote_paddr; /* The next address in the remote buffer */ |
| 161 | uint32_t count; /* The number of bytes left to copy */ |
| 162 | |
| 163 | /* Get the parameters from the user */ |
| 164 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_memcpy))) |
| 165 | return -EFAULT; |
| 166 | |
| 167 | /* |
| 168 | * One partition must be local, the other must be remote. In other |
| 169 | * words, if source and target are both -1, or are both not -1, then |
| 170 | * return an error. |
| 171 | */ |
| 172 | if ((param.source == -1) == (param.target == -1)) |
| 173 | return -EINVAL; |
| 174 | |
| 175 | /* |
| 176 | * The array of pages returned by get_user_pages() covers only |
| 177 | * page-aligned memory. Since the user buffer is probably not |
| 178 | * page-aligned, we need to handle the discrepancy. |
| 179 | * |
| 180 | * We calculate the offset within a page of the S/G list, and make |
| 181 | * adjustments accordingly. This will result in a page list that looks |
| 182 | * like this: |
| 183 | * |
| 184 | * ---- <-- first page starts before the buffer |
| 185 | * | | |
| 186 | * |////|-> ---- |
| 187 | * |////| | | |
| 188 | * ---- | | |
| 189 | * | | |
| 190 | * ---- | | |
| 191 | * |////| | | |
| 192 | * |////| | | |
| 193 | * |////| | | |
| 194 | * ---- | | |
| 195 | * | | |
| 196 | * ---- | | |
| 197 | * |////| | | |
| 198 | * |////| | | |
| 199 | * |////| | | |
| 200 | * ---- | | |
| 201 | * | | |
| 202 | * ---- | | |
| 203 | * |////| | | |
| 204 | * |////|-> ---- |
| 205 | * | | <-- last page ends after the buffer |
| 206 | * ---- |
| 207 | * |
| 208 | * The distance between the start of the first page and the start of the |
| 209 | * buffer is lb_offset. The hashed (///) areas are the parts of the |
| 210 | * page list that contain the actual buffer. |
| 211 | * |
| 212 | * The advantage of this approach is that the number of pages is |
| 213 | * equal to the number of entries in the S/G list that we give to the |
| 214 | * hypervisor. |
| 215 | */ |
| 216 | lb_offset = param.local_vaddr & (PAGE_SIZE - 1); |
| 217 | num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 218 | |
| 219 | /* Allocate the buffers we need */ |
| 220 | |
| 221 | /* |
| 222 | * 'pages' is an array of struct page pointers that's initialized by |
| 223 | * get_user_pages(). |
| 224 | */ |
| 225 | pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL); |
| 226 | if (!pages) { |
| 227 | pr_debug("fsl-hv: could not allocate page list\n"); |
| 228 | return -ENOMEM; |
| 229 | } |
| 230 | |
| 231 | /* |
| 232 | * sg_list is the list of fh_sg_list objects that we pass to the |
| 233 | * hypervisor. |
| 234 | */ |
| 235 | sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) + |
| 236 | sizeof(struct fh_sg_list) - 1, GFP_KERNEL); |
| 237 | if (!sg_list_unaligned) { |
| 238 | pr_debug("fsl-hv: could not allocate S/G list\n"); |
| 239 | ret = -ENOMEM; |
| 240 | goto exit; |
| 241 | } |
| 242 | sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list)); |
| 243 | |
| 244 | /* Get the physical addresses of the source buffer */ |
| 245 | down_read(¤t->mm->mmap_sem); |
| 246 | num_pinned = get_user_pages(current, current->mm, |
| 247 | param.local_vaddr - lb_offset, num_pages, |
| 248 | (param.source == -1) ? READ : WRITE, |
| 249 | 0, pages, NULL); |
| 250 | up_read(¤t->mm->mmap_sem); |
| 251 | |
| 252 | if (num_pinned != num_pages) { |
| 253 | /* get_user_pages() failed */ |
| 254 | pr_debug("fsl-hv: could not lock source buffer\n"); |
| 255 | ret = (num_pinned < 0) ? num_pinned : -EFAULT; |
| 256 | goto exit; |
| 257 | } |
| 258 | |
| 259 | /* |
| 260 | * Build the fh_sg_list[] array. The first page is special |
| 261 | * because it's misaligned. |
| 262 | */ |
| 263 | if (param.source == -1) { |
| 264 | sg_list[0].source = page_to_phys(pages[0]) + lb_offset; |
| 265 | sg_list[0].target = param.remote_paddr; |
| 266 | } else { |
| 267 | sg_list[0].source = param.remote_paddr; |
| 268 | sg_list[0].target = page_to_phys(pages[0]) + lb_offset; |
| 269 | } |
| 270 | sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset); |
| 271 | |
| 272 | remote_paddr = param.remote_paddr + sg_list[0].size; |
| 273 | count = param.count - sg_list[0].size; |
| 274 | |
| 275 | for (i = 1; i < num_pages; i++) { |
| 276 | if (param.source == -1) { |
| 277 | /* local to remote */ |
| 278 | sg_list[i].source = page_to_phys(pages[i]); |
| 279 | sg_list[i].target = remote_paddr; |
| 280 | } else { |
| 281 | /* remote to local */ |
| 282 | sg_list[i].source = remote_paddr; |
| 283 | sg_list[i].target = page_to_phys(pages[i]); |
| 284 | } |
| 285 | sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE); |
| 286 | |
| 287 | remote_paddr += sg_list[i].size; |
| 288 | count -= sg_list[i].size; |
| 289 | } |
| 290 | |
| 291 | param.ret = fh_partition_memcpy(param.source, param.target, |
| 292 | virt_to_phys(sg_list), num_pages); |
| 293 | |
| 294 | exit: |
| 295 | if (pages) { |
| 296 | for (i = 0; i < num_pages; i++) |
| 297 | if (pages[i]) |
| 298 | put_page(pages[i]); |
| 299 | } |
| 300 | |
| 301 | kfree(sg_list_unaligned); |
| 302 | kfree(pages); |
| 303 | |
| 304 | if (!ret) |
| 305 | if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) |
| 306 | return -EFAULT; |
| 307 | |
| 308 | return ret; |
| 309 | } |
| 310 | |
| 311 | /* |
| 312 | * Ioctl interface for FSL_HV_IOCTL_DOORBELL |
| 313 | * |
| 314 | * Ring a doorbell |
| 315 | */ |
| 316 | static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p) |
| 317 | { |
| 318 | struct fsl_hv_ioctl_doorbell param; |
| 319 | |
| 320 | /* Get the parameters from the user. */ |
| 321 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_doorbell))) |
| 322 | return -EFAULT; |
| 323 | |
| 324 | param.ret = ev_doorbell_send(param.doorbell); |
| 325 | |
| 326 | if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) |
| 327 | return -EFAULT; |
| 328 | |
| 329 | return 0; |
| 330 | } |
| 331 | |
| 332 | static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) |
| 333 | { |
| 334 | struct fsl_hv_ioctl_prop param; |
| 335 | char __user *upath, *upropname; |
| 336 | void __user *upropval; |
| 337 | char *path = NULL, *propname = NULL; |
| 338 | void *propval = NULL; |
| 339 | int ret = 0; |
| 340 | |
| 341 | /* Get the parameters from the user. */ |
| 342 | if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_prop))) |
| 343 | return -EFAULT; |
| 344 | |
| 345 | upath = (char __user *)(uintptr_t)param.path; |
| 346 | upropname = (char __user *)(uintptr_t)param.propname; |
| 347 | upropval = (void __user *)(uintptr_t)param.propval; |
| 348 | |
| 349 | path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN); |
| 350 | if (IS_ERR(path)) { |
| 351 | ret = PTR_ERR(path); |
| 352 | goto out; |
| 353 | } |
| 354 | |
| 355 | propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN); |
| 356 | if (IS_ERR(propname)) { |
| 357 | ret = PTR_ERR(propname); |
| 358 | goto out; |
| 359 | } |
| 360 | |
| 361 | if (param.proplen > FH_DTPROP_MAX_PROPLEN) { |
| 362 | ret = -EINVAL; |
| 363 | goto out; |
| 364 | } |
| 365 | |
| 366 | propval = kmalloc(param.proplen, GFP_KERNEL); |
| 367 | if (!propval) { |
| 368 | ret = -ENOMEM; |
| 369 | goto out; |
| 370 | } |
| 371 | |
| 372 | if (set) { |
| 373 | if (copy_from_user(propval, upropval, param.proplen)) { |
| 374 | ret = -EFAULT; |
| 375 | goto out; |
| 376 | } |
| 377 | |
| 378 | param.ret = fh_partition_set_dtprop(param.handle, |
| 379 | virt_to_phys(path), |
| 380 | virt_to_phys(propname), |
| 381 | virt_to_phys(propval), |
| 382 | param.proplen); |
| 383 | } else { |
| 384 | param.ret = fh_partition_get_dtprop(param.handle, |
| 385 | virt_to_phys(path), |
| 386 | virt_to_phys(propname), |
| 387 | virt_to_phys(propval), |
| 388 | ¶m.proplen); |
| 389 | |
| 390 | if (param.ret == 0) { |
| 391 | if (copy_to_user(upropval, propval, param.proplen) || |
| 392 | put_user(param.proplen, &p->proplen)) { |
| 393 | ret = -EFAULT; |
| 394 | goto out; |
| 395 | } |
| 396 | } |
| 397 | } |
| 398 | |
| 399 | if (put_user(param.ret, &p->ret)) |
| 400 | ret = -EFAULT; |
| 401 | |
| 402 | out: |
| 403 | kfree(path); |
| 404 | kfree(propval); |
| 405 | kfree(propname); |
| 406 | |
| 407 | return ret; |
| 408 | } |
| 409 | |
| 410 | /* |
| 411 | * Ioctl main entry point |
| 412 | */ |
| 413 | static long fsl_hv_ioctl(struct file *file, unsigned int cmd, |
| 414 | unsigned long argaddr) |
| 415 | { |
| 416 | void __user *arg = (void __user *)argaddr; |
| 417 | long ret; |
| 418 | |
| 419 | switch (cmd) { |
| 420 | case FSL_HV_IOCTL_PARTITION_RESTART: |
| 421 | ret = ioctl_restart(arg); |
| 422 | break; |
| 423 | case FSL_HV_IOCTL_PARTITION_GET_STATUS: |
| 424 | ret = ioctl_status(arg); |
| 425 | break; |
| 426 | case FSL_HV_IOCTL_PARTITION_START: |
| 427 | ret = ioctl_start(arg); |
| 428 | break; |
| 429 | case FSL_HV_IOCTL_PARTITION_STOP: |
| 430 | ret = ioctl_stop(arg); |
| 431 | break; |
| 432 | case FSL_HV_IOCTL_MEMCPY: |
| 433 | ret = ioctl_memcpy(arg); |
| 434 | break; |
| 435 | case FSL_HV_IOCTL_DOORBELL: |
| 436 | ret = ioctl_doorbell(arg); |
| 437 | break; |
| 438 | case FSL_HV_IOCTL_GETPROP: |
| 439 | ret = ioctl_dtprop(arg, 0); |
| 440 | break; |
| 441 | case FSL_HV_IOCTL_SETPROP: |
| 442 | ret = ioctl_dtprop(arg, 1); |
| 443 | break; |
| 444 | default: |
| 445 | pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n", |
| 446 | _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), |
| 447 | _IOC_SIZE(cmd)); |
| 448 | return -ENOTTY; |
| 449 | } |
| 450 | |
| 451 | return ret; |
| 452 | } |
| 453 | |
| 454 | /* Linked list of processes that have us open */ |
| 455 | static struct list_head db_list; |
| 456 | |
| 457 | /* spinlock for db_list */ |
| 458 | static DEFINE_SPINLOCK(db_list_lock); |
| 459 | |
| 460 | /* The size of the doorbell event queue. This must be a power of two. */ |
| 461 | #define QSIZE 16 |
| 462 | |
| 463 | /* Returns the next head/tail pointer, wrapping around the queue if necessary */ |
| 464 | #define nextp(x) (((x) + 1) & (QSIZE - 1)) |
| 465 | |
| 466 | /* Per-open data structure */ |
| 467 | struct doorbell_queue { |
| 468 | struct list_head list; |
| 469 | spinlock_t lock; |
| 470 | wait_queue_head_t wait; |
| 471 | unsigned int head; |
| 472 | unsigned int tail; |
| 473 | uint32_t q[QSIZE]; |
| 474 | }; |
| 475 | |
| 476 | /* Linked list of ISRs that we registered */ |
| 477 | struct list_head isr_list; |
| 478 | |
| 479 | /* Per-ISR data structure */ |
| 480 | struct doorbell_isr { |
| 481 | struct list_head list; |
| 482 | unsigned int irq; |
| 483 | uint32_t doorbell; /* The doorbell handle */ |
| 484 | uint32_t partition; /* The partition handle, if used */ |
| 485 | }; |
| 486 | |
| 487 | /* |
| 488 | * Add a doorbell to all of the doorbell queues |
| 489 | */ |
| 490 | static void fsl_hv_queue_doorbell(uint32_t doorbell) |
| 491 | { |
| 492 | struct doorbell_queue *dbq; |
| 493 | unsigned long flags; |
| 494 | |
| 495 | /* Prevent another core from modifying db_list */ |
| 496 | spin_lock_irqsave(&db_list_lock, flags); |
| 497 | |
| 498 | list_for_each_entry(dbq, &db_list, list) { |
| 499 | if (dbq->head != nextp(dbq->tail)) { |
| 500 | dbq->q[dbq->tail] = doorbell; |
| 501 | /* |
| 502 | * This memory barrier eliminates the need to grab |
| 503 | * the spinlock for dbq. |
| 504 | */ |
| 505 | smp_wmb(); |
| 506 | dbq->tail = nextp(dbq->tail); |
| 507 | wake_up_interruptible(&dbq->wait); |
| 508 | } |
| 509 | } |
| 510 | |
| 511 | spin_unlock_irqrestore(&db_list_lock, flags); |
| 512 | } |
| 513 | |
| 514 | /* |
| 515 | * Interrupt handler for all doorbells |
| 516 | * |
| 517 | * We use the same interrupt handler for all doorbells. Whenever a doorbell |
| 518 | * is rung, and we receive an interrupt, we just put the handle for that |
| 519 | * doorbell (passed to us as *data) into all of the queues. |
| 520 | */ |
| 521 | static irqreturn_t fsl_hv_isr(int irq, void *data) |
| 522 | { |
| 523 | fsl_hv_queue_doorbell((uintptr_t) data); |
| 524 | |
| 525 | return IRQ_HANDLED; |
| 526 | } |
| 527 | |
| 528 | /* |
| 529 | * State change thread function |
| 530 | * |
| 531 | * The state change notification arrives in an interrupt, but we can't call |
| 532 | * blocking_notifier_call_chain() in an interrupt handler. We could call |
| 533 | * atomic_notifier_call_chain(), but that would require the clients' call-back |
| 534 | * function to run in interrupt context. Since we don't want to impose that |
| 535 | * restriction on the clients, we use a threaded IRQ to process the |
| 536 | * notification in kernel context. |
| 537 | */ |
| 538 | static irqreturn_t fsl_hv_state_change_thread(int irq, void *data) |
| 539 | { |
| 540 | struct doorbell_isr *dbisr = data; |
| 541 | |
| 542 | blocking_notifier_call_chain(&failover_subscribers, dbisr->partition, |
| 543 | NULL); |
| 544 | |
| 545 | return IRQ_HANDLED; |
| 546 | } |
| 547 | |
| 548 | /* |
| 549 | * Interrupt handler for state-change doorbells |
| 550 | */ |
| 551 | static irqreturn_t fsl_hv_state_change_isr(int irq, void *data) |
| 552 | { |
| 553 | unsigned int status; |
| 554 | struct doorbell_isr *dbisr = data; |
| 555 | int ret; |
| 556 | |
| 557 | /* It's still a doorbell, so add it to all the queues. */ |
| 558 | fsl_hv_queue_doorbell(dbisr->doorbell); |
| 559 | |
| 560 | /* Determine the new state, and if it's stopped, notify the clients. */ |
| 561 | ret = fh_partition_get_status(dbisr->partition, &status); |
| 562 | if (!ret && (status == FH_PARTITION_STOPPED)) |
| 563 | return IRQ_WAKE_THREAD; |
| 564 | |
| 565 | return IRQ_HANDLED; |
| 566 | } |
| 567 | |
| 568 | /* |
| 569 | * Returns a bitmask indicating whether a read will block |
| 570 | */ |
| 571 | static unsigned int fsl_hv_poll(struct file *filp, struct poll_table_struct *p) |
| 572 | { |
| 573 | struct doorbell_queue *dbq = filp->private_data; |
| 574 | unsigned long flags; |
| 575 | unsigned int mask; |
| 576 | |
| 577 | spin_lock_irqsave(&dbq->lock, flags); |
| 578 | |
| 579 | poll_wait(filp, &dbq->wait, p); |
| 580 | mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM); |
| 581 | |
| 582 | spin_unlock_irqrestore(&dbq->lock, flags); |
| 583 | |
| 584 | return mask; |
| 585 | } |
| 586 | |
| 587 | /* |
| 588 | * Return the handles for any incoming doorbells |
| 589 | * |
| 590 | * If there are doorbell handles in the queue for this open instance, then |
| 591 | * return them to the caller as an array of 32-bit integers. Otherwise, |
| 592 | * block until there is at least one handle to return. |
| 593 | */ |
| 594 | static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len, |
| 595 | loff_t *off) |
| 596 | { |
| 597 | struct doorbell_queue *dbq = filp->private_data; |
| 598 | uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */ |
| 599 | unsigned long flags; |
| 600 | ssize_t count = 0; |
| 601 | |
| 602 | /* Make sure we stop when the user buffer is full. */ |
| 603 | while (len >= sizeof(uint32_t)) { |
| 604 | uint32_t dbell; /* Local copy of doorbell queue data */ |
| 605 | |
| 606 | spin_lock_irqsave(&dbq->lock, flags); |
| 607 | |
| 608 | /* |
| 609 | * If the queue is empty, then either we're done or we need |
| 610 | * to block. If the application specified O_NONBLOCK, then |
| 611 | * we return the appropriate error code. |
| 612 | */ |
| 613 | if (dbq->head == dbq->tail) { |
| 614 | spin_unlock_irqrestore(&dbq->lock, flags); |
| 615 | if (count) |
| 616 | break; |
| 617 | if (filp->f_flags & O_NONBLOCK) |
| 618 | return -EAGAIN; |
| 619 | if (wait_event_interruptible(dbq->wait, |
| 620 | dbq->head != dbq->tail)) |
| 621 | return -ERESTARTSYS; |
| 622 | continue; |
| 623 | } |
| 624 | |
| 625 | /* |
| 626 | * Even though we have an smp_wmb() in the ISR, the core |
| 627 | * might speculatively execute the "dbell = ..." below while |
| 628 | * it's evaluating the if-statement above. In that case, the |
| 629 | * value put into dbell could be stale if the core accepts the |
| 630 | * speculation. To prevent that, we need a read memory barrier |
| 631 | * here as well. |
| 632 | */ |
| 633 | smp_rmb(); |
| 634 | |
| 635 | /* Copy the data to a temporary local buffer, because |
| 636 | * we can't call copy_to_user() from inside a spinlock |
| 637 | */ |
| 638 | dbell = dbq->q[dbq->head]; |
| 639 | dbq->head = nextp(dbq->head); |
| 640 | |
| 641 | spin_unlock_irqrestore(&dbq->lock, flags); |
| 642 | |
| 643 | if (put_user(dbell, p)) |
| 644 | return -EFAULT; |
| 645 | p++; |
| 646 | count += sizeof(uint32_t); |
| 647 | len -= sizeof(uint32_t); |
| 648 | } |
| 649 | |
| 650 | return count; |
| 651 | } |
| 652 | |
| 653 | /* |
| 654 | * Open the driver and prepare for reading doorbells. |
| 655 | * |
| 656 | * Every time an application opens the driver, we create a doorbell queue |
| 657 | * for that file handle. This queue is used for any incoming doorbells. |
| 658 | */ |
| 659 | static int fsl_hv_open(struct inode *inode, struct file *filp) |
| 660 | { |
| 661 | struct doorbell_queue *dbq; |
| 662 | unsigned long flags; |
| 663 | int ret = 0; |
| 664 | |
| 665 | dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL); |
| 666 | if (!dbq) { |
| 667 | pr_err("fsl-hv: out of memory\n"); |
| 668 | return -ENOMEM; |
| 669 | } |
| 670 | |
| 671 | spin_lock_init(&dbq->lock); |
| 672 | init_waitqueue_head(&dbq->wait); |
| 673 | |
| 674 | spin_lock_irqsave(&db_list_lock, flags); |
| 675 | list_add(&dbq->list, &db_list); |
| 676 | spin_unlock_irqrestore(&db_list_lock, flags); |
| 677 | |
| 678 | filp->private_data = dbq; |
| 679 | |
| 680 | return ret; |
| 681 | } |
| 682 | |
| 683 | /* |
| 684 | * Close the driver |
| 685 | */ |
| 686 | static int fsl_hv_close(struct inode *inode, struct file *filp) |
| 687 | { |
| 688 | struct doorbell_queue *dbq = filp->private_data; |
| 689 | unsigned long flags; |
| 690 | |
| 691 | int ret = 0; |
| 692 | |
| 693 | spin_lock_irqsave(&db_list_lock, flags); |
| 694 | list_del(&dbq->list); |
| 695 | spin_unlock_irqrestore(&db_list_lock, flags); |
| 696 | |
| 697 | kfree(dbq); |
| 698 | |
| 699 | return ret; |
| 700 | } |
| 701 | |
| 702 | static const struct file_operations fsl_hv_fops = { |
| 703 | .owner = THIS_MODULE, |
| 704 | .open = fsl_hv_open, |
| 705 | .release = fsl_hv_close, |
| 706 | .poll = fsl_hv_poll, |
| 707 | .read = fsl_hv_read, |
| 708 | .unlocked_ioctl = fsl_hv_ioctl, |
| 709 | }; |
| 710 | |
| 711 | static struct miscdevice fsl_hv_misc_dev = { |
| 712 | MISC_DYNAMIC_MINOR, |
| 713 | "fsl-hv", |
| 714 | &fsl_hv_fops |
| 715 | }; |
| 716 | |
| 717 | static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data) |
| 718 | { |
| 719 | orderly_poweroff(false); |
| 720 | |
| 721 | return IRQ_HANDLED; |
| 722 | } |
| 723 | |
| 724 | /* |
| 725 | * Returns the handle of the parent of the given node |
| 726 | * |
| 727 | * The handle is the value of the 'hv-handle' property |
| 728 | */ |
| 729 | static int get_parent_handle(struct device_node *np) |
| 730 | { |
| 731 | struct device_node *parent; |
| 732 | const uint32_t *prop; |
| 733 | uint32_t handle; |
| 734 | int len; |
| 735 | |
| 736 | parent = of_get_parent(np); |
| 737 | if (!parent) |
| 738 | /* It's not really possible for this to fail */ |
| 739 | return -ENODEV; |
| 740 | |
| 741 | /* |
| 742 | * The proper name for the handle property is "hv-handle", but some |
| 743 | * older versions of the hypervisor used "reg". |
| 744 | */ |
| 745 | prop = of_get_property(parent, "hv-handle", &len); |
| 746 | if (!prop) |
| 747 | prop = of_get_property(parent, "reg", &len); |
| 748 | |
| 749 | if (!prop || (len != sizeof(uint32_t))) { |
| 750 | /* This can happen only if the node is malformed */ |
| 751 | of_node_put(parent); |
| 752 | return -ENODEV; |
| 753 | } |
| 754 | |
| 755 | handle = be32_to_cpup(prop); |
| 756 | of_node_put(parent); |
| 757 | |
| 758 | return handle; |
| 759 | } |
| 760 | |
| 761 | /* |
| 762 | * Register a callback for failover events |
| 763 | * |
| 764 | * This function is called by device drivers to register their callback |
| 765 | * functions for fail-over events. |
| 766 | */ |
| 767 | int fsl_hv_failover_register(struct notifier_block *nb) |
| 768 | { |
| 769 | return blocking_notifier_chain_register(&failover_subscribers, nb); |
| 770 | } |
| 771 | EXPORT_SYMBOL(fsl_hv_failover_register); |
| 772 | |
| 773 | /* |
| 774 | * Unregister a callback for failover events |
| 775 | */ |
| 776 | int fsl_hv_failover_unregister(struct notifier_block *nb) |
| 777 | { |
| 778 | return blocking_notifier_chain_unregister(&failover_subscribers, nb); |
| 779 | } |
| 780 | EXPORT_SYMBOL(fsl_hv_failover_unregister); |
| 781 | |
| 782 | /* |
| 783 | * Return TRUE if we're running under FSL hypervisor |
| 784 | * |
| 785 | * This function checks to see if we're running under the Freescale |
| 786 | * hypervisor, and returns zero if we're not, or non-zero if we are. |
| 787 | * |
| 788 | * First, it checks if MSR[GS]==1, which means we're running under some |
| 789 | * hypervisor. Then it checks if there is a hypervisor node in the device |
| 790 | * tree. Currently, that means there needs to be a node in the root called |
| 791 | * "hypervisor" and which has a property named "fsl,hv-version". |
| 792 | */ |
| 793 | static int has_fsl_hypervisor(void) |
| 794 | { |
| 795 | struct device_node *node; |
| 796 | int ret; |
| 797 | |
| 798 | if (!(mfmsr() & MSR_GS)) |
| 799 | return 0; |
| 800 | |
| 801 | node = of_find_node_by_path("/hypervisor"); |
| 802 | if (!node) |
| 803 | return 0; |
| 804 | |
| 805 | ret = of_find_property(node, "fsl,hv-version", NULL) != NULL; |
| 806 | |
| 807 | of_node_put(node); |
| 808 | |
| 809 | return ret; |
| 810 | } |
| 811 | |
| 812 | /* |
| 813 | * Freescale hypervisor management driver init |
| 814 | * |
| 815 | * This function is called when this module is loaded. |
| 816 | * |
| 817 | * Register ourselves as a miscellaneous driver. This will register the |
| 818 | * fops structure and create the right sysfs entries for udev. |
| 819 | */ |
| 820 | static int __init fsl_hypervisor_init(void) |
| 821 | { |
| 822 | struct device_node *np; |
| 823 | struct doorbell_isr *dbisr, *n; |
| 824 | int ret; |
| 825 | |
| 826 | pr_info("Freescale hypervisor management driver\n"); |
| 827 | |
| 828 | if (!has_fsl_hypervisor()) { |
| 829 | pr_info("fsl-hv: no hypervisor found\n"); |
| 830 | return -ENODEV; |
| 831 | } |
| 832 | |
| 833 | ret = misc_register(&fsl_hv_misc_dev); |
| 834 | if (ret) { |
| 835 | pr_err("fsl-hv: cannot register device\n"); |
| 836 | return ret; |
| 837 | } |
| 838 | |
| 839 | INIT_LIST_HEAD(&db_list); |
| 840 | INIT_LIST_HEAD(&isr_list); |
| 841 | |
| 842 | for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") { |
| 843 | unsigned int irq; |
| 844 | const uint32_t *handle; |
| 845 | |
| 846 | handle = of_get_property(np, "interrupts", NULL); |
| 847 | irq = irq_of_parse_and_map(np, 0); |
| 848 | if (!handle || (irq == NO_IRQ)) { |
| 849 | pr_err("fsl-hv: no 'interrupts' property in %s node\n", |
| 850 | np->full_name); |
| 851 | continue; |
| 852 | } |
| 853 | |
| 854 | dbisr = kzalloc(sizeof(*dbisr), GFP_KERNEL); |
| 855 | if (!dbisr) |
| 856 | goto out_of_memory; |
| 857 | |
| 858 | dbisr->irq = irq; |
| 859 | dbisr->doorbell = be32_to_cpup(handle); |
| 860 | |
| 861 | if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) { |
| 862 | /* The shutdown doorbell gets its own ISR */ |
| 863 | ret = request_irq(irq, fsl_hv_shutdown_isr, 0, |
| 864 | np->name, NULL); |
| 865 | } else if (of_device_is_compatible(np, |
| 866 | "fsl,hv-state-change-doorbell")) { |
| 867 | /* |
| 868 | * The state change doorbell triggers a notification if |
| 869 | * the state of the managed partition changes to |
| 870 | * "stopped". We need a separate interrupt handler for |
| 871 | * that, and we also need to know the handle of the |
| 872 | * target partition, not just the handle of the |
| 873 | * doorbell. |
| 874 | */ |
| 875 | dbisr->partition = ret = get_parent_handle(np); |
| 876 | if (ret < 0) { |
| 877 | pr_err("fsl-hv: node %s has missing or " |
| 878 | "malformed parent\n", np->full_name); |
| 879 | kfree(dbisr); |
| 880 | continue; |
| 881 | } |
| 882 | ret = request_threaded_irq(irq, fsl_hv_state_change_isr, |
| 883 | fsl_hv_state_change_thread, |
| 884 | 0, np->name, dbisr); |
| 885 | } else |
| 886 | ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr); |
| 887 | |
| 888 | if (ret < 0) { |
| 889 | pr_err("fsl-hv: could not request irq %u for node %s\n", |
| 890 | irq, np->full_name); |
| 891 | kfree(dbisr); |
| 892 | continue; |
| 893 | } |
| 894 | |
| 895 | list_add(&dbisr->list, &isr_list); |
| 896 | |
| 897 | pr_info("fsl-hv: registered handler for doorbell %u\n", |
| 898 | dbisr->doorbell); |
| 899 | } |
| 900 | |
| 901 | return 0; |
| 902 | |
| 903 | out_of_memory: |
| 904 | list_for_each_entry_safe(dbisr, n, &isr_list, list) { |
| 905 | free_irq(dbisr->irq, dbisr); |
| 906 | list_del(&dbisr->list); |
| 907 | kfree(dbisr); |
| 908 | } |
| 909 | |
| 910 | misc_deregister(&fsl_hv_misc_dev); |
| 911 | |
| 912 | return -ENOMEM; |
| 913 | } |
| 914 | |
| 915 | /* |
| 916 | * Freescale hypervisor management driver termination |
| 917 | * |
| 918 | * This function is called when this driver is unloaded. |
| 919 | */ |
| 920 | static void __exit fsl_hypervisor_exit(void) |
| 921 | { |
| 922 | struct doorbell_isr *dbisr, *n; |
| 923 | |
| 924 | list_for_each_entry_safe(dbisr, n, &isr_list, list) { |
| 925 | free_irq(dbisr->irq, dbisr); |
| 926 | list_del(&dbisr->list); |
| 927 | kfree(dbisr); |
| 928 | } |
| 929 | |
| 930 | misc_deregister(&fsl_hv_misc_dev); |
| 931 | } |
| 932 | |
| 933 | module_init(fsl_hypervisor_init); |
| 934 | module_exit(fsl_hypervisor_exit); |
| 935 | |
| 936 | MODULE_AUTHOR("Timur Tabi <timur@freescale.com>"); |
| 937 | MODULE_DESCRIPTION("Freescale hypervisor management driver"); |
| 938 | MODULE_LICENSE("GPL v2"); |