| /* |
| * KVMGT - the implementation of Intel mediated pass-through framework for KVM |
| * |
| * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| * |
| * Authors: |
| * Kevin Tian <kevin.tian@intel.com> |
| * Jike Song <jike.song@intel.com> |
| * Xiaoguang Chen <xiaoguang.chen@intel.com> |
| */ |
| |
| #include <linux/init.h> |
| #include <linux/device.h> |
| #include <linux/mm.h> |
| #include <linux/types.h> |
| #include <linux/list.h> |
| #include <linux/rbtree.h> |
| #include <linux/spinlock.h> |
| #include <linux/eventfd.h> |
| #include <linux/uuid.h> |
| #include <linux/kvm_host.h> |
| #include <linux/vfio.h> |
| |
| #include "i915_drv.h" |
| #include "gvt.h" |
| |
| #if IS_ENABLED(CONFIG_VFIO_MDEV) |
| #include <linux/mdev.h> |
| #else |
| static inline long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, |
| long npage, int prot, unsigned long *phys_pfn) |
| { |
| return 0; |
| } |
| static inline long vfio_unpin_pages(struct device *dev, unsigned long *pfn, |
| long npage) |
| { |
| return 0; |
| } |
| #endif |
| |
| static const struct intel_gvt_ops *intel_gvt_ops; |
| |
| |
| /* helper macros copied from vfio-pci */ |
| #define VFIO_PCI_OFFSET_SHIFT 40 |
| #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) |
| #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) |
| #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) |
| |
| struct vfio_region { |
| u32 type; |
| u32 subtype; |
| size_t size; |
| u32 flags; |
| }; |
| |
| struct kvmgt_pgfn { |
| gfn_t gfn; |
| struct hlist_node hnode; |
| }; |
| |
| struct kvmgt_guest_info { |
| struct kvm *kvm; |
| struct intel_vgpu *vgpu; |
| struct kvm_page_track_notifier_node track_node; |
| #define NR_BKT (1 << 18) |
| struct hlist_head ptable[NR_BKT]; |
| #undef NR_BKT |
| }; |
| |
| struct gvt_dma { |
| struct rb_node node; |
| gfn_t gfn; |
| kvm_pfn_t pfn; |
| }; |
| |
| static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) |
| { |
| struct rb_node *node = vgpu->vdev.cache.rb_node; |
| struct gvt_dma *ret = NULL; |
| |
| while (node) { |
| struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); |
| |
| if (gfn < itr->gfn) |
| node = node->rb_left; |
| else if (gfn > itr->gfn) |
| node = node->rb_right; |
| else { |
| ret = itr; |
| goto out; |
| } |
| } |
| |
| out: |
| return ret; |
| } |
| |
| static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) |
| { |
| struct gvt_dma *entry; |
| |
| mutex_lock(&vgpu->vdev.cache_lock); |
| entry = __gvt_cache_find(vgpu, gfn); |
| mutex_unlock(&vgpu->vdev.cache_lock); |
| |
| return entry == NULL ? 0 : entry->pfn; |
| } |
| |
| static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn) |
| { |
| struct gvt_dma *new, *itr; |
| struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; |
| |
| new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); |
| if (!new) |
| return; |
| |
| new->gfn = gfn; |
| new->pfn = pfn; |
| |
| mutex_lock(&vgpu->vdev.cache_lock); |
| while (*link) { |
| parent = *link; |
| itr = rb_entry(parent, struct gvt_dma, node); |
| |
| if (gfn == itr->gfn) |
| goto out; |
| else if (gfn < itr->gfn) |
| link = &parent->rb_left; |
| else |
| link = &parent->rb_right; |
| } |
| |
| rb_link_node(&new->node, parent, link); |
| rb_insert_color(&new->node, &vgpu->vdev.cache); |
| mutex_unlock(&vgpu->vdev.cache_lock); |
| return; |
| |
| out: |
| mutex_unlock(&vgpu->vdev.cache_lock); |
| kfree(new); |
| } |
| |
| static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, |
| struct gvt_dma *entry) |
| { |
| rb_erase(&entry->node, &vgpu->vdev.cache); |
| kfree(entry); |
| } |
| |
| static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) |
| { |
| struct device *dev = vgpu->vdev.mdev; |
| struct gvt_dma *this; |
| unsigned long pfn; |
| |
| mutex_lock(&vgpu->vdev.cache_lock); |
| this = __gvt_cache_find(vgpu, gfn); |
| if (!this) { |
| mutex_unlock(&vgpu->vdev.cache_lock); |
| return; |
| } |
| |
| pfn = this->pfn; |
| WARN_ON((vfio_unpin_pages(dev, &pfn, 1) != 1)); |
| __gvt_cache_remove_entry(vgpu, this); |
| mutex_unlock(&vgpu->vdev.cache_lock); |
| } |
| |
| static void gvt_cache_init(struct intel_vgpu *vgpu) |
| { |
| vgpu->vdev.cache = RB_ROOT; |
| mutex_init(&vgpu->vdev.cache_lock); |
| } |
| |
| static void gvt_cache_destroy(struct intel_vgpu *vgpu) |
| { |
| struct gvt_dma *dma; |
| struct rb_node *node = NULL; |
| struct device *dev = vgpu->vdev.mdev; |
| unsigned long pfn; |
| |
| mutex_lock(&vgpu->vdev.cache_lock); |
| while ((node = rb_first(&vgpu->vdev.cache))) { |
| dma = rb_entry(node, struct gvt_dma, node); |
| pfn = dma->pfn; |
| |
| vfio_unpin_pages(dev, &pfn, 1); |
| __gvt_cache_remove_entry(vgpu, dma); |
| } |
| mutex_unlock(&vgpu->vdev.cache_lock); |
| } |
| |
| static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, |
| const char *name) |
| { |
| int i; |
| struct intel_vgpu_type *t; |
| const char *driver_name = dev_driver_string( |
| &gvt->dev_priv->drm.pdev->dev); |
| |
| for (i = 0; i < gvt->num_types; i++) { |
| t = &gvt->types[i]; |
| if (!strncmp(t->name, name + strlen(driver_name) + 1, |
| sizeof(t->name))) |
| return t; |
| } |
| |
| return NULL; |
| } |
| |
| static struct attribute *type_attrs[] = { |
| NULL, |
| }; |
| |
| static struct attribute_group *intel_vgpu_type_groups[] = { |
| [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, |
| }; |
| |
| static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) |
| { |
| int i, j; |
| struct intel_vgpu_type *type; |
| struct attribute_group *group; |
| |
| for (i = 0; i < gvt->num_types; i++) { |
| type = &gvt->types[i]; |
| |
| group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); |
| if (WARN_ON(!group)) |
| goto unwind; |
| |
| group->name = type->name; |
| group->attrs = type_attrs; |
| intel_vgpu_type_groups[i] = group; |
| } |
| |
| return true; |
| |
| unwind: |
| for (j = 0; j < i; j++) { |
| group = intel_vgpu_type_groups[j]; |
| kfree(group); |
| } |
| |
| return false; |
| } |
| |
| static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) |
| { |
| int i; |
| struct attribute_group *group; |
| |
| for (i = 0; i < gvt->num_types; i++) { |
| group = intel_vgpu_type_groups[i]; |
| kfree(group); |
| } |
| } |
| |
| static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) |
| { |
| hash_init(info->ptable); |
| } |
| |
| static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) |
| { |
| struct kvmgt_pgfn *p; |
| struct hlist_node *tmp; |
| int i; |
| |
| hash_for_each_safe(info->ptable, i, tmp, p, hnode) { |
| hash_del(&p->hnode); |
| kfree(p); |
| } |
| } |
| |
| static struct kvmgt_pgfn * |
| __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) |
| { |
| struct kvmgt_pgfn *p, *res = NULL; |
| |
| hash_for_each_possible(info->ptable, p, hnode, gfn) { |
| if (gfn == p->gfn) { |
| res = p; |
| break; |
| } |
| } |
| |
| return res; |
| } |
| |
| static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, |
| gfn_t gfn) |
| { |
| struct kvmgt_pgfn *p; |
| |
| p = __kvmgt_protect_table_find(info, gfn); |
| return !!p; |
| } |
| |
| static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) |
| { |
| struct kvmgt_pgfn *p; |
| |
| if (kvmgt_gfn_is_write_protected(info, gfn)) |
| return; |
| |
| p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); |
| if (WARN(!p, "gfn: 0x%llx\n", gfn)) |
| return; |
| |
| p->gfn = gfn; |
| hash_add(info->ptable, &p->hnode, gfn); |
| } |
| |
| static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, |
| gfn_t gfn) |
| { |
| struct kvmgt_pgfn *p; |
| |
| p = __kvmgt_protect_table_find(info, gfn); |
| if (p) { |
| hash_del(&p->hnode); |
| kfree(p); |
| } |
| } |
| |
| static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) |
| { |
| if (!intel_gvt_init_vgpu_type_groups(gvt)) |
| return -EFAULT; |
| |
| intel_gvt_ops = ops; |
| |
| /* MDEV is not yet available */ |
| return -ENODEV; |
| } |
| |
| static void kvmgt_host_exit(struct device *dev, void *gvt) |
| { |
| intel_gvt_cleanup_vgpu_type_groups(gvt); |
| } |
| |
| static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) |
| { |
| struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; |
| struct kvm *kvm = info->kvm; |
| struct kvm_memory_slot *slot; |
| int idx; |
| |
| idx = srcu_read_lock(&kvm->srcu); |
| slot = gfn_to_memslot(kvm, gfn); |
| |
| spin_lock(&kvm->mmu_lock); |
| |
| if (kvmgt_gfn_is_write_protected(info, gfn)) |
| goto out; |
| |
| kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); |
| kvmgt_protect_table_add(info, gfn); |
| |
| out: |
| spin_unlock(&kvm->mmu_lock); |
| srcu_read_unlock(&kvm->srcu, idx); |
| return 0; |
| } |
| |
| static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) |
| { |
| struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; |
| struct kvm *kvm = info->kvm; |
| struct kvm_memory_slot *slot; |
| int idx; |
| |
| idx = srcu_read_lock(&kvm->srcu); |
| slot = gfn_to_memslot(kvm, gfn); |
| |
| spin_lock(&kvm->mmu_lock); |
| |
| if (!kvmgt_gfn_is_write_protected(info, gfn)) |
| goto out; |
| |
| kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); |
| kvmgt_protect_table_del(info, gfn); |
| |
| out: |
| spin_unlock(&kvm->mmu_lock); |
| srcu_read_unlock(&kvm->srcu, idx); |
| return 0; |
| } |
| |
| static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| const u8 *val, int len, |
| struct kvm_page_track_notifier_node *node) |
| { |
| struct kvmgt_guest_info *info = container_of(node, |
| struct kvmgt_guest_info, track_node); |
| |
| if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) |
| intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa, |
| (void *)val, len); |
| } |
| |
| static void kvmgt_page_track_flush_slot(struct kvm *kvm, |
| struct kvm_memory_slot *slot, |
| struct kvm_page_track_notifier_node *node) |
| { |
| int i; |
| gfn_t gfn; |
| struct kvmgt_guest_info *info = container_of(node, |
| struct kvmgt_guest_info, track_node); |
| |
| spin_lock(&kvm->mmu_lock); |
| for (i = 0; i < slot->npages; i++) { |
| gfn = slot->base_gfn + i; |
| if (kvmgt_gfn_is_write_protected(info, gfn)) { |
| kvm_slot_page_track_remove_page(kvm, slot, gfn, |
| KVM_PAGE_TRACK_WRITE); |
| kvmgt_protect_table_del(info, gfn); |
| } |
| } |
| spin_unlock(&kvm->mmu_lock); |
| } |
| |
| static bool kvmgt_check_guest(void) |
| { |
| unsigned int eax, ebx, ecx, edx; |
| char s[12]; |
| unsigned int *i; |
| |
| eax = KVM_CPUID_SIGNATURE; |
| ebx = ecx = edx = 0; |
| |
| asm volatile ("cpuid" |
| : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) |
| : |
| : "cc", "memory"); |
| i = (unsigned int *)s; |
| i[0] = ebx; |
| i[1] = ecx; |
| i[2] = edx; |
| |
| return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM")); |
| } |
| |
| /** |
| * NOTE: |
| * It's actually impossible to check if we are running in KVM host, |
| * since the "KVM host" is simply native. So we only dectect guest here. |
| */ |
| static int kvmgt_detect_host(void) |
| { |
| #ifdef CONFIG_INTEL_IOMMU |
| if (intel_iommu_gfx_mapped) { |
| gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n"); |
| return -ENODEV; |
| } |
| #endif |
| return kvmgt_check_guest() ? -ENODEV : 0; |
| } |
| |
| static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) |
| { |
| /* nothing to do here */ |
| return 0; |
| } |
| |
| static void kvmgt_detach_vgpu(unsigned long handle) |
| { |
| /* nothing to do here */ |
| } |
| |
| static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) |
| { |
| struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; |
| struct intel_vgpu *vgpu = info->vgpu; |
| |
| if (vgpu->vdev.msi_trigger) |
| return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1; |
| |
| return false; |
| } |
| |
| static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) |
| { |
| unsigned long pfn; |
| struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; |
| int rc; |
| |
| pfn = gvt_cache_find(info->vgpu, gfn); |
| if (pfn != 0) |
| return pfn; |
| |
| rc = vfio_pin_pages(info->vgpu->vdev.mdev, &gfn, 1, |
| IOMMU_READ | IOMMU_WRITE, &pfn); |
| if (rc != 1) { |
| gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn); |
| return 0; |
| } |
| |
| gvt_cache_add(info->vgpu, gfn, pfn); |
| return pfn; |
| } |
| |
| static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa) |
| { |
| unsigned long pfn; |
| gfn_t gfn = gpa_to_gfn(gpa); |
| |
| pfn = kvmgt_gfn_to_pfn(handle, gfn); |
| if (!pfn) |
| return NULL; |
| |
| return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa); |
| } |
| |
| static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, |
| void *buf, unsigned long len, bool write) |
| { |
| void *hva = NULL; |
| |
| hva = kvmgt_gpa_to_hva(handle, gpa); |
| if (!hva) |
| return -EFAULT; |
| |
| if (write) |
| memcpy(hva, buf, len); |
| else |
| memcpy(buf, hva, len); |
| |
| return 0; |
| } |
| |
| static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, |
| void *buf, unsigned long len) |
| { |
| return kvmgt_rw_gpa(handle, gpa, buf, len, false); |
| } |
| |
| static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, |
| void *buf, unsigned long len) |
| { |
| return kvmgt_rw_gpa(handle, gpa, buf, len, true); |
| } |
| |
| static unsigned long kvmgt_virt_to_pfn(void *addr) |
| { |
| return PFN_DOWN(__pa(addr)); |
| } |
| |
| struct intel_gvt_mpt kvmgt_mpt = { |
| .detect_host = kvmgt_detect_host, |
| .host_init = kvmgt_host_init, |
| .host_exit = kvmgt_host_exit, |
| .attach_vgpu = kvmgt_attach_vgpu, |
| .detach_vgpu = kvmgt_detach_vgpu, |
| .inject_msi = kvmgt_inject_msi, |
| .from_virt_to_mfn = kvmgt_virt_to_pfn, |
| .set_wp_page = kvmgt_write_protect_add, |
| .unset_wp_page = kvmgt_write_protect_remove, |
| .read_gpa = kvmgt_read_gpa, |
| .write_gpa = kvmgt_write_gpa, |
| .gfn_to_mfn = kvmgt_gfn_to_pfn, |
| }; |
| EXPORT_SYMBOL_GPL(kvmgt_mpt); |
| |
| static int __init kvmgt_init(void) |
| { |
| return 0; |
| } |
| |
| static void __exit kvmgt_exit(void) |
| { |
| } |
| |
| module_init(kvmgt_init); |
| module_exit(kvmgt_exit); |
| |
| MODULE_LICENSE("GPL and additional rights"); |
| MODULE_AUTHOR("Intel Corporation"); |