KVM: MMU: Optimize pte permission checks
walk_addr_generic() permission checks are a maze of branchy code, which is
performed four times per lookup. It depends on the type of access, efer.nxe,
cr0.wp, cr4.smep, and in the near future, cr4.smap.
Optimize this away by precalculating all variants and storing them in a
bitmap. The bitmap is recalculated when rarely-changing variables change
(cr0, cr4) and is indexed by the often-changing variables (page fault error
code, pte access permissions).
The permission check is moved to the end of the loop, otherwise an SMEP
fault could be reported as a false positive, when PDE.U=1 but PTE.U=0.
Noted by Xiao Guangrong.
The result is short, branch-free code.
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f297a2c..9c61889 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3516,6 +3516,38 @@
}
}
+static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+ unsigned bit, byte, pfec;
+ u8 map;
+ bool fault, x, w, u, wf, uf, ff, smep;
+
+ smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
+ pfec = byte << 1;
+ map = 0;
+ wf = pfec & PFERR_WRITE_MASK;
+ uf = pfec & PFERR_USER_MASK;
+ ff = pfec & PFERR_FETCH_MASK;
+ for (bit = 0; bit < 8; ++bit) {
+ x = bit & ACC_EXEC_MASK;
+ w = bit & ACC_WRITE_MASK;
+ u = bit & ACC_USER_MASK;
+
+ /* Not really needed: !nx will cause pte.nx to fault */
+ x |= !mmu->nx;
+ /* Allow supervisor writes if !cr0.wp */
+ w |= !is_write_protection(vcpu) && !uf;
+ /* Disallow supervisor fetches of user code if cr4.smep */
+ x &= !(smep && u && !uf);
+
+ fault = (ff && !x) || (uf && !u) || (wf && !w);
+ map |= fault << bit;
+ }
+ mmu->permissions[byte] = map;
+ }
+}
+
static int paging64_init_context_common(struct kvm_vcpu *vcpu,
struct kvm_mmu *context,
int level)
@@ -3524,6 +3556,7 @@
context->root_level = level;
reset_rsvds_bits_mask(vcpu, context);
+ update_permission_bitmask(vcpu, context);
ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3;
@@ -3552,6 +3585,7 @@
context->root_level = PT32_ROOT_LEVEL;
reset_rsvds_bits_mask(vcpu, context);
+ update_permission_bitmask(vcpu, context);
context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
@@ -3612,6 +3646,8 @@
context->gva_to_gpa = paging32_gva_to_gpa;
}
+ update_permission_bitmask(vcpu, context);
+
return 0;
}
@@ -3687,6 +3723,8 @@
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
}
+ update_permission_bitmask(vcpu, g_context);
+
return 0;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2832081..5846607 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -89,17 +89,14 @@
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
-static inline bool check_write_user_access(struct kvm_vcpu *vcpu,
- bool write_fault, bool user_fault,
- unsigned long pte)
+/*
+ * Will a fault with a given page-fault error code (pfec) cause a permission
+ * fault with the given access (in ACC_* format)?
+ */
+static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
+ unsigned pfec)
{
- if (unlikely(write_fault && !is_writable_pte(pte)
- && (user_fault || is_write_protection(vcpu))))
- return false;
-
- if (unlikely(user_fault && !(pte & PT_USER_MASK)))
- return false;
-
- return true;
+ return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
}
+
#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 35a05dd..8f6c59f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -169,7 +169,7 @@
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
- unsigned index, pt_access, uninitialized_var(pte_access);
+ unsigned index, pt_access, pte_access;
gpa_t pte_gpa;
bool eperm, last_gpte;
int offset;
@@ -237,24 +237,9 @@
goto error;
}
- if (!check_write_user_access(vcpu, write_fault, user_fault,
- pte))
- eperm = true;
-
-#if PTTYPE == 64
- if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
- eperm = true;
-#endif
+ pte_access = pt_access & gpte_access(vcpu, pte);
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
- if (last_gpte) {
- pte_access = pt_access & gpte_access(vcpu, pte);
- /* check if the kernel is fetching from user page */
- if (unlikely(pte_access & PT_USER_MASK) &&
- kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
- if (fetch_fault && !user_fault)
- eperm = true;
- }
walker->ptes[walker->level - 1] = pte;
@@ -284,10 +269,11 @@
break;
}
- pt_access &= gpte_access(vcpu, pte);
+ pt_access &= pte_access;
--walker->level;
}
+ eperm |= permission_fault(mmu, pte_access, access);
if (unlikely(eperm)) {
errcode |= PFERR_PRESENT_MASK;
goto error;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19047ea..497226e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3672,20 +3672,17 @@
gpa_t *gpa, struct x86_exception *exception,
bool write)
{
- u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+ | (write ? PFERR_WRITE_MASK : 0);
- if (vcpu_match_mmio_gva(vcpu, gva) &&
- check_write_user_access(vcpu, write, access,
- vcpu->arch.access)) {
+ if (vcpu_match_mmio_gva(vcpu, gva)
+ && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
return 1;
}
- if (write)
- access |= PFERR_WRITE_MASK;
-
*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
if (*gpa == UNMAPPED_GVA)