KVM, pkeys: add pkeys support for permission_fault
Protection keys define a new 4-bit protection key field (PKEY) in bits
62:59 of leaf entries of the page tables, the PKEY is an index to PKRU
register(16 domains), every domain has 2 bits(write disable bit, access
disable bit).
Static logic has been produced in update_pkru_bitmask, dynamic logic need
read pkey from page table entries, get pkru value, and deduce the correct
result.
[ Huaitong: Xiao helps to modify many sections. ]
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 8b2b3df..b70df72 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -10,10 +10,11 @@
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#define PT_WRITABLE_SHIFT 1
+#define PT_USER_SHIFT 2
#define PT_PRESENT_MASK (1ULL << 0)
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-#define PT_USER_MASK (1ULL << 2)
+#define PT_USER_MASK (1ULL << PT_USER_SHIFT)
#define PT_PWT_MASK (1ULL << 3)
#define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5
@@ -149,7 +150,8 @@
* if the access faults.
*/
static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- unsigned pte_access, unsigned pfec)
+ unsigned pte_access, unsigned pte_pkey,
+ unsigned pfec)
{
int cpl = kvm_x86_ops->get_cpl(vcpu);
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -170,11 +172,32 @@
unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+ bool fault = (mmu->permissions[index] >> pte_access) & 1;
- WARN_ON(pfec & PFERR_RSVD_MASK);
-
+ WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
pfec |= PFERR_PRESENT_MASK;
- return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
+
+ if (unlikely(mmu->pkru_mask)) {
+ u32 pkru_bits, offset;
+
+ /*
+ * PKRU defines 32 bits, there are 16 domains and 2
+ * attribute bits per domain in pkru. pte_pkey is the
+ * index of the protection domain, so pte_pkey * 2 is
+ * is the index of the first bit for the domain.
+ */
+ pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+
+ /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
+ offset = pfec - 1 +
+ ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
+
+ pkru_bits &= mmu->pkru_mask >> offset;
+ pfec |= -pkru_bits & PFERR_PK_MASK;
+ fault |= (pkru_bits != 0);
+ }
+
+ return -(uint32_t)fault & pfec;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);