KVM: x86: MMU: Encapsulate the type of rmap-chain head in a new struct
New struct kvm_rmap_head makes the code type-safe to some extent.
Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f608e17..8140077 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -214,6 +214,10 @@
};
};
+struct kvm_rmap_head {
+ unsigned long val;
+};
+
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
@@ -231,7 +235,7 @@
bool unsync;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
- unsigned long parent_ptes; /* Reverse mapping for parent_pte */
+ struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
unsigned long mmu_valid_gen;
@@ -606,7 +610,7 @@
};
struct kvm_arch_memory_slot {
- unsigned long *rmap[KVM_NR_PAGE_SIZES];
+ struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 276d2f2..d9a6801 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -909,36 +909,35 @@
}
/*
- * Pte mapping structures:
+ * About rmap_head encoding:
*
- * If pte_list bit zero is zero, then pte_list point to the spte.
- *
- * If pte_list bit zero is one, (then pte_list & ~1) points to a struct
+ * If the bit zero of rmap_head->val is clear, then it points to the only spte
+ * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
* pte_list_desc containing more mappings.
- *
- * Returns the number of pte entries before the spte was added or zero if
- * the spte was not added.
- *
+ */
+
+/*
+ * Returns the number of pointers in the rmap chain, not counting the new one.
*/
static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
- unsigned long *pte_list)
+ struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
int i, count = 0;
- if (!*pte_list) {
+ if (!rmap_head->val) {
rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
- *pte_list = (unsigned long)spte;
- } else if (!(*pte_list & 1)) {
+ rmap_head->val = (unsigned long)spte;
+ } else if (!(rmap_head->val & 1)) {
rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_pte_list_desc(vcpu);
- desc->sptes[0] = (u64 *)*pte_list;
+ desc->sptes[0] = (u64 *)rmap_head->val;
desc->sptes[1] = spte;
- *pte_list = (unsigned long)desc | 1;
+ rmap_head->val = (unsigned long)desc | 1;
++count;
} else {
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
- desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
desc = desc->more;
count += PTE_LIST_EXT;
@@ -955,8 +954,9 @@
}
static void
-pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
- int i, struct pte_list_desc *prev_desc)
+pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+ struct pte_list_desc *desc, int i,
+ struct pte_list_desc *prev_desc)
{
int j;
@@ -967,43 +967,43 @@
if (j != 0)
return;
if (!prev_desc && !desc->more)
- *pte_list = (unsigned long)desc->sptes[0];
+ rmap_head->val = (unsigned long)desc->sptes[0];
else
if (prev_desc)
prev_desc->more = desc->more;
else
- *pte_list = (unsigned long)desc->more | 1;
+ rmap_head->val = (unsigned long)desc->more | 1;
mmu_free_pte_list_desc(desc);
}
-static void pte_list_remove(u64 *spte, unsigned long *pte_list)
+static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
struct pte_list_desc *prev_desc;
int i;
- if (!*pte_list) {
+ if (!rmap_head->val) {
printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
BUG();
- } else if (!(*pte_list & 1)) {
+ } else if (!(rmap_head->val & 1)) {
rmap_printk("pte_list_remove: %p 1->0\n", spte);
- if ((u64 *)*pte_list != spte) {
+ if ((u64 *)rmap_head->val != spte) {
printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte);
BUG();
}
- *pte_list = 0;
+ rmap_head->val = 0;
} else {
rmap_printk("pte_list_remove: %p many->many\n", spte);
- desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
prev_desc = NULL;
while (desc) {
- for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
+ for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(pte_list,
- desc, i,
- prev_desc);
+ pte_list_desc_remove_entry(rmap_head,
+ desc, i, prev_desc);
return;
}
+ }
prev_desc = desc;
desc = desc->more;
}
@@ -1013,18 +1013,18 @@
}
typedef void (*pte_list_walk_fn) (u64 *spte);
-static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
+static void pte_list_walk(struct kvm_rmap_head *rmap_head, pte_list_walk_fn fn)
{
struct pte_list_desc *desc;
int i;
- if (!*pte_list)
+ if (!rmap_head->val)
return;
- if (!(*pte_list & 1))
- return fn((u64 *)*pte_list);
+ if (!(rmap_head->val & 1))
+ return fn((u64 *)rmap_head->val);
- desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
while (desc) {
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
fn(desc->sptes[i]);
@@ -1032,8 +1032,8 @@
}
}
-static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
- struct kvm_memory_slot *slot)
+static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
+ struct kvm_memory_slot *slot)
{
unsigned long idx;
@@ -1041,10 +1041,8 @@
return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
}
-/*
- * Take gfn and return the reverse mapping to it.
- */
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp)
+static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
+ struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
@@ -1065,24 +1063,24 @@
static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
struct kvm_mmu_page *sp;
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
sp = page_header(__pa(spte));
kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
- rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
- return pte_list_add(vcpu, spte, rmapp);
+ rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+ return pte_list_add(vcpu, spte, rmap_head);
}
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_mmu_page *sp;
gfn_t gfn;
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
- rmapp = gfn_to_rmap(kvm, gfn, sp);
- pte_list_remove(spte, rmapp);
+ rmap_head = gfn_to_rmap(kvm, gfn, sp);
+ pte_list_remove(spte, rmap_head);
}
/*
@@ -1102,17 +1100,18 @@
*
* Returns sptep if found, NULL otherwise.
*/
-static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
+static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
+ struct rmap_iterator *iter)
{
- if (!rmap)
+ if (!rmap_head->val)
return NULL;
- if (!(rmap & 1)) {
+ if (!(rmap_head->val & 1)) {
iter->desc = NULL;
- return (u64 *)rmap;
+ return (u64 *)rmap_head->val;
}
- iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
+ iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
iter->pos = 0;
return iter->desc->sptes[iter->pos];
}
@@ -1146,10 +1145,10 @@
return NULL;
}
-#define for_each_rmap_spte(_rmap_, _iter_, _spte_) \
- for (_spte_ = rmap_get_first(*_rmap_, _iter_); \
- _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \
- _spte_ = rmap_get_next(_iter_))
+#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
+ for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
+ _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \
+ _spte_ = rmap_get_next(_iter_))
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
@@ -1207,14 +1206,15 @@
return mmu_spte_update(sptep, spte);
}
-static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
+static bool __rmap_write_protect(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
bool pt_protect)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
- for_each_rmap_spte(rmapp, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep)
flush |= spte_write_protect(kvm, sptep, pt_protect);
return flush;
@@ -1231,13 +1231,13 @@
return mmu_spte_update(sptep, spte);
}
-static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
- for_each_rmap_spte(rmapp, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep)
flush |= spte_clear_dirty(kvm, sptep);
return flush;
@@ -1254,13 +1254,13 @@
return mmu_spte_update(sptep, spte);
}
-static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
+static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
- for_each_rmap_spte(rmapp, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep)
flush |= spte_set_dirty(kvm, sptep);
return flush;
@@ -1280,12 +1280,12 @@
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
while (mask) {
- rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
- PT_PAGE_TABLE_LEVEL, slot);
- __rmap_write_protect(kvm, rmapp, false);
+ rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+ PT_PAGE_TABLE_LEVEL, slot);
+ __rmap_write_protect(kvm, rmap_head, false);
/* clear the first set bit */
mask &= mask - 1;
@@ -1305,12 +1305,12 @@
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
while (mask) {
- rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
- PT_PAGE_TABLE_LEVEL, slot);
- __rmap_clear_dirty(kvm, rmapp);
+ rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+ PT_PAGE_TABLE_LEVEL, slot);
+ __rmap_clear_dirty(kvm, rmap_head);
/* clear the first set bit */
mask &= mask - 1;
@@ -1342,27 +1342,27 @@
static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{
struct kvm_memory_slot *slot;
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
int i;
bool write_protected = false;
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
- rmapp = __gfn_to_rmap(gfn, i, slot);
- write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true);
+ rmap_head = __gfn_to_rmap(gfn, i, slot);
+ write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true);
}
return write_protected;
}
-static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
- while ((sptep = rmap_get_first(*rmapp, &iter))) {
+ while ((sptep = rmap_get_first(rmap_head, &iter))) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
@@ -1373,14 +1373,14 @@
return flush;
}
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
- return kvm_zap_rmapp(kvm, rmapp);
+ return kvm_zap_rmapp(kvm, rmap_head);
}
-static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
@@ -1395,7 +1395,7 @@
new_pfn = pte_pfn(*ptep);
restart:
- for_each_rmap_spte(rmapp, &iter, sptep) {
+ for_each_rmap_spte(rmap_head, &iter, sptep) {
rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
sptep, *sptep, gfn, level);
@@ -1433,11 +1433,11 @@
/* output fields. */
gfn_t gfn;
- unsigned long *rmap;
+ struct kvm_rmap_head *rmap;
int level;
/* private field. */
- unsigned long *end_rmap;
+ struct kvm_rmap_head *end_rmap;
};
static void
@@ -1496,7 +1496,7 @@
unsigned long end,
unsigned long data,
int (*handler)(struct kvm *kvm,
- unsigned long *rmapp,
+ struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot,
gfn_t gfn,
int level,
@@ -1540,7 +1540,8 @@
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ int (*handler)(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot,
gfn_t gfn, int level,
unsigned long data))
@@ -1563,7 +1564,7 @@
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
}
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
@@ -1573,18 +1574,19 @@
BUG_ON(!shadow_accessed_mask);
- for_each_rmap_spte(rmapp, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep) {
if (*sptep & shadow_accessed_mask) {
young = 1;
clear_bit((ffs(shadow_accessed_mask) - 1),
(unsigned long *)sptep);
}
+ }
trace_kvm_age_page(gfn, level, slot, young);
return young;
}
-static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
int level, unsigned long data)
{
@@ -1600,11 +1602,12 @@
if (!shadow_accessed_mask)
goto out;
- for_each_rmap_spte(rmapp, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep) {
if (*sptep & shadow_accessed_mask) {
young = 1;
break;
}
+ }
out:
return young;
}
@@ -1613,14 +1616,14 @@
static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
struct kvm_mmu_page *sp;
sp = page_header(__pa(spte));
- rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
+ rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
- kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
+ kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
@@ -1737,7 +1740,7 @@
* this feature. See the comments in kvm_zap_obsolete_pages().
*/
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
- sp->parent_ptes = 0;
+ sp->parent_ptes.val = 0;
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
return sp;
@@ -2277,7 +2280,7 @@
u64 *sptep;
struct rmap_iterator iter;
- while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
+ while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
drop_parent_pte(sp, sptep);
}
@@ -4492,7 +4495,7 @@
}
/* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap);
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
/* The caller should hold mmu-lock before calling this function. */
static bool
@@ -4586,9 +4589,10 @@
spin_unlock(&kvm->mmu_lock);
}
-static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp)
+static bool slot_rmap_write_protect(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head)
{
- return __rmap_write_protect(kvm, rmapp, false);
+ return __rmap_write_protect(kvm, rmap_head, false);
}
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -4624,7 +4628,7 @@
}
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
- unsigned long *rmapp)
+ struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -4633,7 +4637,7 @@
struct kvm_mmu_page *sp;
restart:
- for_each_rmap_spte(rmapp, &iter, sptep) {
+ for_each_rmap_spte(rmap_head, &iter, sptep) {
sp = page_header(__pa(sptep));
pfn = spte_to_pfn(*sptep);
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 03d518e..f7b0488 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -129,7 +129,7 @@
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
{
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
struct kvm_mmu_page *rev_sp;
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
@@ -150,8 +150,8 @@
return;
}
- rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
- if (!*rmapp) {
+ rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
+ if (!rmap_head->val) {
if (!__ratelimit(&ratelimit_state))
return;
audit_printk(kvm, "no rmap for writable spte %llx\n",
@@ -192,7 +192,7 @@
static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- unsigned long *rmapp;
+ struct kvm_rmap_head *rmap_head;
u64 *sptep;
struct rmap_iterator iter;
struct kvm_memslots *slots;
@@ -203,13 +203,14 @@
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, sp->gfn);
- rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
+ rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
- for_each_rmap_spte(rmapp, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep) {
if (is_writable_pte(*sptep))
audit_printk(kvm, "shadow page has writable "
"mappings: gfn %llx role %x\n",
sp->gfn, sp->role.word);
+ }
}
static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)