KVM: MMU: Partial swapping of guest memory

This allows guest memory to be swapped.  Pages which are currently mapped
via shadow page tables are pinned into memory, but all other pages can
be freely swapped.

The patch makes gfn_to_page() elevate the page's reference count, and
introduces kvm_release_page() that pairs with it.

Signed-off-by: Izik Eidus <izike@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 572e5b6..0f0266a 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -72,7 +72,7 @@
 			    struct kvm_vcpu *vcpu, gva_t addr,
 			    int write_fault, int user_fault, int fetch_fault)
 {
-	struct page *page;
+	struct page *page = NULL;
 	pt_element_t *table;
 	pt_element_t pte;
 	gfn_t table_gfn;
@@ -149,6 +149,7 @@
 
 		walker->inherited_ar &= pte;
 		--walker->level;
+		kvm_release_page(page);
 	}
 
 	if (write_fault && !is_dirty_pte(pte)) {
@@ -162,6 +163,7 @@
 		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
 	}
 
+	kvm_release_page(page);
 	walker->pte = pte;
 	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
 	return 1;
@@ -180,6 +182,8 @@
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (page)
+		kvm_release_page(page);
 	return 0;
 }
 
@@ -223,6 +227,8 @@
 	if (is_error_hpa(paddr)) {
 		set_shadow_pte(shadow_pte,
 			       shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
+		kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+					     >> PAGE_SHIFT));
 		return;
 	}
 
@@ -260,9 +266,20 @@
 	pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
 	set_shadow_pte(shadow_pte, spte);
 	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
-	if (!was_rmapped)
+	if (!was_rmapped) {
 		rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
 			 >> PAGE_SHIFT);
+		if (!is_rmap_pte(*shadow_pte)) {
+			struct page *page;
+
+			page = pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+					   >> PAGE_SHIFT);
+			kvm_release_page(page);
+		}
+	}
+	else
+		kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+				 >> PAGE_SHIFT));
 	if (!ptwrite || !*ptwrite)
 		vcpu->last_pte_updated = shadow_pte;
 }
@@ -486,19 +503,22 @@
 {
 	int i;
 	pt_element_t *gpt;
+	struct page *page;
 
 	if (sp->role.metaphysical || PTTYPE == 32) {
 		nonpaging_prefetch_page(vcpu, sp);
 		return;
 	}
 
-	gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0);
+	page = gfn_to_page(vcpu->kvm, sp->gfn);
+	gpt = kmap_atomic(page, KM_USER0);
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
 		if (is_present_pte(gpt[i]))
 			sp->spt[i] = shadow_trap_nonpresent_pte;
 		else
 			sp->spt[i] = shadow_notrap_nonpresent_pte;
 	kunmap_atomic(gpt, KM_USER0);
+	kvm_release_page(page);
 }
 
 #undef pt_element_t