KVM: PPC: Implement MMU notifiers for Book3S HV guests

This adds the infrastructure to enable us to page out pages underneath
a Book3S HV guest, on processors that support virtualized partition
memory, that is, POWER7.  Instead of pinning all the guest's pages,
we now look in the host userspace Linux page tables to find the
mapping for a given guest page.  Then, if the userspace Linux PTE
gets invalidated, kvm_unmap_hva() gets called for that address, and
we replace all the guest HPTEs that refer to that page with absent
HPTEs, i.e. ones with the valid bit clear and the HPTE_V_ABSENT bit
set, which will cause an HDSI when the guest tries to access them.
Finally, the page fault handler is extended to reinstantiate the
guest HPTE when the guest tries to access a page which has been paged
out.

Since we can't intercept the guest DSI and ISI interrupts on PPC970,
we still have to pin all the guest pages on PPC970.  We have a new flag,
kvm->arch.using_mmu_notifiers, that indicates whether we can page
guest pages out.  If it is not set, the MMU notifier callbacks do
nothing and everything operates as before.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 45aabb9..86c4191 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -326,19 +326,19 @@
 		break;
 	}
 	/*
-	 * We get this if the guest accesses a page which it thinks
-	 * it has mapped but which is not actually present, because
-	 * it is for an emulated I/O device.
-	 * Any other HDSI interrupt has been handled already.
+	 * We get these next two if the guest accesses a page which it thinks
+	 * it has mapped but which is not actually present, either because
+	 * it is for an emulated I/O device or because the corresonding
+	 * host page has been paged out.  Any other HDSI/HISI interrupts
+	 * have been handled already.
 	 */
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 		r = kvmppc_book3s_hv_page_fault(run, vcpu,
 				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
 		break;
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
-		kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
-					vcpu->arch.shregs.msr & 0x58000000);
-		r = RESUME_GUEST;
+		r = kvmppc_book3s_hv_page_fault(run, vcpu,
+				kvmppc_get_pc(vcpu), 0);
 		break;
 	/*
 	 * This occurs if the guest executes an illegal instruction.
@@ -867,6 +867,7 @@
 	flush_altivec_to_thread(current);
 	flush_vsx_to_thread(current);
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+	vcpu->arch.pgdir = current->mm->pgd;
 
 	do {
 		r = kvmppc_run_vcpu(run, vcpu);
@@ -1090,9 +1091,9 @@
 	unsigned long *phys;
 
 	/* Allocate a slot_phys array */
-	npages = mem->memory_size >> PAGE_SHIFT;
 	phys = kvm->arch.slot_phys[mem->slot];
-	if (!phys) {
+	if (!kvm->arch.using_mmu_notifiers && !phys) {
+		npages = mem->memory_size >> PAGE_SHIFT;
 		phys = vzalloc(npages * sizeof(unsigned long));
 		if (!phys)
 			return -ENOMEM;
@@ -1298,6 +1299,7 @@
 	}
 	kvm->arch.lpcr = lpcr;
 
+	kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
 	spin_lock_init(&kvm->arch.slot_phys_lock);
 	return 0;
 }
@@ -1306,8 +1308,9 @@
 {
 	unsigned long i;
 
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		unpin_slot(kvm, i);
+	if (!kvm->arch.using_mmu_notifiers)
+		for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+			unpin_slot(kvm, i);
 
 	if (kvm->arch.rma) {
 		kvm_release_rma(kvm->arch.rma);