kvm: x86: nVMX: maintain internal copy of current VMCS KVM maintains L1's current VMCS in guest memory, at the guest physical page identified by the argument to VMPTRLD. This makes hairy time-of-check to time-of-use bugs possible,as VCPUs can be writing the the VMCS page in memory while KVM is emulating VMLAUNCH and VMRESUME. The spec documents that writing to the VMCS page while it is loaded is "undefined". Therefore it is reasonable to load the entire VMCS into an internal cache during VMPTRLD and ignore writes to the VMCS page -- the guest should be using VMREAD and VMWRITE to access the current VMCS. To adhere to the spec, KVM should flush the current VMCS during VMPTRLD, and the target VMCS during VMCLEAR (as given by the operand to VMCLEAR). Since this implementation of VMCS caching only maintains the the current VMCS, VMCLEAR will only do a flush if the operand to VMCLEAR is the current VMCS pointer. KVM will also flush during VMXOFF, which is not mandated by the spec, but also not in conflict with the spec. Signed-off-by: David Matlack <dmatlack@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

commit: 4f2777bc97974b0df9276ee9a85155a9e27a5282 [log] [tgz]
author: David Matlack <dmatlack@google.com> Wed Jul 13 17:16:37 2016 -0700
committer: Radim Krčmář <rkrcmar@redhat.com> Mon Aug 01 14:49:05 2016 +0200
tree: 44b8579722765e29f3a69ff09d2950165f94cfb4
parent: 601045bff74569374b3d5b34dbb9bad71adf3aa5 [diff] [blame]
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b61cdad..151d261 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -405,6 +405,12 @@
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
+	/*
+	 * Cache of the guest's VMCS, existing outside of guest memory.
+	 * Loaded from guest memory during VMPTRLD. Flushed to guest
+	 * memory during VMXOFF, VMCLEAR, VMPTRLD.
+	 */
+	struct vmcs12 *cached_vmcs12;
 	struct vmcs *current_shadow_vmcs;
 	/*
 	 * Indicates if the shadow vmcs must be updated with the
@@ -858,7 +864,7 @@
 
 static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 {
-	return to_vmx(vcpu)->nested.current_vmcs12;
+	return to_vmx(vcpu)->nested.cached_vmcs12;
 }
 
 static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
@@ -6987,10 +6993,16 @@
 		return 1;
 	}
 
+	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+	if (!vmx->nested.cached_vmcs12)
+		return -ENOMEM;
+
 	if (enable_shadow_vmcs) {
 		shadow_vmcs = alloc_vmcs();
-		if (!shadow_vmcs)
+		if (!shadow_vmcs) {
+			kfree(vmx->nested.cached_vmcs12);
 			return -ENOMEM;
+		}
 		/* mark vmcs as shadow */
 		shadow_vmcs->revision_id |= (1u << 31);
 		/* init shadow vmcs */
@@ -7061,6 +7073,11 @@
 		vmcs_write64(VMCS_LINK_POINTER, -1ull);
 	}
 	vmx->nested.posted_intr_nv = -1;
+
+	/* Flush VMCS12 to guest memory */
+	memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
+	       VMCS12_SIZE);
+
 	kunmap(vmx->nested.current_vmcs12_page);
 	nested_release_page(vmx->nested.current_vmcs12_page);
 	vmx->nested.current_vmptr = -1ull;
@@ -7081,6 +7098,7 @@
 	nested_release_vmcs12(vmx);
 	if (enable_shadow_vmcs)
 		free_vmcs(vmx->nested.current_shadow_vmcs);
+	kfree(vmx->nested.cached_vmcs12);
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
@@ -7484,6 +7502,13 @@
 		vmx->nested.current_vmptr = vmptr;
 		vmx->nested.current_vmcs12 = new_vmcs12;
 		vmx->nested.current_vmcs12_page = page;
+		/*
+		 * Load VMCS12 from guest memory since it is not already
+		 * cached.
+		 */
+		memcpy(vmx->nested.cached_vmcs12,
+		       vmx->nested.current_vmcs12, VMCS12_SIZE);
+
 		if (enable_shadow_vmcs) {
 			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
 				      SECONDARY_EXEC_SHADOW_VMCS);
@@ -8456,7 +8481,7 @@
 	 * the next L2->L1 exit.
 	 */
 	if (!is_guest_mode(vcpu) ||
-	    !nested_cpu_has2(vmx->nested.current_vmcs12,
+	    !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
 			     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
 		vmcs_write64(APIC_ACCESS_ADDR, hpa);
 }
commit	4f2777bc97974b0df9276ee9a85155a9e27a5282	[log] [tgz]
author	David Matlack <dmatlack@google.com>	Wed Jul 13 17:16:37 2016 -0700
committer	Radim Krčmář <rkrcmar@redhat.com>	Mon Aug 01 14:49:05 2016 +0200
tree	44b8579722765e29f3a69ff09d2950165f94cfb4
parent	601045bff74569374b3d5b34dbb9bad71adf3aa5 [diff] [blame]