blob: aebd008ccccb40d6964e58a368e804e424fd3801 [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
Nicolas Kaiser9611c182010-10-06 14:23:22 +02008 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Avi Kivity6aa8b732006-12-10 02:21:36 -08009 *
10 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2. See
15 * the COPYING file in the top-level directory.
16 *
17 */
18
Eddie Dong85f455f2007-07-06 12:20:49 +030019#include "irq.h"
Zhang Xiantao1d737c82007-12-14 09:35:10 +080020#include "mmu.h"
Avi Kivity00b27a32011-11-23 16:30:32 +020021#include "cpuid.h"
Andrey Smetanind62caab2015-11-10 15:36:33 +030022#include "lapic.h"
Avi Kivitye4956062007-06-28 14:15:57 -040023
Avi Kivityedf88412007-12-16 11:02:48 +020024#include <linux/kvm_host.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080025#include <linux/module.h>
Ahmed S. Darwish9d8f5492007-02-19 14:37:46 +020026#include <linux/kernel.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080027#include <linux/mm.h>
28#include <linux/highmem.h>
Alexey Dobriyane8edc6e2007-05-21 01:22:52 +040029#include <linux/sched.h>
Avi Kivityc7addb92007-09-16 18:58:32 +020030#include <linux/moduleparam.h>
Josh Triplette9bda3b2012-03-20 23:33:51 -070031#include <linux/mod_devicetable.h>
Steven Rostedt (Red Hat)af658dc2015-04-29 14:36:05 -040032#include <linux/trace_events.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090033#include <linux/slab.h>
Shane Wangcafd6652010-04-29 12:09:01 -040034#include <linux/tboot.h>
Jan Kiszkaf4124502014-03-07 20:03:13 +010035#include <linux/hrtimer.h>
Josh Poimboeufc207aee2017-06-28 10:11:06 -050036#include <linux/frame.h>
Dan Williams085331d2018-01-31 17:47:03 -080037#include <linux/nospec.h>
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030038#include "kvm_cache_regs.h"
Avi Kivity35920a32008-07-03 14:50:12 +030039#include "x86.h"
Avi Kivitye4956062007-06-28 14:15:57 -040040
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +020041#include <asm/asm.h>
Feng Wu28b835d2015-09-18 22:29:54 +080042#include <asm/cpu.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080043#include <asm/io.h>
Anthony Liguori3b3be0d2006-12-13 00:33:43 -080044#include <asm/desc.h>
Eduardo Habkost13673a92008-11-17 19:03:13 -020045#include <asm/vmx.h>
Eduardo Habkost6210e372008-11-17 19:03:16 -020046#include <asm/virtext.h>
Andi Kleena0861c02009-06-08 17:37:09 +080047#include <asm/mce.h>
Ingo Molnar952f07e2015-04-26 16:56:05 +020048#include <asm/fpu/internal.h>
Gleb Natapovd7cd9792011-10-05 14:01:23 +020049#include <asm/perf_event.h>
Paolo Bonzini81908bf2014-02-21 10:32:27 +010050#include <asm/debugreg.h>
Zhang Yanfei8f536b72012-12-06 23:43:34 +080051#include <asm/kexec.h>
Radim Krčmářdab20872015-02-09 22:44:07 +010052#include <asm/apic.h>
Feng Wuefc64402015-09-18 22:29:51 +080053#include <asm/irq_remapping.h>
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070054#include <asm/mmu_context.h>
Thomas Gleixner28a27752018-04-29 15:01:37 +020055#include <asm/spec-ctrl.h>
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010056#include <asm/mshyperv.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080057
Marcelo Tosatti229456f2009-06-17 09:22:14 -030058#include "trace.h"
Wei Huang25462f72015-06-19 15:45:05 +020059#include "pmu.h"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010060#include "vmx_evmcs.h"
Marcelo Tosatti229456f2009-06-17 09:22:14 -030061
Avi Kivity4ecac3f2008-05-13 13:23:38 +030062#define __ex(x) __kvm_handle_fault_on_reboot(x)
Avi Kivity5e520e62011-05-15 10:13:12 -040063#define __ex_clear(x, reg) \
Uros Bizjak44c2d662018-10-11 19:40:45 +020064 ____kvm_handle_fault_on_reboot(x, "xorl " reg " , " reg)
Avi Kivity4ecac3f2008-05-13 13:23:38 +030065
Avi Kivity6aa8b732006-12-10 02:21:36 -080066MODULE_AUTHOR("Qumranet");
67MODULE_LICENSE("GPL");
68
Josh Triplette9bda3b2012-03-20 23:33:51 -070069static const struct x86_cpu_id vmx_cpu_id[] = {
70 X86_FEATURE_MATCH(X86_FEATURE_VMX),
71 {}
72};
73MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
74
Rusty Russell476bc002012-01-13 09:32:18 +103075static bool __read_mostly enable_vpid = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020076module_param_named(vpid, enable_vpid, bool, 0444);
Sheng Yang2384d2b2008-01-17 15:14:33 +080077
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010078static bool __read_mostly enable_vnmi = 1;
79module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
80
Rusty Russell476bc002012-01-13 09:32:18 +103081static bool __read_mostly flexpriority_enabled = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020082module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
Avi Kivity4c9fc8e2008-03-24 18:15:14 +020083
Rusty Russell476bc002012-01-13 09:32:18 +103084static bool __read_mostly enable_ept = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020085module_param_named(ept, enable_ept, bool, S_IRUGO);
Sheng Yangd56f5462008-04-25 10:13:16 +080086
Rusty Russell476bc002012-01-13 09:32:18 +103087static bool __read_mostly enable_unrestricted_guest = 1;
Nitin A Kamble3a624e22009-06-08 11:34:16 -070088module_param_named(unrestricted_guest,
89 enable_unrestricted_guest, bool, S_IRUGO);
90
Xudong Hao83c3a332012-05-28 19:33:35 +080091static bool __read_mostly enable_ept_ad_bits = 1;
92module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
93
Avi Kivitya27685c2012-06-12 20:30:18 +030094static bool __read_mostly emulate_invalid_guest_state = true;
Avi Kivityc1f8bc02009-03-23 15:41:17 +020095module_param(emulate_invalid_guest_state, bool, S_IRUGO);
Mohammed Gamal04fa4d32008-08-17 16:39:48 +030096
Rusty Russell476bc002012-01-13 09:32:18 +103097static bool __read_mostly fasteoi = 1;
Kevin Tian58fbbf22011-08-30 13:56:17 +030098module_param(fasteoi, bool, S_IRUGO);
99
Yang Zhang5a717852013-04-11 19:25:16 +0800100static bool __read_mostly enable_apicv = 1;
Yang Zhang01e439b2013-04-11 19:25:12 +0800101module_param(enable_apicv, bool, S_IRUGO);
Yang Zhang83d4c282013-01-25 10:18:49 +0800102
Abel Gordonabc4fc52013-04-18 14:35:25 +0300103static bool __read_mostly enable_shadow_vmcs = 1;
104module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
Nadav Har'El801d3422011-05-25 23:02:23 +0300105/*
106 * If nested=1, nested virtualization is supported, i.e., guests may use
107 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
108 * use VMX instructions.
109 */
Rusty Russell476bc002012-01-13 09:32:18 +1030110static bool __read_mostly nested = 0;
Nadav Har'El801d3422011-05-25 23:02:23 +0300111module_param(nested, bool, S_IRUGO);
112
Sean Christopherson52017602018-09-26 09:23:57 -0700113static bool __read_mostly nested_early_check = 0;
114module_param(nested_early_check, bool, S_IRUGO);
115
Wanpeng Li20300092014-12-02 19:14:59 +0800116static u64 __read_mostly host_xss;
117
Kai Huang843e4332015-01-28 10:54:28 +0800118static bool __read_mostly enable_pml = 1;
119module_param_named(pml, enable_pml, bool, S_IRUGO);
120
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100121#define MSR_TYPE_R 1
122#define MSR_TYPE_W 2
123#define MSR_TYPE_RW 3
124
125#define MSR_BITMAP_MODE_X2APIC 1
126#define MSR_BITMAP_MODE_X2APIC_APICV 2
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100127
Haozhong Zhang64903d62015-10-20 15:39:09 +0800128#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
129
Yunhong Jiang64672c92016-06-13 14:19:59 -0700130/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
131static int __read_mostly cpu_preemption_timer_multi;
132static bool __read_mostly enable_preemption_timer = 1;
133#ifdef CONFIG_X86_64
134module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
135#endif
136
Sean Christopherson3de63472018-07-13 08:42:30 -0700137#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
Sean Christopherson1706bd02018-03-05 12:04:38 -0800138#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
139#define KVM_VM_CR0_ALWAYS_ON \
140 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
141 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
Avi Kivity4c386092009-12-07 12:26:18 +0200142#define KVM_CR4_GUEST_OWNED_BITS \
143 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
Yu Zhangfd8cb432017-08-24 20:27:56 +0800144 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
Avi Kivity4c386092009-12-07 12:26:18 +0200145
Sean Christopherson5dc1f042018-03-05 12:04:39 -0800146#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
Avi Kivitycdc0e242009-12-06 17:21:14 +0200147#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
148#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
149
Avi Kivity78ac8b42010-04-08 18:19:35 +0300150#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
151
Jan Kiszkaf4124502014-03-07 20:03:13 +0100152#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
153
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800154/*
Jan Dakinevich16c2aec2016-10-28 07:00:30 +0300155 * Hyper-V requires all of these, so mark them as supported even though
156 * they are just treated the same as all-context.
157 */
158#define VMX_VPID_EXTENT_SUPPORTED_MASK \
159 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
160 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
161 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
162 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
163
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800164/*
165 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
166 * ple_gap: upper bound on the amount of time between two successive
167 * executions of PAUSE in a loop. Also indicate if ple enabled.
Rik van Riel00c25bc2011-01-04 09:51:33 -0500168 * According to test, this time is usually smaller than 128 cycles.
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800169 * ple_window: upper bound on the amount of time a guest is allowed to execute
170 * in a PAUSE loop. Tests indicate that most spinlocks are held for
171 * less than 2^12 cycles
172 * Time is measured based on a counter that runs at the same rate as the TSC,
173 * refer SDM volume 3b section 21.6.13 & 22.1.3.
174 */
Babu Mogerc8e88712018-03-16 16:37:24 -0400175static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200176
Babu Moger7fbc85a2018-03-16 16:37:22 -0400177static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
178module_param(ple_window, uint, 0444);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800179
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200180/* Default doubles per-vcpu window every exit. */
Babu Mogerc8e88712018-03-16 16:37:24 -0400181static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
Babu Moger7fbc85a2018-03-16 16:37:22 -0400182module_param(ple_window_grow, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200183
184/* Default resets per-vcpu window every exit to ple_window. */
Babu Mogerc8e88712018-03-16 16:37:24 -0400185static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
Babu Moger7fbc85a2018-03-16 16:37:22 -0400186module_param(ple_window_shrink, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200187
188/* Default is to compute the maximum so we can never overflow. */
Babu Moger7fbc85a2018-03-16 16:37:22 -0400189static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
190module_param(ple_window_max, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200191
Avi Kivity83287ea422012-09-16 15:10:57 +0300192extern const ulong vmx_return;
Sean Christopherson52017602018-09-26 09:23:57 -0700193extern const ulong vmx_early_consistency_check_return;
Avi Kivity83287ea422012-09-16 15:10:57 +0300194
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200195static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
Nicolai Stange427362a2018-07-21 22:25:00 +0200196static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
Thomas Gleixnerdd4bfa72018-07-13 16:23:21 +0200197static DEFINE_MUTEX(vmx_l1d_flush_mutex);
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200198
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200199/* Storage for pre module init parameter parsing */
200static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200201
202static const struct {
203 const char *option;
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200204 bool for_parse;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200205} vmentry_l1d_param[] = {
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200206 [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
207 [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
208 [VMENTER_L1D_FLUSH_COND] = {"cond", true},
209 [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
210 [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
211 [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200212};
213
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200214#define L1D_CACHE_ORDER 4
215static void *vmx_l1d_flush_pages;
216
217static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
218{
219 struct page *page;
Nicolai Stange288d1522018-07-18 19:07:38 +0200220 unsigned int i;
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200221
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200222 if (!enable_ept) {
223 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
224 return 0;
225 }
226
Yi Wangd806afa2018-08-16 13:42:39 +0800227 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
228 u64 msr;
Paolo Bonzini8e0b2b92018-08-05 16:07:46 +0200229
Yi Wangd806afa2018-08-16 13:42:39 +0800230 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
231 if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
232 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
233 return 0;
234 }
235 }
Paolo Bonzini8e0b2b92018-08-05 16:07:46 +0200236
Jiri Kosinad90a7a02018-07-13 16:23:25 +0200237 /* If set to auto use the default l1tf mitigation method */
238 if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
239 switch (l1tf_mitigation) {
240 case L1TF_MITIGATION_OFF:
241 l1tf = VMENTER_L1D_FLUSH_NEVER;
242 break;
243 case L1TF_MITIGATION_FLUSH_NOWARN:
244 case L1TF_MITIGATION_FLUSH:
245 case L1TF_MITIGATION_FLUSH_NOSMT:
246 l1tf = VMENTER_L1D_FLUSH_COND;
247 break;
248 case L1TF_MITIGATION_FULL:
249 case L1TF_MITIGATION_FULL_FORCE:
250 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
251 break;
252 }
253 } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
254 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
255 }
256
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200257 if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
258 !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
259 page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
260 if (!page)
261 return -ENOMEM;
262 vmx_l1d_flush_pages = page_address(page);
Nicolai Stange288d1522018-07-18 19:07:38 +0200263
264 /*
265 * Initialize each page with a different pattern in
266 * order to protect against KSM in the nested
267 * virtualization case.
268 */
269 for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
270 memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
271 PAGE_SIZE);
272 }
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200273 }
274
275 l1tf_vmx_mitigation = l1tf;
276
Thomas Gleixner895ae472018-07-13 16:23:22 +0200277 if (l1tf != VMENTER_L1D_FLUSH_NEVER)
278 static_branch_enable(&vmx_l1d_should_flush);
279 else
280 static_branch_disable(&vmx_l1d_should_flush);
Thomas Gleixner4c6523e2018-07-13 16:23:20 +0200281
Nicolai Stange427362a2018-07-21 22:25:00 +0200282 if (l1tf == VMENTER_L1D_FLUSH_COND)
283 static_branch_enable(&vmx_l1d_flush_cond);
Thomas Gleixner895ae472018-07-13 16:23:22 +0200284 else
Nicolai Stange427362a2018-07-21 22:25:00 +0200285 static_branch_disable(&vmx_l1d_flush_cond);
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200286 return 0;
287}
288
289static int vmentry_l1d_flush_parse(const char *s)
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200290{
291 unsigned int i;
292
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200293 if (s) {
294 for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200295 if (vmentry_l1d_param[i].for_parse &&
296 sysfs_streq(s, vmentry_l1d_param[i].option))
297 return i;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200298 }
299 }
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200300 return -EINVAL;
301}
302
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200303static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
304{
Thomas Gleixnerdd4bfa72018-07-13 16:23:21 +0200305 int l1tf, ret;
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200306
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200307 l1tf = vmentry_l1d_flush_parse(s);
308 if (l1tf < 0)
309 return l1tf;
310
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200311 if (!boot_cpu_has(X86_BUG_L1TF))
312 return 0;
313
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200314 /*
315 * Has vmx_init() run already? If not then this is the pre init
316 * parameter parsing. In that case just store the value and let
317 * vmx_init() do the proper setup after enable_ept has been
318 * established.
319 */
320 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
321 vmentry_l1d_flush_param = l1tf;
322 return 0;
323 }
324
Thomas Gleixnerdd4bfa72018-07-13 16:23:21 +0200325 mutex_lock(&vmx_l1d_flush_mutex);
326 ret = vmx_setup_l1d_flush(l1tf);
327 mutex_unlock(&vmx_l1d_flush_mutex);
328 return ret;
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200329}
330
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200331static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
332{
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200333 if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
334 return sprintf(s, "???\n");
335
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200336 return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200337}
338
339static const struct kernel_param_ops vmentry_l1d_flush_ops = {
340 .set = vmentry_l1d_flush_set,
341 .get = vmentry_l1d_flush_get,
342};
Thomas Gleixner895ae472018-07-13 16:23:22 +0200343module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200344
Tianyu Lan877ad952018-07-19 08:40:23 +0000345enum ept_pointers_status {
346 EPT_POINTERS_CHECK = 0,
347 EPT_POINTERS_MATCH = 1,
348 EPT_POINTERS_MISMATCH = 2
349};
350
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700351struct kvm_vmx {
352 struct kvm kvm;
353
354 unsigned int tss_addr;
355 bool ept_identity_pagetable_done;
356 gpa_t ept_identity_map_addr;
Tianyu Lan877ad952018-07-19 08:40:23 +0000357
358 enum ept_pointers_status ept_pointers_match;
359 spinlock_t ept_pointer_lock;
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700360};
361
Gleb Natapov8bf00a52011-10-05 14:01:22 +0200362#define NR_AUTOLOAD_MSRS 8
Avi Kivity61d2ef22010-04-28 16:40:38 +0300363
Liran Alon392b2f22018-06-23 02:35:01 +0300364struct vmcs_hdr {
365 u32 revision_id:31;
366 u32 shadow_vmcs:1;
367};
368
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400369struct vmcs {
Liran Alon392b2f22018-06-23 02:35:01 +0300370 struct vmcs_hdr hdr;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400371 u32 abort;
372 char data[0];
373};
374
Nadav Har'Eld462b812011-05-24 15:26:10 +0300375/*
Sean Christophersond7ee0392018-07-23 12:32:47 -0700376 * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
377 * and whose values change infrequently, but are not constant. I.e. this is
378 * used as a write-through cache of the corresponding VMCS fields.
379 */
380struct vmcs_host_state {
381 unsigned long cr3; /* May not match real cr3 */
382 unsigned long cr4; /* May not match real cr4 */
Sean Christopherson5e079c72018-07-23 12:32:50 -0700383 unsigned long gs_base;
384 unsigned long fs_base;
Sean Christophersond7ee0392018-07-23 12:32:47 -0700385
386 u16 fs_sel, gs_sel, ldt_sel;
387#ifdef CONFIG_X86_64
388 u16 ds_sel, es_sel;
389#endif
390};
391
392/*
Nadav Har'Eld462b812011-05-24 15:26:10 +0300393 * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
394 * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
395 * loaded on this CPU (so we can clear them if the CPU goes down).
396 */
397struct loaded_vmcs {
398 struct vmcs *vmcs;
Jim Mattson355f4fb2016-10-28 08:29:39 -0700399 struct vmcs *shadow_vmcs;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300400 int cpu;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +0200401 bool launched;
402 bool nmi_known_unmasked;
Sean Christophersonf459a702018-08-27 15:21:11 -0700403 bool hv_timer_armed;
Paolo Bonzini8a1b4392017-11-06 13:31:12 +0100404 /* Support for vnmi-less CPUs */
405 int soft_vnmi_blocked;
406 ktime_t entry_time;
407 s64 vnmi_blocked_time;
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100408 unsigned long *msr_bitmap;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300409 struct list_head loaded_vmcss_on_cpu_link;
Sean Christophersond7ee0392018-07-23 12:32:47 -0700410 struct vmcs_host_state host_state;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300411};
412
Avi Kivity26bb0982009-09-07 11:14:12 +0300413struct shared_msr_entry {
414 unsigned index;
415 u64 data;
Avi Kivityd5696722009-12-02 12:28:47 +0200416 u64 mask;
Avi Kivity26bb0982009-09-07 11:14:12 +0300417};
418
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300419/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300420 * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
421 * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
422 * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
423 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
424 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
425 * More than one of these structures may exist, if L1 runs multiple L2 guests.
Jim Mattsonde3a0022017-11-27 17:22:25 -0600426 * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300427 * underlying hardware which will be used to run L2.
428 * This structure is packed to ensure that its layout is identical across
429 * machines (necessary for live migration).
Jim Mattsonb348e792018-05-01 15:40:27 -0700430 *
431 * IMPORTANT: Changing the layout of existing fields in this structure
432 * will break save/restore compatibility with older kvm releases. When
433 * adding new fields, either use space in the reserved padding* arrays
434 * or add the new fields to the end of the structure.
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300435 */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300436typedef u64 natural_width;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300437struct __packed vmcs12 {
438 /* According to the Intel spec, a VMCS region must start with the
439 * following two fields. Then follow implementation-specific data.
440 */
Liran Alon392b2f22018-06-23 02:35:01 +0300441 struct vmcs_hdr hdr;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300442 u32 abort;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300443
Nadav Har'El27d6c862011-05-25 23:06:59 +0300444 u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
445 u32 padding[7]; /* room for future expansion */
446
Nadav Har'El22bd0352011-05-25 23:05:57 +0300447 u64 io_bitmap_a;
448 u64 io_bitmap_b;
449 u64 msr_bitmap;
450 u64 vm_exit_msr_store_addr;
451 u64 vm_exit_msr_load_addr;
452 u64 vm_entry_msr_load_addr;
453 u64 tsc_offset;
454 u64 virtual_apic_page_addr;
455 u64 apic_access_addr;
Wincy Van705699a2015-02-03 23:58:17 +0800456 u64 posted_intr_desc_addr;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300457 u64 ept_pointer;
Wincy Van608406e2015-02-03 23:57:51 +0800458 u64 eoi_exit_bitmap0;
459 u64 eoi_exit_bitmap1;
460 u64 eoi_exit_bitmap2;
461 u64 eoi_exit_bitmap3;
Wanpeng Li81dc01f2014-12-04 19:11:07 +0800462 u64 xss_exit_bitmap;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300463 u64 guest_physical_address;
464 u64 vmcs_link_pointer;
465 u64 guest_ia32_debugctl;
466 u64 guest_ia32_pat;
467 u64 guest_ia32_efer;
468 u64 guest_ia32_perf_global_ctrl;
469 u64 guest_pdptr0;
470 u64 guest_pdptr1;
471 u64 guest_pdptr2;
472 u64 guest_pdptr3;
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100473 u64 guest_bndcfgs;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300474 u64 host_ia32_pat;
475 u64 host_ia32_efer;
476 u64 host_ia32_perf_global_ctrl;
Jim Mattsonb348e792018-05-01 15:40:27 -0700477 u64 vmread_bitmap;
478 u64 vmwrite_bitmap;
479 u64 vm_function_control;
480 u64 eptp_list_address;
481 u64 pml_address;
482 u64 padding64[3]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300483 /*
484 * To allow migration of L1 (complete with its L2 guests) between
485 * machines of different natural widths (32 or 64 bit), we cannot have
486 * unsigned long fields with no explict size. We use u64 (aliased
487 * natural_width) instead. Luckily, x86 is little-endian.
488 */
489 natural_width cr0_guest_host_mask;
490 natural_width cr4_guest_host_mask;
491 natural_width cr0_read_shadow;
492 natural_width cr4_read_shadow;
493 natural_width cr3_target_value0;
494 natural_width cr3_target_value1;
495 natural_width cr3_target_value2;
496 natural_width cr3_target_value3;
497 natural_width exit_qualification;
498 natural_width guest_linear_address;
499 natural_width guest_cr0;
500 natural_width guest_cr3;
501 natural_width guest_cr4;
502 natural_width guest_es_base;
503 natural_width guest_cs_base;
504 natural_width guest_ss_base;
505 natural_width guest_ds_base;
506 natural_width guest_fs_base;
507 natural_width guest_gs_base;
508 natural_width guest_ldtr_base;
509 natural_width guest_tr_base;
510 natural_width guest_gdtr_base;
511 natural_width guest_idtr_base;
512 natural_width guest_dr7;
513 natural_width guest_rsp;
514 natural_width guest_rip;
515 natural_width guest_rflags;
516 natural_width guest_pending_dbg_exceptions;
517 natural_width guest_sysenter_esp;
518 natural_width guest_sysenter_eip;
519 natural_width host_cr0;
520 natural_width host_cr3;
521 natural_width host_cr4;
522 natural_width host_fs_base;
523 natural_width host_gs_base;
524 natural_width host_tr_base;
525 natural_width host_gdtr_base;
526 natural_width host_idtr_base;
527 natural_width host_ia32_sysenter_esp;
528 natural_width host_ia32_sysenter_eip;
529 natural_width host_rsp;
530 natural_width host_rip;
531 natural_width paddingl[8]; /* room for future expansion */
532 u32 pin_based_vm_exec_control;
533 u32 cpu_based_vm_exec_control;
534 u32 exception_bitmap;
535 u32 page_fault_error_code_mask;
536 u32 page_fault_error_code_match;
537 u32 cr3_target_count;
538 u32 vm_exit_controls;
539 u32 vm_exit_msr_store_count;
540 u32 vm_exit_msr_load_count;
541 u32 vm_entry_controls;
542 u32 vm_entry_msr_load_count;
543 u32 vm_entry_intr_info_field;
544 u32 vm_entry_exception_error_code;
545 u32 vm_entry_instruction_len;
546 u32 tpr_threshold;
547 u32 secondary_vm_exec_control;
548 u32 vm_instruction_error;
549 u32 vm_exit_reason;
550 u32 vm_exit_intr_info;
551 u32 vm_exit_intr_error_code;
552 u32 idt_vectoring_info_field;
553 u32 idt_vectoring_error_code;
554 u32 vm_exit_instruction_len;
555 u32 vmx_instruction_info;
556 u32 guest_es_limit;
557 u32 guest_cs_limit;
558 u32 guest_ss_limit;
559 u32 guest_ds_limit;
560 u32 guest_fs_limit;
561 u32 guest_gs_limit;
562 u32 guest_ldtr_limit;
563 u32 guest_tr_limit;
564 u32 guest_gdtr_limit;
565 u32 guest_idtr_limit;
566 u32 guest_es_ar_bytes;
567 u32 guest_cs_ar_bytes;
568 u32 guest_ss_ar_bytes;
569 u32 guest_ds_ar_bytes;
570 u32 guest_fs_ar_bytes;
571 u32 guest_gs_ar_bytes;
572 u32 guest_ldtr_ar_bytes;
573 u32 guest_tr_ar_bytes;
574 u32 guest_interruptibility_info;
575 u32 guest_activity_state;
576 u32 guest_sysenter_cs;
577 u32 host_ia32_sysenter_cs;
Jan Kiszka0238ea92013-03-13 11:31:24 +0100578 u32 vmx_preemption_timer_value;
579 u32 padding32[7]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300580 u16 virtual_processor_id;
Wincy Van705699a2015-02-03 23:58:17 +0800581 u16 posted_intr_nv;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300582 u16 guest_es_selector;
583 u16 guest_cs_selector;
584 u16 guest_ss_selector;
585 u16 guest_ds_selector;
586 u16 guest_fs_selector;
587 u16 guest_gs_selector;
588 u16 guest_ldtr_selector;
589 u16 guest_tr_selector;
Wincy Van608406e2015-02-03 23:57:51 +0800590 u16 guest_intr_status;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300591 u16 host_es_selector;
592 u16 host_cs_selector;
593 u16 host_ss_selector;
594 u16 host_ds_selector;
595 u16 host_fs_selector;
596 u16 host_gs_selector;
597 u16 host_tr_selector;
Jim Mattsonb348e792018-05-01 15:40:27 -0700598 u16 guest_pml_index;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300599};
600
601/*
Jim Mattson21ebf532018-05-01 15:40:28 -0700602 * For save/restore compatibility, the vmcs12 field offsets must not change.
603 */
604#define CHECK_OFFSET(field, loc) \
605 BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \
606 "Offset of " #field " in struct vmcs12 has changed.")
607
608static inline void vmx_check_vmcs12_offsets(void) {
Liran Alon392b2f22018-06-23 02:35:01 +0300609 CHECK_OFFSET(hdr, 0);
Jim Mattson21ebf532018-05-01 15:40:28 -0700610 CHECK_OFFSET(abort, 4);
611 CHECK_OFFSET(launch_state, 8);
612 CHECK_OFFSET(io_bitmap_a, 40);
613 CHECK_OFFSET(io_bitmap_b, 48);
614 CHECK_OFFSET(msr_bitmap, 56);
615 CHECK_OFFSET(vm_exit_msr_store_addr, 64);
616 CHECK_OFFSET(vm_exit_msr_load_addr, 72);
617 CHECK_OFFSET(vm_entry_msr_load_addr, 80);
618 CHECK_OFFSET(tsc_offset, 88);
619 CHECK_OFFSET(virtual_apic_page_addr, 96);
620 CHECK_OFFSET(apic_access_addr, 104);
621 CHECK_OFFSET(posted_intr_desc_addr, 112);
622 CHECK_OFFSET(ept_pointer, 120);
623 CHECK_OFFSET(eoi_exit_bitmap0, 128);
624 CHECK_OFFSET(eoi_exit_bitmap1, 136);
625 CHECK_OFFSET(eoi_exit_bitmap2, 144);
626 CHECK_OFFSET(eoi_exit_bitmap3, 152);
627 CHECK_OFFSET(xss_exit_bitmap, 160);
628 CHECK_OFFSET(guest_physical_address, 168);
629 CHECK_OFFSET(vmcs_link_pointer, 176);
630 CHECK_OFFSET(guest_ia32_debugctl, 184);
631 CHECK_OFFSET(guest_ia32_pat, 192);
632 CHECK_OFFSET(guest_ia32_efer, 200);
633 CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
634 CHECK_OFFSET(guest_pdptr0, 216);
635 CHECK_OFFSET(guest_pdptr1, 224);
636 CHECK_OFFSET(guest_pdptr2, 232);
637 CHECK_OFFSET(guest_pdptr3, 240);
638 CHECK_OFFSET(guest_bndcfgs, 248);
639 CHECK_OFFSET(host_ia32_pat, 256);
640 CHECK_OFFSET(host_ia32_efer, 264);
641 CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
642 CHECK_OFFSET(vmread_bitmap, 280);
643 CHECK_OFFSET(vmwrite_bitmap, 288);
644 CHECK_OFFSET(vm_function_control, 296);
645 CHECK_OFFSET(eptp_list_address, 304);
646 CHECK_OFFSET(pml_address, 312);
647 CHECK_OFFSET(cr0_guest_host_mask, 344);
648 CHECK_OFFSET(cr4_guest_host_mask, 352);
649 CHECK_OFFSET(cr0_read_shadow, 360);
650 CHECK_OFFSET(cr4_read_shadow, 368);
651 CHECK_OFFSET(cr3_target_value0, 376);
652 CHECK_OFFSET(cr3_target_value1, 384);
653 CHECK_OFFSET(cr3_target_value2, 392);
654 CHECK_OFFSET(cr3_target_value3, 400);
655 CHECK_OFFSET(exit_qualification, 408);
656 CHECK_OFFSET(guest_linear_address, 416);
657 CHECK_OFFSET(guest_cr0, 424);
658 CHECK_OFFSET(guest_cr3, 432);
659 CHECK_OFFSET(guest_cr4, 440);
660 CHECK_OFFSET(guest_es_base, 448);
661 CHECK_OFFSET(guest_cs_base, 456);
662 CHECK_OFFSET(guest_ss_base, 464);
663 CHECK_OFFSET(guest_ds_base, 472);
664 CHECK_OFFSET(guest_fs_base, 480);
665 CHECK_OFFSET(guest_gs_base, 488);
666 CHECK_OFFSET(guest_ldtr_base, 496);
667 CHECK_OFFSET(guest_tr_base, 504);
668 CHECK_OFFSET(guest_gdtr_base, 512);
669 CHECK_OFFSET(guest_idtr_base, 520);
670 CHECK_OFFSET(guest_dr7, 528);
671 CHECK_OFFSET(guest_rsp, 536);
672 CHECK_OFFSET(guest_rip, 544);
673 CHECK_OFFSET(guest_rflags, 552);
674 CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
675 CHECK_OFFSET(guest_sysenter_esp, 568);
676 CHECK_OFFSET(guest_sysenter_eip, 576);
677 CHECK_OFFSET(host_cr0, 584);
678 CHECK_OFFSET(host_cr3, 592);
679 CHECK_OFFSET(host_cr4, 600);
680 CHECK_OFFSET(host_fs_base, 608);
681 CHECK_OFFSET(host_gs_base, 616);
682 CHECK_OFFSET(host_tr_base, 624);
683 CHECK_OFFSET(host_gdtr_base, 632);
684 CHECK_OFFSET(host_idtr_base, 640);
685 CHECK_OFFSET(host_ia32_sysenter_esp, 648);
686 CHECK_OFFSET(host_ia32_sysenter_eip, 656);
687 CHECK_OFFSET(host_rsp, 664);
688 CHECK_OFFSET(host_rip, 672);
689 CHECK_OFFSET(pin_based_vm_exec_control, 744);
690 CHECK_OFFSET(cpu_based_vm_exec_control, 748);
691 CHECK_OFFSET(exception_bitmap, 752);
692 CHECK_OFFSET(page_fault_error_code_mask, 756);
693 CHECK_OFFSET(page_fault_error_code_match, 760);
694 CHECK_OFFSET(cr3_target_count, 764);
695 CHECK_OFFSET(vm_exit_controls, 768);
696 CHECK_OFFSET(vm_exit_msr_store_count, 772);
697 CHECK_OFFSET(vm_exit_msr_load_count, 776);
698 CHECK_OFFSET(vm_entry_controls, 780);
699 CHECK_OFFSET(vm_entry_msr_load_count, 784);
700 CHECK_OFFSET(vm_entry_intr_info_field, 788);
701 CHECK_OFFSET(vm_entry_exception_error_code, 792);
702 CHECK_OFFSET(vm_entry_instruction_len, 796);
703 CHECK_OFFSET(tpr_threshold, 800);
704 CHECK_OFFSET(secondary_vm_exec_control, 804);
705 CHECK_OFFSET(vm_instruction_error, 808);
706 CHECK_OFFSET(vm_exit_reason, 812);
707 CHECK_OFFSET(vm_exit_intr_info, 816);
708 CHECK_OFFSET(vm_exit_intr_error_code, 820);
709 CHECK_OFFSET(idt_vectoring_info_field, 824);
710 CHECK_OFFSET(idt_vectoring_error_code, 828);
711 CHECK_OFFSET(vm_exit_instruction_len, 832);
712 CHECK_OFFSET(vmx_instruction_info, 836);
713 CHECK_OFFSET(guest_es_limit, 840);
714 CHECK_OFFSET(guest_cs_limit, 844);
715 CHECK_OFFSET(guest_ss_limit, 848);
716 CHECK_OFFSET(guest_ds_limit, 852);
717 CHECK_OFFSET(guest_fs_limit, 856);
718 CHECK_OFFSET(guest_gs_limit, 860);
719 CHECK_OFFSET(guest_ldtr_limit, 864);
720 CHECK_OFFSET(guest_tr_limit, 868);
721 CHECK_OFFSET(guest_gdtr_limit, 872);
722 CHECK_OFFSET(guest_idtr_limit, 876);
723 CHECK_OFFSET(guest_es_ar_bytes, 880);
724 CHECK_OFFSET(guest_cs_ar_bytes, 884);
725 CHECK_OFFSET(guest_ss_ar_bytes, 888);
726 CHECK_OFFSET(guest_ds_ar_bytes, 892);
727 CHECK_OFFSET(guest_fs_ar_bytes, 896);
728 CHECK_OFFSET(guest_gs_ar_bytes, 900);
729 CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
730 CHECK_OFFSET(guest_tr_ar_bytes, 908);
731 CHECK_OFFSET(guest_interruptibility_info, 912);
732 CHECK_OFFSET(guest_activity_state, 916);
733 CHECK_OFFSET(guest_sysenter_cs, 920);
734 CHECK_OFFSET(host_ia32_sysenter_cs, 924);
735 CHECK_OFFSET(vmx_preemption_timer_value, 928);
736 CHECK_OFFSET(virtual_processor_id, 960);
737 CHECK_OFFSET(posted_intr_nv, 962);
738 CHECK_OFFSET(guest_es_selector, 964);
739 CHECK_OFFSET(guest_cs_selector, 966);
740 CHECK_OFFSET(guest_ss_selector, 968);
741 CHECK_OFFSET(guest_ds_selector, 970);
742 CHECK_OFFSET(guest_fs_selector, 972);
743 CHECK_OFFSET(guest_gs_selector, 974);
744 CHECK_OFFSET(guest_ldtr_selector, 976);
745 CHECK_OFFSET(guest_tr_selector, 978);
746 CHECK_OFFSET(guest_intr_status, 980);
747 CHECK_OFFSET(host_es_selector, 982);
748 CHECK_OFFSET(host_cs_selector, 984);
749 CHECK_OFFSET(host_ss_selector, 986);
750 CHECK_OFFSET(host_ds_selector, 988);
751 CHECK_OFFSET(host_fs_selector, 990);
752 CHECK_OFFSET(host_gs_selector, 992);
753 CHECK_OFFSET(host_tr_selector, 994);
754 CHECK_OFFSET(guest_pml_index, 996);
755}
756
757/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300758 * VMCS12_REVISION is an arbitrary id that should be changed if the content or
759 * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
760 * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
Jim Mattsonb348e792018-05-01 15:40:27 -0700761 *
762 * IMPORTANT: Changing this value will break save/restore compatibility with
763 * older kvm releases.
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300764 */
765#define VMCS12_REVISION 0x11e57ed0
766
767/*
768 * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
769 * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
770 * current implementation, 4K are reserved to avoid future complications.
771 */
772#define VMCS12_SIZE 0x1000
773
774/*
Jim Mattson5b157062017-12-22 12:11:12 -0800775 * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
776 * supported VMCS12 field encoding.
777 */
778#define VMCS12_MAX_FIELD_INDEX 0x17
779
Paolo Bonzini6677f3d2018-02-26 13:40:08 +0100780struct nested_vmx_msrs {
781 /*
782 * We only store the "true" versions of the VMX capability MSRs. We
783 * generate the "non-true" versions by setting the must-be-1 bits
784 * according to the SDM.
785 */
786 u32 procbased_ctls_low;
787 u32 procbased_ctls_high;
788 u32 secondary_ctls_low;
789 u32 secondary_ctls_high;
790 u32 pinbased_ctls_low;
791 u32 pinbased_ctls_high;
792 u32 exit_ctls_low;
793 u32 exit_ctls_high;
794 u32 entry_ctls_low;
795 u32 entry_ctls_high;
796 u32 misc_low;
797 u32 misc_high;
798 u32 ept_caps;
799 u32 vpid_caps;
800 u64 basic;
801 u64 cr0_fixed0;
802 u64 cr0_fixed1;
803 u64 cr4_fixed0;
804 u64 cr4_fixed1;
805 u64 vmcs_enum;
806 u64 vmfunc_controls;
807};
808
Jim Mattson5b157062017-12-22 12:11:12 -0800809/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300810 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
811 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
812 */
813struct nested_vmx {
814 /* Has the level1 guest done vmxon? */
815 bool vmxon;
Bandan Das3573e222014-05-06 02:19:16 -0400816 gpa_t vmxon_ptr;
Bandan Dasc5f983f2017-05-05 15:25:14 -0400817 bool pml_full;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300818
819 /* The guest-physical address of the current VMCS L1 keeps for L2 */
820 gpa_t current_vmptr;
David Matlack4f2777b2016-07-13 17:16:37 -0700821 /*
822 * Cache of the guest's VMCS, existing outside of guest memory.
823 * Loaded from guest memory during VMPTRLD. Flushed to guest
David Matlack8ca44e82017-08-01 14:00:39 -0700824 * memory during VMCLEAR and VMPTRLD.
David Matlack4f2777b2016-07-13 17:16:37 -0700825 */
826 struct vmcs12 *cached_vmcs12;
Abel Gordon012f83c2013-04-18 14:39:25 +0300827 /*
Liran Alon61ada742018-06-23 02:35:08 +0300828 * Cache of the guest's shadow VMCS, existing outside of guest
829 * memory. Loaded from guest memory during VM entry. Flushed
830 * to guest memory during VM exit.
831 */
832 struct vmcs12 *cached_shadow_vmcs12;
833 /*
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200834 * Indicates if the shadow vmcs or enlightened vmcs must be updated
835 * with the data held by struct vmcs12.
Abel Gordon012f83c2013-04-18 14:39:25 +0300836 */
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200837 bool need_vmcs12_sync;
Paolo Bonzini74a497f2017-12-20 13:55:39 +0100838 bool dirty_vmcs12;
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +0300839
Sean Christopherson9d6105b22018-09-26 09:23:51 -0700840 /*
841 * vmcs02 has been initialized, i.e. state that is constant for
842 * vmcs02 has been written to the backing VMCS. Initialization
843 * is delayed until L1 actually attempts to run a nested VM.
844 */
845 bool vmcs02_initialized;
846
Jim Mattson8d860bb2018-05-09 16:56:05 -0400847 bool change_vmcs01_virtual_apic_mode;
848
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +0200849 /*
850 * Enlightened VMCS has been enabled. It does not mean that L1 has to
851 * use it. However, VMX features available to L1 will be limited based
852 * on what the enlightened VMCS supports.
853 */
854 bool enlightened_vmcs_enabled;
855
Nadav Har'El644d7112011-05-25 23:12:35 +0300856 /* L2 must run next, and mustn't decide to exit to L1. */
857 bool nested_run_pending;
Jim Mattsonde3a0022017-11-27 17:22:25 -0600858
859 struct loaded_vmcs vmcs02;
860
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300861 /*
Jim Mattsonde3a0022017-11-27 17:22:25 -0600862 * Guest pages referred to in the vmcs02 with host-physical
863 * pointers, so we must keep them pinned while L2 runs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300864 */
865 struct page *apic_access_page;
Wanpeng Lia7c0b072014-08-21 19:46:50 +0800866 struct page *virtual_apic_page;
Wincy Van705699a2015-02-03 23:58:17 +0800867 struct page *pi_desc_page;
868 struct pi_desc *pi_desc;
869 bool pi_pending;
870 u16 posted_intr_nv;
Jan Kiszkaf4124502014-03-07 20:03:13 +0100871
872 struct hrtimer preemption_timer;
873 bool preemption_timer_expired;
Jan Kiszka2996fca2014-06-16 13:59:43 +0200874
875 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
876 u64 vmcs01_debugctl;
Liran Alon62cf9bd812018-09-14 03:25:54 +0300877 u64 vmcs01_guest_bndcfgs;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800878
Wanpeng Li5c614b32015-10-13 09:18:36 -0700879 u16 vpid02;
880 u16 last_vpid;
881
Paolo Bonzini6677f3d2018-02-26 13:40:08 +0100882 struct nested_vmx_msrs msrs;
Ladi Prosek72e9cbd2017-10-11 16:54:43 +0200883
884 /* SMM related state */
885 struct {
886 /* in VMX operation on SMM entry? */
887 bool vmxon;
888 /* in guest mode on SMM entry? */
889 bool guest_mode;
890 } smm;
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200891
892 struct hv_enlightened_vmcs *hv_evmcs;
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300893};
894
Yang Zhang01e439b2013-04-11 19:25:12 +0800895#define POSTED_INTR_ON 0
Feng Wuebbfc762015-09-18 22:29:46 +0800896#define POSTED_INTR_SN 1
897
Yang Zhang01e439b2013-04-11 19:25:12 +0800898/* Posted-Interrupt Descriptor */
899struct pi_desc {
900 u32 pir[8]; /* Posted interrupt requested */
Feng Wu6ef15222015-09-18 22:29:45 +0800901 union {
902 struct {
903 /* bit 256 - Outstanding Notification */
904 u16 on : 1,
905 /* bit 257 - Suppress Notification */
906 sn : 1,
907 /* bit 271:258 - Reserved */
908 rsvd_1 : 14;
909 /* bit 279:272 - Notification Vector */
910 u8 nv;
911 /* bit 287:280 - Reserved */
912 u8 rsvd_2;
913 /* bit 319:288 - Notification Destination */
914 u32 ndst;
915 };
916 u64 control;
917 };
918 u32 rsvd[6];
Yang Zhang01e439b2013-04-11 19:25:12 +0800919} __aligned(64);
920
Yang Zhanga20ed542013-04-11 19:25:15 +0800921static bool pi_test_and_set_on(struct pi_desc *pi_desc)
922{
923 return test_and_set_bit(POSTED_INTR_ON,
924 (unsigned long *)&pi_desc->control);
925}
926
927static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
928{
929 return test_and_clear_bit(POSTED_INTR_ON,
930 (unsigned long *)&pi_desc->control);
931}
932
933static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
934{
935 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
936}
937
Feng Wuebbfc762015-09-18 22:29:46 +0800938static inline void pi_clear_sn(struct pi_desc *pi_desc)
939{
940 return clear_bit(POSTED_INTR_SN,
941 (unsigned long *)&pi_desc->control);
942}
943
944static inline void pi_set_sn(struct pi_desc *pi_desc)
945{
946 return set_bit(POSTED_INTR_SN,
947 (unsigned long *)&pi_desc->control);
948}
949
Paolo Bonziniad361092016-09-20 16:15:05 +0200950static inline void pi_clear_on(struct pi_desc *pi_desc)
951{
952 clear_bit(POSTED_INTR_ON,
953 (unsigned long *)&pi_desc->control);
954}
955
Feng Wuebbfc762015-09-18 22:29:46 +0800956static inline int pi_test_on(struct pi_desc *pi_desc)
957{
958 return test_bit(POSTED_INTR_ON,
959 (unsigned long *)&pi_desc->control);
960}
961
962static inline int pi_test_sn(struct pi_desc *pi_desc)
963{
964 return test_bit(POSTED_INTR_SN,
965 (unsigned long *)&pi_desc->control);
966}
967
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -0400968struct vmx_msrs {
969 unsigned int nr;
970 struct vmx_msr_entry val[NR_AUTOLOAD_MSRS];
971};
972
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400973struct vcpu_vmx {
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000974 struct kvm_vcpu vcpu;
Avi Kivity313dbd492008-07-17 18:04:30 +0300975 unsigned long host_rsp;
Avi Kivity29bd8a72007-09-10 17:27:03 +0300976 u8 fail;
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100977 u8 msr_bitmap_mode;
Avi Kivity51aa01d2010-07-20 14:31:20 +0300978 u32 exit_intr_info;
Avi Kivity1155f762007-11-22 11:30:47 +0200979 u32 idt_vectoring_info;
Avi Kivity6de12732011-03-07 12:51:22 +0200980 ulong rflags;
Avi Kivity26bb0982009-09-07 11:14:12 +0300981 struct shared_msr_entry *guest_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400982 int nmsrs;
983 int save_nmsrs;
Yang Zhanga547c6d2013-04-11 19:25:10 +0800984 unsigned long host_idt_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400985#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +0300986 u64 msr_host_kernel_gs_base;
987 u64 msr_guest_kernel_gs_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400988#endif
Ashok Raj15d45072018-02-01 22:59:43 +0100989
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +0100990 u64 arch_capabilities;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +0100991 u64 spec_ctrl;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +0100992
Gleb Natapov2961e8762013-11-25 15:37:13 +0200993 u32 vm_entry_controls_shadow;
994 u32 vm_exit_controls_shadow;
Paolo Bonzini80154d72017-08-24 13:55:35 +0200995 u32 secondary_exec_control;
996
Nadav Har'Eld462b812011-05-24 15:26:10 +0300997 /*
998 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
999 * non-nested (L1) guest, it always points to vmcs01. For a nested
Sean Christophersonbd9966d2018-07-23 12:32:42 -07001000 * guest (L2), it points to a different VMCS. loaded_cpu_state points
1001 * to the VMCS whose state is loaded into the CPU registers that only
1002 * need to be switched when transitioning to/from the kernel; a NULL
1003 * value indicates that host state is loaded.
Nadav Har'Eld462b812011-05-24 15:26:10 +03001004 */
1005 struct loaded_vmcs vmcs01;
1006 struct loaded_vmcs *loaded_vmcs;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07001007 struct loaded_vmcs *loaded_cpu_state;
Nadav Har'Eld462b812011-05-24 15:26:10 +03001008 bool __launched; /* temporary, used in vmx_vcpu_run */
Avi Kivity61d2ef22010-04-28 16:40:38 +03001009 struct msr_autoload {
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04001010 struct vmx_msrs guest;
1011 struct vmx_msrs host;
Avi Kivity61d2ef22010-04-28 16:40:38 +03001012 } msr_autoload;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07001013
Avi Kivity9c8cba32007-11-22 11:42:59 +02001014 struct {
Avi Kivity7ffd92c2009-06-09 14:10:45 +03001015 int vm86_active;
Avi Kivity78ac8b42010-04-08 18:19:35 +03001016 ulong save_rflags;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03001017 struct kvm_segment segs[8];
1018 } rmode;
1019 struct {
1020 u32 bitmask; /* 4 bits per segment (1 bit per field) */
Avi Kivity7ffd92c2009-06-09 14:10:45 +03001021 struct kvm_save_segment {
1022 u16 selector;
1023 unsigned long base;
1024 u32 limit;
1025 u32 ar;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03001026 } seg[8];
Avi Kivity2fb92db2011-04-27 19:42:18 +03001027 } segment_cache;
Sheng Yang2384d2b2008-01-17 15:14:33 +08001028 int vpid;
Mohammed Gamal04fa4d32008-08-17 16:39:48 +03001029 bool emulation_required;
Jan Kiszka3b86cd92008-09-26 09:30:57 +02001030
Andi Kleena0861c02009-06-08 17:37:09 +08001031 u32 exit_reason;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08001032
Yang Zhang01e439b2013-04-11 19:25:12 +08001033 /* Posted interrupt descriptor */
1034 struct pi_desc pi_desc;
1035
Nadav Har'Elec378ae2011-05-25 23:02:54 +03001036 /* Support for a guest hypervisor (nested VMX) */
1037 struct nested_vmx nested;
Radim Krčmářa7653ec2014-08-21 18:08:07 +02001038
1039 /* Dynamic PLE window. */
1040 int ple_window;
1041 bool ple_window_dirty;
Kai Huang843e4332015-01-28 10:54:28 +08001042
Sean Christophersond264ee02018-08-27 15:21:12 -07001043 bool req_immediate_exit;
1044
Kai Huang843e4332015-01-28 10:54:28 +08001045 /* Support for PML */
1046#define PML_ENTITY_NUM 512
1047 struct page *pml_pg;
Owen Hofmann2680d6d2016-03-01 13:36:13 -08001048
Yunhong Jiang64672c92016-06-13 14:19:59 -07001049 /* apic deadline value in host tsc */
1050 u64 hv_deadline_tsc;
1051
Owen Hofmann2680d6d2016-03-01 13:36:13 -08001052 u64 current_tsc_ratio;
Xiao Guangrong1be0e612016-03-22 16:51:18 +08001053
Xiao Guangrong1be0e612016-03-22 16:51:18 +08001054 u32 host_pkru;
Haozhong Zhang3b840802016-06-22 14:59:54 +08001055
Wanpeng Li74c55932017-11-29 01:31:20 -08001056 unsigned long host_debugctlmsr;
1057
Haozhong Zhang37e4c992016-06-22 14:59:55 +08001058 /*
1059 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
1060 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
1061 * in msr_ia32_feature_control_valid_bits.
1062 */
Haozhong Zhang3b840802016-06-22 14:59:54 +08001063 u64 msr_ia32_feature_control;
Haozhong Zhang37e4c992016-06-22 14:59:55 +08001064 u64 msr_ia32_feature_control_valid_bits;
Tianyu Lan877ad952018-07-19 08:40:23 +00001065 u64 ept_pointer;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001066};
1067
Avi Kivity2fb92db2011-04-27 19:42:18 +03001068enum segment_cache_field {
1069 SEG_FIELD_SEL = 0,
1070 SEG_FIELD_BASE = 1,
1071 SEG_FIELD_LIMIT = 2,
1072 SEG_FIELD_AR = 3,
1073
1074 SEG_FIELD_NR = 4
1075};
1076
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07001077static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
1078{
1079 return container_of(kvm, struct kvm_vmx, kvm);
1080}
1081
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001082static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
1083{
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001084 return container_of(vcpu, struct vcpu_vmx, vcpu);
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001085}
1086
Feng Wuefc64402015-09-18 22:29:51 +08001087static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
1088{
1089 return &(to_vmx(vcpu)->pi_desc);
1090}
1091
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001092#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
Nadav Har'El22bd0352011-05-25 23:05:57 +03001093#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001094#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
1095#define FIELD64(number, name) \
1096 FIELD(number, name), \
1097 [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
Nadav Har'El22bd0352011-05-25 23:05:57 +03001098
Abel Gordon4607c2d2013-04-18 14:35:55 +03001099
Paolo Bonzini44900ba2017-12-13 12:58:02 +01001100static u16 shadow_read_only_fields[] = {
Paolo Bonzinic9e9dea2017-12-20 13:16:29 +01001101#define SHADOW_FIELD_RO(x) x,
1102#include "vmx_shadow_fields.h"
Abel Gordon4607c2d2013-04-18 14:35:55 +03001103};
Bandan Dasfe2b2012014-04-21 15:20:14 -04001104static int max_shadow_read_only_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +03001105 ARRAY_SIZE(shadow_read_only_fields);
1106
Paolo Bonzini44900ba2017-12-13 12:58:02 +01001107static u16 shadow_read_write_fields[] = {
Paolo Bonzinic9e9dea2017-12-20 13:16:29 +01001108#define SHADOW_FIELD_RW(x) x,
1109#include "vmx_shadow_fields.h"
Abel Gordon4607c2d2013-04-18 14:35:55 +03001110};
Bandan Dasfe2b2012014-04-21 15:20:14 -04001111static int max_shadow_read_write_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +03001112 ARRAY_SIZE(shadow_read_write_fields);
1113
Mathias Krause772e0312012-08-30 01:30:19 +02001114static const unsigned short vmcs_field_to_offset_table[] = {
Nadav Har'El22bd0352011-05-25 23:05:57 +03001115 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
Wincy Van705699a2015-02-03 23:58:17 +08001116 FIELD(POSTED_INTR_NV, posted_intr_nv),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001117 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
1118 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
1119 FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
1120 FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
1121 FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
1122 FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
1123 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
1124 FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
Wincy Van608406e2015-02-03 23:57:51 +08001125 FIELD(GUEST_INTR_STATUS, guest_intr_status),
Bandan Dasc5f983f2017-05-05 15:25:14 -04001126 FIELD(GUEST_PML_INDEX, guest_pml_index),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001127 FIELD(HOST_ES_SELECTOR, host_es_selector),
1128 FIELD(HOST_CS_SELECTOR, host_cs_selector),
1129 FIELD(HOST_SS_SELECTOR, host_ss_selector),
1130 FIELD(HOST_DS_SELECTOR, host_ds_selector),
1131 FIELD(HOST_FS_SELECTOR, host_fs_selector),
1132 FIELD(HOST_GS_SELECTOR, host_gs_selector),
1133 FIELD(HOST_TR_SELECTOR, host_tr_selector),
1134 FIELD64(IO_BITMAP_A, io_bitmap_a),
1135 FIELD64(IO_BITMAP_B, io_bitmap_b),
1136 FIELD64(MSR_BITMAP, msr_bitmap),
1137 FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
1138 FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
1139 FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
Jim Mattsonb348e792018-05-01 15:40:27 -07001140 FIELD64(PML_ADDRESS, pml_address),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001141 FIELD64(TSC_OFFSET, tsc_offset),
1142 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
1143 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
Wincy Van705699a2015-02-03 23:58:17 +08001144 FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
Bandan Das27c42a12017-08-03 15:54:42 -04001145 FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001146 FIELD64(EPT_POINTER, ept_pointer),
Wincy Van608406e2015-02-03 23:57:51 +08001147 FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
1148 FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
1149 FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
1150 FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
Bandan Das41ab9372017-08-03 15:54:43 -04001151 FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
Jim Mattsonb348e792018-05-01 15:40:27 -07001152 FIELD64(VMREAD_BITMAP, vmread_bitmap),
1153 FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
Wanpeng Li81dc01f2014-12-04 19:11:07 +08001154 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001155 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
1156 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
1157 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
1158 FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
1159 FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
1160 FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
1161 FIELD64(GUEST_PDPTR0, guest_pdptr0),
1162 FIELD64(GUEST_PDPTR1, guest_pdptr1),
1163 FIELD64(GUEST_PDPTR2, guest_pdptr2),
1164 FIELD64(GUEST_PDPTR3, guest_pdptr3),
Paolo Bonzini36be0b92014-02-24 12:30:04 +01001165 FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001166 FIELD64(HOST_IA32_PAT, host_ia32_pat),
1167 FIELD64(HOST_IA32_EFER, host_ia32_efer),
1168 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
1169 FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
1170 FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
1171 FIELD(EXCEPTION_BITMAP, exception_bitmap),
1172 FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
1173 FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
1174 FIELD(CR3_TARGET_COUNT, cr3_target_count),
1175 FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
1176 FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
1177 FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
1178 FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
1179 FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
1180 FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
1181 FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
1182 FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
1183 FIELD(TPR_THRESHOLD, tpr_threshold),
1184 FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
1185 FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
1186 FIELD(VM_EXIT_REASON, vm_exit_reason),
1187 FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
1188 FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
1189 FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
1190 FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
1191 FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
1192 FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
1193 FIELD(GUEST_ES_LIMIT, guest_es_limit),
1194 FIELD(GUEST_CS_LIMIT, guest_cs_limit),
1195 FIELD(GUEST_SS_LIMIT, guest_ss_limit),
1196 FIELD(GUEST_DS_LIMIT, guest_ds_limit),
1197 FIELD(GUEST_FS_LIMIT, guest_fs_limit),
1198 FIELD(GUEST_GS_LIMIT, guest_gs_limit),
1199 FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
1200 FIELD(GUEST_TR_LIMIT, guest_tr_limit),
1201 FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
1202 FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
1203 FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
1204 FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
1205 FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
1206 FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
1207 FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
1208 FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
1209 FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
1210 FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
1211 FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
1212 FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
1213 FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
1214 FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
Jan Kiszka0238ea92013-03-13 11:31:24 +01001215 FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001216 FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
1217 FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
1218 FIELD(CR0_READ_SHADOW, cr0_read_shadow),
1219 FIELD(CR4_READ_SHADOW, cr4_read_shadow),
1220 FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
1221 FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
1222 FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
1223 FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
1224 FIELD(EXIT_QUALIFICATION, exit_qualification),
1225 FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
1226 FIELD(GUEST_CR0, guest_cr0),
1227 FIELD(GUEST_CR3, guest_cr3),
1228 FIELD(GUEST_CR4, guest_cr4),
1229 FIELD(GUEST_ES_BASE, guest_es_base),
1230 FIELD(GUEST_CS_BASE, guest_cs_base),
1231 FIELD(GUEST_SS_BASE, guest_ss_base),
1232 FIELD(GUEST_DS_BASE, guest_ds_base),
1233 FIELD(GUEST_FS_BASE, guest_fs_base),
1234 FIELD(GUEST_GS_BASE, guest_gs_base),
1235 FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
1236 FIELD(GUEST_TR_BASE, guest_tr_base),
1237 FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
1238 FIELD(GUEST_IDTR_BASE, guest_idtr_base),
1239 FIELD(GUEST_DR7, guest_dr7),
1240 FIELD(GUEST_RSP, guest_rsp),
1241 FIELD(GUEST_RIP, guest_rip),
1242 FIELD(GUEST_RFLAGS, guest_rflags),
1243 FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
1244 FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
1245 FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
1246 FIELD(HOST_CR0, host_cr0),
1247 FIELD(HOST_CR3, host_cr3),
1248 FIELD(HOST_CR4, host_cr4),
1249 FIELD(HOST_FS_BASE, host_fs_base),
1250 FIELD(HOST_GS_BASE, host_gs_base),
1251 FIELD(HOST_TR_BASE, host_tr_base),
1252 FIELD(HOST_GDTR_BASE, host_gdtr_base),
1253 FIELD(HOST_IDTR_BASE, host_idtr_base),
1254 FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
1255 FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
1256 FIELD(HOST_RSP, host_rsp),
1257 FIELD(HOST_RIP, host_rip),
1258};
Nadav Har'El22bd0352011-05-25 23:05:57 +03001259
1260static inline short vmcs_field_to_offset(unsigned long field)
1261{
Dan Williams085331d2018-01-31 17:47:03 -08001262 const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
1263 unsigned short offset;
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001264 unsigned index;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01001265
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001266 if (field >> 15)
Andrew Honig75f139a2018-01-10 10:12:03 -08001267 return -ENOENT;
1268
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001269 index = ROL16(field, 6);
Linus Torvalds15303ba2018-02-10 13:16:35 -08001270 if (index >= size)
Andrew Honig75f139a2018-01-10 10:12:03 -08001271 return -ENOENT;
1272
Linus Torvalds15303ba2018-02-10 13:16:35 -08001273 index = array_index_nospec(index, size);
1274 offset = vmcs_field_to_offset_table[index];
Dan Williams085331d2018-01-31 17:47:03 -08001275 if (offset == 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01001276 return -ENOENT;
Dan Williams085331d2018-01-31 17:47:03 -08001277 return offset;
Nadav Har'El22bd0352011-05-25 23:05:57 +03001278}
1279
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03001280static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
1281{
David Matlack4f2777b2016-07-13 17:16:37 -07001282 return to_vmx(vcpu)->nested.cached_vmcs12;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03001283}
1284
Liran Alon61ada742018-06-23 02:35:08 +03001285static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
1286{
1287 return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
1288}
1289
Peter Feiner995f00a2017-06-30 17:26:32 -07001290static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03001291static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
Peter Feiner995f00a2017-06-30 17:26:32 -07001292static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
Wanpeng Lif53cd632014-12-02 19:14:58 +08001293static bool vmx_xsaves_supported(void);
Orit Wassermanb246dd52012-05-31 14:49:22 +03001294static void vmx_set_segment(struct kvm_vcpu *vcpu,
1295 struct kvm_segment *var, int seg);
1296static void vmx_get_segment(struct kvm_vcpu *vcpu,
1297 struct kvm_segment *var, int seg);
Gleb Natapovd99e4152012-12-20 16:57:45 +02001298static bool guest_state_valid(struct kvm_vcpu *vcpu);
1299static u32 vmx_segment_access_rights(struct kvm_segment *var);
Abel Gordon16f5b902013-04-18 14:38:25 +03001300static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
Paolo Bonzinib96fb432017-07-27 12:29:32 +02001301static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
1302static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
1303static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
1304 u16 error_code);
Paolo Bonzini904e14f2018-01-16 16:51:18 +01001305static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
Ashok Raj15d45072018-02-01 22:59:43 +01001306static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1307 u32 msr, int type);
Avi Kivity75880a02007-06-20 11:20:04 +03001308
Avi Kivity6aa8b732006-12-10 02:21:36 -08001309static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1310static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03001311/*
1312 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1313 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1314 */
1315static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001316
Feng Wubf9f6ac2015-09-18 22:29:55 +08001317/*
1318 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1319 * can find which vCPU should be waken up.
1320 */
1321static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1322static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1323
Radim Krčmář23611332016-09-29 22:41:33 +02001324enum {
Radim Krčmář23611332016-09-29 22:41:33 +02001325 VMX_VMREAD_BITMAP,
1326 VMX_VMWRITE_BITMAP,
1327 VMX_BITMAP_NR
1328};
1329
1330static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
1331
Radim Krčmář23611332016-09-29 22:41:33 +02001332#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
1333#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
He, Qingfdef3ad2007-04-30 09:45:24 +03001334
Avi Kivity110312c2010-12-21 12:54:20 +02001335static bool cpu_has_load_ia32_efer;
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001336static bool cpu_has_load_perf_global_ctrl;
Avi Kivity110312c2010-12-21 12:54:20 +02001337
Sheng Yang2384d2b2008-01-17 15:14:33 +08001338static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1339static DEFINE_SPINLOCK(vmx_vpid_lock);
1340
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001341static struct vmcs_config {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001342 int size;
1343 int order;
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001344 u32 basic_cap;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001345 u32 revision_id;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001346 u32 pin_based_exec_ctrl;
1347 u32 cpu_based_exec_ctrl;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001348 u32 cpu_based_2nd_exec_ctrl;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001349 u32 vmexit_ctrl;
1350 u32 vmentry_ctrl;
Paolo Bonzini13893092018-02-26 13:40:09 +01001351 struct nested_vmx_msrs nested;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001352} vmcs_config;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001353
Hannes Ederefff9e52008-11-28 17:02:06 +01001354static struct vmx_capability {
Sheng Yangd56f5462008-04-25 10:13:16 +08001355 u32 ept;
1356 u32 vpid;
1357} vmx_capability;
1358
Avi Kivity6aa8b732006-12-10 02:21:36 -08001359#define VMX_SEGMENT_FIELD(seg) \
1360 [VCPU_SREG_##seg] = { \
1361 .selector = GUEST_##seg##_SELECTOR, \
1362 .base = GUEST_##seg##_BASE, \
1363 .limit = GUEST_##seg##_LIMIT, \
1364 .ar_bytes = GUEST_##seg##_AR_BYTES, \
1365 }
1366
Mathias Krause772e0312012-08-30 01:30:19 +02001367static const struct kvm_vmx_segment_field {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001368 unsigned selector;
1369 unsigned base;
1370 unsigned limit;
1371 unsigned ar_bytes;
1372} kvm_vmx_segment_fields[] = {
1373 VMX_SEGMENT_FIELD(CS),
1374 VMX_SEGMENT_FIELD(DS),
1375 VMX_SEGMENT_FIELD(ES),
1376 VMX_SEGMENT_FIELD(FS),
1377 VMX_SEGMENT_FIELD(GS),
1378 VMX_SEGMENT_FIELD(SS),
1379 VMX_SEGMENT_FIELD(TR),
1380 VMX_SEGMENT_FIELD(LDTR),
1381};
1382
Avi Kivity26bb0982009-09-07 11:14:12 +03001383static u64 host_efer;
1384
Avi Kivity6de4f3a2009-05-31 22:58:47 +03001385static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
1386
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001387/*
Brian Gerst8c065852010-07-17 09:03:26 -04001388 * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001389 * away by decrementing the array size.
1390 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08001391static const u32 vmx_msr_index[] = {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001392#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +03001393 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001394#endif
Brian Gerst8c065852010-07-17 09:03:26 -04001395 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001396};
Avi Kivity6aa8b732006-12-10 02:21:36 -08001397
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001398DEFINE_STATIC_KEY_FALSE(enable_evmcs);
1399
1400#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
1401
1402#define KVM_EVMCS_VERSION 1
1403
Vitaly Kuznetsov5d7a6442018-10-16 18:50:00 +02001404/*
1405 * Enlightened VMCSv1 doesn't support these:
1406 *
1407 * POSTED_INTR_NV = 0x00000002,
1408 * GUEST_INTR_STATUS = 0x00000810,
1409 * APIC_ACCESS_ADDR = 0x00002014,
1410 * POSTED_INTR_DESC_ADDR = 0x00002016,
1411 * EOI_EXIT_BITMAP0 = 0x0000201c,
1412 * EOI_EXIT_BITMAP1 = 0x0000201e,
1413 * EOI_EXIT_BITMAP2 = 0x00002020,
1414 * EOI_EXIT_BITMAP3 = 0x00002022,
1415 * GUEST_PML_INDEX = 0x00000812,
1416 * PML_ADDRESS = 0x0000200e,
1417 * VM_FUNCTION_CONTROL = 0x00002018,
1418 * EPTP_LIST_ADDRESS = 0x00002024,
1419 * VMREAD_BITMAP = 0x00002026,
1420 * VMWRITE_BITMAP = 0x00002028,
1421 *
1422 * TSC_MULTIPLIER = 0x00002032,
1423 * PLE_GAP = 0x00004020,
1424 * PLE_WINDOW = 0x00004022,
1425 * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
1426 * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
1427 * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
1428 *
1429 * Currently unsupported in KVM:
1430 * GUEST_IA32_RTIT_CTL = 0x00002814,
1431 */
1432#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
1433 PIN_BASED_VMX_PREEMPTION_TIMER)
1434#define EVMCS1_UNSUPPORTED_2NDEXEC \
1435 (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
1436 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
1437 SECONDARY_EXEC_APIC_REGISTER_VIRT | \
1438 SECONDARY_EXEC_ENABLE_PML | \
1439 SECONDARY_EXEC_ENABLE_VMFUNC | \
1440 SECONDARY_EXEC_SHADOW_VMCS | \
1441 SECONDARY_EXEC_TSC_SCALING | \
1442 SECONDARY_EXEC_PAUSE_LOOP_EXITING)
1443#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
1444#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
1445#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
1446
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001447#if IS_ENABLED(CONFIG_HYPERV)
1448static bool __read_mostly enlightened_vmcs = true;
1449module_param(enlightened_vmcs, bool, 0444);
1450
1451static inline void evmcs_write64(unsigned long field, u64 value)
1452{
1453 u16 clean_field;
1454 int offset = get_evmcs_offset(field, &clean_field);
1455
1456 if (offset < 0)
1457 return;
1458
1459 *(u64 *)((char *)current_evmcs + offset) = value;
1460
1461 current_evmcs->hv_clean_fields &= ~clean_field;
1462}
1463
1464static inline void evmcs_write32(unsigned long field, u32 value)
1465{
1466 u16 clean_field;
1467 int offset = get_evmcs_offset(field, &clean_field);
1468
1469 if (offset < 0)
1470 return;
1471
1472 *(u32 *)((char *)current_evmcs + offset) = value;
1473 current_evmcs->hv_clean_fields &= ~clean_field;
1474}
1475
1476static inline void evmcs_write16(unsigned long field, u16 value)
1477{
1478 u16 clean_field;
1479 int offset = get_evmcs_offset(field, &clean_field);
1480
1481 if (offset < 0)
1482 return;
1483
1484 *(u16 *)((char *)current_evmcs + offset) = value;
1485 current_evmcs->hv_clean_fields &= ~clean_field;
1486}
1487
1488static inline u64 evmcs_read64(unsigned long field)
1489{
1490 int offset = get_evmcs_offset(field, NULL);
1491
1492 if (offset < 0)
1493 return 0;
1494
1495 return *(u64 *)((char *)current_evmcs + offset);
1496}
1497
1498static inline u32 evmcs_read32(unsigned long field)
1499{
1500 int offset = get_evmcs_offset(field, NULL);
1501
1502 if (offset < 0)
1503 return 0;
1504
1505 return *(u32 *)((char *)current_evmcs + offset);
1506}
1507
1508static inline u16 evmcs_read16(unsigned long field)
1509{
1510 int offset = get_evmcs_offset(field, NULL);
1511
1512 if (offset < 0)
1513 return 0;
1514
1515 return *(u16 *)((char *)current_evmcs + offset);
1516}
1517
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02001518static inline void evmcs_touch_msr_bitmap(void)
1519{
1520 if (unlikely(!current_evmcs))
1521 return;
1522
1523 if (current_evmcs->hv_enlightenments_control.msr_bitmap)
1524 current_evmcs->hv_clean_fields &=
1525 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
1526}
1527
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001528static void evmcs_load(u64 phys_addr)
1529{
1530 struct hv_vp_assist_page *vp_ap =
1531 hv_get_vp_assist_page(smp_processor_id());
1532
1533 vp_ap->current_nested_vmcs = phys_addr;
1534 vp_ap->enlighten_vmentry = 1;
1535}
1536
1537static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
1538{
Vitaly Kuznetsov5d7a6442018-10-16 18:50:00 +02001539 vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
1540 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001541
Vitaly Kuznetsov5d7a6442018-10-16 18:50:00 +02001542 vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
1543 vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001544
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001545}
Tianyu Lan877ad952018-07-19 08:40:23 +00001546
1547/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1548static void check_ept_pointer_match(struct kvm *kvm)
1549{
1550 struct kvm_vcpu *vcpu;
1551 u64 tmp_eptp = INVALID_PAGE;
1552 int i;
1553
1554 kvm_for_each_vcpu(i, vcpu, kvm) {
1555 if (!VALID_PAGE(tmp_eptp)) {
1556 tmp_eptp = to_vmx(vcpu)->ept_pointer;
1557 } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1558 to_kvm_vmx(kvm)->ept_pointers_match
1559 = EPT_POINTERS_MISMATCH;
1560 return;
1561 }
1562 }
1563
1564 to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1565}
1566
1567static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
1568{
Lan Tianyua5c214d2018-10-13 22:54:05 +08001569 struct kvm_vcpu *vcpu;
1570 int ret = -ENOTSUPP, i;
Tianyu Lan877ad952018-07-19 08:40:23 +00001571
1572 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1573
1574 if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1575 check_ept_pointer_match(kvm);
1576
1577 if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
Lan Tianyua5c214d2018-10-13 22:54:05 +08001578 kvm_for_each_vcpu(i, vcpu, kvm)
1579 ret |= hyperv_flush_guest_mapping(
1580 to_vmx(kvm_get_vcpu(kvm, i))->ept_pointer);
1581 } else {
1582 ret = hyperv_flush_guest_mapping(
1583 to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
Tianyu Lan877ad952018-07-19 08:40:23 +00001584 }
1585
Tianyu Lan877ad952018-07-19 08:40:23 +00001586 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1587 return ret;
1588}
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001589#else /* !IS_ENABLED(CONFIG_HYPERV) */
1590static inline void evmcs_write64(unsigned long field, u64 value) {}
1591static inline void evmcs_write32(unsigned long field, u32 value) {}
1592static inline void evmcs_write16(unsigned long field, u16 value) {}
1593static inline u64 evmcs_read64(unsigned long field) { return 0; }
1594static inline u32 evmcs_read32(unsigned long field) { return 0; }
1595static inline u16 evmcs_read16(unsigned long field) { return 0; }
1596static inline void evmcs_load(u64 phys_addr) {}
1597static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02001598static inline void evmcs_touch_msr_bitmap(void) {}
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001599#endif /* IS_ENABLED(CONFIG_HYPERV) */
1600
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +02001601static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
1602 uint16_t *vmcs_version)
1603{
1604 struct vcpu_vmx *vmx = to_vmx(vcpu);
1605
1606 /* We don't support disabling the feature for simplicity. */
1607 if (vmx->nested.enlightened_vmcs_enabled)
1608 return 0;
1609
1610 vmx->nested.enlightened_vmcs_enabled = true;
1611
1612 /*
1613 * vmcs_version represents the range of supported Enlightened VMCS
1614 * versions: lower 8 bits is the minimal version, higher 8 bits is the
1615 * maximum supported version. KVM supports versions from 1 to
1616 * KVM_EVMCS_VERSION.
1617 */
1618 *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
1619
1620 vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
1621 vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
1622 vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
1623 vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
1624 vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
1625
1626 return 0;
1627}
1628
Jan Kiszka5bb16012016-02-09 20:14:21 +01001629static inline bool is_exception_n(u32 intr_info, u8 vector)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001630{
1631 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1632 INTR_INFO_VALID_MASK)) ==
Jan Kiszka5bb16012016-02-09 20:14:21 +01001633 (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
1634}
1635
Jan Kiszka6f054852016-02-09 20:15:18 +01001636static inline bool is_debug(u32 intr_info)
1637{
1638 return is_exception_n(intr_info, DB_VECTOR);
1639}
1640
1641static inline bool is_breakpoint(u32 intr_info)
1642{
1643 return is_exception_n(intr_info, BP_VECTOR);
1644}
1645
Jan Kiszka5bb16012016-02-09 20:14:21 +01001646static inline bool is_page_fault(u32 intr_info)
1647{
1648 return is_exception_n(intr_info, PF_VECTOR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001649}
1650
Gui Jianfeng31299942010-03-15 17:29:09 +08001651static inline bool is_invalid_opcode(u32 intr_info)
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001652{
Jan Kiszka5bb16012016-02-09 20:14:21 +01001653 return is_exception_n(intr_info, UD_VECTOR);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001654}
1655
Liran Alon9e869482018-03-12 13:12:51 +02001656static inline bool is_gp_fault(u32 intr_info)
1657{
1658 return is_exception_n(intr_info, GP_VECTOR);
1659}
1660
Gui Jianfeng31299942010-03-15 17:29:09 +08001661static inline bool is_machine_check(u32 intr_info)
Andi Kleena0861c02009-06-08 17:37:09 +08001662{
1663 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1664 INTR_INFO_VALID_MASK)) ==
1665 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
1666}
1667
Linus Torvalds32d43cd2018-03-20 12:16:59 -07001668/* Undocumented: icebp/int1 */
1669static inline bool is_icebp(u32 intr_info)
1670{
1671 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
1672 == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
1673}
1674
Gui Jianfeng31299942010-03-15 17:29:09 +08001675static inline bool cpu_has_vmx_msr_bitmap(void)
Sheng Yang25c5f222008-03-28 13:18:56 +08001676{
Sheng Yang04547152009-04-01 15:52:31 +08001677 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
Sheng Yang25c5f222008-03-28 13:18:56 +08001678}
1679
Gui Jianfeng31299942010-03-15 17:29:09 +08001680static inline bool cpu_has_vmx_tpr_shadow(void)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001681{
Sheng Yang04547152009-04-01 15:52:31 +08001682 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001683}
1684
Paolo Bonzini35754c92015-07-29 12:05:37 +02001685static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001686{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001687 return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001688}
1689
Gui Jianfeng31299942010-03-15 17:29:09 +08001690static inline bool cpu_has_secondary_exec_ctrls(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001691{
Sheng Yang04547152009-04-01 15:52:31 +08001692 return vmcs_config.cpu_based_exec_ctrl &
1693 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001694}
1695
Avi Kivity774ead32007-12-26 13:57:04 +02001696static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001697{
Sheng Yang04547152009-04-01 15:52:31 +08001698 return vmcs_config.cpu_based_2nd_exec_ctrl &
1699 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1700}
1701
Yang Zhang8d146952013-01-25 10:18:50 +08001702static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
1703{
1704 return vmcs_config.cpu_based_2nd_exec_ctrl &
1705 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
1706}
1707
Yang Zhang83d4c282013-01-25 10:18:49 +08001708static inline bool cpu_has_vmx_apic_register_virt(void)
1709{
1710 return vmcs_config.cpu_based_2nd_exec_ctrl &
1711 SECONDARY_EXEC_APIC_REGISTER_VIRT;
1712}
1713
Yang Zhangc7c9c562013-01-25 10:18:51 +08001714static inline bool cpu_has_vmx_virtual_intr_delivery(void)
1715{
1716 return vmcs_config.cpu_based_2nd_exec_ctrl &
1717 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
1718}
1719
Sean Christopherson0b665d32018-08-14 09:33:34 -07001720static inline bool cpu_has_vmx_encls_vmexit(void)
1721{
1722 return vmcs_config.cpu_based_2nd_exec_ctrl &
1723 SECONDARY_EXEC_ENCLS_EXITING;
1724}
1725
Yunhong Jiang64672c92016-06-13 14:19:59 -07001726/*
1727 * Comment's format: document - errata name - stepping - processor name.
1728 * Refer from
1729 * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1730 */
1731static u32 vmx_preemption_cpu_tfms[] = {
1732/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
17330x000206E6,
1734/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1735/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1736/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
17370x00020652,
1738/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
17390x00020655,
1740/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1741/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1742/*
1743 * 320767.pdf - AAP86 - B1 -
1744 * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1745 */
17460x000106E5,
1747/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
17480x000106A0,
1749/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
17500x000106A1,
1751/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
17520x000106A4,
1753 /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1754 /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1755 /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
17560x000106A5,
1757};
1758
1759static inline bool cpu_has_broken_vmx_preemption_timer(void)
1760{
1761 u32 eax = cpuid_eax(0x00000001), i;
1762
1763 /* Clear the reserved bits */
1764 eax &= ~(0x3U << 14 | 0xfU << 28);
Wei Yongjun03f6a222016-07-04 15:13:07 +00001765 for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
Yunhong Jiang64672c92016-06-13 14:19:59 -07001766 if (eax == vmx_preemption_cpu_tfms[i])
1767 return true;
1768
1769 return false;
1770}
1771
1772static inline bool cpu_has_vmx_preemption_timer(void)
1773{
Yunhong Jiang64672c92016-06-13 14:19:59 -07001774 return vmcs_config.pin_based_exec_ctrl &
1775 PIN_BASED_VMX_PREEMPTION_TIMER;
1776}
1777
Yang Zhang01e439b2013-04-11 19:25:12 +08001778static inline bool cpu_has_vmx_posted_intr(void)
1779{
Paolo Bonzinid6a858d2015-09-28 11:58:14 +02001780 return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
1781 vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
Yang Zhang01e439b2013-04-11 19:25:12 +08001782}
1783
1784static inline bool cpu_has_vmx_apicv(void)
1785{
1786 return cpu_has_vmx_apic_register_virt() &&
1787 cpu_has_vmx_virtual_intr_delivery() &&
1788 cpu_has_vmx_posted_intr();
1789}
1790
Sheng Yang04547152009-04-01 15:52:31 +08001791static inline bool cpu_has_vmx_flexpriority(void)
1792{
1793 return cpu_has_vmx_tpr_shadow() &&
1794 cpu_has_vmx_virtualize_apic_accesses();
Sheng Yangf78e0e22007-10-29 09:40:42 +08001795}
1796
Marcelo Tosattie7997942009-06-11 12:07:40 -03001797static inline bool cpu_has_vmx_ept_execute_only(void)
1798{
Gui Jianfeng31299942010-03-15 17:29:09 +08001799 return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001800}
1801
Marcelo Tosattie7997942009-06-11 12:07:40 -03001802static inline bool cpu_has_vmx_ept_2m_page(void)
1803{
Gui Jianfeng31299942010-03-15 17:29:09 +08001804 return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001805}
1806
Sheng Yang878403b2010-01-05 19:02:29 +08001807static inline bool cpu_has_vmx_ept_1g_page(void)
1808{
Gui Jianfeng31299942010-03-15 17:29:09 +08001809 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
Sheng Yang878403b2010-01-05 19:02:29 +08001810}
1811
Sheng Yang4bc9b982010-06-02 14:05:24 +08001812static inline bool cpu_has_vmx_ept_4levels(void)
1813{
1814 return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
1815}
1816
David Hildenbrand42aa53b2017-08-10 23:15:29 +02001817static inline bool cpu_has_vmx_ept_mt_wb(void)
1818{
1819 return vmx_capability.ept & VMX_EPTP_WB_BIT;
1820}
1821
Yu Zhang855feb62017-08-24 20:27:55 +08001822static inline bool cpu_has_vmx_ept_5levels(void)
1823{
1824 return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
1825}
1826
Xudong Hao83c3a332012-05-28 19:33:35 +08001827static inline bool cpu_has_vmx_ept_ad_bits(void)
1828{
1829 return vmx_capability.ept & VMX_EPT_AD_BIT;
1830}
1831
Gui Jianfeng31299942010-03-15 17:29:09 +08001832static inline bool cpu_has_vmx_invept_context(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001833{
Gui Jianfeng31299942010-03-15 17:29:09 +08001834 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001835}
1836
Gui Jianfeng31299942010-03-15 17:29:09 +08001837static inline bool cpu_has_vmx_invept_global(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001838{
Gui Jianfeng31299942010-03-15 17:29:09 +08001839 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001840}
1841
Liran Aloncd9a4912018-05-22 17:16:15 +03001842static inline bool cpu_has_vmx_invvpid_individual_addr(void)
1843{
1844 return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
1845}
1846
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08001847static inline bool cpu_has_vmx_invvpid_single(void)
1848{
1849 return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
1850}
1851
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001852static inline bool cpu_has_vmx_invvpid_global(void)
1853{
1854 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
1855}
1856
Wanpeng Li08d839c2017-03-23 05:30:08 -07001857static inline bool cpu_has_vmx_invvpid(void)
1858{
1859 return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
1860}
1861
Gui Jianfeng31299942010-03-15 17:29:09 +08001862static inline bool cpu_has_vmx_ept(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001863{
Sheng Yang04547152009-04-01 15:52:31 +08001864 return vmcs_config.cpu_based_2nd_exec_ctrl &
1865 SECONDARY_EXEC_ENABLE_EPT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001866}
1867
Gui Jianfeng31299942010-03-15 17:29:09 +08001868static inline bool cpu_has_vmx_unrestricted_guest(void)
Nitin A Kamble3a624e22009-06-08 11:34:16 -07001869{
1870 return vmcs_config.cpu_based_2nd_exec_ctrl &
1871 SECONDARY_EXEC_UNRESTRICTED_GUEST;
1872}
1873
Gui Jianfeng31299942010-03-15 17:29:09 +08001874static inline bool cpu_has_vmx_ple(void)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08001875{
1876 return vmcs_config.cpu_based_2nd_exec_ctrl &
1877 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
1878}
1879
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001880static inline bool cpu_has_vmx_basic_inout(void)
1881{
1882 return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
1883}
1884
Paolo Bonzini35754c92015-07-29 12:05:37 +02001885static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001886{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001887 return flexpriority_enabled && lapic_in_kernel(vcpu);
Sheng Yangf78e0e22007-10-29 09:40:42 +08001888}
1889
Gui Jianfeng31299942010-03-15 17:29:09 +08001890static inline bool cpu_has_vmx_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001891{
Sheng Yang04547152009-04-01 15:52:31 +08001892 return vmcs_config.cpu_based_2nd_exec_ctrl &
1893 SECONDARY_EXEC_ENABLE_VPID;
Sheng Yang2384d2b2008-01-17 15:14:33 +08001894}
1895
Gui Jianfeng31299942010-03-15 17:29:09 +08001896static inline bool cpu_has_vmx_rdtscp(void)
Sheng Yang4e47c7a2009-12-18 16:48:47 +08001897{
1898 return vmcs_config.cpu_based_2nd_exec_ctrl &
1899 SECONDARY_EXEC_RDTSCP;
1900}
1901
Mao, Junjiead756a12012-07-02 01:18:48 +00001902static inline bool cpu_has_vmx_invpcid(void)
1903{
1904 return vmcs_config.cpu_based_2nd_exec_ctrl &
1905 SECONDARY_EXEC_ENABLE_INVPCID;
1906}
1907
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01001908static inline bool cpu_has_virtual_nmis(void)
1909{
1910 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
1911}
1912
Sheng Yangf5f48ee2010-06-30 12:25:15 +08001913static inline bool cpu_has_vmx_wbinvd_exit(void)
1914{
1915 return vmcs_config.cpu_based_2nd_exec_ctrl &
1916 SECONDARY_EXEC_WBINVD_EXITING;
1917}
1918
Abel Gordonabc4fc52013-04-18 14:35:25 +03001919static inline bool cpu_has_vmx_shadow_vmcs(void)
1920{
1921 u64 vmx_msr;
1922 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
1923 /* check if the cpu supports writing r/o exit information fields */
1924 if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
1925 return false;
1926
1927 return vmcs_config.cpu_based_2nd_exec_ctrl &
1928 SECONDARY_EXEC_SHADOW_VMCS;
1929}
1930
Kai Huang843e4332015-01-28 10:54:28 +08001931static inline bool cpu_has_vmx_pml(void)
1932{
1933 return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
1934}
1935
Haozhong Zhang64903d62015-10-20 15:39:09 +08001936static inline bool cpu_has_vmx_tsc_scaling(void)
1937{
1938 return vmcs_config.cpu_based_2nd_exec_ctrl &
1939 SECONDARY_EXEC_TSC_SCALING;
1940}
1941
Bandan Das2a499e42017-08-03 15:54:41 -04001942static inline bool cpu_has_vmx_vmfunc(void)
1943{
1944 return vmcs_config.cpu_based_2nd_exec_ctrl &
1945 SECONDARY_EXEC_ENABLE_VMFUNC;
1946}
1947
Sean Christopherson64f7a112018-04-30 10:01:06 -07001948static bool vmx_umip_emulated(void)
1949{
1950 return vmcs_config.cpu_based_2nd_exec_ctrl &
1951 SECONDARY_EXEC_DESC;
1952}
1953
Sheng Yang04547152009-04-01 15:52:31 +08001954static inline bool report_flexpriority(void)
1955{
1956 return flexpriority_enabled;
1957}
1958
Jim Mattsonc7c2c702017-05-05 11:28:09 -07001959static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
1960{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01001961 return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
Jim Mattsonc7c2c702017-05-05 11:28:09 -07001962}
1963
Jim Mattsonf4160e42018-05-29 09:11:33 -07001964/*
1965 * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
1966 * to modify any valid field of the VMCS, or are the VM-exit
1967 * information fields read-only?
1968 */
1969static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
1970{
1971 return to_vmx(vcpu)->nested.msrs.misc_low &
1972 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
1973}
1974
Marc Orr04473782018-06-20 17:21:29 -07001975static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
1976{
1977 return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
1978}
1979
1980static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
1981{
1982 return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
1983 CPU_BASED_MONITOR_TRAP_FLAG;
1984}
1985
Liran Alonfa97d7d2018-07-18 14:07:59 +02001986static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
1987{
1988 return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
1989 SECONDARY_EXEC_SHADOW_VMCS;
1990}
1991
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03001992static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
1993{
1994 return vmcs12->cpu_based_vm_exec_control & bit;
1995}
1996
1997static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
1998{
1999 return (vmcs12->cpu_based_vm_exec_control &
2000 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
2001 (vmcs12->secondary_vm_exec_control & bit);
2002}
2003
Jan Kiszkaf4124502014-03-07 20:03:13 +01002004static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
2005{
2006 return vmcs12->pin_based_vm_exec_control &
2007 PIN_BASED_VMX_PREEMPTION_TIMER;
2008}
2009
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -05002010static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
2011{
2012 return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
2013}
2014
2015static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
2016{
2017 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
2018}
2019
Nadav Har'El155a97a2013-08-05 11:07:16 +03002020static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
2021{
2022 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
2023}
2024
Wanpeng Li81dc01f2014-12-04 19:11:07 +08002025static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
2026{
Paolo Bonzini3db13482017-08-24 14:48:03 +02002027 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li81dc01f2014-12-04 19:11:07 +08002028}
2029
Bandan Dasc5f983f2017-05-05 15:25:14 -04002030static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
2031{
2032 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
2033}
2034
Wincy Vanf2b93282015-02-03 23:56:03 +08002035static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
2036{
2037 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
2038}
2039
Wanpeng Li5c614b32015-10-13 09:18:36 -07002040static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
2041{
2042 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
2043}
2044
Wincy Van82f0dd42015-02-03 23:57:18 +08002045static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
2046{
2047 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
2048}
2049
Wincy Van608406e2015-02-03 23:57:51 +08002050static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
2051{
2052 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2053}
2054
Wincy Van705699a2015-02-03 23:58:17 +08002055static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
2056{
2057 return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
2058}
2059
Bandan Das27c42a12017-08-03 15:54:42 -04002060static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
2061{
2062 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
2063}
2064
Bandan Das41ab9372017-08-03 15:54:43 -04002065static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
2066{
2067 return nested_cpu_has_vmfunc(vmcs12) &&
2068 (vmcs12->vm_function_control &
2069 VMX_VMFUNC_EPTP_SWITCHING);
2070}
2071
Liran Alonf792d272018-06-23 02:35:05 +03002072static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
2073{
2074 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
2075}
2076
Jim Mattsonef85b672016-12-12 11:01:37 -08002077static inline bool is_nmi(u32 intr_info)
Nadav Har'El644d7112011-05-25 23:12:35 +03002078{
2079 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
Jim Mattsonef85b672016-12-12 11:01:37 -08002080 == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
Nadav Har'El644d7112011-05-25 23:12:35 +03002081}
2082
Jan Kiszka533558b2014-01-04 18:47:20 +01002083static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
2084 u32 exit_intr_info,
2085 unsigned long exit_qualification);
Nadav Har'El7c177932011-05-25 23:12:04 +03002086
Rusty Russell8b9cf982007-07-30 16:31:43 +10002087static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
Avi Kivity7725f0b2006-12-13 00:34:01 -08002088{
2089 int i;
2090
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002091 for (i = 0; i < vmx->nmsrs; ++i)
Avi Kivity26bb0982009-09-07 11:14:12 +03002092 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
Eddie Donga75beee2007-05-17 18:55:15 +03002093 return i;
2094 return -1;
2095}
2096
Uros Bizjak5ebb2722018-10-11 19:40:43 +02002097static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002098{
2099 struct {
2100 u64 vpid : 16;
2101 u64 rsvd : 48;
2102 u64 gva;
2103 } operand = { vpid, 0, gva };
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002104 bool error;
Sheng Yang2384d2b2008-01-17 15:14:33 +08002105
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002106 asm volatile (__ex("invvpid %2, %1") CC_SET(na)
2107 : CC_OUT(na) (error) : "r"(ext), "m"(operand));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002108 BUG_ON(error);
Sheng Yang2384d2b2008-01-17 15:14:33 +08002109}
2110
Uros Bizjak5ebb2722018-10-11 19:40:43 +02002111static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
Sheng Yang14394422008-04-28 12:24:45 +08002112{
2113 struct {
2114 u64 eptp, gpa;
2115 } operand = {eptp, gpa};
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002116 bool error;
Sheng Yang14394422008-04-28 12:24:45 +08002117
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002118 asm volatile (__ex("invept %2, %1") CC_SET(na)
2119 : CC_OUT(na) (error) : "r"(ext), "m"(operand));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002120 BUG_ON(error);
Sheng Yang14394422008-04-28 12:24:45 +08002121}
2122
Avi Kivity26bb0982009-09-07 11:14:12 +03002123static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
Eddie Donga75beee2007-05-17 18:55:15 +03002124{
2125 int i;
2126
Rusty Russell8b9cf982007-07-30 16:31:43 +10002127 i = __find_msr_index(vmx, msr);
Eddie Donga75beee2007-05-17 18:55:15 +03002128 if (i >= 0)
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002129 return &vmx->guest_msrs[i];
Al Viro8b6d44c2007-02-09 16:38:40 +00002130 return NULL;
Avi Kivity7725f0b2006-12-13 00:34:01 -08002131}
2132
Avi Kivity6aa8b732006-12-10 02:21:36 -08002133static void vmcs_clear(struct vmcs *vmcs)
2134{
2135 u64 phys_addr = __pa(vmcs);
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002136 bool error;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002137
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002138 asm volatile (__ex("vmclear %1") CC_SET(na)
2139 : CC_OUT(na) (error) : "m"(phys_addr));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002140 if (unlikely(error))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002141 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
2142 vmcs, phys_addr);
2143}
2144
Nadav Har'Eld462b812011-05-24 15:26:10 +03002145static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
2146{
2147 vmcs_clear(loaded_vmcs->vmcs);
Jim Mattson355f4fb2016-10-28 08:29:39 -07002148 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
2149 vmcs_clear(loaded_vmcs->shadow_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03002150 loaded_vmcs->cpu = -1;
2151 loaded_vmcs->launched = 0;
2152}
2153
Dongxiao Xu7725b892010-05-11 18:29:38 +08002154static void vmcs_load(struct vmcs *vmcs)
2155{
2156 u64 phys_addr = __pa(vmcs);
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002157 bool error;
Dongxiao Xu7725b892010-05-11 18:29:38 +08002158
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002159 if (static_branch_unlikely(&enable_evmcs))
2160 return evmcs_load(phys_addr);
2161
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002162 asm volatile (__ex("vmptrld %1") CC_SET(na)
2163 : CC_OUT(na) (error) : "m"(phys_addr));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002164 if (unlikely(error))
Nadav Har'El2844d842011-05-25 23:16:40 +03002165 printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
Dongxiao Xu7725b892010-05-11 18:29:38 +08002166 vmcs, phys_addr);
2167}
2168
Dave Young2965faa2015-09-09 15:38:55 -07002169#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002170/*
2171 * This bitmap is used to indicate whether the vmclear
2172 * operation is enabled on all cpus. All disabled by
2173 * default.
2174 */
2175static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
2176
2177static inline void crash_enable_local_vmclear(int cpu)
2178{
2179 cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
2180}
2181
2182static inline void crash_disable_local_vmclear(int cpu)
2183{
2184 cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
2185}
2186
2187static inline int crash_local_vmclear_enabled(int cpu)
2188{
2189 return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
2190}
2191
2192static void crash_vmclear_local_loaded_vmcss(void)
2193{
2194 int cpu = raw_smp_processor_id();
2195 struct loaded_vmcs *v;
2196
2197 if (!crash_local_vmclear_enabled(cpu))
2198 return;
2199
2200 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
2201 loaded_vmcss_on_cpu_link)
2202 vmcs_clear(v->vmcs);
2203}
2204#else
2205static inline void crash_enable_local_vmclear(int cpu) { }
2206static inline void crash_disable_local_vmclear(int cpu) { }
Dave Young2965faa2015-09-09 15:38:55 -07002207#endif /* CONFIG_KEXEC_CORE */
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002208
Nadav Har'Eld462b812011-05-24 15:26:10 +03002209static void __loaded_vmcs_clear(void *arg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002210{
Nadav Har'Eld462b812011-05-24 15:26:10 +03002211 struct loaded_vmcs *loaded_vmcs = arg;
Ingo Molnard3b2c332007-01-05 16:36:23 -08002212 int cpu = raw_smp_processor_id();
Avi Kivity6aa8b732006-12-10 02:21:36 -08002213
Nadav Har'Eld462b812011-05-24 15:26:10 +03002214 if (loaded_vmcs->cpu != cpu)
2215 return; /* vcpu migration can race with cpu offline */
2216 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002217 per_cpu(current_vmcs, cpu) = NULL;
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002218 crash_disable_local_vmclear(cpu);
Nadav Har'Eld462b812011-05-24 15:26:10 +03002219 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08002220
2221 /*
2222 * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
2223 * is before setting loaded_vmcs->vcpu to -1 which is done in
2224 * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
2225 * then adds the vmcs into percpu list before it is deleted.
2226 */
2227 smp_wmb();
2228
Nadav Har'Eld462b812011-05-24 15:26:10 +03002229 loaded_vmcs_init(loaded_vmcs);
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002230 crash_enable_local_vmclear(cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002231}
2232
Nadav Har'Eld462b812011-05-24 15:26:10 +03002233static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
Avi Kivity8d0be2b2007-02-12 00:54:46 -08002234{
Xiao Guangronge6c7d322012-11-28 20:53:15 +08002235 int cpu = loaded_vmcs->cpu;
2236
2237 if (cpu != -1)
2238 smp_call_function_single(cpu,
2239 __loaded_vmcs_clear, loaded_vmcs, 1);
Avi Kivity8d0be2b2007-02-12 00:54:46 -08002240}
2241
Junaid Shahidfaff8752018-06-29 13:10:05 -07002242static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
2243{
2244 if (vpid == 0)
2245 return true;
2246
2247 if (cpu_has_vmx_invvpid_individual_addr()) {
2248 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
2249 return true;
2250 }
2251
2252 return false;
2253}
2254
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002255static inline void vpid_sync_vcpu_single(int vpid)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002256{
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002257 if (vpid == 0)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002258 return;
2259
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08002260 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002261 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
Sheng Yang2384d2b2008-01-17 15:14:33 +08002262}
2263
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002264static inline void vpid_sync_vcpu_global(void)
2265{
2266 if (cpu_has_vmx_invvpid_global())
2267 __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
2268}
2269
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002270static inline void vpid_sync_context(int vpid)
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002271{
2272 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002273 vpid_sync_vcpu_single(vpid);
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002274 else
2275 vpid_sync_vcpu_global();
2276}
2277
Sheng Yang14394422008-04-28 12:24:45 +08002278static inline void ept_sync_global(void)
2279{
David Hildenbrandf5f51582017-08-24 20:51:30 +02002280 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
Sheng Yang14394422008-04-28 12:24:45 +08002281}
2282
2283static inline void ept_sync_context(u64 eptp)
2284{
David Hildenbrand0e1252d2017-08-24 20:51:28 +02002285 if (cpu_has_vmx_invept_context())
2286 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
2287 else
2288 ept_sync_global();
Sheng Yang14394422008-04-28 12:24:45 +08002289}
2290
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002291static __always_inline void vmcs_check16(unsigned long field)
2292{
2293 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
2294 "16-bit accessor invalid for 64-bit field");
2295 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2296 "16-bit accessor invalid for 64-bit high field");
2297 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2298 "16-bit accessor invalid for 32-bit high field");
2299 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2300 "16-bit accessor invalid for natural width field");
2301}
2302
2303static __always_inline void vmcs_check32(unsigned long field)
2304{
2305 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2306 "32-bit accessor invalid for 16-bit field");
2307 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2308 "32-bit accessor invalid for natural width field");
2309}
2310
2311static __always_inline void vmcs_check64(unsigned long field)
2312{
2313 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2314 "64-bit accessor invalid for 16-bit field");
2315 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2316 "64-bit accessor invalid for 64-bit high field");
2317 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2318 "64-bit accessor invalid for 32-bit field");
2319 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2320 "64-bit accessor invalid for natural width field");
2321}
2322
2323static __always_inline void vmcs_checkl(unsigned long field)
2324{
2325 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2326 "Natural width accessor invalid for 16-bit field");
2327 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
2328 "Natural width accessor invalid for 64-bit field");
2329 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2330 "Natural width accessor invalid for 64-bit high field");
2331 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2332 "Natural width accessor invalid for 32-bit field");
2333}
2334
2335static __always_inline unsigned long __vmcs_readl(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002336{
Avi Kivity5e520e62011-05-15 10:13:12 -04002337 unsigned long value;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002338
Uros Bizjak44c2d662018-10-11 19:40:45 +02002339 asm volatile (__ex_clear("vmread %1, %0", "%k0")
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002340 : "=r"(value) : "r"(field));
Avi Kivity6aa8b732006-12-10 02:21:36 -08002341 return value;
2342}
2343
Avi Kivity96304212011-05-15 10:13:13 -04002344static __always_inline u16 vmcs_read16(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002345{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002346 vmcs_check16(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002347 if (static_branch_unlikely(&enable_evmcs))
2348 return evmcs_read16(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002349 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002350}
2351
Avi Kivity96304212011-05-15 10:13:13 -04002352static __always_inline u32 vmcs_read32(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002353{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002354 vmcs_check32(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002355 if (static_branch_unlikely(&enable_evmcs))
2356 return evmcs_read32(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002357 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002358}
2359
Avi Kivity96304212011-05-15 10:13:13 -04002360static __always_inline u64 vmcs_read64(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002361{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002362 vmcs_check64(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002363 if (static_branch_unlikely(&enable_evmcs))
2364 return evmcs_read64(field);
Avi Kivity05b3e0c2006-12-13 00:33:45 -08002365#ifdef CONFIG_X86_64
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002366 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002367#else
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002368 return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002369#endif
2370}
2371
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002372static __always_inline unsigned long vmcs_readl(unsigned long field)
2373{
2374 vmcs_checkl(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002375 if (static_branch_unlikely(&enable_evmcs))
2376 return evmcs_read64(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002377 return __vmcs_readl(field);
2378}
2379
Avi Kivitye52de1b2007-01-05 16:36:56 -08002380static noinline void vmwrite_error(unsigned long field, unsigned long value)
2381{
2382 printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
2383 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
2384 dump_stack();
2385}
2386
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002387static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002388{
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002389 bool error;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002390
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002391 asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
2392 : CC_OUT(na) (error) : "r"(field), "rm"(value));
Avi Kivitye52de1b2007-01-05 16:36:56 -08002393 if (unlikely(error))
2394 vmwrite_error(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002395}
2396
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002397static __always_inline void vmcs_write16(unsigned long field, u16 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002398{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002399 vmcs_check16(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002400 if (static_branch_unlikely(&enable_evmcs))
2401 return evmcs_write16(field, value);
2402
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002403 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002404}
2405
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002406static __always_inline void vmcs_write32(unsigned long field, u32 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002407{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002408 vmcs_check32(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002409 if (static_branch_unlikely(&enable_evmcs))
2410 return evmcs_write32(field, value);
2411
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002412 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002413}
2414
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002415static __always_inline void vmcs_write64(unsigned long field, u64 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002416{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002417 vmcs_check64(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002418 if (static_branch_unlikely(&enable_evmcs))
2419 return evmcs_write64(field, value);
2420
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002421 __vmcs_writel(field, value);
Avi Kivity7682f2d2008-05-12 19:25:43 +03002422#ifndef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08002423 asm volatile ("");
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002424 __vmcs_writel(field+1, value >> 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002425#endif
2426}
2427
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002428static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002429{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002430 vmcs_checkl(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002431 if (static_branch_unlikely(&enable_evmcs))
2432 return evmcs_write64(field, value);
2433
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002434 __vmcs_writel(field, value);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002435}
2436
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002437static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002438{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002439 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
2440 "vmcs_clear_bits does not support 64-bit fields");
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002441 if (static_branch_unlikely(&enable_evmcs))
2442 return evmcs_write32(field, evmcs_read32(field) & ~mask);
2443
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002444 __vmcs_writel(field, __vmcs_readl(field) & ~mask);
2445}
2446
2447static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
2448{
2449 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
2450 "vmcs_set_bits does not support 64-bit fields");
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002451 if (static_branch_unlikely(&enable_evmcs))
2452 return evmcs_write32(field, evmcs_read32(field) | mask);
2453
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002454 __vmcs_writel(field, __vmcs_readl(field) | mask);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002455}
2456
Paolo Bonzini8391ce42016-07-07 14:58:33 +02002457static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
2458{
2459 vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
2460}
2461
Gleb Natapov2961e8762013-11-25 15:37:13 +02002462static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
2463{
2464 vmcs_write32(VM_ENTRY_CONTROLS, val);
2465 vmx->vm_entry_controls_shadow = val;
2466}
2467
2468static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
2469{
2470 if (vmx->vm_entry_controls_shadow != val)
2471 vm_entry_controls_init(vmx, val);
2472}
2473
2474static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
2475{
2476 return vmx->vm_entry_controls_shadow;
2477}
2478
2479
2480static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
2481{
2482 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
2483}
2484
2485static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
2486{
2487 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
2488}
2489
Paolo Bonzini8391ce42016-07-07 14:58:33 +02002490static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
2491{
2492 vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
2493}
2494
Gleb Natapov2961e8762013-11-25 15:37:13 +02002495static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
2496{
2497 vmcs_write32(VM_EXIT_CONTROLS, val);
2498 vmx->vm_exit_controls_shadow = val;
2499}
2500
2501static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
2502{
2503 if (vmx->vm_exit_controls_shadow != val)
2504 vm_exit_controls_init(vmx, val);
2505}
2506
2507static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
2508{
2509 return vmx->vm_exit_controls_shadow;
2510}
2511
2512
2513static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
2514{
2515 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
2516}
2517
2518static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
2519{
2520 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
2521}
2522
Avi Kivity2fb92db2011-04-27 19:42:18 +03002523static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
2524{
2525 vmx->segment_cache.bitmask = 0;
2526}
2527
2528static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
2529 unsigned field)
2530{
2531 bool ret;
2532 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
2533
2534 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
2535 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
2536 vmx->segment_cache.bitmask = 0;
2537 }
2538 ret = vmx->segment_cache.bitmask & mask;
2539 vmx->segment_cache.bitmask |= mask;
2540 return ret;
2541}
2542
2543static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
2544{
2545 u16 *p = &vmx->segment_cache.seg[seg].selector;
2546
2547 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
2548 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
2549 return *p;
2550}
2551
2552static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
2553{
2554 ulong *p = &vmx->segment_cache.seg[seg].base;
2555
2556 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
2557 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
2558 return *p;
2559}
2560
2561static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
2562{
2563 u32 *p = &vmx->segment_cache.seg[seg].limit;
2564
2565 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
2566 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
2567 return *p;
2568}
2569
2570static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
2571{
2572 u32 *p = &vmx->segment_cache.seg[seg].ar;
2573
2574 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
2575 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
2576 return *p;
2577}
2578
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002579static void update_exception_bitmap(struct kvm_vcpu *vcpu)
2580{
2581 u32 eb;
2582
Jan Kiszkafd7373c2010-01-20 18:20:20 +01002583 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08002584 (1u << DB_VECTOR) | (1u << AC_VECTOR);
Liran Alon9e869482018-03-12 13:12:51 +02002585 /*
2586 * Guest access to VMware backdoor ports could legitimately
2587 * trigger #GP because of TSS I/O permission bitmap.
2588 * We intercept those #GP and allow access to them anyway
2589 * as VMware does.
2590 */
2591 if (enable_vmware_backdoor)
2592 eb |= (1u << GP_VECTOR);
Jan Kiszkafd7373c2010-01-20 18:20:20 +01002593 if ((vcpu->guest_debug &
2594 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
2595 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
2596 eb |= 1u << BP_VECTOR;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03002597 if (to_vmx(vcpu)->rmode.vm86_active)
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002598 eb = ~0;
Avi Kivity089d0342009-03-23 18:26:32 +02002599 if (enable_ept)
Sheng Yang14394422008-04-28 12:24:45 +08002600 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
Nadav Har'El36cf24e2011-05-25 23:15:08 +03002601
2602 /* When we are running a nested L2 guest and L1 specified for it a
2603 * certain exception bitmap, we must trap the same exceptions and pass
2604 * them to L1. When running L2, we will only handle the exceptions
2605 * specified above if L1 did not want them.
2606 */
2607 if (is_guest_mode(vcpu))
2608 eb |= get_vmcs12(vcpu)->exception_bitmap;
2609
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002610 vmcs_write32(EXCEPTION_BITMAP, eb);
2611}
2612
Ashok Raj15d45072018-02-01 22:59:43 +01002613/*
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01002614 * Check if MSR is intercepted for currently loaded MSR bitmap.
2615 */
2616static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
2617{
2618 unsigned long *msr_bitmap;
2619 int f = sizeof(unsigned long);
2620
2621 if (!cpu_has_vmx_msr_bitmap())
2622 return true;
2623
2624 msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
2625
2626 if (msr <= 0x1fff) {
2627 return !!test_bit(msr, msr_bitmap + 0x800 / f);
2628 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2629 msr &= 0x1fff;
2630 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2631 }
2632
2633 return true;
2634}
2635
2636/*
Ashok Raj15d45072018-02-01 22:59:43 +01002637 * Check if MSR is intercepted for L01 MSR bitmap.
2638 */
2639static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
2640{
2641 unsigned long *msr_bitmap;
2642 int f = sizeof(unsigned long);
2643
2644 if (!cpu_has_vmx_msr_bitmap())
2645 return true;
2646
2647 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
2648
2649 if (msr <= 0x1fff) {
2650 return !!test_bit(msr, msr_bitmap + 0x800 / f);
2651 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2652 msr &= 0x1fff;
2653 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2654 }
2655
2656 return true;
2657}
2658
Gleb Natapov2961e8762013-11-25 15:37:13 +02002659static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2660 unsigned long entry, unsigned long exit)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002661{
Gleb Natapov2961e8762013-11-25 15:37:13 +02002662 vm_entry_controls_clearbit(vmx, entry);
2663 vm_exit_controls_clearbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002664}
2665
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002666static int find_msr(struct vmx_msrs *m, unsigned int msr)
2667{
2668 unsigned int i;
2669
2670 for (i = 0; i < m->nr; ++i) {
2671 if (m->val[i].index == msr)
2672 return i;
2673 }
2674 return -ENOENT;
2675}
2676
Avi Kivity61d2ef22010-04-28 16:40:38 +03002677static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
2678{
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002679 int i;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002680 struct msr_autoload *m = &vmx->msr_autoload;
2681
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002682 switch (msr) {
2683 case MSR_EFER:
2684 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002685 clear_atomic_switch_msr_special(vmx,
2686 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002687 VM_EXIT_LOAD_IA32_EFER);
2688 return;
2689 }
2690 break;
2691 case MSR_CORE_PERF_GLOBAL_CTRL:
2692 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002693 clear_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002694 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2695 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
2696 return;
2697 }
2698 break;
Avi Kivity110312c2010-12-21 12:54:20 +02002699 }
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002700 i = find_msr(&m->guest, msr);
2701 if (i < 0)
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002702 goto skip_guest;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002703 --m->guest.nr;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002704 m->guest.val[i] = m->guest.val[m->guest.nr];
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002705 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
Avi Kivity110312c2010-12-21 12:54:20 +02002706
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002707skip_guest:
2708 i = find_msr(&m->host, msr);
2709 if (i < 0)
Avi Kivity61d2ef22010-04-28 16:40:38 +03002710 return;
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002711
2712 --m->host.nr;
2713 m->host.val[i] = m->host.val[m->host.nr];
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002714 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
Avi Kivity61d2ef22010-04-28 16:40:38 +03002715}
2716
Gleb Natapov2961e8762013-11-25 15:37:13 +02002717static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2718 unsigned long entry, unsigned long exit,
2719 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
2720 u64 guest_val, u64 host_val)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002721{
2722 vmcs_write64(guest_val_vmcs, guest_val);
Sean Christopherson5a5e8a12018-09-26 09:23:56 -07002723 if (host_val_vmcs != HOST_IA32_EFER)
2724 vmcs_write64(host_val_vmcs, host_val);
Gleb Natapov2961e8762013-11-25 15:37:13 +02002725 vm_entry_controls_setbit(vmx, entry);
2726 vm_exit_controls_setbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002727}
2728
Avi Kivity61d2ef22010-04-28 16:40:38 +03002729static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002730 u64 guest_val, u64 host_val, bool entry_only)
Avi Kivity61d2ef22010-04-28 16:40:38 +03002731{
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002732 int i, j = 0;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002733 struct msr_autoload *m = &vmx->msr_autoload;
2734
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002735 switch (msr) {
2736 case MSR_EFER:
2737 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002738 add_atomic_switch_msr_special(vmx,
2739 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002740 VM_EXIT_LOAD_IA32_EFER,
2741 GUEST_IA32_EFER,
2742 HOST_IA32_EFER,
2743 guest_val, host_val);
2744 return;
2745 }
2746 break;
2747 case MSR_CORE_PERF_GLOBAL_CTRL:
2748 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002749 add_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002750 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2751 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
2752 GUEST_IA32_PERF_GLOBAL_CTRL,
2753 HOST_IA32_PERF_GLOBAL_CTRL,
2754 guest_val, host_val);
2755 return;
2756 }
2757 break;
Radim Krčmář7099e2e2016-03-04 15:08:42 +01002758 case MSR_IA32_PEBS_ENABLE:
2759 /* PEBS needs a quiescent period after being disabled (to write
2760 * a record). Disabling PEBS through VMX MSR swapping doesn't
2761 * provide that period, so a CPU could write host's record into
2762 * guest's memory.
2763 */
2764 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
Avi Kivity110312c2010-12-21 12:54:20 +02002765 }
2766
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002767 i = find_msr(&m->guest, msr);
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002768 if (!entry_only)
2769 j = find_msr(&m->host, msr);
Avi Kivity61d2ef22010-04-28 16:40:38 +03002770
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002771 if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) {
Michael S. Tsirkin60266202013-10-31 00:34:56 +02002772 printk_once(KERN_WARNING "Not enough msr switch entries. "
Gleb Natapove7fc6f93b2011-10-05 14:01:24 +02002773 "Can't add msr %x\n", msr);
2774 return;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002775 }
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002776 if (i < 0) {
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002777 i = m->guest.nr++;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002778 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002779 }
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002780 m->guest.val[i].index = msr;
2781 m->guest.val[i].value = guest_val;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002782
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002783 if (entry_only)
2784 return;
2785
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002786 if (j < 0) {
2787 j = m->host.nr++;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002788 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
Avi Kivity61d2ef22010-04-28 16:40:38 +03002789 }
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002790 m->host.val[j].index = msr;
2791 m->host.val[j].value = host_val;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002792}
2793
Avi Kivity92c0d902009-10-29 11:00:16 +02002794static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
Eddie Dong2cc51562007-05-21 07:28:09 +03002795{
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002796 u64 guest_efer = vmx->vcpu.arch.efer;
2797 u64 ignore_bits = 0;
Eddie Dong2cc51562007-05-21 07:28:09 +03002798
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002799 if (!enable_ept) {
2800 /*
2801 * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
2802 * host CPUID is more efficient than testing guest CPUID
2803 * or CR4. Host SMEP is anyway a requirement for guest SMEP.
2804 */
2805 if (boot_cpu_has(X86_FEATURE_SMEP))
2806 guest_efer |= EFER_NX;
2807 else if (!(guest_efer & EFER_NX))
2808 ignore_bits |= EFER_NX;
2809 }
Roel Kluin3a34a882009-08-04 02:08:45 -07002810
Avi Kivity51c6cf62007-08-29 03:48:05 +03002811 /*
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002812 * LMA and LME handled by hardware; SCE meaningless outside long mode.
Avi Kivity51c6cf62007-08-29 03:48:05 +03002813 */
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002814 ignore_bits |= EFER_SCE;
Avi Kivity51c6cf62007-08-29 03:48:05 +03002815#ifdef CONFIG_X86_64
2816 ignore_bits |= EFER_LMA | EFER_LME;
2817 /* SCE is meaningful only in long mode on Intel */
2818 if (guest_efer & EFER_LMA)
2819 ignore_bits &= ~(u64)EFER_SCE;
2820#endif
Avi Kivity84ad33e2010-04-28 16:42:29 +03002821
Andy Lutomirskif6577a5f2014-11-07 18:25:18 -08002822 /*
2823 * On EPT, we can't emulate NX, so we must switch EFER atomically.
2824 * On CPUs that support "load IA32_EFER", always switch EFER
2825 * atomically, since it's faster than switching it manually.
2826 */
2827 if (cpu_has_load_ia32_efer ||
2828 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
Avi Kivity84ad33e2010-04-28 16:42:29 +03002829 if (!(guest_efer & EFER_LMA))
2830 guest_efer &= ~EFER_LME;
Andy Lutomirski54b98bf2014-11-10 11:19:15 -08002831 if (guest_efer != host_efer)
2832 add_atomic_switch_msr(vmx, MSR_EFER,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002833 guest_efer, host_efer, false);
Sean Christopherson02343cf2018-09-26 09:23:43 -07002834 else
2835 clear_atomic_switch_msr(vmx, MSR_EFER);
Avi Kivity84ad33e2010-04-28 16:42:29 +03002836 return false;
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002837 } else {
Sean Christopherson02343cf2018-09-26 09:23:43 -07002838 clear_atomic_switch_msr(vmx, MSR_EFER);
2839
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002840 guest_efer &= ~ignore_bits;
2841 guest_efer |= host_efer & ignore_bits;
Avi Kivity84ad33e2010-04-28 16:42:29 +03002842
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002843 vmx->guest_msrs[efer_offset].data = guest_efer;
2844 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
2845
2846 return true;
2847 }
Avi Kivity51c6cf62007-08-29 03:48:05 +03002848}
2849
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002850#ifdef CONFIG_X86_32
2851/*
2852 * On 32-bit kernels, VM exits still load the FS and GS bases from the
2853 * VMCS rather than the segment table. KVM uses this helper to figure
2854 * out the current bases to poke them into the VMCS before entry.
2855 */
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002856static unsigned long segment_base(u16 selector)
2857{
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002858 struct desc_struct *table;
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002859 unsigned long v;
2860
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002861 if (!(selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002862 return 0;
2863
Thomas Garnier45fc8752017-03-14 10:05:08 -07002864 table = get_current_gdt_ro();
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002865
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002866 if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002867 u16 ldt_selector = kvm_read_ldt();
2868
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002869 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002870 return 0;
2871
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002872 table = (struct desc_struct *)segment_base(ldt_selector);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002873 }
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002874 v = get_desc_base(&table[selector >> 3]);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002875 return v;
2876}
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002877#endif
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002878
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002879static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
Avi Kivity33ed6322007-05-02 16:54:03 +03002880{
Avi Kivity04d2cc72007-09-10 18:10:54 +03002881 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christophersond7ee0392018-07-23 12:32:47 -07002882 struct vmcs_host_state *host_state;
Arnd Bergmann51e8a8c2018-04-04 12:44:14 +02002883#ifdef CONFIG_X86_64
Vitaly Kuznetsov35060ed2018-03-13 18:48:05 +01002884 int cpu = raw_smp_processor_id();
Arnd Bergmann51e8a8c2018-04-04 12:44:14 +02002885#endif
Sean Christophersone368b872018-07-23 12:32:41 -07002886 unsigned long fs_base, gs_base;
2887 u16 fs_sel, gs_sel;
Avi Kivity26bb0982009-09-07 11:14:12 +03002888 int i;
Avi Kivity04d2cc72007-09-10 18:10:54 +03002889
Sean Christophersond264ee02018-08-27 15:21:12 -07002890 vmx->req_immediate_exit = false;
2891
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002892 if (vmx->loaded_cpu_state)
Avi Kivity33ed6322007-05-02 16:54:03 +03002893 return;
2894
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002895 vmx->loaded_cpu_state = vmx->loaded_vmcs;
Sean Christophersond7ee0392018-07-23 12:32:47 -07002896 host_state = &vmx->loaded_cpu_state->host_state;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002897
Avi Kivity33ed6322007-05-02 16:54:03 +03002898 /*
2899 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
2900 * allow segment selectors with cpl > 0 or ti == 1.
2901 */
Sean Christophersond7ee0392018-07-23 12:32:47 -07002902 host_state->ldt_sel = kvm_read_ldt();
Vitaly Kuznetsov42b933b2018-03-13 18:48:04 +01002903
2904#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002905 savesegment(ds, host_state->ds_sel);
2906 savesegment(es, host_state->es_sel);
Sean Christophersone368b872018-07-23 12:32:41 -07002907
2908 gs_base = cpu_kernelmode_gs_base(cpu);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002909 if (likely(is_64bit_mm(current->mm))) {
2910 save_fsgs_for_kvm();
Sean Christophersone368b872018-07-23 12:32:41 -07002911 fs_sel = current->thread.fsindex;
2912 gs_sel = current->thread.gsindex;
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002913 fs_base = current->thread.fsbase;
Sean Christophersone368b872018-07-23 12:32:41 -07002914 vmx->msr_host_kernel_gs_base = current->thread.gsbase;
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002915 } else {
Sean Christophersone368b872018-07-23 12:32:41 -07002916 savesegment(fs, fs_sel);
2917 savesegment(gs, gs_sel);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002918 fs_base = read_msr(MSR_FS_BASE);
Sean Christophersone368b872018-07-23 12:32:41 -07002919 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
Avi Kivity33ed6322007-05-02 16:54:03 +03002920 }
2921
Paolo Bonzini4679b612018-09-24 17:23:01 +02002922 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
Avi Kivity33ed6322007-05-02 16:54:03 +03002923#else
Sean Christophersone368b872018-07-23 12:32:41 -07002924 savesegment(fs, fs_sel);
2925 savesegment(gs, gs_sel);
2926 fs_base = segment_base(fs_sel);
2927 gs_base = segment_base(gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002928#endif
Sean Christophersone368b872018-07-23 12:32:41 -07002929
Sean Christopherson8f21a0b2018-07-23 12:32:49 -07002930 if (unlikely(fs_sel != host_state->fs_sel)) {
2931 if (!(fs_sel & 7))
2932 vmcs_write16(HOST_FS_SELECTOR, fs_sel);
2933 else
2934 vmcs_write16(HOST_FS_SELECTOR, 0);
2935 host_state->fs_sel = fs_sel;
2936 }
2937 if (unlikely(gs_sel != host_state->gs_sel)) {
2938 if (!(gs_sel & 7))
2939 vmcs_write16(HOST_GS_SELECTOR, gs_sel);
2940 else
2941 vmcs_write16(HOST_GS_SELECTOR, 0);
2942 host_state->gs_sel = gs_sel;
2943 }
Sean Christopherson5e079c72018-07-23 12:32:50 -07002944 if (unlikely(fs_base != host_state->fs_base)) {
2945 vmcs_writel(HOST_FS_BASE, fs_base);
2946 host_state->fs_base = fs_base;
2947 }
2948 if (unlikely(gs_base != host_state->gs_base)) {
2949 vmcs_writel(HOST_GS_BASE, gs_base);
2950 host_state->gs_base = gs_base;
2951 }
Avi Kivity33ed6322007-05-02 16:54:03 +03002952
Avi Kivity26bb0982009-09-07 11:14:12 +03002953 for (i = 0; i < vmx->save_nmsrs; ++i)
2954 kvm_set_shared_msr(vmx->guest_msrs[i].index,
Avi Kivityd5696722009-12-02 12:28:47 +02002955 vmx->guest_msrs[i].data,
2956 vmx->guest_msrs[i].mask);
Avi Kivity33ed6322007-05-02 16:54:03 +03002957}
2958
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002959static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
Avi Kivity33ed6322007-05-02 16:54:03 +03002960{
Sean Christophersond7ee0392018-07-23 12:32:47 -07002961 struct vmcs_host_state *host_state;
2962
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002963 if (!vmx->loaded_cpu_state)
Avi Kivity33ed6322007-05-02 16:54:03 +03002964 return;
2965
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002966 WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
Sean Christophersond7ee0392018-07-23 12:32:47 -07002967 host_state = &vmx->loaded_cpu_state->host_state;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002968
Avi Kivitye1beb1d2007-11-18 13:50:24 +02002969 ++vmx->vcpu.stat.host_state_reload;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002970 vmx->loaded_cpu_state = NULL;
2971
Avi Kivityc8770e72010-11-11 12:37:26 +02002972#ifdef CONFIG_X86_64
Paolo Bonzini4679b612018-09-24 17:23:01 +02002973 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
Avi Kivityc8770e72010-11-11 12:37:26 +02002974#endif
Sean Christophersond7ee0392018-07-23 12:32:47 -07002975 if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2976 kvm_load_ldt(host_state->ldt_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002977#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002978 load_gs_index(host_state->gs_sel);
Avi Kivity9581d442010-10-19 16:46:55 +02002979#else
Sean Christophersond7ee0392018-07-23 12:32:47 -07002980 loadsegment(gs, host_state->gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002981#endif
Avi Kivity33ed6322007-05-02 16:54:03 +03002982 }
Sean Christophersond7ee0392018-07-23 12:32:47 -07002983 if (host_state->fs_sel & 7)
2984 loadsegment(fs, host_state->fs_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002985#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002986 if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2987 loadsegment(ds, host_state->ds_sel);
2988 loadsegment(es, host_state->es_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002989 }
Avi Kivityb2da15a2012-05-13 19:53:24 +03002990#endif
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08002991 invalidate_tss_limit();
Avi Kivity44ea2b12009-09-06 15:55:37 +03002992#ifdef CONFIG_X86_64
Avi Kivityc8770e72010-11-11 12:37:26 +02002993 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
Avi Kivity44ea2b12009-09-06 15:55:37 +03002994#endif
Thomas Garnier45fc8752017-03-14 10:05:08 -07002995 load_fixmap_gdt(raw_smp_processor_id());
Avi Kivity33ed6322007-05-02 16:54:03 +03002996}
2997
Sean Christopherson678e3152018-07-23 12:32:43 -07002998#ifdef CONFIG_X86_64
2999static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
Avi Kivitya9b21b62008-06-24 11:48:49 +03003000{
Paolo Bonzini4679b612018-09-24 17:23:01 +02003001 preempt_disable();
3002 if (vmx->loaded_cpu_state)
3003 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
3004 preempt_enable();
Sean Christopherson678e3152018-07-23 12:32:43 -07003005 return vmx->msr_guest_kernel_gs_base;
Avi Kivitya9b21b62008-06-24 11:48:49 +03003006}
3007
Sean Christopherson678e3152018-07-23 12:32:43 -07003008static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
3009{
Paolo Bonzini4679b612018-09-24 17:23:01 +02003010 preempt_disable();
3011 if (vmx->loaded_cpu_state)
3012 wrmsrl(MSR_KERNEL_GS_BASE, data);
3013 preempt_enable();
Sean Christopherson678e3152018-07-23 12:32:43 -07003014 vmx->msr_guest_kernel_gs_base = data;
3015}
3016#endif
3017
Feng Wu28b835d2015-09-18 22:29:54 +08003018static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
3019{
3020 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
3021 struct pi_desc old, new;
3022 unsigned int dest;
3023
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003024 /*
3025 * In case of hot-plug or hot-unplug, we may have to undo
3026 * vmx_vcpu_pi_put even if there is no assigned device. And we
3027 * always keep PI.NDST up to date for simplicity: it makes the
3028 * code easier, and CPU migration is not a fast path.
3029 */
3030 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
Feng Wu28b835d2015-09-18 22:29:54 +08003031 return;
3032
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003033 /*
3034 * First handle the simple case where no cmpxchg is necessary; just
3035 * allow posting non-urgent interrupts.
3036 *
3037 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
3038 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
3039 * expects the VCPU to be on the blocked_vcpu_list that matches
3040 * PI.NDST.
3041 */
3042 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
3043 vcpu->cpu == cpu) {
3044 pi_clear_sn(pi_desc);
3045 return;
3046 }
3047
3048 /* The full case. */
Feng Wu28b835d2015-09-18 22:29:54 +08003049 do {
3050 old.control = new.control = pi_desc->control;
3051
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003052 dest = cpu_physical_id(cpu);
Feng Wu28b835d2015-09-18 22:29:54 +08003053
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003054 if (x2apic_enabled())
3055 new.ndst = dest;
3056 else
3057 new.ndst = (dest << 8) & 0xFF00;
Feng Wu28b835d2015-09-18 22:29:54 +08003058
Feng Wu28b835d2015-09-18 22:29:54 +08003059 new.sn = 0;
Paolo Bonzinic0a16662017-09-28 17:58:41 +02003060 } while (cmpxchg64(&pi_desc->control, old.control,
3061 new.control) != old.control);
Feng Wu28b835d2015-09-18 22:29:54 +08003062}
Xiao Guangrong1be0e612016-03-22 16:51:18 +08003063
Peter Feinerc95ba922016-08-17 09:36:47 -07003064static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
3065{
3066 vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
3067 vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
3068}
3069
Avi Kivity6aa8b732006-12-10 02:21:36 -08003070/*
3071 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
3072 * vcpu mutex is already taken.
3073 */
Avi Kivity15ad7142007-07-11 18:17:21 +03003074static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003075{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003076 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003077 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003078
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003079 if (!already_loaded) {
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01003080 loaded_vmcs_clear(vmx->loaded_vmcs);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08003081 local_irq_disable();
Zhang Yanfei8f536b72012-12-06 23:43:34 +08003082 crash_disable_local_vmclear(cpu);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08003083
3084 /*
3085 * Read loaded_vmcs->cpu should be before fetching
3086 * loaded_vmcs->loaded_vmcss_on_cpu_link.
3087 * See the comments in __loaded_vmcs_clear().
3088 */
3089 smp_rmb();
3090
Nadav Har'Eld462b812011-05-24 15:26:10 +03003091 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
3092 &per_cpu(loaded_vmcss_on_cpu, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08003093 crash_enable_local_vmclear(cpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08003094 local_irq_enable();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003095 }
3096
3097 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
3098 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
3099 vmcs_load(vmx->loaded_vmcs->vmcs);
Ashok Raj15d45072018-02-01 22:59:43 +01003100 indirect_branch_prediction_barrier();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003101 }
3102
3103 if (!already_loaded) {
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07003104 void *gdt = get_current_gdt_ro();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003105 unsigned long sysenter_esp;
3106
3107 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08003108
Avi Kivity6aa8b732006-12-10 02:21:36 -08003109 /*
3110 * Linux uses per-cpu TSS and GDT, so set these when switching
Andy Lutomirskie0c23062017-02-20 08:56:10 -08003111 * processors. See 22.2.4.
Avi Kivity6aa8b732006-12-10 02:21:36 -08003112 */
Andy Lutomirskie0c23062017-02-20 08:56:10 -08003113 vmcs_writel(HOST_TR_BASE,
Andy Lutomirski72f5e082017-12-04 15:07:20 +01003114 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07003115 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08003116
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08003117 /*
3118 * VM exits change the host TR limit to 0x67 after a VM
3119 * exit. This is okay, since 0x67 covers everything except
3120 * the IO bitmap and have have code to handle the IO bitmap
3121 * being lost after a VM exit.
3122 */
3123 BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
3124
Avi Kivity6aa8b732006-12-10 02:21:36 -08003125 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
3126 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
Haozhong Zhangff2c3a12015-10-20 15:39:10 +08003127
Nadav Har'Eld462b812011-05-24 15:26:10 +03003128 vmx->loaded_vmcs->cpu = cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003129 }
Feng Wu28b835d2015-09-18 22:29:54 +08003130
Owen Hofmann2680d6d2016-03-01 13:36:13 -08003131 /* Setup TSC multiplier */
3132 if (kvm_has_tsc_control &&
Peter Feinerc95ba922016-08-17 09:36:47 -07003133 vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
3134 decache_tsc_multiplier(vmx);
Owen Hofmann2680d6d2016-03-01 13:36:13 -08003135
Feng Wu28b835d2015-09-18 22:29:54 +08003136 vmx_vcpu_pi_load(vcpu, cpu);
Xiao Guangrong1be0e612016-03-22 16:51:18 +08003137 vmx->host_pkru = read_pkru();
Wanpeng Li74c55932017-11-29 01:31:20 -08003138 vmx->host_debugctlmsr = get_debugctlmsr();
Feng Wu28b835d2015-09-18 22:29:54 +08003139}
3140
3141static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
3142{
3143 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
3144
3145 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +08003146 !irq_remapping_cap(IRQ_POSTING_CAP) ||
3147 !kvm_vcpu_apicv_active(vcpu))
Feng Wu28b835d2015-09-18 22:29:54 +08003148 return;
3149
3150 /* Set SN when the vCPU is preempted */
3151 if (vcpu->preempted)
3152 pi_set_sn(pi_desc);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003153}
3154
3155static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
3156{
Feng Wu28b835d2015-09-18 22:29:54 +08003157 vmx_vcpu_pi_put(vcpu);
3158
Sean Christopherson6d6095b2018-07-23 12:32:44 -07003159 vmx_prepare_switch_to_host(to_vmx(vcpu));
Avi Kivity6aa8b732006-12-10 02:21:36 -08003160}
3161
Wanpeng Lif244dee2017-07-20 01:11:54 -07003162static bool emulation_required(struct kvm_vcpu *vcpu)
3163{
3164 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
3165}
3166
Avi Kivityedcafe32009-12-30 18:07:40 +02003167static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
3168
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03003169/*
3170 * Return the cr0 value that a nested guest would read. This is a combination
3171 * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
3172 * its hypervisor (cr0_read_shadow).
3173 */
3174static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
3175{
3176 return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
3177 (fields->cr0_read_shadow & fields->cr0_guest_host_mask);
3178}
3179static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
3180{
3181 return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
3182 (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
3183}
3184
Avi Kivity6aa8b732006-12-10 02:21:36 -08003185static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
3186{
Avi Kivity78ac8b42010-04-08 18:19:35 +03003187 unsigned long rflags, save_rflags;
Avi Kivity345dcaa2009-08-12 15:29:37 +03003188
Avi Kivity6de12732011-03-07 12:51:22 +02003189 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
3190 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
3191 rflags = vmcs_readl(GUEST_RFLAGS);
3192 if (to_vmx(vcpu)->rmode.vm86_active) {
3193 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
3194 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
3195 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
3196 }
3197 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003198 }
Avi Kivity6de12732011-03-07 12:51:22 +02003199 return to_vmx(vcpu)->rflags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003200}
3201
3202static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3203{
Wanpeng Lif244dee2017-07-20 01:11:54 -07003204 unsigned long old_rflags = vmx_get_rflags(vcpu);
3205
Avi Kivity6de12732011-03-07 12:51:22 +02003206 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
3207 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003208 if (to_vmx(vcpu)->rmode.vm86_active) {
3209 to_vmx(vcpu)->rmode.save_rflags = rflags;
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01003210 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003211 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003212 vmcs_writel(GUEST_RFLAGS, rflags);
Wanpeng Lif244dee2017-07-20 01:11:54 -07003213
3214 if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
3215 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003216}
3217
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02003218static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003219{
3220 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3221 int ret = 0;
3222
3223 if (interruptibility & GUEST_INTR_STATE_STI)
Jan Kiszka48005f62010-02-19 19:38:07 +01003224 ret |= KVM_X86_SHADOW_INT_STI;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003225 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
Jan Kiszka48005f62010-02-19 19:38:07 +01003226 ret |= KVM_X86_SHADOW_INT_MOV_SS;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003227
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02003228 return ret;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003229}
3230
3231static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
3232{
3233 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3234 u32 interruptibility = interruptibility_old;
3235
3236 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
3237
Jan Kiszka48005f62010-02-19 19:38:07 +01003238 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003239 interruptibility |= GUEST_INTR_STATE_MOV_SS;
Jan Kiszka48005f62010-02-19 19:38:07 +01003240 else if (mask & KVM_X86_SHADOW_INT_STI)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003241 interruptibility |= GUEST_INTR_STATE_STI;
3242
3243 if ((interruptibility != interruptibility_old))
3244 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
3245}
3246
Avi Kivity6aa8b732006-12-10 02:21:36 -08003247static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
3248{
3249 unsigned long rip;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003250
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003251 rip = kvm_rip_read(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003252 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003253 kvm_rip_write(vcpu, rip);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003254
Glauber Costa2809f5d2009-05-12 16:21:05 -04003255 /* skipping an emulated instruction also counts */
3256 vmx_set_interrupt_shadow(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003257}
3258
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003259static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3260 unsigned long exit_qual)
3261{
3262 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3263 unsigned int nr = vcpu->arch.exception.nr;
3264 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3265
3266 if (vcpu->arch.exception.has_error_code) {
3267 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3268 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3269 }
3270
3271 if (kvm_exception_is_soft(nr))
3272 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3273 else
3274 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3275
3276 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3277 vmx_get_nmi_mask(vcpu))
3278 intr_info |= INTR_INFO_UNBLOCK_NMI;
3279
3280 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3281}
3282
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003283/*
3284 * KVM wants to inject page-faults which it got to the guest. This function
3285 * checks whether in a nested guest, we need to inject them to L1 or L2.
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003286 */
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003287static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003288{
3289 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Liadfe20f2017-07-13 18:30:41 -07003290 unsigned int nr = vcpu->arch.exception.nr;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003291
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003292 if (nr == PF_VECTOR) {
3293 if (vcpu->arch.exception.nested_apf) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003294 *exit_qual = vcpu->arch.apf.nested_apf_token;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003295 return 1;
3296 }
3297 /*
3298 * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
3299 * The fix is to add the ancillary datum (CR2 or DR6) to structs
3300 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
3301 * can be written only when inject_pending_event runs. This should be
3302 * conditional on a new capability---if the capability is disabled,
3303 * kvm_multiple_exception would write the ancillary information to
3304 * CR2 or DR6, for backwards ABI-compatibility.
3305 */
3306 if (nested_vmx_is_page_fault_vmexit(vmcs12,
3307 vcpu->arch.exception.error_code)) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003308 *exit_qual = vcpu->arch.cr2;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003309 return 1;
3310 }
3311 } else {
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003312 if (vmcs12->exception_bitmap & (1u << nr)) {
Jim Mattsoncfb634f2018-09-21 10:36:17 -07003313 if (nr == DB_VECTOR) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003314 *exit_qual = vcpu->arch.dr6;
Jim Mattsoncfb634f2018-09-21 10:36:17 -07003315 *exit_qual &= ~(DR6_FIXED_1 | DR6_BT);
3316 *exit_qual ^= DR6_RTM;
3317 } else {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003318 *exit_qual = 0;
Jim Mattsoncfb634f2018-09-21 10:36:17 -07003319 }
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003320 return 1;
3321 }
Wanpeng Liadfe20f2017-07-13 18:30:41 -07003322 }
3323
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003324 return 0;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003325}
3326
Wanpeng Licaa057a2018-03-12 04:53:03 -07003327static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
3328{
3329 /*
3330 * Ensure that we clear the HLT state in the VMCS. We don't need to
3331 * explicitly skip the instruction because if the HLT state is set,
3332 * then the instruction is already executing and RIP has already been
3333 * advanced.
3334 */
3335 if (kvm_hlt_in_guest(vcpu->kvm) &&
3336 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
3337 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
3338}
3339
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003340static void vmx_queue_exception(struct kvm_vcpu *vcpu)
Avi Kivity298101d2007-11-25 13:41:11 +02003341{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003342 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003343 unsigned nr = vcpu->arch.exception.nr;
3344 bool has_error_code = vcpu->arch.exception.has_error_code;
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003345 u32 error_code = vcpu->arch.exception.error_code;
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003346 u32 intr_info = nr | INTR_INFO_VALID_MASK;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003347
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003348 if (has_error_code) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003349 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003350 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3351 }
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003352
Avi Kivity7ffd92c2009-06-09 14:10:45 +03003353 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05003354 int inc_eip = 0;
3355 if (kvm_exception_is_soft(nr))
3356 inc_eip = vcpu->arch.event_exit_inst_len;
3357 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02003358 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003359 return;
3360 }
3361
Sean Christophersonadd5ff72018-03-23 09:34:00 -07003362 WARN_ON_ONCE(vmx->emulation_required);
3363
Gleb Natapov66fd3f72009-05-11 13:35:50 +03003364 if (kvm_exception_is_soft(nr)) {
3365 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
3366 vmx->vcpu.arch.event_exit_inst_len);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003367 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3368 } else
3369 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3370
3371 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
Wanpeng Licaa057a2018-03-12 04:53:03 -07003372
3373 vmx_clear_hlt(vcpu);
Avi Kivity298101d2007-11-25 13:41:11 +02003374}
3375
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003376static bool vmx_rdtscp_supported(void)
3377{
3378 return cpu_has_vmx_rdtscp();
3379}
3380
Mao, Junjiead756a12012-07-02 01:18:48 +00003381static bool vmx_invpcid_supported(void)
3382{
Junaid Shahideb4b2482018-06-27 14:59:14 -07003383 return cpu_has_vmx_invpcid();
Mao, Junjiead756a12012-07-02 01:18:48 +00003384}
3385
Avi Kivity6aa8b732006-12-10 02:21:36 -08003386/*
Eddie Donga75beee2007-05-17 18:55:15 +03003387 * Swap MSR entry in host/guest MSR entry array.
3388 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10003389static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
Eddie Donga75beee2007-05-17 18:55:15 +03003390{
Avi Kivity26bb0982009-09-07 11:14:12 +03003391 struct shared_msr_entry tmp;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003392
3393 tmp = vmx->guest_msrs[to];
3394 vmx->guest_msrs[to] = vmx->guest_msrs[from];
3395 vmx->guest_msrs[from] = tmp;
Eddie Donga75beee2007-05-17 18:55:15 +03003396}
3397
3398/*
Avi Kivitye38aea32007-04-19 13:22:48 +03003399 * Set up the vmcs to automatically save and restore system
3400 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
3401 * mode, as fiddling with msrs is very expensive.
3402 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10003403static void setup_msrs(struct vcpu_vmx *vmx)
Avi Kivitye38aea32007-04-19 13:22:48 +03003404{
Avi Kivity26bb0982009-09-07 11:14:12 +03003405 int save_nmsrs, index;
Avi Kivitye38aea32007-04-19 13:22:48 +03003406
Eddie Donga75beee2007-05-17 18:55:15 +03003407 save_nmsrs = 0;
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003408#ifdef CONFIG_X86_64
Rusty Russell8b9cf982007-07-30 16:31:43 +10003409 if (is_long_mode(&vmx->vcpu)) {
Rusty Russell8b9cf982007-07-30 16:31:43 +10003410 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
Eddie Donga75beee2007-05-17 18:55:15 +03003411 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003412 move_msr_up(vmx, index, save_nmsrs++);
3413 index = __find_msr_index(vmx, MSR_LSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03003414 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003415 move_msr_up(vmx, index, save_nmsrs++);
3416 index = __find_msr_index(vmx, MSR_CSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03003417 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003418 move_msr_up(vmx, index, save_nmsrs++);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003419 index = __find_msr_index(vmx, MSR_TSC_AUX);
Radim Krčmářd6321d42017-08-05 00:12:49 +02003420 if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003421 move_msr_up(vmx, index, save_nmsrs++);
Eddie Donga75beee2007-05-17 18:55:15 +03003422 /*
Brian Gerst8c065852010-07-17 09:03:26 -04003423 * MSR_STAR is only needed on long mode guests, and only
Eddie Donga75beee2007-05-17 18:55:15 +03003424 * if efer.sce is enabled.
3425 */
Brian Gerst8c065852010-07-17 09:03:26 -04003426 index = __find_msr_index(vmx, MSR_STAR);
Avi Kivityf6801df2010-01-21 15:31:50 +02003427 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
Rusty Russell8b9cf982007-07-30 16:31:43 +10003428 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003429 }
Eddie Donga75beee2007-05-17 18:55:15 +03003430#endif
Avi Kivity92c0d902009-10-29 11:00:16 +02003431 index = __find_msr_index(vmx, MSR_EFER);
3432 if (index >= 0 && update_transition_efer(vmx, index))
Avi Kivity26bb0982009-09-07 11:14:12 +03003433 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003434
Avi Kivity26bb0982009-09-07 11:14:12 +03003435 vmx->save_nmsrs = save_nmsrs;
Avi Kivity58972972009-02-24 22:26:47 +02003436
Yang Zhang8d146952013-01-25 10:18:50 +08003437 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01003438 vmx_update_msr_bitmap(&vmx->vcpu);
Avi Kivitye38aea32007-04-19 13:22:48 +03003439}
3440
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003441static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003442{
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003443 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003444
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003445 if (is_guest_mode(vcpu) &&
3446 (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
3447 return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
3448
3449 return vcpu->arch.tsc_offset;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003450}
3451
3452/*
Zachary Amsden99e3e302010-08-19 22:07:17 -10003453 * writes 'offset' into guest's timestamp counter offset register
Avi Kivity6aa8b732006-12-10 02:21:36 -08003454 */
Zachary Amsden99e3e302010-08-19 22:07:17 -10003455static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003456{
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003457 if (is_guest_mode(vcpu)) {
Nadav Har'El79918252011-05-25 23:15:39 +03003458 /*
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003459 * We're here if L1 chose not to trap WRMSR to TSC. According
3460 * to the spec, this should set L1's TSC; The offset that L1
3461 * set for L2 remains unchanged, and still needs to be added
3462 * to the newly set TSC to get L2's TSC.
Nadav Har'El79918252011-05-25 23:15:39 +03003463 */
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003464 struct vmcs12 *vmcs12;
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003465 /* recalculate vmcs02.TSC_OFFSET: */
3466 vmcs12 = get_vmcs12(vcpu);
3467 vmcs_write64(TSC_OFFSET, offset +
3468 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
3469 vmcs12->tsc_offset : 0));
3470 } else {
Yoshihiro YUNOMAE489223e2013-06-12 16:43:44 +09003471 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
3472 vmcs_read64(TSC_OFFSET), offset);
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003473 vmcs_write64(TSC_OFFSET, offset);
3474 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003475}
3476
Nadav Har'El801d3422011-05-25 23:02:23 +03003477/*
3478 * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
3479 * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
3480 * all guests if the "nested" module option is off, and can also be disabled
3481 * for a single guest by disabling its VMX cpuid bit.
3482 */
3483static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
3484{
Radim Krčmářd6321d42017-08-05 00:12:49 +02003485 return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
Nadav Har'El801d3422011-05-25 23:02:23 +03003486}
3487
Avi Kivity6aa8b732006-12-10 02:21:36 -08003488/*
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003489 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
3490 * returned for the various VMX controls MSRs when nested VMX is enabled.
3491 * The same values should also be used to verify that vmcs12 control fields are
3492 * valid during nested entry from L1 to L2.
3493 * Each of these control msrs has a low and high 32-bit half: A low bit is on
3494 * if the corresponding bit in the (32-bit) control field *must* be on, and a
3495 * bit in the high half is on if the corresponding bit in the control field
3496 * may be on. See also vmx_control_verify().
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003497 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003498static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003499{
Paolo Bonzini13893092018-02-26 13:40:09 +01003500 if (!nested) {
3501 memset(msrs, 0, sizeof(*msrs));
3502 return;
3503 }
3504
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003505 /*
3506 * Note that as a general rule, the high half of the MSRs (bits in
3507 * the control fields which may be 1) should be initialized by the
3508 * intersection of the underlying hardware's MSR (i.e., features which
3509 * can be supported) and the list of features we want to expose -
3510 * because they are known to be properly supported in our code.
3511 * Also, usually, the low half of the MSRs (bits which must be 1) can
3512 * be set to 0, meaning that L1 may turn off any of these bits. The
3513 * reason is that if one of these bits is necessary, it will appear
3514 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
3515 * fields of vmcs01 and vmcs02, will turn these bits off - and
Paolo Bonzini7313c692017-07-27 10:31:25 +02003516 * nested_vmx_exit_reflected() will not pass related exits to L1.
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003517 * These rules have exceptions below.
3518 */
3519
3520 /* pin-based controls */
Jan Kiszkaeabeaac2013-03-13 11:30:50 +01003521 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003522 msrs->pinbased_ctls_low,
3523 msrs->pinbased_ctls_high);
3524 msrs->pinbased_ctls_low |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003525 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003526 msrs->pinbased_ctls_high &=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003527 PIN_BASED_EXT_INTR_MASK |
3528 PIN_BASED_NMI_EXITING |
Paolo Bonzini13893092018-02-26 13:40:09 +01003529 PIN_BASED_VIRTUAL_NMIS |
3530 (apicv ? PIN_BASED_POSTED_INTR : 0);
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003531 msrs->pinbased_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003532 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka0238ea92013-03-13 11:31:24 +01003533 PIN_BASED_VMX_PREEMPTION_TIMER;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003534
Jan Kiszka3dbcd8d2014-06-16 13:59:40 +02003535 /* exit controls */
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08003536 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003537 msrs->exit_ctls_low,
3538 msrs->exit_ctls_high);
3539 msrs->exit_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003540 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Bandan Dase0ba1a62014-04-19 18:17:46 -04003541
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003542 msrs->exit_ctls_high &=
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003543#ifdef CONFIG_X86_64
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08003544 VM_EXIT_HOST_ADDR_SPACE_SIZE |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003545#endif
Jan Kiszkaf4124502014-03-07 20:03:13 +01003546 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003547 msrs->exit_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003548 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszkaf4124502014-03-07 20:03:13 +01003549 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
Bandan Dase0ba1a62014-04-19 18:17:46 -04003550 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
3551
Jan Kiszka2996fca2014-06-16 13:59:43 +02003552 /* We support free control of debug control saving. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003553 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02003554
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003555 /* entry controls */
3556 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003557 msrs->entry_ctls_low,
3558 msrs->entry_ctls_high);
3559 msrs->entry_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003560 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003561 msrs->entry_ctls_high &=
Jan Kiszka57435342013-08-06 10:39:56 +02003562#ifdef CONFIG_X86_64
3563 VM_ENTRY_IA32E_MODE |
3564#endif
3565 VM_ENTRY_LOAD_IA32_PAT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003566 msrs->entry_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003567 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
Jan Kiszka57435342013-08-06 10:39:56 +02003568
Jan Kiszka2996fca2014-06-16 13:59:43 +02003569 /* We support free control of debug control loading. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003570 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02003571
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003572 /* cpu-based controls */
3573 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003574 msrs->procbased_ctls_low,
3575 msrs->procbased_ctls_high);
3576 msrs->procbased_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003577 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003578 msrs->procbased_ctls_high &=
Jan Kiszkaa294c9b2013-10-23 17:43:09 +01003579 CPU_BASED_VIRTUAL_INTR_PENDING |
3580 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003581 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
3582 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
3583 CPU_BASED_CR3_STORE_EXITING |
3584#ifdef CONFIG_X86_64
3585 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
3586#endif
3587 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03003588 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
3589 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
3590 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
3591 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003592 /*
3593 * We can allow some features even when not supported by the
3594 * hardware. For example, L1 can specify an MSR bitmap - and we
3595 * can use it to avoid exits to L1 - even when L0 runs L2
3596 * without MSR bitmaps.
3597 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003598 msrs->procbased_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003599 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka560b7ee2014-06-16 13:59:42 +02003600 CPU_BASED_USE_MSR_BITMAPS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003601
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02003602 /* We support free control of CR3 access interception. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003603 msrs->procbased_ctls_low &=
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02003604 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
3605
Paolo Bonzini80154d72017-08-24 13:55:35 +02003606 /*
3607 * secondary cpu-based controls. Do not include those that
3608 * depend on CPUID bits, they are added later by vmx_cpuid_update.
3609 */
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003610 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003611 msrs->secondary_ctls_low,
3612 msrs->secondary_ctls_high);
3613 msrs->secondary_ctls_low = 0;
3614 msrs->secondary_ctls_high &=
Paolo Bonzini1b073042016-10-25 16:06:30 +02003615 SECONDARY_EXEC_DESC |
Wincy Vanf2b93282015-02-03 23:56:03 +08003616 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Wincy Van82f0dd42015-02-03 23:57:18 +08003617 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Wincy Van608406e2015-02-03 23:57:51 +08003618 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Paolo Bonzini3db13482017-08-24 14:48:03 +02003619 SECONDARY_EXEC_WBINVD_EXITING;
Paolo Bonzini2cf7ea92018-10-03 10:34:00 +02003620
Liran Alon32c7acf2018-06-23 02:35:11 +03003621 /*
3622 * We can emulate "VMCS shadowing," even if the hardware
3623 * doesn't support it.
3624 */
3625 msrs->secondary_ctls_high |=
3626 SECONDARY_EXEC_SHADOW_VMCS;
Jan Kiszkac18911a2013-03-13 16:06:41 +01003627
Nadav Har'Elafa61f72013-08-07 14:59:22 +02003628 if (enable_ept) {
3629 /* nested EPT: emulate EPT also to L1 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003630 msrs->secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01003631 SECONDARY_EXEC_ENABLE_EPT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003632 msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01003633 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
Bandan Das02120c42016-07-12 18:18:52 -04003634 if (cpu_has_vmx_ept_execute_only())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003635 msrs->ept_caps |=
Bandan Das02120c42016-07-12 18:18:52 -04003636 VMX_EPT_EXECUTE_ONLY_BIT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003637 msrs->ept_caps &= vmx_capability.ept;
3638 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01003639 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
3640 VMX_EPT_1GB_PAGE_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04003641 if (enable_ept_ad_bits) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003642 msrs->secondary_ctls_high |=
Bandan Das03efce62017-05-05 15:25:15 -04003643 SECONDARY_EXEC_ENABLE_PML;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003644 msrs->ept_caps |= VMX_EPT_AD_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04003645 }
David Hildenbrand1c13bff2017-08-24 20:51:33 +02003646 }
Nadav Har'Elafa61f72013-08-07 14:59:22 +02003647
Bandan Das27c42a12017-08-03 15:54:42 -04003648 if (cpu_has_vmx_vmfunc()) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003649 msrs->secondary_ctls_high |=
Bandan Das27c42a12017-08-03 15:54:42 -04003650 SECONDARY_EXEC_ENABLE_VMFUNC;
Bandan Das41ab9372017-08-03 15:54:43 -04003651 /*
3652 * Advertise EPTP switching unconditionally
3653 * since we emulate it
3654 */
Wanpeng Li575b3a22017-10-19 07:00:34 +08003655 if (enable_ept)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003656 msrs->vmfunc_controls =
Wanpeng Li575b3a22017-10-19 07:00:34 +08003657 VMX_VMFUNC_EPTP_SWITCHING;
Bandan Das27c42a12017-08-03 15:54:42 -04003658 }
3659
Paolo Bonzinief697a72016-03-18 16:58:38 +01003660 /*
3661 * Old versions of KVM use the single-context version without
3662 * checking for support, so declare that it is supported even
3663 * though it is treated as global context. The alternative is
3664 * not failing the single-context invvpid, and it is worse.
3665 */
Wanpeng Li63cb6d52017-03-20 21:18:53 -07003666 if (enable_vpid) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003667 msrs->secondary_ctls_high |=
Wanpeng Li63cb6d52017-03-20 21:18:53 -07003668 SECONDARY_EXEC_ENABLE_VPID;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003669 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
Jan Dakinevichbcdde302016-10-28 07:00:30 +03003670 VMX_VPID_EXTENT_SUPPORTED_MASK;
David Hildenbrand1c13bff2017-08-24 20:51:33 +02003671 }
Wanpeng Li99b83ac2015-10-13 09:12:21 -07003672
Radim Krčmář0790ec12015-03-17 14:02:32 +01003673 if (enable_unrestricted_guest)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003674 msrs->secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01003675 SECONDARY_EXEC_UNRESTRICTED_GUEST;
3676
Paolo Bonzini2cf7ea92018-10-03 10:34:00 +02003677 if (flexpriority_enabled)
3678 msrs->secondary_ctls_high |=
3679 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
3680
Jan Kiszkac18911a2013-03-13 16:06:41 +01003681 /* miscellaneous data */
Wincy Vanb9c237b2015-02-03 23:56:30 +08003682 rdmsr(MSR_IA32_VMX_MISC,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003683 msrs->misc_low,
3684 msrs->misc_high);
3685 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
3686 msrs->misc_low |=
Jim Mattsonf4160e42018-05-29 09:11:33 -07003687 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
Wincy Vanb9c237b2015-02-03 23:56:30 +08003688 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
Jan Kiszkaf4124502014-03-07 20:03:13 +01003689 VMX_MISC_ACTIVITY_HLT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003690 msrs->misc_high = 0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003691
3692 /*
3693 * This MSR reports some information about VMX support. We
3694 * should return information about the VMX we emulate for the
3695 * guest, and the VMCS structure we give it - not about the
3696 * VMX support of the underlying hardware.
3697 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003698 msrs->basic =
David Matlack62cc6b9d2016-11-29 18:14:07 -08003699 VMCS12_REVISION |
3700 VMX_BASIC_TRUE_CTLS |
3701 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
3702 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
3703
3704 if (cpu_has_vmx_basic_inout())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003705 msrs->basic |= VMX_BASIC_INOUT;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003706
3707 /*
David Matlack8322ebb2016-11-29 18:14:09 -08003708 * These MSRs specify bits which the guest must keep fixed on
David Matlack62cc6b9d2016-11-29 18:14:07 -08003709 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
3710 * We picked the standard core2 setting.
3711 */
3712#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
3713#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003714 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
3715 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
David Matlack8322ebb2016-11-29 18:14:09 -08003716
3717 /* These MSRs specify bits which the guest must keep fixed off. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003718 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
3719 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003720
3721 /* highest index: VMX_PREEMPTION_TIMER_VALUE */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003722 msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003723}
3724
David Matlack38991522016-11-29 18:14:08 -08003725/*
3726 * if fixed0[i] == 1: val[i] must be 1
3727 * if fixed1[i] == 0: val[i] must be 0
3728 */
3729static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
3730{
3731 return ((val & fixed1) | fixed0) == val;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003732}
3733
3734static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
3735{
David Matlack38991522016-11-29 18:14:08 -08003736 return fixed_bits_valid(control, low, high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003737}
3738
3739static inline u64 vmx_control_msr(u32 low, u32 high)
3740{
3741 return low | ((u64)high << 32);
3742}
3743
David Matlack62cc6b9d2016-11-29 18:14:07 -08003744static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
3745{
3746 superset &= mask;
3747 subset &= mask;
3748
3749 return (superset | subset) == superset;
3750}
3751
3752static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
3753{
3754 const u64 feature_and_reserved =
3755 /* feature (except bit 48; see below) */
3756 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
3757 /* reserved */
3758 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003759 u64 vmx_basic = vmx->nested.msrs.basic;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003760
3761 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
3762 return -EINVAL;
3763
3764 /*
3765 * KVM does not emulate a version of VMX that constrains physical
3766 * addresses of VMX structures (e.g. VMCS) to 32-bits.
3767 */
3768 if (data & BIT_ULL(48))
3769 return -EINVAL;
3770
3771 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
3772 vmx_basic_vmcs_revision_id(data))
3773 return -EINVAL;
3774
3775 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
3776 return -EINVAL;
3777
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003778 vmx->nested.msrs.basic = data;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003779 return 0;
3780}
3781
3782static int
3783vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3784{
3785 u64 supported;
3786 u32 *lowp, *highp;
3787
3788 switch (msr_index) {
3789 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003790 lowp = &vmx->nested.msrs.pinbased_ctls_low;
3791 highp = &vmx->nested.msrs.pinbased_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003792 break;
3793 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003794 lowp = &vmx->nested.msrs.procbased_ctls_low;
3795 highp = &vmx->nested.msrs.procbased_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003796 break;
3797 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003798 lowp = &vmx->nested.msrs.exit_ctls_low;
3799 highp = &vmx->nested.msrs.exit_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003800 break;
3801 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003802 lowp = &vmx->nested.msrs.entry_ctls_low;
3803 highp = &vmx->nested.msrs.entry_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003804 break;
3805 case MSR_IA32_VMX_PROCBASED_CTLS2:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003806 lowp = &vmx->nested.msrs.secondary_ctls_low;
3807 highp = &vmx->nested.msrs.secondary_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003808 break;
3809 default:
3810 BUG();
3811 }
3812
3813 supported = vmx_control_msr(*lowp, *highp);
3814
3815 /* Check must-be-1 bits are still 1. */
3816 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
3817 return -EINVAL;
3818
3819 /* Check must-be-0 bits are still 0. */
3820 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
3821 return -EINVAL;
3822
3823 *lowp = data;
3824 *highp = data >> 32;
3825 return 0;
3826}
3827
3828static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
3829{
3830 const u64 feature_and_reserved_bits =
3831 /* feature */
3832 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
3833 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
3834 /* reserved */
3835 GENMASK_ULL(13, 9) | BIT_ULL(31);
3836 u64 vmx_misc;
3837
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003838 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
3839 vmx->nested.msrs.misc_high);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003840
3841 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
3842 return -EINVAL;
3843
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003844 if ((vmx->nested.msrs.pinbased_ctls_high &
David Matlack62cc6b9d2016-11-29 18:14:07 -08003845 PIN_BASED_VMX_PREEMPTION_TIMER) &&
3846 vmx_misc_preemption_timer_rate(data) !=
3847 vmx_misc_preemption_timer_rate(vmx_misc))
3848 return -EINVAL;
3849
3850 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
3851 return -EINVAL;
3852
3853 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
3854 return -EINVAL;
3855
3856 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
3857 return -EINVAL;
3858
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003859 vmx->nested.msrs.misc_low = data;
3860 vmx->nested.msrs.misc_high = data >> 32;
Jim Mattsonf4160e42018-05-29 09:11:33 -07003861
3862 /*
3863 * If L1 has read-only VM-exit information fields, use the
3864 * less permissive vmx_vmwrite_bitmap to specify write
3865 * permissions for the shadow VMCS.
3866 */
3867 if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
3868 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
3869
David Matlack62cc6b9d2016-11-29 18:14:07 -08003870 return 0;
3871}
3872
3873static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
3874{
3875 u64 vmx_ept_vpid_cap;
3876
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003877 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
3878 vmx->nested.msrs.vpid_caps);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003879
3880 /* Every bit is either reserved or a feature bit. */
3881 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
3882 return -EINVAL;
3883
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003884 vmx->nested.msrs.ept_caps = data;
3885 vmx->nested.msrs.vpid_caps = data >> 32;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003886 return 0;
3887}
3888
3889static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3890{
3891 u64 *msr;
3892
3893 switch (msr_index) {
3894 case MSR_IA32_VMX_CR0_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003895 msr = &vmx->nested.msrs.cr0_fixed0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003896 break;
3897 case MSR_IA32_VMX_CR4_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003898 msr = &vmx->nested.msrs.cr4_fixed0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003899 break;
3900 default:
3901 BUG();
3902 }
3903
3904 /*
3905 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
3906 * must be 1 in the restored value.
3907 */
3908 if (!is_bitwise_subset(data, *msr, -1ULL))
3909 return -EINVAL;
3910
3911 *msr = data;
3912 return 0;
3913}
3914
3915/*
3916 * Called when userspace is restoring VMX MSRs.
3917 *
3918 * Returns 0 on success, non-0 otherwise.
3919 */
3920static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
3921{
3922 struct vcpu_vmx *vmx = to_vmx(vcpu);
3923
Jim Mattsona943ac52018-05-29 09:11:32 -07003924 /*
3925 * Don't allow changes to the VMX capability MSRs while the vCPU
3926 * is in VMX operation.
3927 */
3928 if (vmx->nested.vmxon)
3929 return -EBUSY;
3930
David Matlack62cc6b9d2016-11-29 18:14:07 -08003931 switch (msr_index) {
3932 case MSR_IA32_VMX_BASIC:
3933 return vmx_restore_vmx_basic(vmx, data);
3934 case MSR_IA32_VMX_PINBASED_CTLS:
3935 case MSR_IA32_VMX_PROCBASED_CTLS:
3936 case MSR_IA32_VMX_EXIT_CTLS:
3937 case MSR_IA32_VMX_ENTRY_CTLS:
3938 /*
3939 * The "non-true" VMX capability MSRs are generated from the
3940 * "true" MSRs, so we do not support restoring them directly.
3941 *
3942 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
3943 * should restore the "true" MSRs with the must-be-1 bits
3944 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
3945 * DEFAULT SETTINGS".
3946 */
3947 return -EINVAL;
3948 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3949 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3950 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3951 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3952 case MSR_IA32_VMX_PROCBASED_CTLS2:
3953 return vmx_restore_control_msr(vmx, msr_index, data);
3954 case MSR_IA32_VMX_MISC:
3955 return vmx_restore_vmx_misc(vmx, data);
3956 case MSR_IA32_VMX_CR0_FIXED0:
3957 case MSR_IA32_VMX_CR4_FIXED0:
3958 return vmx_restore_fixed0_msr(vmx, msr_index, data);
3959 case MSR_IA32_VMX_CR0_FIXED1:
3960 case MSR_IA32_VMX_CR4_FIXED1:
3961 /*
3962 * These MSRs are generated based on the vCPU's CPUID, so we
3963 * do not support restoring them directly.
3964 */
3965 return -EINVAL;
3966 case MSR_IA32_VMX_EPT_VPID_CAP:
3967 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
3968 case MSR_IA32_VMX_VMCS_ENUM:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003969 vmx->nested.msrs.vmcs_enum = data;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003970 return 0;
3971 default:
3972 /*
3973 * The rest of the VMX capability MSRs do not support restore.
3974 */
3975 return -EINVAL;
3976 }
3977}
3978
Jan Kiszkacae50132014-01-04 18:47:22 +01003979/* Returns 0 on success, non-0 otherwise. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003980static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003981{
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003982 switch (msr_index) {
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003983 case MSR_IA32_VMX_BASIC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003984 *pdata = msrs->basic;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003985 break;
3986 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3987 case MSR_IA32_VMX_PINBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003988 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003989 msrs->pinbased_ctls_low,
3990 msrs->pinbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003991 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
3992 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003993 break;
3994 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3995 case MSR_IA32_VMX_PROCBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003996 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003997 msrs->procbased_ctls_low,
3998 msrs->procbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003999 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
4000 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004001 break;
4002 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
4003 case MSR_IA32_VMX_EXIT_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004004 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004005 msrs->exit_ctls_low,
4006 msrs->exit_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08004007 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
4008 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004009 break;
4010 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
4011 case MSR_IA32_VMX_ENTRY_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004012 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004013 msrs->entry_ctls_low,
4014 msrs->entry_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08004015 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
4016 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004017 break;
4018 case MSR_IA32_VMX_MISC:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004019 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004020 msrs->misc_low,
4021 msrs->misc_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004022 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004023 case MSR_IA32_VMX_CR0_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004024 *pdata = msrs->cr0_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004025 break;
4026 case MSR_IA32_VMX_CR0_FIXED1:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004027 *pdata = msrs->cr0_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004028 break;
4029 case MSR_IA32_VMX_CR4_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004030 *pdata = msrs->cr4_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004031 break;
4032 case MSR_IA32_VMX_CR4_FIXED1:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004033 *pdata = msrs->cr4_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004034 break;
4035 case MSR_IA32_VMX_VMCS_ENUM:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004036 *pdata = msrs->vmcs_enum;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004037 break;
4038 case MSR_IA32_VMX_PROCBASED_CTLS2:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004039 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004040 msrs->secondary_ctls_low,
4041 msrs->secondary_ctls_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004042 break;
4043 case MSR_IA32_VMX_EPT_VPID_CAP:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004044 *pdata = msrs->ept_caps |
4045 ((u64)msrs->vpid_caps << 32);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004046 break;
Bandan Das27c42a12017-08-03 15:54:42 -04004047 case MSR_IA32_VMX_VMFUNC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004048 *pdata = msrs->vmfunc_controls;
Bandan Das27c42a12017-08-03 15:54:42 -04004049 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004050 default:
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004051 return 1;
Nadav Har'Elb3897a42013-07-08 19:12:35 +08004052 }
4053
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004054 return 0;
4055}
4056
Haozhong Zhang37e4c992016-06-22 14:59:55 +08004057static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
4058 uint64_t val)
4059{
4060 uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
4061
4062 return !(val & ~valid_bits);
4063}
4064
Tom Lendacky801e4592018-02-21 13:39:51 -06004065static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
4066{
Paolo Bonzini13893092018-02-26 13:40:09 +01004067 switch (msr->index) {
4068 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
4069 if (!nested)
4070 return 1;
4071 return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
4072 default:
4073 return 1;
4074 }
4075
4076 return 0;
Tom Lendacky801e4592018-02-21 13:39:51 -06004077}
4078
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004079/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08004080 * Reads an msr value (of 'msr_index') into 'pdata'.
4081 * Returns 0 on success, non-0 otherwise.
4082 * Assumes vcpu_load() was already called.
4083 */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004084static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004085{
Borislav Petkova6cb0992017-12-20 12:50:28 +01004086 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03004087 struct shared_msr_entry *msr;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004088
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004089 switch (msr_info->index) {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08004090#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08004091 case MSR_FS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004092 msr_info->data = vmcs_readl(GUEST_FS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004093 break;
4094 case MSR_GS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004095 msr_info->data = vmcs_readl(GUEST_GS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004096 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03004097 case MSR_KERNEL_GS_BASE:
Sean Christopherson678e3152018-07-23 12:32:43 -07004098 msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
Avi Kivity44ea2b12009-09-06 15:55:37 +03004099 break;
Avi Kivity26bb0982009-09-07 11:14:12 +03004100#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -08004101 case MSR_EFER:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004102 return kvm_get_msr_common(vcpu, msr_info);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004103 case MSR_IA32_SPEC_CTRL:
4104 if (!msr_info->host_initiated &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004105 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4106 return 1;
4107
4108 msr_info->data = to_vmx(vcpu)->spec_ctrl;
4109 break;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01004110 case MSR_IA32_ARCH_CAPABILITIES:
4111 if (!msr_info->host_initiated &&
4112 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
4113 return 1;
4114 msr_info->data = to_vmx(vcpu)->arch_capabilities;
4115 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004116 case MSR_IA32_SYSENTER_CS:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004117 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004118 break;
4119 case MSR_IA32_SYSENTER_EIP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004120 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004121 break;
4122 case MSR_IA32_SYSENTER_ESP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004123 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004124 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004125 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08004126 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02004127 (!msr_info->host_initiated &&
4128 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01004129 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004130 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004131 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004132 case MSR_IA32_MCG_EXT_CTL:
4133 if (!msr_info->host_initiated &&
Borislav Petkova6cb0992017-12-20 12:50:28 +01004134 !(vmx->msr_ia32_feature_control &
Ashok Rajc45dcc72016-06-22 14:59:56 +08004135 FEATURE_CONTROL_LMCE))
Jan Kiszkacae50132014-01-04 18:47:22 +01004136 return 1;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004137 msr_info->data = vcpu->arch.mcg_ext_ctl;
4138 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01004139 case MSR_IA32_FEATURE_CONTROL:
Borislav Petkova6cb0992017-12-20 12:50:28 +01004140 msr_info->data = vmx->msr_ia32_feature_control;
Jan Kiszkacae50132014-01-04 18:47:22 +01004141 break;
4142 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
4143 if (!nested_vmx_allowed(vcpu))
4144 return 1;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004145 return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
4146 &msr_info->data);
Wanpeng Li20300092014-12-02 19:14:59 +08004147 case MSR_IA32_XSS:
4148 if (!vmx_xsaves_supported())
4149 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004150 msr_info->data = vcpu->arch.ia32_xss;
Wanpeng Li20300092014-12-02 19:14:59 +08004151 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004152 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02004153 if (!msr_info->host_initiated &&
4154 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004155 return 1;
4156 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08004157 default:
Borislav Petkova6cb0992017-12-20 12:50:28 +01004158 msr = find_msr_entry(vmx, msr_info->index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08004159 if (msr) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004160 msr_info->data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08004161 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004162 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004163 return kvm_get_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004164 }
4165
Avi Kivity6aa8b732006-12-10 02:21:36 -08004166 return 0;
4167}
4168
Jan Kiszkacae50132014-01-04 18:47:22 +01004169static void vmx_leave_nested(struct kvm_vcpu *vcpu);
4170
Avi Kivity6aa8b732006-12-10 02:21:36 -08004171/*
4172 * Writes msr value into into the appropriate "register".
4173 * Returns 0 on success, non-0 otherwise.
4174 * Assumes vcpu_load() was already called.
4175 */
Will Auld8fe8ab42012-11-29 12:42:12 -08004176static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004177{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04004178 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03004179 struct shared_msr_entry *msr;
Eddie Dong2cc51562007-05-21 07:28:09 +03004180 int ret = 0;
Will Auld8fe8ab42012-11-29 12:42:12 -08004181 u32 msr_index = msr_info->index;
4182 u64 data = msr_info->data;
Eddie Dong2cc51562007-05-21 07:28:09 +03004183
Avi Kivity6aa8b732006-12-10 02:21:36 -08004184 switch (msr_index) {
Avi Kivity3bab1f52006-12-29 16:49:48 -08004185 case MSR_EFER:
Will Auld8fe8ab42012-11-29 12:42:12 -08004186 ret = kvm_set_msr_common(vcpu, msr_info);
Eddie Dong2cc51562007-05-21 07:28:09 +03004187 break;
Avi Kivity16175a72009-03-23 22:13:44 +02004188#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08004189 case MSR_FS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03004190 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004191 vmcs_writel(GUEST_FS_BASE, data);
4192 break;
4193 case MSR_GS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03004194 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004195 vmcs_writel(GUEST_GS_BASE, data);
4196 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03004197 case MSR_KERNEL_GS_BASE:
Sean Christopherson678e3152018-07-23 12:32:43 -07004198 vmx_write_guest_kernel_gs_base(vmx, data);
Avi Kivity44ea2b12009-09-06 15:55:37 +03004199 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004200#endif
4201 case MSR_IA32_SYSENTER_CS:
4202 vmcs_write32(GUEST_SYSENTER_CS, data);
4203 break;
4204 case MSR_IA32_SYSENTER_EIP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02004205 vmcs_writel(GUEST_SYSENTER_EIP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004206 break;
4207 case MSR_IA32_SYSENTER_ESP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02004208 vmcs_writel(GUEST_SYSENTER_ESP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004209 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004210 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08004211 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02004212 (!msr_info->host_initiated &&
4213 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01004214 return 1;
Yu Zhangfd8cb432017-08-24 20:27:56 +08004215 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
Jim Mattson45316622017-05-23 11:52:54 -07004216 (data & MSR_IA32_BNDCFGS_RSVD))
Avi Kivity6aa8b732006-12-10 02:21:36 -08004217 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08004218 vmcs_write64(GUEST_BNDCFGS, data);
4219 break;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004220 case MSR_IA32_SPEC_CTRL:
4221 if (!msr_info->host_initiated &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004222 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4223 return 1;
4224
4225 /* The STIBP bit doesn't fault even if it's not advertised */
Konrad Rzeszutek Wilk9f65fb22018-05-09 21:41:38 +02004226 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004227 return 1;
4228
4229 vmx->spec_ctrl = data;
4230
4231 if (!data)
4232 break;
4233
4234 /*
4235 * For non-nested:
4236 * When it's written (to non-zero) for the first time, pass
4237 * it through.
4238 *
4239 * For nested:
4240 * The handling of the MSR bitmap for L2 guests is done in
4241 * nested_vmx_merge_msr_bitmap. We should not touch the
4242 * vmcs02.msr_bitmap here since it gets completely overwritten
4243 * in the merging. We update the vmcs01 here for L1 as well
4244 * since it will end up touching the MSR anyway now.
4245 */
4246 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
4247 MSR_IA32_SPEC_CTRL,
4248 MSR_TYPE_RW);
4249 break;
Ashok Raj15d45072018-02-01 22:59:43 +01004250 case MSR_IA32_PRED_CMD:
4251 if (!msr_info->host_initiated &&
Ashok Raj15d45072018-02-01 22:59:43 +01004252 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4253 return 1;
4254
4255 if (data & ~PRED_CMD_IBPB)
4256 return 1;
4257
4258 if (!data)
4259 break;
4260
4261 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4262
4263 /*
4264 * For non-nested:
4265 * When it's written (to non-zero) for the first time, pass
4266 * it through.
4267 *
4268 * For nested:
4269 * The handling of the MSR bitmap for L2 guests is done in
4270 * nested_vmx_merge_msr_bitmap. We should not touch the
4271 * vmcs02.msr_bitmap here since it gets completely overwritten
4272 * in the merging.
4273 */
4274 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
4275 MSR_TYPE_W);
4276 break;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01004277 case MSR_IA32_ARCH_CAPABILITIES:
4278 if (!msr_info->host_initiated)
4279 return 1;
4280 vmx->arch_capabilities = data;
4281 break;
Sheng Yang468d4722008-10-09 16:01:55 +08004282 case MSR_IA32_CR_PAT:
4283 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Amit45666542014-09-18 22:39:44 +03004284 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4285 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08004286 vmcs_write64(GUEST_IA32_PAT, data);
4287 vcpu->arch.pat = data;
4288 break;
4289 }
Will Auld8fe8ab42012-11-29 12:42:12 -08004290 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004291 break;
Will Auldba904632012-11-29 12:42:50 -08004292 case MSR_IA32_TSC_ADJUST:
4293 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004294 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004295 case MSR_IA32_MCG_EXT_CTL:
4296 if ((!msr_info->host_initiated &&
4297 !(to_vmx(vcpu)->msr_ia32_feature_control &
4298 FEATURE_CONTROL_LMCE)) ||
4299 (data & ~MCG_EXT_CTL_LMCE_EN))
4300 return 1;
4301 vcpu->arch.mcg_ext_ctl = data;
4302 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01004303 case MSR_IA32_FEATURE_CONTROL:
Haozhong Zhang37e4c992016-06-22 14:59:55 +08004304 if (!vmx_feature_control_msr_valid(vcpu, data) ||
Haozhong Zhang3b840802016-06-22 14:59:54 +08004305 (to_vmx(vcpu)->msr_ia32_feature_control &
Jan Kiszkacae50132014-01-04 18:47:22 +01004306 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
4307 return 1;
Haozhong Zhang3b840802016-06-22 14:59:54 +08004308 vmx->msr_ia32_feature_control = data;
Jan Kiszkacae50132014-01-04 18:47:22 +01004309 if (msr_info->host_initiated && data == 0)
4310 vmx_leave_nested(vcpu);
4311 break;
4312 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
David Matlack62cc6b9d2016-11-29 18:14:07 -08004313 if (!msr_info->host_initiated)
4314 return 1; /* they are read-only */
4315 if (!nested_vmx_allowed(vcpu))
4316 return 1;
4317 return vmx_set_vmx_msr(vcpu, msr_index, data);
Wanpeng Li20300092014-12-02 19:14:59 +08004318 case MSR_IA32_XSS:
4319 if (!vmx_xsaves_supported())
4320 return 1;
4321 /*
4322 * The only supported bit as of Skylake is bit 8, but
4323 * it is not supported on KVM.
4324 */
4325 if (data != 0)
4326 return 1;
4327 vcpu->arch.ia32_xss = data;
4328 if (vcpu->arch.ia32_xss != host_xss)
4329 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04004330 vcpu->arch.ia32_xss, host_xss, false);
Wanpeng Li20300092014-12-02 19:14:59 +08004331 else
4332 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
4333 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004334 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02004335 if (!msr_info->host_initiated &&
4336 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004337 return 1;
4338 /* Check reserved bit, higher 32 bits should be zero */
4339 if ((data >> 32) != 0)
4340 return 1;
4341 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08004342 default:
Rusty Russell8b9cf982007-07-30 16:31:43 +10004343 msr = find_msr_entry(vmx, msr_index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08004344 if (msr) {
Andy Honig8b3c3102014-08-27 11:16:44 -07004345 u64 old_msr_data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08004346 msr->data = data;
Avi Kivity2225fd52012-04-18 15:03:04 +03004347 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
4348 preempt_disable();
Andy Honig8b3c3102014-08-27 11:16:44 -07004349 ret = kvm_set_shared_msr(msr->index, msr->data,
4350 msr->mask);
Avi Kivity2225fd52012-04-18 15:03:04 +03004351 preempt_enable();
Andy Honig8b3c3102014-08-27 11:16:44 -07004352 if (ret)
4353 msr->data = old_msr_data;
Avi Kivity2225fd52012-04-18 15:03:04 +03004354 }
Avi Kivity3bab1f52006-12-29 16:49:48 -08004355 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004356 }
Will Auld8fe8ab42012-11-29 12:42:12 -08004357 ret = kvm_set_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004358 }
4359
Eddie Dong2cc51562007-05-21 07:28:09 +03004360 return ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004361}
4362
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004363static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004364{
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004365 __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
4366 switch (reg) {
4367 case VCPU_REGS_RSP:
4368 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
4369 break;
4370 case VCPU_REGS_RIP:
4371 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
4372 break;
Avi Kivity6de4f3a2009-05-31 22:58:47 +03004373 case VCPU_EXREG_PDPTR:
4374 if (enable_ept)
4375 ept_save_pdptrs(vcpu);
4376 break;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004377 default:
4378 break;
4379 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004380}
4381
Avi Kivity6aa8b732006-12-10 02:21:36 -08004382static __init int cpu_has_kvm_support(void)
4383{
Eduardo Habkost6210e372008-11-17 19:03:16 -02004384 return cpu_has_vmx();
Avi Kivity6aa8b732006-12-10 02:21:36 -08004385}
4386
4387static __init int vmx_disabled_by_bios(void)
4388{
4389 u64 msr;
4390
4391 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
Shane Wangcafd6652010-04-29 12:09:01 -04004392 if (msr & FEATURE_CONTROL_LOCKED) {
Joseph Cihula23f3e992011-02-08 11:45:56 -08004393 /* launched w/ TXT and VMX disabled */
Shane Wangcafd6652010-04-29 12:09:01 -04004394 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
4395 && tboot_enabled())
4396 return 1;
Joseph Cihula23f3e992011-02-08 11:45:56 -08004397 /* launched w/o TXT and VMX only enabled w/ TXT */
Shane Wangcafd6652010-04-29 12:09:01 -04004398 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
Joseph Cihula23f3e992011-02-08 11:45:56 -08004399 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
Shane Wangf9335af2010-11-17 11:40:17 +08004400 && !tboot_enabled()) {
4401 printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
Joseph Cihula23f3e992011-02-08 11:45:56 -08004402 "activate TXT before enabling KVM\n");
Shane Wangcafd6652010-04-29 12:09:01 -04004403 return 1;
Shane Wangf9335af2010-11-17 11:40:17 +08004404 }
Joseph Cihula23f3e992011-02-08 11:45:56 -08004405 /* launched w/o TXT and VMX disabled */
4406 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
4407 && !tboot_enabled())
4408 return 1;
Shane Wangcafd6652010-04-29 12:09:01 -04004409 }
4410
4411 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004412}
4413
Dongxiao Xu7725b892010-05-11 18:29:38 +08004414static void kvm_cpu_vmxon(u64 addr)
4415{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004416 cr4_set_bits(X86_CR4_VMXE);
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03004417 intel_pt_handle_vmx(1);
4418
Uros Bizjak4b1e5472018-10-11 19:40:44 +02004419 asm volatile ("vmxon %0" : : "m"(addr));
Dongxiao Xu7725b892010-05-11 18:29:38 +08004420}
4421
Radim Krčmář13a34e02014-08-28 15:13:03 +02004422static int hardware_enable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004423{
4424 int cpu = raw_smp_processor_id();
4425 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
Shane Wangcafd6652010-04-29 12:09:01 -04004426 u64 old, test_bits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004427
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07004428 if (cr4_read_shadow() & X86_CR4_VMXE)
Alexander Graf10474ae2009-09-15 11:37:46 +02004429 return -EBUSY;
4430
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004431 /*
4432 * This can happen if we hot-added a CPU but failed to allocate
4433 * VP assist page for it.
4434 */
4435 if (static_branch_unlikely(&enable_evmcs) &&
4436 !hv_get_vp_assist_page(cpu))
4437 return -EFAULT;
4438
Nadav Har'Eld462b812011-05-24 15:26:10 +03004439 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
Feng Wubf9f6ac2015-09-18 22:29:55 +08004440 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
4441 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08004442
4443 /*
4444 * Now we can enable the vmclear operation in kdump
4445 * since the loaded_vmcss_on_cpu list on this cpu
4446 * has been initialized.
4447 *
4448 * Though the cpu is not in VMX operation now, there
4449 * is no problem to enable the vmclear operation
4450 * for the loaded_vmcss_on_cpu list is empty!
4451 */
4452 crash_enable_local_vmclear(cpu);
4453
Avi Kivity6aa8b732006-12-10 02:21:36 -08004454 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
Shane Wangcafd6652010-04-29 12:09:01 -04004455
4456 test_bits = FEATURE_CONTROL_LOCKED;
4457 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
4458 if (tboot_enabled())
4459 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
4460
4461 if ((old & test_bits) != test_bits) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004462 /* enable and lock */
Shane Wangcafd6652010-04-29 12:09:01 -04004463 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
4464 }
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004465 kvm_cpu_vmxon(phys_addr);
David Hildenbrandfdf288b2017-08-24 20:51:29 +02004466 if (enable_ept)
4467 ept_sync_global();
Alexander Graf10474ae2009-09-15 11:37:46 +02004468
4469 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004470}
4471
Nadav Har'Eld462b812011-05-24 15:26:10 +03004472static void vmclear_local_loaded_vmcss(void)
Avi Kivity543e4242008-05-13 16:22:47 +03004473{
4474 int cpu = raw_smp_processor_id();
Nadav Har'Eld462b812011-05-24 15:26:10 +03004475 struct loaded_vmcs *v, *n;
Avi Kivity543e4242008-05-13 16:22:47 +03004476
Nadav Har'Eld462b812011-05-24 15:26:10 +03004477 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
4478 loaded_vmcss_on_cpu_link)
4479 __loaded_vmcs_clear(v);
Avi Kivity543e4242008-05-13 16:22:47 +03004480}
4481
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02004482
4483/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
4484 * tricks.
4485 */
4486static void kvm_cpu_vmxoff(void)
4487{
Uros Bizjak4b1e5472018-10-11 19:40:44 +02004488 asm volatile (__ex("vmxoff"));
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03004489
4490 intel_pt_handle_vmx(0);
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004491 cr4_clear_bits(X86_CR4_VMXE);
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02004492}
4493
Radim Krčmář13a34e02014-08-28 15:13:03 +02004494static void hardware_disable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004495{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004496 vmclear_local_loaded_vmcss();
4497 kvm_cpu_vmxoff();
Avi Kivity6aa8b732006-12-10 02:21:36 -08004498}
4499
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004500static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
Mike Dayd77c26f2007-10-08 09:02:08 -04004501 u32 msr, u32 *result)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004502{
4503 u32 vmx_msr_low, vmx_msr_high;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004504 u32 ctl = ctl_min | ctl_opt;
4505
4506 rdmsr(msr, vmx_msr_low, vmx_msr_high);
4507
4508 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
4509 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
4510
4511 /* Ensure minimum (required) set of control bits are supported. */
4512 if (ctl_min & ~ctl)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004513 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004514
4515 *result = ctl;
4516 return 0;
4517}
4518
Avi Kivity110312c2010-12-21 12:54:20 +02004519static __init bool allow_1_setting(u32 msr, u32 ctl)
4520{
4521 u32 vmx_msr_low, vmx_msr_high;
4522
4523 rdmsr(msr, vmx_msr_low, vmx_msr_high);
4524 return vmx_msr_high & ctl;
4525}
4526
Yang, Sheng002c7f72007-07-31 14:23:01 +03004527static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004528{
4529 u32 vmx_msr_low, vmx_msr_high;
Sheng Yangd56f5462008-04-25 10:13:16 +08004530 u32 min, opt, min2, opt2;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004531 u32 _pin_based_exec_control = 0;
4532 u32 _cpu_based_exec_control = 0;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004533 u32 _cpu_based_2nd_exec_control = 0;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004534 u32 _vmexit_control = 0;
4535 u32 _vmentry_control = 0;
4536
Paolo Bonzini13893092018-02-26 13:40:09 +01004537 memset(vmcs_conf, 0, sizeof(*vmcs_conf));
Raghavendra K T10166742012-02-07 23:19:20 +05304538 min = CPU_BASED_HLT_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004539#ifdef CONFIG_X86_64
4540 CPU_BASED_CR8_LOAD_EXITING |
4541 CPU_BASED_CR8_STORE_EXITING |
4542#endif
Sheng Yangd56f5462008-04-25 10:13:16 +08004543 CPU_BASED_CR3_LOAD_EXITING |
4544 CPU_BASED_CR3_STORE_EXITING |
Quan Xu8eb73e22017-12-12 16:44:21 +08004545 CPU_BASED_UNCOND_IO_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004546 CPU_BASED_MOV_DR_EXITING |
Marcelo Tosattia7052892008-09-23 13:18:35 -03004547 CPU_BASED_USE_TSC_OFFSETING |
Wanpeng Li4d5422c2018-03-12 04:53:02 -07004548 CPU_BASED_MWAIT_EXITING |
4549 CPU_BASED_MONITOR_EXITING |
Avi Kivityfee84b02011-11-10 14:57:25 +02004550 CPU_BASED_INVLPG_EXITING |
4551 CPU_BASED_RDPMC_EXITING;
Anthony Liguori443381a2010-12-06 10:53:38 -06004552
Sheng Yangf78e0e22007-10-29 09:40:42 +08004553 opt = CPU_BASED_TPR_SHADOW |
Sheng Yang25c5f222008-03-28 13:18:56 +08004554 CPU_BASED_USE_MSR_BITMAPS |
Sheng Yangf78e0e22007-10-29 09:40:42 +08004555 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004556 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
4557 &_cpu_based_exec_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004558 return -EIO;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08004559#ifdef CONFIG_X86_64
4560 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
4561 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
4562 ~CPU_BASED_CR8_STORE_EXITING;
4563#endif
Sheng Yangf78e0e22007-10-29 09:40:42 +08004564 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
Sheng Yangd56f5462008-04-25 10:13:16 +08004565 min2 = 0;
4566 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Yang Zhang8d146952013-01-25 10:18:50 +08004567 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Sheng Yang2384d2b2008-01-17 15:14:33 +08004568 SECONDARY_EXEC_WBINVD_EXITING |
Sheng Yangd56f5462008-04-25 10:13:16 +08004569 SECONDARY_EXEC_ENABLE_VPID |
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004570 SECONDARY_EXEC_ENABLE_EPT |
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08004571 SECONDARY_EXEC_UNRESTRICTED_GUEST |
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004572 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
Paolo Bonzini0367f202016-07-12 10:44:55 +02004573 SECONDARY_EXEC_DESC |
Mao, Junjiead756a12012-07-02 01:18:48 +00004574 SECONDARY_EXEC_RDTSCP |
Yang Zhang83d4c282013-01-25 10:18:49 +08004575 SECONDARY_EXEC_ENABLE_INVPCID |
Yang Zhangc7c9c562013-01-25 10:18:51 +08004576 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Abel Gordonabc4fc52013-04-18 14:35:25 +03004577 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Wanpeng Li20300092014-12-02 19:14:59 +08004578 SECONDARY_EXEC_SHADOW_VMCS |
Kai Huang843e4332015-01-28 10:54:28 +08004579 SECONDARY_EXEC_XSAVES |
David Hildenbrand736fdf72017-08-24 20:51:37 +02004580 SECONDARY_EXEC_RDSEED_EXITING |
4581 SECONDARY_EXEC_RDRAND_EXITING |
Xiao Guangrong8b3e34e2015-09-09 14:05:51 +08004582 SECONDARY_EXEC_ENABLE_PML |
Bandan Das2a499e42017-08-03 15:54:41 -04004583 SECONDARY_EXEC_TSC_SCALING |
Sean Christopherson0b665d32018-08-14 09:33:34 -07004584 SECONDARY_EXEC_ENABLE_VMFUNC |
4585 SECONDARY_EXEC_ENCLS_EXITING;
Sheng Yangd56f5462008-04-25 10:13:16 +08004586 if (adjust_vmx_controls(min2, opt2,
4587 MSR_IA32_VMX_PROCBASED_CTLS2,
Sheng Yangf78e0e22007-10-29 09:40:42 +08004588 &_cpu_based_2nd_exec_control) < 0)
4589 return -EIO;
4590 }
4591#ifndef CONFIG_X86_64
4592 if (!(_cpu_based_2nd_exec_control &
4593 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
4594 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
4595#endif
Yang Zhang83d4c282013-01-25 10:18:49 +08004596
4597 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
4598 _cpu_based_2nd_exec_control &= ~(
Yang Zhang8d146952013-01-25 10:18:50 +08004599 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Yang Zhangc7c9c562013-01-25 10:18:51 +08004600 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
4601 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang83d4c282013-01-25 10:18:49 +08004602
Wanpeng Li61f1dd92017-10-18 16:02:19 -07004603 rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
4604 &vmx_capability.ept, &vmx_capability.vpid);
4605
Sheng Yangd56f5462008-04-25 10:13:16 +08004606 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
Marcelo Tosattia7052892008-09-23 13:18:35 -03004607 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
4608 enabled */
Gleb Natapov5fff7d22009-08-27 18:41:30 +03004609 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
4610 CPU_BASED_CR3_STORE_EXITING |
4611 CPU_BASED_INVLPG_EXITING);
Wanpeng Li61f1dd92017-10-18 16:02:19 -07004612 } else if (vmx_capability.ept) {
4613 vmx_capability.ept = 0;
4614 pr_warn_once("EPT CAP should not exist if not support "
4615 "1-setting enable EPT VM-execution control\n");
4616 }
4617 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
4618 vmx_capability.vpid) {
4619 vmx_capability.vpid = 0;
4620 pr_warn_once("VPID CAP should not exist if not support "
4621 "1-setting enable VPID VM-execution control\n");
Sheng Yangd56f5462008-04-25 10:13:16 +08004622 }
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004623
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004624 min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004625#ifdef CONFIG_X86_64
4626 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
4627#endif
Yang Zhanga547c6d2013-04-11 19:25:10 +08004628 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004629 VM_EXIT_CLEAR_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004630 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
4631 &_vmexit_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004632 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004633
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01004634 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
4635 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
4636 PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08004637 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
4638 &_pin_based_exec_control) < 0)
4639 return -EIO;
4640
Paolo Bonzini1c17c3e2016-07-08 11:53:38 +02004641 if (cpu_has_broken_vmx_preemption_timer())
4642 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08004643 if (!(_cpu_based_2nd_exec_control &
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004644 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
Yang Zhang01e439b2013-04-11 19:25:12 +08004645 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
4646
Paolo Bonzinic845f9c2014-02-21 10:55:44 +01004647 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
Liu, Jinsongda8999d2014-02-24 10:55:46 +00004648 opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004649 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
4650 &_vmentry_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004651 return -EIO;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004652
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08004653 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004654
4655 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
4656 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004657 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004658
4659#ifdef CONFIG_X86_64
4660 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
4661 if (vmx_msr_high & (1u<<16))
Yang, Sheng002c7f72007-07-31 14:23:01 +03004662 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004663#endif
4664
4665 /* Require Write-Back (WB) memory type for VMCS accesses. */
4666 if (((vmx_msr_high >> 18) & 15) != 6)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004667 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004668
Yang, Sheng002c7f72007-07-31 14:23:01 +03004669 vmcs_conf->size = vmx_msr_high & 0x1fff;
Paolo Bonzini16cb0252016-09-05 15:57:00 +02004670 vmcs_conf->order = get_order(vmcs_conf->size);
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03004671 vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004672
Liran Alon2307af12018-06-29 22:59:04 +03004673 vmcs_conf->revision_id = vmx_msr_low;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004674
Yang, Sheng002c7f72007-07-31 14:23:01 +03004675 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
4676 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004677 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
Yang, Sheng002c7f72007-07-31 14:23:01 +03004678 vmcs_conf->vmexit_ctrl = _vmexit_control;
4679 vmcs_conf->vmentry_ctrl = _vmentry_control;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004680
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004681 if (static_branch_unlikely(&enable_evmcs))
4682 evmcs_sanitize_exec_ctrls(vmcs_conf);
4683
Avi Kivity110312c2010-12-21 12:54:20 +02004684 cpu_has_load_ia32_efer =
4685 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
4686 VM_ENTRY_LOAD_IA32_EFER)
4687 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
4688 VM_EXIT_LOAD_IA32_EFER);
4689
Gleb Natapov8bf00a52011-10-05 14:01:22 +02004690 cpu_has_load_perf_global_ctrl =
4691 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
4692 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
4693 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
4694 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
4695
4696 /*
4697 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
Andrea Gelminibb3541f2016-05-21 14:14:44 +02004698 * but due to errata below it can't be used. Workaround is to use
Gleb Natapov8bf00a52011-10-05 14:01:22 +02004699 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
4700 *
4701 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
4702 *
4703 * AAK155 (model 26)
4704 * AAP115 (model 30)
4705 * AAT100 (model 37)
4706 * BC86,AAY89,BD102 (model 44)
4707 * BA97 (model 46)
4708 *
4709 */
4710 if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
4711 switch (boot_cpu_data.x86_model) {
4712 case 26:
4713 case 30:
4714 case 37:
4715 case 44:
4716 case 46:
4717 cpu_has_load_perf_global_ctrl = false;
4718 printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
4719 "does not work properly. Using workaround\n");
4720 break;
4721 default:
4722 break;
4723 }
4724 }
4725
Borislav Petkov782511b2016-04-04 22:25:03 +02004726 if (boot_cpu_has(X86_FEATURE_XSAVES))
Wanpeng Li20300092014-12-02 19:14:59 +08004727 rdmsrl(MSR_IA32_XSS, host_xss);
4728
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004729 return 0;
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08004730}
Avi Kivity6aa8b732006-12-10 02:21:36 -08004731
Liran Alon491a6032018-06-23 02:35:12 +03004732static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004733{
4734 int node = cpu_to_node(cpu);
4735 struct page *pages;
4736 struct vmcs *vmcs;
4737
Vlastimil Babka96db8002015-09-08 15:03:50 -07004738 pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004739 if (!pages)
4740 return NULL;
4741 vmcs = page_address(pages);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004742 memset(vmcs, 0, vmcs_config.size);
Liran Alon2307af12018-06-29 22:59:04 +03004743
4744 /* KVM supports Enlightened VMCS v1 only */
4745 if (static_branch_unlikely(&enable_evmcs))
Liran Alon392b2f22018-06-23 02:35:01 +03004746 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
Liran Alon2307af12018-06-29 22:59:04 +03004747 else
Liran Alon392b2f22018-06-23 02:35:01 +03004748 vmcs->hdr.revision_id = vmcs_config.revision_id;
Liran Alon2307af12018-06-29 22:59:04 +03004749
Liran Alon491a6032018-06-23 02:35:12 +03004750 if (shadow)
4751 vmcs->hdr.shadow_vmcs = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004752 return vmcs;
4753}
4754
Avi Kivity6aa8b732006-12-10 02:21:36 -08004755static void free_vmcs(struct vmcs *vmcs)
4756{
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004757 free_pages((unsigned long)vmcs, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004758}
4759
Nadav Har'Eld462b812011-05-24 15:26:10 +03004760/*
4761 * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
4762 */
4763static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
4764{
4765 if (!loaded_vmcs->vmcs)
4766 return;
4767 loaded_vmcs_clear(loaded_vmcs);
4768 free_vmcs(loaded_vmcs->vmcs);
4769 loaded_vmcs->vmcs = NULL;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004770 if (loaded_vmcs->msr_bitmap)
4771 free_page((unsigned long)loaded_vmcs->msr_bitmap);
Jim Mattson355f4fb2016-10-28 08:29:39 -07004772 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
Nadav Har'Eld462b812011-05-24 15:26:10 +03004773}
4774
Liran Alon491a6032018-06-23 02:35:12 +03004775static struct vmcs *alloc_vmcs(bool shadow)
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004776{
Liran Alon491a6032018-06-23 02:35:12 +03004777 return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004778}
4779
4780static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
4781{
Liran Alon491a6032018-06-23 02:35:12 +03004782 loaded_vmcs->vmcs = alloc_vmcs(false);
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004783 if (!loaded_vmcs->vmcs)
4784 return -ENOMEM;
4785
4786 loaded_vmcs->shadow_vmcs = NULL;
4787 loaded_vmcs_init(loaded_vmcs);
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004788
4789 if (cpu_has_vmx_msr_bitmap()) {
4790 loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
4791 if (!loaded_vmcs->msr_bitmap)
4792 goto out_vmcs;
4793 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02004794
Arnd Bergmann1f008e12018-05-25 17:36:17 +02004795 if (IS_ENABLED(CONFIG_HYPERV) &&
4796 static_branch_unlikely(&enable_evmcs) &&
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02004797 (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
4798 struct hv_enlightened_vmcs *evmcs =
4799 (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
4800
4801 evmcs->hv_enlightenments_control.msr_bitmap = 1;
4802 }
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004803 }
Sean Christophersond7ee0392018-07-23 12:32:47 -07004804
4805 memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
4806
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004807 return 0;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004808
4809out_vmcs:
4810 free_loaded_vmcs(loaded_vmcs);
4811 return -ENOMEM;
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004812}
4813
Sam Ravnborg39959582007-06-01 00:47:13 -07004814static void free_kvm_area(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004815{
4816 int cpu;
4817
Zachary Amsden3230bb42009-09-29 11:38:37 -10004818 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004819 free_vmcs(per_cpu(vmxarea, cpu));
Zachary Amsden3230bb42009-09-29 11:38:37 -10004820 per_cpu(vmxarea, cpu) = NULL;
4821 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004822}
4823
Jim Mattsond37f4262017-12-22 12:12:16 -08004824enum vmcs_field_width {
4825 VMCS_FIELD_WIDTH_U16 = 0,
4826 VMCS_FIELD_WIDTH_U64 = 1,
4827 VMCS_FIELD_WIDTH_U32 = 2,
4828 VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
Jim Mattson85fd5142017-07-07 12:51:41 -07004829};
4830
Jim Mattsond37f4262017-12-22 12:12:16 -08004831static inline int vmcs_field_width(unsigned long field)
Jim Mattson85fd5142017-07-07 12:51:41 -07004832{
4833 if (0x1 & field) /* the *_HIGH fields are all 32 bit */
Jim Mattsond37f4262017-12-22 12:12:16 -08004834 return VMCS_FIELD_WIDTH_U32;
Jim Mattson85fd5142017-07-07 12:51:41 -07004835 return (field >> 13) & 0x3 ;
4836}
4837
4838static inline int vmcs_field_readonly(unsigned long field)
4839{
4840 return (((field >> 10) & 0x3) == 1);
4841}
4842
Bandan Dasfe2b2012014-04-21 15:20:14 -04004843static void init_vmcs_shadow_fields(void)
4844{
4845 int i, j;
4846
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004847 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
4848 u16 field = shadow_read_only_fields[i];
Jim Mattsond37f4262017-12-22 12:12:16 -08004849 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004850 (i + 1 == max_shadow_read_only_fields ||
4851 shadow_read_only_fields[i + 1] != field + 1))
4852 pr_err("Missing field from shadow_read_only_field %x\n",
4853 field + 1);
4854
4855 clear_bit(field, vmx_vmread_bitmap);
4856#ifdef CONFIG_X86_64
4857 if (field & 1)
4858 continue;
4859#endif
4860 if (j < i)
4861 shadow_read_only_fields[j] = field;
4862 j++;
4863 }
4864 max_shadow_read_only_fields = j;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004865
4866 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004867 u16 field = shadow_read_write_fields[i];
Jim Mattsond37f4262017-12-22 12:12:16 -08004868 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004869 (i + 1 == max_shadow_read_write_fields ||
4870 shadow_read_write_fields[i + 1] != field + 1))
4871 pr_err("Missing field from shadow_read_write_field %x\n",
4872 field + 1);
4873
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01004874 /*
4875 * PML and the preemption timer can be emulated, but the
4876 * processor cannot vmwrite to fields that don't exist
4877 * on bare metal.
4878 */
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004879 switch (field) {
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01004880 case GUEST_PML_INDEX:
4881 if (!cpu_has_vmx_pml())
4882 continue;
4883 break;
4884 case VMX_PREEMPTION_TIMER_VALUE:
4885 if (!cpu_has_vmx_preemption_timer())
4886 continue;
4887 break;
4888 case GUEST_INTR_STATUS:
4889 if (!cpu_has_vmx_apicv())
Bandan Dasfe2b2012014-04-21 15:20:14 -04004890 continue;
4891 break;
4892 default:
4893 break;
4894 }
4895
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004896 clear_bit(field, vmx_vmwrite_bitmap);
4897 clear_bit(field, vmx_vmread_bitmap);
4898#ifdef CONFIG_X86_64
4899 if (field & 1)
4900 continue;
4901#endif
Bandan Dasfe2b2012014-04-21 15:20:14 -04004902 if (j < i)
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004903 shadow_read_write_fields[j] = field;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004904 j++;
4905 }
4906 max_shadow_read_write_fields = j;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004907}
4908
Avi Kivity6aa8b732006-12-10 02:21:36 -08004909static __init int alloc_kvm_area(void)
4910{
4911 int cpu;
4912
Zachary Amsden3230bb42009-09-29 11:38:37 -10004913 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004914 struct vmcs *vmcs;
4915
Liran Alon491a6032018-06-23 02:35:12 +03004916 vmcs = alloc_vmcs_cpu(false, cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004917 if (!vmcs) {
4918 free_kvm_area();
4919 return -ENOMEM;
4920 }
4921
Liran Alon2307af12018-06-29 22:59:04 +03004922 /*
4923 * When eVMCS is enabled, alloc_vmcs_cpu() sets
4924 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
4925 * revision_id reported by MSR_IA32_VMX_BASIC.
4926 *
4927 * However, even though not explictly documented by
4928 * TLFS, VMXArea passed as VMXON argument should
4929 * still be marked with revision_id reported by
4930 * physical CPU.
4931 */
4932 if (static_branch_unlikely(&enable_evmcs))
Liran Alon392b2f22018-06-23 02:35:01 +03004933 vmcs->hdr.revision_id = vmcs_config.revision_id;
Liran Alon2307af12018-06-29 22:59:04 +03004934
Avi Kivity6aa8b732006-12-10 02:21:36 -08004935 per_cpu(vmxarea, cpu) = vmcs;
4936 }
4937 return 0;
4938}
4939
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004940static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
Gleb Natapovd99e4152012-12-20 16:57:45 +02004941 struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004942{
Gleb Natapovd99e4152012-12-20 16:57:45 +02004943 if (!emulate_invalid_guest_state) {
4944 /*
4945 * CS and SS RPL should be equal during guest entry according
4946 * to VMX spec, but in reality it is not always so. Since vcpu
4947 * is in the middle of the transition from real mode to
4948 * protected mode it is safe to assume that RPL 0 is a good
4949 * default value.
4950 */
4951 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
Nadav Amitb32a9912015-03-29 16:33:04 +03004952 save->selector &= ~SEGMENT_RPL_MASK;
4953 save->dpl = save->selector & SEGMENT_RPL_MASK;
Gleb Natapovd99e4152012-12-20 16:57:45 +02004954 save->s = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004955 }
Gleb Natapovd99e4152012-12-20 16:57:45 +02004956 vmx_set_segment(vcpu, save, seg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004957}
4958
4959static void enter_pmode(struct kvm_vcpu *vcpu)
4960{
4961 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004962 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004963
Gleb Natapovd99e4152012-12-20 16:57:45 +02004964 /*
4965 * Update real mode segment cache. It may be not up-to-date if sement
4966 * register was written while vcpu was in a guest mode.
4967 */
4968 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4969 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4970 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4971 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
4972 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4973 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
4974
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004975 vmx->rmode.vm86_active = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004976
Avi Kivity2fb92db2011-04-27 19:42:18 +03004977 vmx_segment_cache_clear(vmx);
4978
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004979 vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004980
4981 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03004982 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
4983 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004984 vmcs_writel(GUEST_RFLAGS, flags);
4985
Rusty Russell66aee912007-07-17 23:34:16 +10004986 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
4987 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
Avi Kivity6aa8b732006-12-10 02:21:36 -08004988
4989 update_exception_bitmap(vcpu);
4990
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004991 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4992 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4993 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4994 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4995 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
4996 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004997}
4998
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004999static void fix_rmode_seg(int seg, struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005000{
Mathias Krause772e0312012-08-30 01:30:19 +02005001 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Gleb Natapovd99e4152012-12-20 16:57:45 +02005002 struct kvm_segment var = *save;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005003
Gleb Natapovd99e4152012-12-20 16:57:45 +02005004 var.dpl = 0x3;
5005 if (seg == VCPU_SREG_CS)
5006 var.type = 0x3;
5007
5008 if (!emulate_invalid_guest_state) {
5009 var.selector = var.base >> 4;
5010 var.base = var.base & 0xffff0;
5011 var.limit = 0xffff;
5012 var.g = 0;
5013 var.db = 0;
5014 var.present = 1;
5015 var.s = 1;
5016 var.l = 0;
5017 var.unusable = 0;
5018 var.type = 0x3;
5019 var.avl = 0;
5020 if (save->base & 0xf)
5021 printk_once(KERN_WARNING "kvm: segment base is not "
5022 "paragraph aligned when entering "
5023 "protected mode (seg=%d)", seg);
5024 }
5025
5026 vmcs_write16(sf->selector, var.selector);
Chao Peng96794e42017-02-21 03:50:01 -05005027 vmcs_writel(sf->base, var.base);
Gleb Natapovd99e4152012-12-20 16:57:45 +02005028 vmcs_write32(sf->limit, var.limit);
5029 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
Avi Kivity6aa8b732006-12-10 02:21:36 -08005030}
5031
5032static void enter_rmode(struct kvm_vcpu *vcpu)
5033{
5034 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03005035 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005036 struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005037
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005038 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
5039 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
5040 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
5041 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
5042 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
Gleb Natapovc6ad11532012-12-12 19:10:51 +02005043 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
5044 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005045
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005046 vmx->rmode.vm86_active = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005047
Gleb Natapov776e58e2011-03-13 12:34:27 +02005048 /*
5049 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
Jan Kiszka4918c6c2013-03-15 08:38:56 +01005050 * vcpu. Warn the user that an update is overdue.
Gleb Natapov776e58e2011-03-13 12:34:27 +02005051 */
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005052 if (!kvm_vmx->tss_addr)
Gleb Natapov776e58e2011-03-13 12:34:27 +02005053 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
5054 "called before entering vcpu\n");
Gleb Natapov776e58e2011-03-13 12:34:27 +02005055
Avi Kivity2fb92db2011-04-27 19:42:18 +03005056 vmx_segment_cache_clear(vmx);
5057
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005058 vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005059 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005060 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5061
5062 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03005063 vmx->rmode.save_rflags = flags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005064
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01005065 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005066
5067 vmcs_writel(GUEST_RFLAGS, flags);
Rusty Russell66aee912007-07-17 23:34:16 +10005068 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005069 update_exception_bitmap(vcpu);
5070
Gleb Natapovd99e4152012-12-20 16:57:45 +02005071 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
5072 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
5073 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
5074 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
5075 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
5076 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03005077
Eddie Dong8668a3c2007-10-10 14:26:45 +08005078 kvm_mmu_reset_context(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005079}
5080
Amit Shah401d10d2009-02-20 22:53:37 +05305081static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
5082{
5083 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03005084 struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
5085
5086 if (!msr)
5087 return;
Amit Shah401d10d2009-02-20 22:53:37 +05305088
Avi Kivityf6801df2010-01-21 15:31:50 +02005089 vcpu->arch.efer = efer;
Amit Shah401d10d2009-02-20 22:53:37 +05305090 if (efer & EFER_LMA) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02005091 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05305092 msr->data = efer;
5093 } else {
Gleb Natapov2961e8762013-11-25 15:37:13 +02005094 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05305095
5096 msr->data = efer & ~EFER_LME;
5097 }
5098 setup_msrs(vmx);
5099}
5100
Avi Kivity05b3e0c2006-12-13 00:33:45 -08005101#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08005102
5103static void enter_lmode(struct kvm_vcpu *vcpu)
5104{
5105 u32 guest_tr_ar;
5106
Avi Kivity2fb92db2011-04-27 19:42:18 +03005107 vmx_segment_cache_clear(to_vmx(vcpu));
5108
Avi Kivity6aa8b732006-12-10 02:21:36 -08005109 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005110 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
Jan Kiszkabd801582011-09-12 11:26:22 +02005111 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
5112 __func__);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005113 vmcs_write32(GUEST_TR_AR_BYTES,
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005114 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
5115 | VMX_AR_TYPE_BUSY_64_TSS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005116 }
Avi Kivityda38f432010-07-06 11:30:49 +03005117 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005118}
5119
5120static void exit_lmode(struct kvm_vcpu *vcpu)
5121{
Gleb Natapov2961e8762013-11-25 15:37:13 +02005122 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Avi Kivityda38f432010-07-06 11:30:49 +03005123 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005124}
5125
5126#endif
5127
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005128static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
5129 bool invalidate_gpa)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005130{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005131 if (enable_ept && (invalidate_gpa || !enable_vpid)) {
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +02005132 if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
Xiao Guangrongdd180b32010-07-03 16:02:42 +08005133 return;
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +02005134 ept_sync_context(construct_eptp(vcpu,
5135 vcpu->arch.mmu->root_hpa));
Jim Mattsonf0b98c02017-03-15 07:56:11 -07005136 } else {
5137 vpid_sync_context(vpid);
Xiao Guangrongdd180b32010-07-03 16:02:42 +08005138 }
Sheng Yang2384d2b2008-01-17 15:14:33 +08005139}
5140
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005141static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
Wanpeng Lidd5f5342015-09-23 18:26:57 +08005142{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005143 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
Wanpeng Lidd5f5342015-09-23 18:26:57 +08005144}
5145
Junaid Shahidfaff8752018-06-29 13:10:05 -07005146static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
5147{
5148 int vpid = to_vmx(vcpu)->vpid;
5149
5150 if (!vpid_sync_vcpu_addr(vpid, addr))
5151 vpid_sync_context(vpid);
5152
5153 /*
5154 * If VPIDs are not supported or enabled, then the above is a no-op.
5155 * But we don't really need a TLB flush in that case anyway, because
5156 * each VM entry/exit includes an implicit flush when VPID is 0.
5157 */
5158}
5159
Avi Kivitye8467fd2009-12-29 18:43:06 +02005160static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
5161{
5162 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
5163
5164 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
5165 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
5166}
5167
Avi Kivityaff48ba2010-12-05 18:56:11 +02005168static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
5169{
Sean Christophersonb4d18512018-03-05 12:04:40 -08005170 if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
Avi Kivityaff48ba2010-12-05 18:56:11 +02005171 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
5172 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
5173}
5174
Anthony Liguori25c4c272007-04-27 09:29:21 +03005175static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
Avi Kivity399badf2007-01-05 16:36:38 -08005176{
Avi Kivityfc78f512009-12-07 12:16:48 +02005177 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
5178
5179 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
5180 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
Avi Kivity399badf2007-01-05 16:36:38 -08005181}
5182
Sheng Yang14394422008-04-28 12:24:45 +08005183static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
5184{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005185 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
5186
Avi Kivity6de4f3a2009-05-31 22:58:47 +03005187 if (!test_bit(VCPU_EXREG_PDPTR,
5188 (unsigned long *)&vcpu->arch.regs_dirty))
5189 return;
5190
Sheng Yang14394422008-04-28 12:24:45 +08005191 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005192 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
5193 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
5194 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
5195 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
Sheng Yang14394422008-04-28 12:24:45 +08005196 }
5197}
5198
Avi Kivity8f5d5492009-05-31 18:41:29 +03005199static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
5200{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005201 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
5202
Avi Kivity8f5d5492009-05-31 18:41:29 +03005203 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005204 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
5205 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
5206 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
5207 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
Avi Kivity8f5d5492009-05-31 18:41:29 +03005208 }
Avi Kivity6de4f3a2009-05-31 22:58:47 +03005209
5210 __set_bit(VCPU_EXREG_PDPTR,
5211 (unsigned long *)&vcpu->arch.regs_avail);
5212 __set_bit(VCPU_EXREG_PDPTR,
5213 (unsigned long *)&vcpu->arch.regs_dirty);
Avi Kivity8f5d5492009-05-31 18:41:29 +03005214}
5215
David Matlack38991522016-11-29 18:14:08 -08005216static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
5217{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005218 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
5219 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005220 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5221
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005222 if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
David Matlack38991522016-11-29 18:14:08 -08005223 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
5224 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
5225 fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
5226
5227 return fixed_bits_valid(val, fixed0, fixed1);
5228}
5229
5230static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
5231{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005232 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
5233 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005234
5235 return fixed_bits_valid(val, fixed0, fixed1);
5236}
5237
5238static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
5239{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005240 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
5241 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005242
5243 return fixed_bits_valid(val, fixed0, fixed1);
5244}
5245
5246/* No difference in the restrictions on guest and host CR4 in VMX operation. */
5247#define nested_guest_cr4_valid nested_cr4_valid
5248#define nested_host_cr4_valid nested_cr4_valid
5249
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005250static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
Sheng Yang14394422008-04-28 12:24:45 +08005251
5252static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
5253 unsigned long cr0,
5254 struct kvm_vcpu *vcpu)
5255{
Marcelo Tosatti5233dd52011-06-06 14:27:47 -03005256 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
5257 vmx_decache_cr3(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08005258 if (!(cr0 & X86_CR0_PG)) {
5259 /* From paging/starting to nonpaging */
5260 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08005261 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
Sheng Yang14394422008-04-28 12:24:45 +08005262 (CPU_BASED_CR3_LOAD_EXITING |
5263 CPU_BASED_CR3_STORE_EXITING));
5264 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02005265 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08005266 } else if (!is_paging(vcpu)) {
5267 /* From nonpaging to paging */
5268 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08005269 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
Sheng Yang14394422008-04-28 12:24:45 +08005270 ~(CPU_BASED_CR3_LOAD_EXITING |
5271 CPU_BASED_CR3_STORE_EXITING));
5272 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02005273 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08005274 }
Sheng Yang95eb84a2009-08-19 09:52:18 +08005275
5276 if (!(cr0 & X86_CR0_WP))
5277 *hw_cr0 &= ~X86_CR0_WP;
Sheng Yang14394422008-04-28 12:24:45 +08005278}
5279
Avi Kivity6aa8b732006-12-10 02:21:36 -08005280static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
5281{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005282 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005283 unsigned long hw_cr0;
5284
Sean Christopherson3de63472018-07-13 08:42:30 -07005285 hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005286 if (enable_unrestricted_guest)
Gleb Natapov50378782013-02-04 16:00:28 +02005287 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
Gleb Natapov218e7632013-01-21 15:36:45 +02005288 else {
Gleb Natapov50378782013-02-04 16:00:28 +02005289 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08005290
Gleb Natapov218e7632013-01-21 15:36:45 +02005291 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
5292 enter_pmode(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005293
Gleb Natapov218e7632013-01-21 15:36:45 +02005294 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
5295 enter_rmode(vcpu);
5296 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08005297
Avi Kivity05b3e0c2006-12-13 00:33:45 -08005298#ifdef CONFIG_X86_64
Avi Kivityf6801df2010-01-21 15:31:50 +02005299 if (vcpu->arch.efer & EFER_LME) {
Rusty Russell707d92fa2007-07-17 23:19:08 +10005300 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08005301 enter_lmode(vcpu);
Rusty Russell707d92fa2007-07-17 23:19:08 +10005302 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08005303 exit_lmode(vcpu);
5304 }
5305#endif
5306
Sean Christophersonb4d18512018-03-05 12:04:40 -08005307 if (enable_ept && !enable_unrestricted_guest)
Sheng Yang14394422008-04-28 12:24:45 +08005308 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
5309
Avi Kivity6aa8b732006-12-10 02:21:36 -08005310 vmcs_writel(CR0_READ_SHADOW, cr0);
Sheng Yang14394422008-04-28 12:24:45 +08005311 vmcs_writel(GUEST_CR0, hw_cr0);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005312 vcpu->arch.cr0 = cr0;
Gleb Natapov14168782013-01-21 15:36:49 +02005313
5314 /* depends on vcpu->arch.cr0 to be set to a new value */
5315 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005316}
5317
Yu Zhang855feb62017-08-24 20:27:55 +08005318static int get_ept_level(struct kvm_vcpu *vcpu)
5319{
5320 if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
5321 return 5;
5322 return 4;
5323}
5324
Peter Feiner995f00a2017-06-30 17:26:32 -07005325static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
Sheng Yang14394422008-04-28 12:24:45 +08005326{
Yu Zhang855feb62017-08-24 20:27:55 +08005327 u64 eptp = VMX_EPTP_MT_WB;
Sheng Yang14394422008-04-28 12:24:45 +08005328
Yu Zhang855feb62017-08-24 20:27:55 +08005329 eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
Sheng Yang14394422008-04-28 12:24:45 +08005330
Peter Feiner995f00a2017-06-30 17:26:32 -07005331 if (enable_ept_ad_bits &&
5332 (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
David Hildenbrandbb97a012017-08-10 23:15:28 +02005333 eptp |= VMX_EPTP_AD_ENABLE_BIT;
Sheng Yang14394422008-04-28 12:24:45 +08005334 eptp |= (root_hpa & PAGE_MASK);
5335
5336 return eptp;
5337}
5338
Avi Kivity6aa8b732006-12-10 02:21:36 -08005339static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
5340{
Tianyu Lan877ad952018-07-19 08:40:23 +00005341 struct kvm *kvm = vcpu->kvm;
Sheng Yang14394422008-04-28 12:24:45 +08005342 unsigned long guest_cr3;
5343 u64 eptp;
5344
5345 guest_cr3 = cr3;
Avi Kivity089d0342009-03-23 18:26:32 +02005346 if (enable_ept) {
Peter Feiner995f00a2017-06-30 17:26:32 -07005347 eptp = construct_eptp(vcpu, cr3);
Sheng Yang14394422008-04-28 12:24:45 +08005348 vmcs_write64(EPT_POINTER, eptp);
Tianyu Lan877ad952018-07-19 08:40:23 +00005349
5350 if (kvm_x86_ops->tlb_remote_flush) {
5351 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
5352 to_vmx(vcpu)->ept_pointer = eptp;
5353 to_kvm_vmx(kvm)->ept_pointers_match
5354 = EPT_POINTERS_CHECK;
5355 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
5356 }
5357
Sean Christophersone90008d2018-03-05 12:04:37 -08005358 if (enable_unrestricted_guest || is_paging(vcpu) ||
5359 is_guest_mode(vcpu))
Jan Kiszka59ab5a82013-08-08 16:26:29 +02005360 guest_cr3 = kvm_read_cr3(vcpu);
5361 else
Tianyu Lan877ad952018-07-19 08:40:23 +00005362 guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
Marcelo Tosatti7c93be442009-10-26 16:48:33 -02005363 ept_load_pdptrs(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08005364 }
5365
Sheng Yang14394422008-04-28 12:24:45 +08005366 vmcs_writel(GUEST_CR3, guest_cr3);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005367}
5368
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005369static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005370{
Ben Serebrin085e68e2015-04-16 11:58:05 -07005371 /*
5372 * Pass through host's Machine Check Enable value to hw_cr4, which
5373 * is in force while we are in guest mode. Do not let guests control
5374 * this bit, even if host CR4.MCE == 0.
5375 */
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005376 unsigned long hw_cr4;
5377
5378 hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
5379 if (enable_unrestricted_guest)
5380 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
5381 else if (to_vmx(vcpu)->rmode.vm86_active)
5382 hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
5383 else
5384 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08005385
Sean Christopherson64f7a112018-04-30 10:01:06 -07005386 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
5387 if (cr4 & X86_CR4_UMIP) {
5388 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini0367f202016-07-12 10:44:55 +02005389 SECONDARY_EXEC_DESC);
Sean Christopherson64f7a112018-04-30 10:01:06 -07005390 hw_cr4 &= ~X86_CR4_UMIP;
5391 } else if (!is_guest_mode(vcpu) ||
5392 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
5393 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
5394 SECONDARY_EXEC_DESC);
5395 }
Paolo Bonzini0367f202016-07-12 10:44:55 +02005396
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005397 if (cr4 & X86_CR4_VMXE) {
5398 /*
5399 * To use VMXON (and later other VMX instructions), a guest
5400 * must first be able to turn on cr4.VMXE (see handle_vmon()).
5401 * So basically the check on whether to allow nested VMX
Paolo Bonzini5bea5122018-09-18 15:19:17 +02005402 * is here. We operate under the default treatment of SMM,
5403 * so VMX cannot be enabled under SMM.
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005404 */
Paolo Bonzini5bea5122018-09-18 15:19:17 +02005405 if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005406 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01005407 }
David Matlack38991522016-11-29 18:14:08 -08005408
5409 if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005410 return 1;
5411
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005412 vcpu->arch.cr4 = cr4;
Sheng Yang14394422008-04-28 12:24:45 +08005413
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005414 if (!enable_unrestricted_guest) {
5415 if (enable_ept) {
5416 if (!is_paging(vcpu)) {
5417 hw_cr4 &= ~X86_CR4_PAE;
5418 hw_cr4 |= X86_CR4_PSE;
5419 } else if (!(cr4 & X86_CR4_PAE)) {
5420 hw_cr4 &= ~X86_CR4_PAE;
5421 }
5422 }
5423
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005424 /*
Huaitong Handdba2622016-03-22 16:51:15 +08005425 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
5426 * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
5427 * to be manually disabled when guest switches to non-paging
5428 * mode.
5429 *
5430 * If !enable_unrestricted_guest, the CPU is always running
5431 * with CR0.PG=1 and CR4 needs to be modified.
5432 * If enable_unrestricted_guest, the CPU automatically
5433 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005434 */
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005435 if (!is_paging(vcpu))
5436 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
5437 }
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005438
Sheng Yang14394422008-04-28 12:24:45 +08005439 vmcs_writel(CR4_READ_SHADOW, cr4);
5440 vmcs_writel(GUEST_CR4, hw_cr4);
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005441 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005442}
5443
Avi Kivity6aa8b732006-12-10 02:21:36 -08005444static void vmx_get_segment(struct kvm_vcpu *vcpu,
5445 struct kvm_segment *var, int seg)
5446{
Avi Kivitya9179492011-01-03 14:28:52 +02005447 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005448 u32 ar;
5449
Gleb Natapovc6ad11532012-12-12 19:10:51 +02005450 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005451 *var = vmx->rmode.segs[seg];
Avi Kivitya9179492011-01-03 14:28:52 +02005452 if (seg == VCPU_SREG_TR
Avi Kivity2fb92db2011-04-27 19:42:18 +03005453 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005454 return;
Avi Kivity1390a282012-08-21 17:07:08 +03005455 var->base = vmx_read_guest_seg_base(vmx, seg);
5456 var->selector = vmx_read_guest_seg_selector(vmx, seg);
5457 return;
Avi Kivitya9179492011-01-03 14:28:52 +02005458 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03005459 var->base = vmx_read_guest_seg_base(vmx, seg);
5460 var->limit = vmx_read_guest_seg_limit(vmx, seg);
5461 var->selector = vmx_read_guest_seg_selector(vmx, seg);
5462 ar = vmx_read_guest_seg_ar(vmx, seg);
Gleb Natapov03617c12013-06-28 13:17:18 +03005463 var->unusable = (ar >> 16) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005464 var->type = ar & 15;
5465 var->s = (ar >> 4) & 1;
5466 var->dpl = (ar >> 5) & 3;
Gleb Natapov03617c12013-06-28 13:17:18 +03005467 /*
5468 * Some userspaces do not preserve unusable property. Since usable
5469 * segment has to be present according to VMX spec we can use present
5470 * property to amend userspace bug by making unusable segment always
5471 * nonpresent. vmx_segment_access_rights() already marks nonpresent
5472 * segment as unusable.
5473 */
5474 var->present = !var->unusable;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005475 var->avl = (ar >> 12) & 1;
5476 var->l = (ar >> 13) & 1;
5477 var->db = (ar >> 14) & 1;
5478 var->g = (ar >> 15) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005479}
5480
Avi Kivitya9179492011-01-03 14:28:52 +02005481static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
5482{
Avi Kivitya9179492011-01-03 14:28:52 +02005483 struct kvm_segment s;
5484
5485 if (to_vmx(vcpu)->rmode.vm86_active) {
5486 vmx_get_segment(vcpu, &s, seg);
5487 return s.base;
5488 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03005489 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
Avi Kivitya9179492011-01-03 14:28:52 +02005490}
5491
Marcelo Tosattib09408d2013-01-07 19:27:06 -02005492static int vmx_get_cpl(struct kvm_vcpu *vcpu)
Izik Eidus2e4d2652008-03-24 19:38:34 +02005493{
Marcelo Tosattib09408d2013-01-07 19:27:06 -02005494 struct vcpu_vmx *vmx = to_vmx(vcpu);
5495
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02005496 if (unlikely(vmx->rmode.vm86_active))
Izik Eidus2e4d2652008-03-24 19:38:34 +02005497 return 0;
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02005498 else {
5499 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005500 return VMX_AR_DPL(ar);
Avi Kivity69c73022011-03-07 15:26:44 +02005501 }
Avi Kivity69c73022011-03-07 15:26:44 +02005502}
5503
Avi Kivity653e3102007-05-07 10:55:37 +03005504static u32 vmx_segment_access_rights(struct kvm_segment *var)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005505{
Avi Kivity6aa8b732006-12-10 02:21:36 -08005506 u32 ar;
5507
Avi Kivityf0495f92012-06-07 17:06:10 +03005508 if (var->unusable || !var->present)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005509 ar = 1 << 16;
5510 else {
5511 ar = var->type & 15;
5512 ar |= (var->s & 1) << 4;
5513 ar |= (var->dpl & 3) << 5;
5514 ar |= (var->present & 1) << 7;
5515 ar |= (var->avl & 1) << 12;
5516 ar |= (var->l & 1) << 13;
5517 ar |= (var->db & 1) << 14;
5518 ar |= (var->g & 1) << 15;
5519 }
Avi Kivity653e3102007-05-07 10:55:37 +03005520
5521 return ar;
5522}
5523
5524static void vmx_set_segment(struct kvm_vcpu *vcpu,
5525 struct kvm_segment *var, int seg)
5526{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005527 struct vcpu_vmx *vmx = to_vmx(vcpu);
Mathias Krause772e0312012-08-30 01:30:19 +02005528 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Avi Kivity653e3102007-05-07 10:55:37 +03005529
Avi Kivity2fb92db2011-04-27 19:42:18 +03005530 vmx_segment_cache_clear(vmx);
5531
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02005532 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
5533 vmx->rmode.segs[seg] = *var;
5534 if (seg == VCPU_SREG_TR)
5535 vmcs_write16(sf->selector, var->selector);
5536 else if (var->s)
5537 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
Gleb Natapovd99e4152012-12-20 16:57:45 +02005538 goto out;
Avi Kivity653e3102007-05-07 10:55:37 +03005539 }
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02005540
Avi Kivity653e3102007-05-07 10:55:37 +03005541 vmcs_writel(sf->base, var->base);
5542 vmcs_write32(sf->limit, var->limit);
5543 vmcs_write16(sf->selector, var->selector);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005544
5545 /*
5546 * Fix the "Accessed" bit in AR field of segment registers for older
5547 * qemu binaries.
5548 * IA32 arch specifies that at the time of processor reset the
5549 * "Accessed" bit in the AR field of segment registers is 1. And qemu
Guo Chao0fa06072012-06-28 15:16:19 +08005550 * is setting it to 0 in the userland code. This causes invalid guest
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005551 * state vmexit when "unrestricted guest" mode is turned on.
5552 * Fix for this setup issue in cpu_reset is being pushed in the qemu
5553 * tree. Newer qemu binaries with that qemu fix would not need this
5554 * kvm hack.
5555 */
5556 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
Gleb Natapovf924d662012-12-12 19:10:55 +02005557 var->type |= 0x1; /* Accessed */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005558
Gleb Natapovf924d662012-12-12 19:10:55 +02005559 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
Gleb Natapovd99e4152012-12-20 16:57:45 +02005560
5561out:
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01005562 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005563}
5564
Avi Kivity6aa8b732006-12-10 02:21:36 -08005565static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
5566{
Avi Kivity2fb92db2011-04-27 19:42:18 +03005567 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005568
5569 *db = (ar >> 14) & 1;
5570 *l = (ar >> 13) & 1;
5571}
5572
Gleb Natapov89a27f42010-02-16 10:51:48 +02005573static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005574{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005575 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
5576 dt->address = vmcs_readl(GUEST_IDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005577}
5578
Gleb Natapov89a27f42010-02-16 10:51:48 +02005579static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005580{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005581 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
5582 vmcs_writel(GUEST_IDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005583}
5584
Gleb Natapov89a27f42010-02-16 10:51:48 +02005585static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005586{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005587 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
5588 dt->address = vmcs_readl(GUEST_GDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005589}
5590
Gleb Natapov89a27f42010-02-16 10:51:48 +02005591static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005592{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005593 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
5594 vmcs_writel(GUEST_GDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005595}
5596
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005597static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
5598{
5599 struct kvm_segment var;
5600 u32 ar;
5601
5602 vmx_get_segment(vcpu, &var, seg);
Gleb Natapov07f42f52012-12-12 19:10:49 +02005603 var.dpl = 0x3;
Gleb Natapov0647f4a2012-12-12 19:10:50 +02005604 if (seg == VCPU_SREG_CS)
5605 var.type = 0x3;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005606 ar = vmx_segment_access_rights(&var);
5607
5608 if (var.base != (var.selector << 4))
5609 return false;
Gleb Natapov89efbed2012-12-20 16:57:44 +02005610 if (var.limit != 0xffff)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005611 return false;
Gleb Natapov07f42f52012-12-12 19:10:49 +02005612 if (ar != 0xf3)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005613 return false;
5614
5615 return true;
5616}
5617
5618static bool code_segment_valid(struct kvm_vcpu *vcpu)
5619{
5620 struct kvm_segment cs;
5621 unsigned int cs_rpl;
5622
5623 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
Nadav Amitb32a9912015-03-29 16:33:04 +03005624 cs_rpl = cs.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005625
Avi Kivity1872a3f2009-01-04 23:26:52 +02005626 if (cs.unusable)
5627 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005628 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005629 return false;
5630 if (!cs.s)
5631 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005632 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005633 if (cs.dpl > cs_rpl)
5634 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02005635 } else {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005636 if (cs.dpl != cs_rpl)
5637 return false;
5638 }
5639 if (!cs.present)
5640 return false;
5641
5642 /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
5643 return true;
5644}
5645
5646static bool stack_segment_valid(struct kvm_vcpu *vcpu)
5647{
5648 struct kvm_segment ss;
5649 unsigned int ss_rpl;
5650
5651 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
Nadav Amitb32a9912015-03-29 16:33:04 +03005652 ss_rpl = ss.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005653
Avi Kivity1872a3f2009-01-04 23:26:52 +02005654 if (ss.unusable)
5655 return true;
5656 if (ss.type != 3 && ss.type != 7)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005657 return false;
5658 if (!ss.s)
5659 return false;
5660 if (ss.dpl != ss_rpl) /* DPL != RPL */
5661 return false;
5662 if (!ss.present)
5663 return false;
5664
5665 return true;
5666}
5667
5668static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
5669{
5670 struct kvm_segment var;
5671 unsigned int rpl;
5672
5673 vmx_get_segment(vcpu, &var, seg);
Nadav Amitb32a9912015-03-29 16:33:04 +03005674 rpl = var.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005675
Avi Kivity1872a3f2009-01-04 23:26:52 +02005676 if (var.unusable)
5677 return true;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005678 if (!var.s)
5679 return false;
5680 if (!var.present)
5681 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005682 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005683 if (var.dpl < rpl) /* DPL < RPL */
5684 return false;
5685 }
5686
5687 /* TODO: Add other members to kvm_segment_field to allow checking for other access
5688 * rights flags
5689 */
5690 return true;
5691}
5692
5693static bool tr_valid(struct kvm_vcpu *vcpu)
5694{
5695 struct kvm_segment tr;
5696
5697 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
5698
Avi Kivity1872a3f2009-01-04 23:26:52 +02005699 if (tr.unusable)
5700 return false;
Nadav Amitb32a9912015-03-29 16:33:04 +03005701 if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005702 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02005703 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005704 return false;
5705 if (!tr.present)
5706 return false;
5707
5708 return true;
5709}
5710
5711static bool ldtr_valid(struct kvm_vcpu *vcpu)
5712{
5713 struct kvm_segment ldtr;
5714
5715 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
5716
Avi Kivity1872a3f2009-01-04 23:26:52 +02005717 if (ldtr.unusable)
5718 return true;
Nadav Amitb32a9912015-03-29 16:33:04 +03005719 if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005720 return false;
5721 if (ldtr.type != 2)
5722 return false;
5723 if (!ldtr.present)
5724 return false;
5725
5726 return true;
5727}
5728
5729static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
5730{
5731 struct kvm_segment cs, ss;
5732
5733 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
5734 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
5735
Nadav Amitb32a9912015-03-29 16:33:04 +03005736 return ((cs.selector & SEGMENT_RPL_MASK) ==
5737 (ss.selector & SEGMENT_RPL_MASK));
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005738}
5739
5740/*
5741 * Check if guest state is valid. Returns true if valid, false if
5742 * not.
5743 * We assume that registers are always usable
5744 */
5745static bool guest_state_valid(struct kvm_vcpu *vcpu)
5746{
Gleb Natapovc5e97c82013-01-21 15:36:43 +02005747 if (enable_unrestricted_guest)
5748 return true;
5749
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005750 /* real mode guest state checks */
Gleb Natapovf13882d2013-04-14 16:07:37 +03005751 if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005752 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
5753 return false;
5754 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
5755 return false;
5756 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
5757 return false;
5758 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
5759 return false;
5760 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
5761 return false;
5762 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
5763 return false;
5764 } else {
5765 /* protected mode guest state checks */
5766 if (!cs_ss_rpl_check(vcpu))
5767 return false;
5768 if (!code_segment_valid(vcpu))
5769 return false;
5770 if (!stack_segment_valid(vcpu))
5771 return false;
5772 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
5773 return false;
5774 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
5775 return false;
5776 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
5777 return false;
5778 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
5779 return false;
5780 if (!tr_valid(vcpu))
5781 return false;
5782 if (!ldtr_valid(vcpu))
5783 return false;
5784 }
5785 /* TODO:
5786 * - Add checks on RIP
5787 * - Add checks on RFLAGS
5788 */
5789
5790 return true;
5791}
5792
Jim Mattson5fa99cb2017-07-06 16:33:07 -07005793static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
5794{
5795 return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
5796}
5797
Mike Dayd77c26f2007-10-08 09:02:08 -04005798static int init_rmode_tss(struct kvm *kvm)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005799{
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005800 gfn_t fn;
Izik Eidus195aefd2007-10-01 22:14:18 +02005801 u16 data = 0;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005802 int idx, r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005803
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005804 idx = srcu_read_lock(&kvm->srcu);
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005805 fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
Izik Eidus195aefd2007-10-01 22:14:18 +02005806 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
5807 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005808 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005809 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
Sheng Yang464d17c2008-08-13 14:10:33 +08005810 r = kvm_write_guest_page(kvm, fn++, &data,
5811 TSS_IOPB_BASE_OFFSET, sizeof(u16));
Izik Eidus195aefd2007-10-01 22:14:18 +02005812 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005813 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005814 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
5815 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005816 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005817 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
5818 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005819 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005820 data = ~0;
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005821 r = kvm_write_guest_page(kvm, fn, &data,
5822 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
5823 sizeof(u8));
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005824out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005825 srcu_read_unlock(&kvm->srcu, idx);
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005826 return r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005827}
5828
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005829static int init_rmode_identity_map(struct kvm *kvm)
5830{
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005831 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
Tang Chenf51770e2014-09-16 18:41:59 +08005832 int i, idx, r = 0;
Dan Williamsba049e92016-01-15 16:56:11 -08005833 kvm_pfn_t identity_map_pfn;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005834 u32 tmp;
5835
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005836 /* Protect kvm_vmx->ept_identity_pagetable_done. */
Tang Chena255d472014-09-16 18:41:58 +08005837 mutex_lock(&kvm->slots_lock);
5838
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005839 if (likely(kvm_vmx->ept_identity_pagetable_done))
Tang Chena255d472014-09-16 18:41:58 +08005840 goto out2;
Tang Chena255d472014-09-16 18:41:58 +08005841
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005842 if (!kvm_vmx->ept_identity_map_addr)
5843 kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
5844 identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
Tang Chena255d472014-09-16 18:41:58 +08005845
David Hildenbrandd8a6e362017-08-24 20:51:34 +02005846 r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005847 kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
Tang Chenf51770e2014-09-16 18:41:59 +08005848 if (r < 0)
Tang Chena255d472014-09-16 18:41:58 +08005849 goto out2;
5850
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005851 idx = srcu_read_lock(&kvm->srcu);
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005852 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
5853 if (r < 0)
5854 goto out;
5855 /* Set up identity-mapping pagetable for EPT in real mode */
5856 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
5857 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
5858 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
5859 r = kvm_write_guest_page(kvm, identity_map_pfn,
5860 &tmp, i * sizeof(tmp), sizeof(tmp));
5861 if (r < 0)
5862 goto out;
5863 }
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005864 kvm_vmx->ept_identity_pagetable_done = true;
Tang Chenf51770e2014-09-16 18:41:59 +08005865
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005866out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005867 srcu_read_unlock(&kvm->srcu, idx);
Tang Chena255d472014-09-16 18:41:58 +08005868
5869out2:
5870 mutex_unlock(&kvm->slots_lock);
Tang Chenf51770e2014-09-16 18:41:59 +08005871 return r;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005872}
5873
Avi Kivity6aa8b732006-12-10 02:21:36 -08005874static void seg_setup(int seg)
5875{
Mathias Krause772e0312012-08-30 01:30:19 +02005876 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005877 unsigned int ar;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005878
5879 vmcs_write16(sf->selector, 0);
5880 vmcs_writel(sf->base, 0);
5881 vmcs_write32(sf->limit, 0xffff);
Gleb Natapovd54d07b2012-12-20 16:57:46 +02005882 ar = 0x93;
5883 if (seg == VCPU_SREG_CS)
5884 ar |= 0x08; /* code segment */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005885
5886 vmcs_write32(sf->ar_bytes, ar);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005887}
5888
Sheng Yangf78e0e22007-10-29 09:40:42 +08005889static int alloc_apic_access_page(struct kvm *kvm)
5890{
Xiao Guangrong44841412012-09-07 14:14:20 +08005891 struct page *page;
Sheng Yangf78e0e22007-10-29 09:40:42 +08005892 int r = 0;
5893
Marcelo Tosatti79fac952009-12-23 14:35:26 -02005894 mutex_lock(&kvm->slots_lock);
Tang Chenc24ae0d2014-09-24 15:57:58 +08005895 if (kvm->arch.apic_access_page_done)
Sheng Yangf78e0e22007-10-29 09:40:42 +08005896 goto out;
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02005897 r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
5898 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
Sheng Yangf78e0e22007-10-29 09:40:42 +08005899 if (r)
5900 goto out;
Izik Eidus72dc67a2008-02-10 18:04:15 +02005901
Tang Chen73a6d942014-09-11 13:38:00 +08005902 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
Xiao Guangrong44841412012-09-07 14:14:20 +08005903 if (is_error_page(page)) {
5904 r = -EFAULT;
5905 goto out;
5906 }
5907
Tang Chenc24ae0d2014-09-24 15:57:58 +08005908 /*
5909 * Do not pin the page in memory, so that memory hot-unplug
5910 * is able to migrate it.
5911 */
5912 put_page(page);
5913 kvm->arch.apic_access_page_done = true;
Sheng Yangf78e0e22007-10-29 09:40:42 +08005914out:
Marcelo Tosatti79fac952009-12-23 14:35:26 -02005915 mutex_unlock(&kvm->slots_lock);
Sheng Yangf78e0e22007-10-29 09:40:42 +08005916 return r;
5917}
5918
Wanpeng Li991e7a02015-09-16 17:30:05 +08005919static int allocate_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005920{
5921 int vpid;
5922
Avi Kivity919818a2009-03-23 18:01:29 +02005923 if (!enable_vpid)
Wanpeng Li991e7a02015-09-16 17:30:05 +08005924 return 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005925 spin_lock(&vmx_vpid_lock);
5926 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005927 if (vpid < VMX_NR_VPIDS)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005928 __set_bit(vpid, vmx_vpid_bitmap);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005929 else
5930 vpid = 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005931 spin_unlock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005932 return vpid;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005933}
5934
Wanpeng Li991e7a02015-09-16 17:30:05 +08005935static void free_vpid(int vpid)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005936{
Wanpeng Li991e7a02015-09-16 17:30:05 +08005937 if (!enable_vpid || vpid == 0)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005938 return;
5939 spin_lock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005940 __clear_bit(vpid, vmx_vpid_bitmap);
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005941 spin_unlock(&vmx_vpid_lock);
5942}
5943
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005944static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
5945 u32 msr, int type)
Sheng Yang25c5f222008-03-28 13:18:56 +08005946{
Avi Kivity3e7c73e2009-02-24 21:46:19 +02005947 int f = sizeof(unsigned long);
Sheng Yang25c5f222008-03-28 13:18:56 +08005948
5949 if (!cpu_has_vmx_msr_bitmap())
5950 return;
5951
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02005952 if (static_branch_unlikely(&enable_evmcs))
5953 evmcs_touch_msr_bitmap();
5954
Sheng Yang25c5f222008-03-28 13:18:56 +08005955 /*
5956 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5957 * have the write-low and read-high bitmap offsets the wrong way round.
5958 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5959 */
Sheng Yang25c5f222008-03-28 13:18:56 +08005960 if (msr <= 0x1fff) {
Yang Zhang8d146952013-01-25 10:18:50 +08005961 if (type & MSR_TYPE_R)
5962 /* read-low */
5963 __clear_bit(msr, msr_bitmap + 0x000 / f);
5964
5965 if (type & MSR_TYPE_W)
5966 /* write-low */
5967 __clear_bit(msr, msr_bitmap + 0x800 / f);
5968
Sheng Yang25c5f222008-03-28 13:18:56 +08005969 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5970 msr &= 0x1fff;
Yang Zhang8d146952013-01-25 10:18:50 +08005971 if (type & MSR_TYPE_R)
5972 /* read-high */
5973 __clear_bit(msr, msr_bitmap + 0x400 / f);
5974
5975 if (type & MSR_TYPE_W)
5976 /* write-high */
5977 __clear_bit(msr, msr_bitmap + 0xc00 / f);
5978
5979 }
5980}
5981
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005982static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
5983 u32 msr, int type)
5984{
5985 int f = sizeof(unsigned long);
5986
5987 if (!cpu_has_vmx_msr_bitmap())
5988 return;
5989
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02005990 if (static_branch_unlikely(&enable_evmcs))
5991 evmcs_touch_msr_bitmap();
5992
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005993 /*
5994 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5995 * have the write-low and read-high bitmap offsets the wrong way round.
5996 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5997 */
5998 if (msr <= 0x1fff) {
5999 if (type & MSR_TYPE_R)
6000 /* read-low */
6001 __set_bit(msr, msr_bitmap + 0x000 / f);
6002
6003 if (type & MSR_TYPE_W)
6004 /* write-low */
6005 __set_bit(msr, msr_bitmap + 0x800 / f);
6006
6007 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
6008 msr &= 0x1fff;
6009 if (type & MSR_TYPE_R)
6010 /* read-high */
6011 __set_bit(msr, msr_bitmap + 0x400 / f);
6012
6013 if (type & MSR_TYPE_W)
6014 /* write-high */
6015 __set_bit(msr, msr_bitmap + 0xc00 / f);
6016
6017 }
6018}
6019
6020static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
6021 u32 msr, int type, bool value)
6022{
6023 if (value)
6024 vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
6025 else
6026 vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
6027}
6028
Wincy Vanf2b93282015-02-03 23:56:03 +08006029/*
6030 * If a msr is allowed by L0, we should check whether it is allowed by L1.
6031 * The corresponding bit will be cleared unless both of L0 and L1 allow it.
6032 */
6033static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
6034 unsigned long *msr_bitmap_nested,
6035 u32 msr, int type)
6036{
6037 int f = sizeof(unsigned long);
6038
Wincy Vanf2b93282015-02-03 23:56:03 +08006039 /*
6040 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
6041 * have the write-low and read-high bitmap offsets the wrong way round.
6042 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
6043 */
6044 if (msr <= 0x1fff) {
6045 if (type & MSR_TYPE_R &&
6046 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
6047 /* read-low */
6048 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
6049
6050 if (type & MSR_TYPE_W &&
6051 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
6052 /* write-low */
6053 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
6054
6055 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
6056 msr &= 0x1fff;
6057 if (type & MSR_TYPE_R &&
6058 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
6059 /* read-high */
6060 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
6061
6062 if (type & MSR_TYPE_W &&
6063 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
6064 /* write-high */
6065 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
6066
6067 }
6068}
6069
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006070static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
Avi Kivity58972972009-02-24 22:26:47 +02006071{
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006072 u8 mode = 0;
6073
6074 if (cpu_has_secondary_exec_ctrls() &&
6075 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
6076 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
6077 mode |= MSR_BITMAP_MODE_X2APIC;
6078 if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
6079 mode |= MSR_BITMAP_MODE_X2APIC_APICV;
6080 }
6081
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006082 return mode;
Yang Zhang8d146952013-01-25 10:18:50 +08006083}
6084
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006085#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
6086
6087static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
6088 u8 mode)
Yang Zhang8d146952013-01-25 10:18:50 +08006089{
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006090 int msr;
6091
6092 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
6093 unsigned word = msr / BITS_PER_LONG;
6094 msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
6095 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
Wanpeng Lif6e90f92016-09-22 07:43:25 +08006096 }
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006097
6098 if (mode & MSR_BITMAP_MODE_X2APIC) {
6099 /*
6100 * TPR reads and writes can be virtualized even if virtual interrupt
6101 * delivery is not in use.
6102 */
6103 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
6104 if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
6105 vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
6106 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
6107 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
6108 }
6109 }
6110}
6111
6112static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
6113{
6114 struct vcpu_vmx *vmx = to_vmx(vcpu);
6115 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
6116 u8 mode = vmx_msr_bitmap_mode(vcpu);
6117 u8 changed = mode ^ vmx->msr_bitmap_mode;
6118
6119 if (!changed)
6120 return;
6121
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006122 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
6123 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
6124
6125 vmx->msr_bitmap_mode = mode;
Avi Kivity58972972009-02-24 22:26:47 +02006126}
6127
Suravee Suthikulpanitb2a05fe2017-09-12 10:42:41 -05006128static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02006129{
Andrey Smetanind62caab2015-11-10 15:36:33 +03006130 return enable_apicv;
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02006131}
6132
David Matlackc9f04402017-08-01 14:00:40 -07006133static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
6134{
6135 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6136 gfn_t gfn;
6137
6138 /*
6139 * Don't need to mark the APIC access page dirty; it is never
6140 * written to by the CPU during APIC virtualization.
6141 */
6142
6143 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
6144 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
6145 kvm_vcpu_mark_page_dirty(vcpu, gfn);
6146 }
6147
6148 if (nested_cpu_has_posted_intr(vmcs12)) {
6149 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
6150 kvm_vcpu_mark_page_dirty(vcpu, gfn);
6151 }
6152}
6153
6154
David Hildenbrand6342c502017-01-25 11:58:58 +01006155static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
Wincy Van705699a2015-02-03 23:58:17 +08006156{
6157 struct vcpu_vmx *vmx = to_vmx(vcpu);
6158 int max_irr;
6159 void *vapic_page;
6160 u16 status;
6161
David Matlackc9f04402017-08-01 14:00:40 -07006162 if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
6163 return;
Wincy Van705699a2015-02-03 23:58:17 +08006164
David Matlackc9f04402017-08-01 14:00:40 -07006165 vmx->nested.pi_pending = false;
6166 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
6167 return;
Wincy Van705699a2015-02-03 23:58:17 +08006168
David Matlackc9f04402017-08-01 14:00:40 -07006169 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
6170 if (max_irr != 256) {
Wincy Van705699a2015-02-03 23:58:17 +08006171 vapic_page = kmap(vmx->nested.virtual_apic_page);
Liran Alone7387b02017-12-24 18:12:54 +02006172 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
6173 vapic_page, &max_irr);
Wincy Van705699a2015-02-03 23:58:17 +08006174 kunmap(vmx->nested.virtual_apic_page);
6175
6176 status = vmcs_read16(GUEST_INTR_STATUS);
6177 if ((u8)max_irr > ((u8)status & 0xff)) {
6178 status &= ~0xff;
6179 status |= (u8)max_irr;
6180 vmcs_write16(GUEST_INTR_STATUS, status);
6181 }
6182 }
David Matlackc9f04402017-08-01 14:00:40 -07006183
6184 nested_mark_vmcs12_pages_dirty(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08006185}
6186
Paolo Bonzini7e712682018-10-03 13:44:26 +02006187static u8 vmx_get_rvi(void)
6188{
6189 return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
6190}
6191
Liran Alone6c67d82018-09-04 10:56:52 +03006192static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
6193{
6194 struct vcpu_vmx *vmx = to_vmx(vcpu);
6195 void *vapic_page;
6196 u32 vppr;
6197 int rvi;
6198
6199 if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
6200 !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
6201 WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
6202 return false;
6203
Paolo Bonzini7e712682018-10-03 13:44:26 +02006204 rvi = vmx_get_rvi();
Liran Alone6c67d82018-09-04 10:56:52 +03006205
6206 vapic_page = kmap(vmx->nested.virtual_apic_page);
6207 vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
6208 kunmap(vmx->nested.virtual_apic_page);
6209
6210 return ((rvi & 0xf0) > (vppr & 0xf0));
6211}
6212
Wincy Van06a55242017-04-28 13:13:59 +08006213static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
6214 bool nested)
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006215{
6216#ifdef CONFIG_SMP
Wincy Van06a55242017-04-28 13:13:59 +08006217 int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
6218
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006219 if (vcpu->mode == IN_GUEST_MODE) {
Feng Wu28b835d2015-09-18 22:29:54 +08006220 /*
Haozhong Zhang5753743f2017-09-18 09:56:50 +08006221 * The vector of interrupt to be delivered to vcpu had
6222 * been set in PIR before this function.
Feng Wu28b835d2015-09-18 22:29:54 +08006223 *
Haozhong Zhang5753743f2017-09-18 09:56:50 +08006224 * Following cases will be reached in this block, and
6225 * we always send a notification event in all cases as
6226 * explained below.
6227 *
6228 * Case 1: vcpu keeps in non-root mode. Sending a
6229 * notification event posts the interrupt to vcpu.
6230 *
6231 * Case 2: vcpu exits to root mode and is still
6232 * runnable. PIR will be synced to vIRR before the
6233 * next vcpu entry. Sending a notification event in
6234 * this case has no effect, as vcpu is not in root
6235 * mode.
6236 *
6237 * Case 3: vcpu exits to root mode and is blocked.
6238 * vcpu_block() has already synced PIR to vIRR and
6239 * never blocks vcpu if vIRR is not cleared. Therefore,
6240 * a blocked vcpu here does not wait for any requested
6241 * interrupts in PIR, and sending a notification event
6242 * which has no effect is safe here.
Feng Wu28b835d2015-09-18 22:29:54 +08006243 */
Feng Wu28b835d2015-09-18 22:29:54 +08006244
Wincy Van06a55242017-04-28 13:13:59 +08006245 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006246 return true;
6247 }
6248#endif
6249 return false;
6250}
6251
Wincy Van705699a2015-02-03 23:58:17 +08006252static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
6253 int vector)
6254{
6255 struct vcpu_vmx *vmx = to_vmx(vcpu);
6256
6257 if (is_guest_mode(vcpu) &&
6258 vector == vmx->nested.posted_intr_nv) {
Wincy Van705699a2015-02-03 23:58:17 +08006259 /*
6260 * If a posted intr is not recognized by hardware,
6261 * we will accomplish it in the next vmentry.
6262 */
6263 vmx->nested.pi_pending = true;
6264 kvm_make_request(KVM_REQ_EVENT, vcpu);
Liran Alon6b697712017-11-09 20:27:20 +02006265 /* the PIR and ON have been set by L1. */
6266 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
6267 kvm_vcpu_kick(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08006268 return 0;
6269 }
6270 return -1;
6271}
Avi Kivity6aa8b732006-12-10 02:21:36 -08006272/*
Yang Zhanga20ed542013-04-11 19:25:15 +08006273 * Send interrupt to vcpu via posted interrupt way.
6274 * 1. If target vcpu is running(non-root mode), send posted interrupt
6275 * notification to vcpu and hardware will sync PIR to vIRR atomically.
6276 * 2. If target vcpu isn't running(root mode), kick it to pick up the
6277 * interrupt from PIR in next vmentry.
6278 */
6279static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
6280{
6281 struct vcpu_vmx *vmx = to_vmx(vcpu);
6282 int r;
6283
Wincy Van705699a2015-02-03 23:58:17 +08006284 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
6285 if (!r)
6286 return;
6287
Yang Zhanga20ed542013-04-11 19:25:15 +08006288 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
6289 return;
6290
Paolo Bonzinib95234c2016-12-19 13:57:33 +01006291 /* If a previous notification has sent the IPI, nothing to do. */
6292 if (pi_test_and_set_on(&vmx->pi_desc))
6293 return;
6294
Wincy Van06a55242017-04-28 13:13:59 +08006295 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
Yang Zhanga20ed542013-04-11 19:25:15 +08006296 kvm_vcpu_kick(vcpu);
6297}
6298
Avi Kivity6aa8b732006-12-10 02:21:36 -08006299/*
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006300 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
6301 * will not change in the lifetime of the guest.
6302 * Note that host-state that does change is set elsewhere. E.g., host-state
6303 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
6304 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006305static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006306{
6307 u32 low32, high32;
6308 unsigned long tmpl;
6309 struct desc_ptr dt;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006310 unsigned long cr0, cr3, cr4;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006311
Andy Lutomirski04ac88a2016-10-31 15:18:45 -07006312 cr0 = read_cr0();
6313 WARN_ON(cr0 & X86_CR0_TS);
6314 vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006315
6316 /*
6317 * Save the most likely value for this task's CR3 in the VMCS.
6318 * We can't use __get_current_cr3_fast() because we're not atomic.
6319 */
Andy Lutomirski6c690ee2017-06-12 10:26:14 -07006320 cr3 = __read_cr3();
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006321 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
Sean Christophersond7ee0392018-07-23 12:32:47 -07006322 vmx->loaded_vmcs->host_state.cr3 = cr3;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006323
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006324 /* Save the most likely value for this task's CR4 in the VMCS. */
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07006325 cr4 = cr4_read_shadow();
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006326 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
Sean Christophersond7ee0392018-07-23 12:32:47 -07006327 vmx->loaded_vmcs->host_state.cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006328
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006329 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03006330#ifdef CONFIG_X86_64
6331 /*
6332 * Load null selectors, so we can avoid reloading them in
Sean Christopherson6d6095b2018-07-23 12:32:44 -07006333 * vmx_prepare_switch_to_host(), in case userspace uses
6334 * the null selectors too (the expected case).
Avi Kivityb2da15a2012-05-13 19:53:24 +03006335 */
6336 vmcs_write16(HOST_DS_SELECTOR, 0);
6337 vmcs_write16(HOST_ES_SELECTOR, 0);
6338#else
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006339 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
6340 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03006341#endif
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006342 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
6343 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
6344
Juergen Gross87930012017-09-04 12:25:27 +02006345 store_idt(&dt);
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006346 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006347 vmx->host_idt_base = dt.address;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006348
Avi Kivity83287ea422012-09-16 15:10:57 +03006349 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006350
6351 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
6352 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
6353 rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
6354 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
6355
6356 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
6357 rdmsr(MSR_IA32_CR_PAT, low32, high32);
6358 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
6359 }
Sean Christopherson5a5e8a12018-09-26 09:23:56 -07006360
6361 if (cpu_has_load_ia32_efer)
6362 vmcs_write64(HOST_IA32_EFER, host_efer);
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006363}
6364
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006365static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
6366{
6367 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
6368 if (enable_ept)
6369 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03006370 if (is_guest_mode(&vmx->vcpu))
6371 vmx->vcpu.arch.cr4_guest_owned_bits &=
6372 ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006373 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
6374}
6375
Yang Zhang01e439b2013-04-11 19:25:12 +08006376static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
6377{
6378 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
6379
Andrey Smetanind62caab2015-11-10 15:36:33 +03006380 if (!kvm_vcpu_apicv_active(&vmx->vcpu))
Yang Zhang01e439b2013-04-11 19:25:12 +08006381 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006382
6383 if (!enable_vnmi)
6384 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
6385
Yunhong Jiang64672c92016-06-13 14:19:59 -07006386 /* Enable the preemption timer dynamically */
6387 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08006388 return pin_based_exec_ctrl;
6389}
6390
Andrey Smetanind62caab2015-11-10 15:36:33 +03006391static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
6392{
6393 struct vcpu_vmx *vmx = to_vmx(vcpu);
6394
6395 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Roman Kagan3ce424e2016-05-18 17:48:20 +03006396 if (cpu_has_secondary_exec_ctrls()) {
6397 if (kvm_vcpu_apicv_active(vcpu))
6398 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
6399 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6400 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
6401 else
6402 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
6403 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6404 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
6405 }
6406
6407 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006408 vmx_update_msr_bitmap(vcpu);
Andrey Smetanind62caab2015-11-10 15:36:33 +03006409}
6410
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006411static u32 vmx_exec_control(struct vcpu_vmx *vmx)
6412{
6413 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
Paolo Bonzinid16c2932014-02-21 10:36:37 +01006414
6415 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
6416 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
6417
Paolo Bonzini35754c92015-07-29 12:05:37 +02006418 if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006419 exec_control &= ~CPU_BASED_TPR_SHADOW;
6420#ifdef CONFIG_X86_64
6421 exec_control |= CPU_BASED_CR8_STORE_EXITING |
6422 CPU_BASED_CR8_LOAD_EXITING;
6423#endif
6424 }
6425 if (!enable_ept)
6426 exec_control |= CPU_BASED_CR3_STORE_EXITING |
6427 CPU_BASED_CR3_LOAD_EXITING |
6428 CPU_BASED_INVLPG_EXITING;
Wanpeng Li4d5422c2018-03-12 04:53:02 -07006429 if (kvm_mwait_in_guest(vmx->vcpu.kvm))
6430 exec_control &= ~(CPU_BASED_MWAIT_EXITING |
6431 CPU_BASED_MONITOR_EXITING);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006432 if (kvm_hlt_in_guest(vmx->vcpu.kvm))
6433 exec_control &= ~CPU_BASED_HLT_EXITING;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006434 return exec_control;
6435}
6436
Jim Mattson45ec3682017-08-23 16:32:04 -07006437static bool vmx_rdrand_supported(void)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006438{
Jim Mattson45ec3682017-08-23 16:32:04 -07006439 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02006440 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006441}
6442
Jim Mattson75f4fc82017-08-23 16:32:03 -07006443static bool vmx_rdseed_supported(void)
6444{
6445 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02006446 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006447}
6448
Paolo Bonzini80154d72017-08-24 13:55:35 +02006449static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006450{
Paolo Bonzini80154d72017-08-24 13:55:35 +02006451 struct kvm_vcpu *vcpu = &vmx->vcpu;
6452
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006453 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
Paolo Bonzini0367f202016-07-12 10:44:55 +02006454
Paolo Bonzini80154d72017-08-24 13:55:35 +02006455 if (!cpu_need_virtualize_apic_accesses(vcpu))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006456 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6457 if (vmx->vpid == 0)
6458 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
6459 if (!enable_ept) {
6460 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
6461 enable_unrestricted_guest = 0;
6462 }
6463 if (!enable_unrestricted_guest)
6464 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
Wanpeng Lib31c1142018-03-12 04:53:04 -07006465 if (kvm_pause_in_guest(vmx->vcpu.kvm))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006466 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
Paolo Bonzini80154d72017-08-24 13:55:35 +02006467 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhangc7c9c562013-01-25 10:18:51 +08006468 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
6469 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang8d146952013-01-25 10:18:50 +08006470 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
Paolo Bonzini0367f202016-07-12 10:44:55 +02006471
6472 /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
6473 * in vmx_set_cr4. */
6474 exec_control &= ~SECONDARY_EXEC_DESC;
6475
Abel Gordonabc4fc52013-04-18 14:35:25 +03006476 /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
6477 (handle_vmptrld).
6478 We can NOT enable shadow_vmcs here because we don't have yet
6479 a current VMCS12
6480 */
6481 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
Kai Huanga3eaa862015-11-04 13:46:05 +08006482
6483 if (!enable_pml)
6484 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
Kai Huang843e4332015-01-28 10:54:28 +08006485
Paolo Bonzini3db13482017-08-24 14:48:03 +02006486 if (vmx_xsaves_supported()) {
6487 /* Exposing XSAVES only when XSAVE is exposed */
6488 bool xsaves_enabled =
6489 guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
6490 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
6491
6492 if (!xsaves_enabled)
6493 exec_control &= ~SECONDARY_EXEC_XSAVES;
6494
6495 if (nested) {
6496 if (xsaves_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006497 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini3db13482017-08-24 14:48:03 +02006498 SECONDARY_EXEC_XSAVES;
6499 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006500 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini3db13482017-08-24 14:48:03 +02006501 ~SECONDARY_EXEC_XSAVES;
6502 }
6503 }
6504
Paolo Bonzini80154d72017-08-24 13:55:35 +02006505 if (vmx_rdtscp_supported()) {
6506 bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
6507 if (!rdtscp_enabled)
6508 exec_control &= ~SECONDARY_EXEC_RDTSCP;
6509
6510 if (nested) {
6511 if (rdtscp_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006512 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006513 SECONDARY_EXEC_RDTSCP;
6514 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006515 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006516 ~SECONDARY_EXEC_RDTSCP;
6517 }
6518 }
6519
6520 if (vmx_invpcid_supported()) {
6521 /* Exposing INVPCID only when PCID is exposed */
6522 bool invpcid_enabled =
6523 guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
6524 guest_cpuid_has(vcpu, X86_FEATURE_PCID);
6525
6526 if (!invpcid_enabled) {
6527 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
6528 guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
6529 }
6530
6531 if (nested) {
6532 if (invpcid_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006533 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006534 SECONDARY_EXEC_ENABLE_INVPCID;
6535 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006536 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006537 ~SECONDARY_EXEC_ENABLE_INVPCID;
6538 }
6539 }
6540
Jim Mattson45ec3682017-08-23 16:32:04 -07006541 if (vmx_rdrand_supported()) {
6542 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
6543 if (rdrand_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02006544 exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006545
6546 if (nested) {
6547 if (rdrand_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006548 vmx->nested.msrs.secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006549 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006550 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006551 vmx->nested.msrs.secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006552 ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006553 }
6554 }
6555
Jim Mattson75f4fc82017-08-23 16:32:03 -07006556 if (vmx_rdseed_supported()) {
6557 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
6558 if (rdseed_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02006559 exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006560
6561 if (nested) {
6562 if (rdseed_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006563 vmx->nested.msrs.secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006564 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006565 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006566 vmx->nested.msrs.secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006567 ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006568 }
6569 }
6570
Paolo Bonzini80154d72017-08-24 13:55:35 +02006571 vmx->secondary_exec_control = exec_control;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006572}
6573
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006574static void ept_set_mmio_spte_mask(void)
6575{
6576 /*
6577 * EPT Misconfigurations can be generated if the value of bits 2:0
6578 * of an EPT paging-structure entry is 110b (write/execute).
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006579 */
Peter Feinerdcdca5f2017-06-30 17:26:30 -07006580 kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
6581 VMX_EPT_MISCONFIG_WX_VALUE);
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006582}
6583
Wanpeng Lif53cd632014-12-02 19:14:58 +08006584#define VMX_XSS_EXIT_BITMAP 0
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006585/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08006586 * Sets up the vmcs for emulated real mode.
6587 */
David Hildenbrand12d79912017-08-24 20:51:26 +02006588static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006589{
Avi Kivity6aa8b732006-12-10 02:21:36 -08006590 int i;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006591
Abel Gordon4607c2d2013-04-18 14:35:55 +03006592 if (enable_shadow_vmcs) {
Jim Mattsonf4160e42018-05-29 09:11:33 -07006593 /*
6594 * At vCPU creation, "VMWRITE to any supported field
6595 * in the VMCS" is supported, so use the more
6596 * permissive vmx_vmread_bitmap to specify both read
6597 * and write permissions for the shadow VMCS.
6598 */
Abel Gordon4607c2d2013-04-18 14:35:55 +03006599 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
Jim Mattsonf4160e42018-05-29 09:11:33 -07006600 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
Abel Gordon4607c2d2013-04-18 14:35:55 +03006601 }
Sheng Yang25c5f222008-03-28 13:18:56 +08006602 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006603 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
Sheng Yang25c5f222008-03-28 13:18:56 +08006604
Avi Kivity6aa8b732006-12-10 02:21:36 -08006605 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
6606
Avi Kivity6aa8b732006-12-10 02:21:36 -08006607 /* Control */
Yang Zhang01e439b2013-04-11 19:25:12 +08006608 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Yunhong Jiang64672c92016-06-13 14:19:59 -07006609 vmx->hv_deadline_tsc = -1;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08006610
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006611 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
Avi Kivity6aa8b732006-12-10 02:21:36 -08006612
Dan Williamsdfa169b2016-06-02 11:17:24 -07006613 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +02006614 vmx_compute_secondary_exec_control(vmx);
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006615 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini80154d72017-08-24 13:55:35 +02006616 vmx->secondary_exec_control);
Dan Williamsdfa169b2016-06-02 11:17:24 -07006617 }
Sheng Yangf78e0e22007-10-29 09:40:42 +08006618
Andrey Smetanind62caab2015-11-10 15:36:33 +03006619 if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
Yang Zhangc7c9c562013-01-25 10:18:51 +08006620 vmcs_write64(EOI_EXIT_BITMAP0, 0);
6621 vmcs_write64(EOI_EXIT_BITMAP1, 0);
6622 vmcs_write64(EOI_EXIT_BITMAP2, 0);
6623 vmcs_write64(EOI_EXIT_BITMAP3, 0);
6624
6625 vmcs_write16(GUEST_INTR_STATUS, 0);
Yang Zhang01e439b2013-04-11 19:25:12 +08006626
Li RongQing0bcf2612015-12-03 13:29:34 +08006627 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
Yang Zhang01e439b2013-04-11 19:25:12 +08006628 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
Yang Zhangc7c9c562013-01-25 10:18:51 +08006629 }
6630
Wanpeng Lib31c1142018-03-12 04:53:04 -07006631 if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006632 vmcs_write32(PLE_GAP, ple_gap);
Radim Krčmářa7653ec2014-08-21 18:08:07 +02006633 vmx->ple_window = ple_window;
6634 vmx->ple_window_dirty = true;
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006635 }
6636
Xiao Guangrongc3707952011-07-12 03:28:04 +08006637 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
6638 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006639 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
6640
Avi Kivity9581d442010-10-19 16:46:55 +02006641 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
6642 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006643 vmx_set_constant_host_state(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006644 vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
6645 vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08006646
Bandan Das2a499e42017-08-03 15:54:41 -04006647 if (cpu_has_vmx_vmfunc())
6648 vmcs_write64(VM_FUNCTION_CONTROL, 0);
6649
Eddie Dong2cc51562007-05-21 07:28:09 +03006650 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
6651 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04006652 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
Eddie Dong2cc51562007-05-21 07:28:09 +03006653 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04006654 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
Avi Kivity6aa8b732006-12-10 02:21:36 -08006655
Radim Krčmář74545702015-04-27 15:11:25 +02006656 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
6657 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Sheng Yang468d4722008-10-09 16:01:55 +08006658
Paolo Bonzini03916db2014-07-24 14:21:57 +02006659 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08006660 u32 index = vmx_msr_index[i];
6661 u32 data_low, data_high;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04006662 int j = vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006663
6664 if (rdmsr_safe(index, &data_low, &data_high) < 0)
6665 continue;
Avi Kivity432bd6c2007-01-31 23:48:13 -08006666 if (wrmsr_safe(index, data_low, data_high) < 0)
6667 continue;
Avi Kivity26bb0982009-09-07 11:14:12 +03006668 vmx->guest_msrs[j].index = i;
6669 vmx->guest_msrs[j].data = 0;
Avi Kivityd5696722009-12-02 12:28:47 +02006670 vmx->guest_msrs[j].mask = -1ull;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04006671 ++vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006672 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08006673
Paolo Bonzini5b76a3c2018-08-05 16:07:47 +02006674 vmx->arch_capabilities = kvm_get_arch_capabilities();
Gleb Natapov2961e8762013-11-25 15:37:13 +02006675
6676 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006677
6678 /* 22.2.1, 20.8.1 */
Gleb Natapov2961e8762013-11-25 15:37:13 +02006679 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03006680
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006681 vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
6682 vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
6683
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006684 set_cr4_guest_host_mask(vmx);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006685
Wanpeng Lif53cd632014-12-02 19:14:58 +08006686 if (vmx_xsaves_supported())
6687 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
6688
Peter Feiner4e595162016-07-07 14:49:58 -07006689 if (enable_pml) {
Peter Feiner4e595162016-07-07 14:49:58 -07006690 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
6691 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6692 }
Sean Christopherson0b665d32018-08-14 09:33:34 -07006693
6694 if (cpu_has_vmx_encls_vmexit())
6695 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006696}
6697
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006698static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006699{
6700 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka58cb6282014-01-24 16:48:44 +01006701 struct msr_data apic_base_msr;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006702 u64 cr0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006703
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006704 vmx->rmode.vm86_active = 0;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01006705 vmx->spec_ctrl = 0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006706
Wanpeng Li518e7b92018-02-28 14:03:31 +08006707 vcpu->arch.microcode_version = 0x100000000ULL;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08006708 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006709 kvm_set_cr8(vcpu, 0);
6710
6711 if (!init_event) {
6712 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
6713 MSR_IA32_APICBASE_ENABLE;
6714 if (kvm_vcpu_is_reset_bsp(vcpu))
6715 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
6716 apic_base_msr.host_initiated = true;
6717 kvm_set_apic_base(vcpu, &apic_base_msr);
6718 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006719
Avi Kivity2fb92db2011-04-27 19:42:18 +03006720 vmx_segment_cache_clear(vmx);
6721
Avi Kivity5706be02008-08-20 15:07:31 +03006722 seg_setup(VCPU_SREG_CS);
Jan Kiszka66450a22013-03-13 12:42:34 +01006723 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
Paolo Bonzinif3531052015-12-03 15:49:56 +01006724 vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006725
6726 seg_setup(VCPU_SREG_DS);
6727 seg_setup(VCPU_SREG_ES);
6728 seg_setup(VCPU_SREG_FS);
6729 seg_setup(VCPU_SREG_GS);
6730 seg_setup(VCPU_SREG_SS);
6731
6732 vmcs_write16(GUEST_TR_SELECTOR, 0);
6733 vmcs_writel(GUEST_TR_BASE, 0);
6734 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
6735 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
6736
6737 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
6738 vmcs_writel(GUEST_LDTR_BASE, 0);
6739 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
6740 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
6741
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006742 if (!init_event) {
6743 vmcs_write32(GUEST_SYSENTER_CS, 0);
6744 vmcs_writel(GUEST_SYSENTER_ESP, 0);
6745 vmcs_writel(GUEST_SYSENTER_EIP, 0);
6746 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
6747 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006748
Wanpeng Lic37c2872017-11-20 14:52:21 -08006749 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
Jan Kiszka66450a22013-03-13 12:42:34 +01006750 kvm_rip_write(vcpu, 0xfff0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006751
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006752 vmcs_writel(GUEST_GDTR_BASE, 0);
6753 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
6754
6755 vmcs_writel(GUEST_IDTR_BASE, 0);
6756 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
6757
Anthony Liguori443381a2010-12-06 10:53:38 -06006758 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006759 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
Paolo Bonzinif3531052015-12-03 15:49:56 +01006760 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
Wanpeng Lia554d202017-10-11 05:10:19 -07006761 if (kvm_mpx_supported())
6762 vmcs_write64(GUEST_BNDCFGS, 0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006763
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006764 setup_msrs(vmx);
6765
Avi Kivity6aa8b732006-12-10 02:21:36 -08006766 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
6767
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006768 if (cpu_has_vmx_tpr_shadow() && !init_event) {
Sheng Yangf78e0e22007-10-29 09:40:42 +08006769 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
Paolo Bonzini35754c92015-07-29 12:05:37 +02006770 if (cpu_need_tpr_shadow(vcpu))
Sheng Yangf78e0e22007-10-29 09:40:42 +08006771 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006772 __pa(vcpu->arch.apic->regs));
Sheng Yangf78e0e22007-10-29 09:40:42 +08006773 vmcs_write32(TPR_THRESHOLD, 0);
6774 }
6775
Paolo Bonzinia73896c2014-11-02 07:54:30 +01006776 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006777
Sheng Yang2384d2b2008-01-17 15:14:33 +08006778 if (vmx->vpid != 0)
6779 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
6780
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006781 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006782 vmx->vcpu.arch.cr0 = cr0;
Bruce Rogersf2463242016-04-28 14:49:21 -06006783 vmx_set_cr0(vcpu, cr0); /* enter rmode */
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006784 vmx_set_cr4(vcpu, 0);
Paolo Bonzini56908912015-10-19 11:30:19 +02006785 vmx_set_efer(vcpu, 0);
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006786
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006787 update_exception_bitmap(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006788
Wanpeng Lidd5f5342015-09-23 18:26:57 +08006789 vpid_sync_context(vmx->vpid);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006790 if (init_event)
6791 vmx_clear_hlt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006792}
6793
Nadav Har'Elb6f12502011-05-25 23:13:06 +03006794/*
6795 * In nested virtualization, check if L1 asked to exit on external interrupts.
6796 * For most existing hypervisors, this will always return true.
6797 */
6798static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
6799{
6800 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
6801 PIN_BASED_EXT_INTR_MASK;
6802}
6803
Bandan Das77b0f5d2014-04-19 18:17:45 -04006804/*
6805 * In nested virtualization, check if L1 has set
6806 * VM_EXIT_ACK_INTR_ON_EXIT
6807 */
6808static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
6809{
6810 return get_vmcs12(vcpu)->vm_exit_controls &
6811 VM_EXIT_ACK_INTR_ON_EXIT;
6812}
6813
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006814static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
6815{
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -05006816 return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006817}
6818
Jan Kiszkac9a79532014-03-07 20:03:15 +01006819static void enable_irq_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006820{
Paolo Bonzini47c01522016-12-19 11:44:07 +01006821 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
6822 CPU_BASED_VIRTUAL_INTR_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006823}
6824
Jan Kiszkac9a79532014-03-07 20:03:15 +01006825static void enable_nmi_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006826{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006827 if (!enable_vnmi ||
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006828 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
Jan Kiszkac9a79532014-03-07 20:03:15 +01006829 enable_irq_window(vcpu);
6830 return;
6831 }
Jan Kiszka03b28f82013-04-29 16:46:42 +02006832
Paolo Bonzini47c01522016-12-19 11:44:07 +01006833 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
6834 CPU_BASED_VIRTUAL_NMI_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006835}
6836
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006837static void vmx_inject_irq(struct kvm_vcpu *vcpu)
Eddie Dong85f455f2007-07-06 12:20:49 +03006838{
Avi Kivity9c8cba32007-11-22 11:42:59 +02006839 struct vcpu_vmx *vmx = to_vmx(vcpu);
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006840 uint32_t intr;
6841 int irq = vcpu->arch.interrupt.nr;
Avi Kivity9c8cba32007-11-22 11:42:59 +02006842
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006843 trace_kvm_inj_virq(irq);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04006844
Avi Kivityfa89a812008-09-01 15:57:51 +03006845 ++vcpu->stat.irq_injections;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006846 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05006847 int inc_eip = 0;
6848 if (vcpu->arch.interrupt.soft)
6849 inc_eip = vcpu->arch.event_exit_inst_len;
6850 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02006851 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03006852 return;
6853 }
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006854 intr = irq | INTR_INFO_VALID_MASK;
6855 if (vcpu->arch.interrupt.soft) {
6856 intr |= INTR_TYPE_SOFT_INTR;
6857 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6858 vmx->vcpu.arch.event_exit_inst_len);
6859 } else
6860 intr |= INTR_TYPE_EXT_INTR;
6861 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006862
6863 vmx_clear_hlt(vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03006864}
6865
Sheng Yangf08864b2008-05-15 18:23:25 +08006866static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
6867{
Jan Kiszka66a5a342008-09-26 09:30:51 +02006868 struct vcpu_vmx *vmx = to_vmx(vcpu);
6869
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006870 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006871 /*
6872 * Tracking the NMI-blocked state in software is built upon
6873 * finding the next open IRQ window. This, in turn, depends on
6874 * well-behaving guests: They have to keep IRQs disabled at
6875 * least as long as the NMI handler runs. Otherwise we may
6876 * cause NMI nesting, maybe breaking the guest. But as this is
6877 * highly unlikely, we can live with the residual risk.
6878 */
6879 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
6880 vmx->loaded_vmcs->vnmi_blocked_time = 0;
6881 }
6882
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006883 ++vcpu->stat.nmi_injections;
6884 vmx->loaded_vmcs->nmi_known_unmasked = false;
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006885
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006886 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05006887 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02006888 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka66a5a342008-09-26 09:30:51 +02006889 return;
6890 }
Wanpeng Lic5a6d5f2016-09-22 17:55:54 +08006891
Sheng Yangf08864b2008-05-15 18:23:25 +08006892 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6893 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006894
6895 vmx_clear_hlt(vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08006896}
6897
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006898static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
6899{
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006900 struct vcpu_vmx *vmx = to_vmx(vcpu);
6901 bool masked;
6902
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006903 if (!enable_vnmi)
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006904 return vmx->loaded_vmcs->soft_vnmi_blocked;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006905 if (vmx->loaded_vmcs->nmi_known_unmasked)
Avi Kivity9d58b932011-03-07 16:52:07 +02006906 return false;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006907 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
6908 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
6909 return masked;
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006910}
6911
6912static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
6913{
6914 struct vcpu_vmx *vmx = to_vmx(vcpu);
6915
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006916 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006917 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
6918 vmx->loaded_vmcs->soft_vnmi_blocked = masked;
6919 vmx->loaded_vmcs->vnmi_blocked_time = 0;
6920 }
6921 } else {
6922 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
6923 if (masked)
6924 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6925 GUEST_INTR_STATE_NMI);
6926 else
6927 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
6928 GUEST_INTR_STATE_NMI);
6929 }
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006930}
6931
Jan Kiszka2505dc92013-04-14 12:12:47 +02006932static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
6933{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01006934 if (to_vmx(vcpu)->nested.nested_run_pending)
6935 return 0;
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006936
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006937 if (!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006938 to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
6939 return 0;
6940
Jan Kiszka2505dc92013-04-14 12:12:47 +02006941 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
6942 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
6943 | GUEST_INTR_STATE_NMI));
6944}
6945
Gleb Natapov78646122009-03-23 12:12:11 +02006946static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
6947{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01006948 return (!to_vmx(vcpu)->nested.nested_run_pending &&
6949 vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
Gleb Natapovc4282df2009-04-21 17:45:07 +03006950 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
6951 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
Gleb Natapov78646122009-03-23 12:12:11 +02006952}
6953
Izik Eiduscbc94022007-10-25 00:29:55 +02006954static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
6955{
6956 int ret;
Izik Eiduscbc94022007-10-25 00:29:55 +02006957
Sean Christophersonf7eaeb02018-03-05 12:04:36 -08006958 if (enable_unrestricted_guest)
6959 return 0;
6960
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02006961 ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
6962 PAGE_SIZE * 3);
Izik Eiduscbc94022007-10-25 00:29:55 +02006963 if (ret)
6964 return ret;
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07006965 to_kvm_vmx(kvm)->tss_addr = addr;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02006966 return init_rmode_tss(kvm);
Izik Eiduscbc94022007-10-25 00:29:55 +02006967}
6968
Sean Christopherson2ac52ab2018-03-20 12:17:19 -07006969static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
6970{
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07006971 to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
Sean Christopherson2ac52ab2018-03-20 12:17:19 -07006972 return 0;
6973}
6974
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006975static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006976{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006977 switch (vec) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006978 case BP_VECTOR:
Jan Kiszkac573cd22010-02-23 17:47:53 +01006979 /*
6980 * Update instruction length as we may reinject the exception
6981 * from user space while in guest debugging mode.
6982 */
6983 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
6984 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006985 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006986 return false;
6987 /* fall through */
6988 case DB_VECTOR:
6989 if (vcpu->guest_debug &
6990 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
6991 return false;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006992 /* fall through */
6993 case DE_VECTOR:
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006994 case OF_VECTOR:
6995 case BR_VECTOR:
6996 case UD_VECTOR:
6997 case DF_VECTOR:
6998 case SS_VECTOR:
6999 case GP_VECTOR:
7000 case MF_VECTOR:
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007001 return true;
7002 break;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02007003 }
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007004 return false;
7005}
7006
7007static int handle_rmode_exception(struct kvm_vcpu *vcpu,
7008 int vec, u32 err_code)
7009{
7010 /*
7011 * Instruction with address size override prefix opcode 0x67
7012 * Cause the #SS fault with 0 error code in VM86 mode.
7013 */
7014 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007015 if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007016 if (vcpu->arch.halt_request) {
7017 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06007018 return kvm_vcpu_halt(vcpu);
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007019 }
7020 return 1;
7021 }
7022 return 0;
7023 }
7024
7025 /*
7026 * Forward all other exceptions that are valid in real mode.
7027 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
7028 * the required debugging infrastructure rework.
7029 */
7030 kvm_queue_exception(vcpu, vec);
7031 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007032}
7033
Andi Kleena0861c02009-06-08 17:37:09 +08007034/*
7035 * Trigger machine check on the host. We assume all the MSRs are already set up
7036 * by the CPU and that we still run on the same CPU as the MCE occurred on.
7037 * We pass a fake environment to the machine check handler because we want
7038 * the guest to be always treated like user space, no matter what context
7039 * it used internally.
7040 */
7041static void kvm_machine_check(void)
7042{
7043#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
7044 struct pt_regs regs = {
7045 .cs = 3, /* Fake ring 3 no matter what the guest ran on */
7046 .flags = X86_EFLAGS_IF,
7047 };
7048
7049 do_machine_check(&regs, 0);
7050#endif
7051}
7052
Avi Kivity851ba692009-08-24 11:10:17 +03007053static int handle_machine_check(struct kvm_vcpu *vcpu)
Andi Kleena0861c02009-06-08 17:37:09 +08007054{
7055 /* already handled by vcpu_run */
7056 return 1;
7057}
7058
Avi Kivity851ba692009-08-24 11:10:17 +03007059static int handle_exception(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007060{
Avi Kivity1155f762007-11-22 11:30:47 +02007061 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03007062 struct kvm_run *kvm_run = vcpu->run;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007063 u32 intr_info, ex_no, error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007064 unsigned long cr2, rip, dr6;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007065 u32 vect_info;
7066 enum emulation_result er;
7067
Avi Kivity1155f762007-11-22 11:30:47 +02007068 vect_info = vmx->idt_vectoring_info;
Avi Kivity88786472011-03-07 17:39:45 +02007069 intr_info = vmx->exit_intr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007070
Andi Kleena0861c02009-06-08 17:37:09 +08007071 if (is_machine_check(intr_info))
Avi Kivity851ba692009-08-24 11:10:17 +03007072 return handle_machine_check(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +08007073
Jim Mattsonef85b672016-12-12 11:01:37 -08007074 if (is_nmi(intr_info))
Avi Kivity1b6269d2007-10-09 12:12:19 +02007075 return 1; /* already handled by vmx_vcpu_run() */
Anthony Liguori2ab455c2007-04-27 09:29:49 +03007076
Wanpeng Li082d06e2018-04-03 16:28:48 -07007077 if (is_invalid_opcode(intr_info))
7078 return handle_ud(vcpu);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05007079
Avi Kivity6aa8b732006-12-10 02:21:36 -08007080 error_code = 0;
Ryan Harper2e113842008-02-11 10:26:38 -06007081 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007082 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007083
Liran Alon9e869482018-03-12 13:12:51 +02007084 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
7085 WARN_ON_ONCE(!enable_vmware_backdoor);
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007086 er = kvm_emulate_instruction(vcpu,
Liran Alon9e869482018-03-12 13:12:51 +02007087 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
7088 if (er == EMULATE_USER_EXIT)
7089 return 0;
7090 else if (er != EMULATE_DONE)
7091 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
7092 return 1;
7093 }
7094
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007095 /*
7096 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
7097 * MMIO, it is better to report an internal error.
7098 * See the comments in vmx_handle_exit.
7099 */
7100 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
7101 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
7102 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7103 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
Radim Krčmář80f0e952015-04-02 21:11:05 +02007104 vcpu->run->internal.ndata = 3;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007105 vcpu->run->internal.data[0] = vect_info;
7106 vcpu->run->internal.data[1] = intr_info;
Radim Krčmář80f0e952015-04-02 21:11:05 +02007107 vcpu->run->internal.data[2] = error_code;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007108 return 0;
7109 }
7110
Avi Kivity6aa8b732006-12-10 02:21:36 -08007111 if (is_page_fault(intr_info)) {
7112 cr2 = vmcs_readl(EXIT_QUALIFICATION);
Wanpeng Li1261bfa2017-07-13 18:30:40 -07007113 /* EPT won't cause page fault directly */
7114 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
Paolo Bonzinid0006532017-08-11 18:36:43 +02007115 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007116 }
7117
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007118 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007119
7120 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
7121 return handle_rmode_exception(vcpu, ex_no, error_code);
7122
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007123 switch (ex_no) {
Eric Northup54a20552015-11-03 18:03:53 +01007124 case AC_VECTOR:
7125 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
7126 return 1;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007127 case DB_VECTOR:
7128 dr6 = vmcs_readl(EXIT_QUALIFICATION);
7129 if (!(vcpu->guest_debug &
7130 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
Jan Kiszka8246bf52014-01-04 18:47:17 +01007131 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03007132 vcpu->arch.dr6 |= dr6 | DR6_RTM;
Linus Torvalds32d43cd2018-03-20 12:16:59 -07007133 if (is_icebp(intr_info))
Huw Daviesfd2a4452014-04-16 10:02:51 +01007134 skip_emulated_instruction(vcpu);
7135
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007136 kvm_queue_exception(vcpu, DB_VECTOR);
7137 return 1;
7138 }
7139 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
7140 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
7141 /* fall through */
7142 case BP_VECTOR:
Jan Kiszkac573cd22010-02-23 17:47:53 +01007143 /*
7144 * Update instruction length as we may reinject #BP from
7145 * user space while in guest debugging mode. Reading it for
7146 * #DB as well causes no harm, it is not used in that case.
7147 */
7148 vmx->vcpu.arch.event_exit_inst_len =
7149 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007150 kvm_run->exit_reason = KVM_EXIT_DEBUG;
Avi Kivity0a434bb2011-04-28 15:59:33 +03007151 rip = kvm_rip_read(vcpu);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007152 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
7153 kvm_run->debug.arch.exception = ex_no;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007154 break;
7155 default:
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007156 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
7157 kvm_run->ex.exception = ex_no;
7158 kvm_run->ex.error_code = error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007159 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007160 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08007161 return 0;
7162}
7163
Avi Kivity851ba692009-08-24 11:10:17 +03007164static int handle_external_interrupt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007165{
Avi Kivity1165f5f2007-04-19 17:27:43 +03007166 ++vcpu->stat.irq_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007167 return 1;
7168}
7169
Avi Kivity851ba692009-08-24 11:10:17 +03007170static int handle_triple_fault(struct kvm_vcpu *vcpu)
Avi Kivity988ad742007-02-12 00:54:36 -08007171{
Avi Kivity851ba692009-08-24 11:10:17 +03007172 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
Wanpeng Libbeac282017-08-09 22:33:12 -07007173 vcpu->mmio_needed = 0;
Avi Kivity988ad742007-02-12 00:54:36 -08007174 return 0;
7175}
Avi Kivity6aa8b732006-12-10 02:21:36 -08007176
Avi Kivity851ba692009-08-24 11:10:17 +03007177static int handle_io(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007178{
He, Qingbfdaab02007-09-12 14:18:28 +08007179 unsigned long exit_qualification;
Sean Christophersondca7f122018-03-08 08:57:27 -08007180 int size, in, string;
Avi Kivity039576c2007-03-20 12:46:50 +02007181 unsigned port;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007182
He, Qingbfdaab02007-09-12 14:18:28 +08007183 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity039576c2007-03-20 12:46:50 +02007184 string = (exit_qualification & 16) != 0;
Laurent Viviere70669a2007-08-05 10:36:40 +03007185
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02007186 ++vcpu->stat.io_exits;
7187
Sean Christopherson432baf62018-03-08 08:57:26 -08007188 if (string)
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007189 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02007190
7191 port = exit_qualification >> 16;
7192 size = (exit_qualification & 7) + 1;
Sean Christopherson432baf62018-03-08 08:57:26 -08007193 in = (exit_qualification & 8) != 0;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02007194
Sean Christophersondca7f122018-03-08 08:57:27 -08007195 return kvm_fast_pio(vcpu, size, port, in);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007196}
7197
Ingo Molnar102d8322007-02-19 14:37:47 +02007198static void
7199vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
7200{
7201 /*
7202 * Patch in the VMCALL instruction:
7203 */
7204 hypercall[0] = 0x0f;
7205 hypercall[1] = 0x01;
7206 hypercall[2] = 0xc1;
Ingo Molnar102d8322007-02-19 14:37:47 +02007207}
7208
Guo Chao0fa06072012-06-28 15:16:19 +08007209/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007210static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
7211{
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007212 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007213 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7214 unsigned long orig_val = val;
7215
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007216 /*
7217 * We get here when L2 changed cr0 in a way that did not change
7218 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007219 * but did change L0 shadowed bits. So we first calculate the
7220 * effective cr0 value that L1 would like to write into the
7221 * hardware. It consists of the L2-owned bits from the new
7222 * value combined with the L1-owned bits from L1's guest_cr0.
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007223 */
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007224 val = (val & ~vmcs12->cr0_guest_host_mask) |
7225 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
7226
David Matlack38991522016-11-29 18:14:08 -08007227 if (!nested_guest_cr0_valid(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007228 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007229
7230 if (kvm_set_cr0(vcpu, val))
7231 return 1;
7232 vmcs_writel(CR0_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007233 return 0;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007234 } else {
7235 if (to_vmx(vcpu)->nested.vmxon &&
David Matlack38991522016-11-29 18:14:08 -08007236 !nested_host_cr0_valid(vcpu, val))
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007237 return 1;
David Matlack38991522016-11-29 18:14:08 -08007238
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007239 return kvm_set_cr0(vcpu, val);
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007240 }
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007241}
7242
7243static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
7244{
7245 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007246 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7247 unsigned long orig_val = val;
7248
7249 /* analogously to handle_set_cr0 */
7250 val = (val & ~vmcs12->cr4_guest_host_mask) |
7251 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
7252 if (kvm_set_cr4(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007253 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007254 vmcs_writel(CR4_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007255 return 0;
7256 } else
7257 return kvm_set_cr4(vcpu, val);
7258}
7259
Paolo Bonzini0367f202016-07-12 10:44:55 +02007260static int handle_desc(struct kvm_vcpu *vcpu)
7261{
7262 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007263 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Paolo Bonzini0367f202016-07-12 10:44:55 +02007264}
7265
Avi Kivity851ba692009-08-24 11:10:17 +03007266static int handle_cr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007267{
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007268 unsigned long exit_qualification, val;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007269 int cr;
7270 int reg;
Avi Kivity49a9b072010-06-10 17:02:14 +03007271 int err;
Kyle Huey6affcbe2016-11-29 12:40:40 -08007272 int ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007273
He, Qingbfdaab02007-09-12 14:18:28 +08007274 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007275 cr = exit_qualification & 15;
7276 reg = (exit_qualification >> 8) & 15;
7277 switch ((exit_qualification >> 4) & 3) {
7278 case 0: /* mov to cr */
Nadav Amit1e32c072014-06-18 17:19:25 +03007279 val = kvm_register_readl(vcpu, reg);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007280 trace_kvm_cr_write(cr, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007281 switch (cr) {
7282 case 0:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007283 err = handle_set_cr0(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007284 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007285 case 3:
Sean Christophersone1de91c2018-03-05 12:04:41 -08007286 WARN_ON_ONCE(enable_unrestricted_guest);
Avi Kivity23902182010-06-10 17:02:16 +03007287 err = kvm_set_cr3(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007288 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007289 case 4:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007290 err = handle_set_cr4(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007291 return kvm_complete_insn_gp(vcpu, err);
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007292 case 8: {
7293 u8 cr8_prev = kvm_get_cr8(vcpu);
Nadav Amit1e32c072014-06-18 17:19:25 +03007294 u8 cr8 = (u8)val;
Andre Przywaraeea1cff2010-12-21 11:12:00 +01007295 err = kvm_set_cr8(vcpu, cr8);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007296 ret = kvm_complete_insn_gp(vcpu, err);
Paolo Bonzini35754c92015-07-29 12:05:37 +02007297 if (lapic_in_kernel(vcpu))
Kyle Huey6affcbe2016-11-29 12:40:40 -08007298 return ret;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007299 if (cr8_prev <= cr8)
Kyle Huey6affcbe2016-11-29 12:40:40 -08007300 return ret;
7301 /*
7302 * TODO: we might be squashing a
7303 * KVM_GUESTDBG_SINGLESTEP-triggered
7304 * KVM_EXIT_DEBUG here.
7305 */
Avi Kivity851ba692009-08-24 11:10:17 +03007306 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007307 return 0;
7308 }
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +02007309 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08007310 break;
Anthony Liguori25c4c272007-04-27 09:29:21 +03007311 case 2: /* clts */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08007312 WARN_ONCE(1, "Guest should always own CR0.TS");
7313 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
Avi Kivity4d4ec082009-12-29 18:07:30 +02007314 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
Kyle Huey6affcbe2016-11-29 12:40:40 -08007315 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007316 case 1: /*mov from cr*/
7317 switch (cr) {
7318 case 3:
Sean Christophersone1de91c2018-03-05 12:04:41 -08007319 WARN_ON_ONCE(enable_unrestricted_guest);
Avi Kivity9f8fe502010-12-05 17:30:00 +02007320 val = kvm_read_cr3(vcpu);
7321 kvm_register_write(vcpu, reg, val);
7322 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007323 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007324 case 8:
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007325 val = kvm_get_cr8(vcpu);
7326 kvm_register_write(vcpu, reg, val);
7327 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007328 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007329 }
7330 break;
7331 case 3: /* lmsw */
Avi Kivitya1f83a72009-12-29 17:33:58 +02007332 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Avi Kivity4d4ec082009-12-29 18:07:30 +02007333 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
Avi Kivitya1f83a72009-12-29 17:33:58 +02007334 kvm_lmsw(vcpu, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007335
Kyle Huey6affcbe2016-11-29 12:40:40 -08007336 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007337 default:
7338 break;
7339 }
Avi Kivity851ba692009-08-24 11:10:17 +03007340 vcpu->run->exit_reason = 0;
Christoffer Dalla737f252012-06-03 21:17:48 +03007341 vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
Avi Kivity6aa8b732006-12-10 02:21:36 -08007342 (int)(exit_qualification >> 4) & 3, cr);
7343 return 0;
7344}
7345
Avi Kivity851ba692009-08-24 11:10:17 +03007346static int handle_dr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007347{
He, Qingbfdaab02007-09-12 14:18:28 +08007348 unsigned long exit_qualification;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007349 int dr, dr7, reg;
7350
7351 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7352 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
7353
7354 /* First, if DR does not exist, trigger UD */
7355 if (!kvm_require_dr(vcpu, dr))
7356 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007357
Jan Kiszkaf2483412010-01-20 18:20:20 +01007358 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
Avi Kivity0a79b002009-09-01 12:03:25 +03007359 if (!kvm_require_cpl(vcpu, 0))
7360 return 1;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007361 dr7 = vmcs_readl(GUEST_DR7);
7362 if (dr7 & DR7_GD) {
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007363 /*
7364 * As the vm-exit takes precedence over the debug trap, we
7365 * need to emulate the latter, either for the host or the
7366 * guest debugging itself.
7367 */
7368 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
Avi Kivity851ba692009-08-24 11:10:17 +03007369 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007370 vcpu->run->debug.arch.dr7 = dr7;
Nadav Amit82b32772014-11-02 11:54:45 +02007371 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03007372 vcpu->run->debug.arch.exception = DB_VECTOR;
7373 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007374 return 0;
7375 } else {
Nadav Amit7305eb52014-11-02 11:54:44 +02007376 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03007377 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007378 kvm_queue_exception(vcpu, DB_VECTOR);
7379 return 1;
7380 }
7381 }
7382
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007383 if (vcpu->guest_debug == 0) {
Paolo Bonzini8f223722016-02-26 12:09:49 +01007384 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7385 CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007386
7387 /*
7388 * No more DR vmexits; force a reload of the debug registers
7389 * and reenter on this instruction. The next vmexit will
7390 * retrieve the full state of the debug registers.
7391 */
7392 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
7393 return 1;
7394 }
7395
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007396 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
7397 if (exit_qualification & TYPE_MOV_FROM_DR) {
Gleb Natapov020df072010-04-13 10:05:23 +03007398 unsigned long val;
Jan Kiszka4c4d5632013-12-18 19:16:24 +01007399
7400 if (kvm_get_dr(vcpu, dr, &val))
7401 return 1;
7402 kvm_register_write(vcpu, reg, val);
Gleb Natapov020df072010-04-13 10:05:23 +03007403 } else
Nadav Amit57773922014-06-18 17:19:23 +03007404 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
Jan Kiszka4c4d5632013-12-18 19:16:24 +01007405 return 1;
7406
Kyle Huey6affcbe2016-11-29 12:40:40 -08007407 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007408}
7409
Jan Kiszka73aaf249e2014-01-04 18:47:16 +01007410static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
7411{
7412 return vcpu->arch.dr6;
7413}
7414
7415static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
7416{
7417}
7418
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007419static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
7420{
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007421 get_debugreg(vcpu->arch.db[0], 0);
7422 get_debugreg(vcpu->arch.db[1], 1);
7423 get_debugreg(vcpu->arch.db[2], 2);
7424 get_debugreg(vcpu->arch.db[3], 3);
7425 get_debugreg(vcpu->arch.dr6, 6);
7426 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
7427
7428 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
Paolo Bonzini8f223722016-02-26 12:09:49 +01007429 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007430}
7431
Gleb Natapov020df072010-04-13 10:05:23 +03007432static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
7433{
7434 vmcs_writel(GUEST_DR7, val);
7435}
7436
Avi Kivity851ba692009-08-24 11:10:17 +03007437static int handle_cpuid(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007438{
Kyle Huey6a908b62016-11-29 12:40:37 -08007439 return kvm_emulate_cpuid(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007440}
7441
Avi Kivity851ba692009-08-24 11:10:17 +03007442static int handle_rdmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007443{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08007444 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007445 struct msr_data msr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007446
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007447 msr_info.index = ecx;
7448 msr_info.host_initiated = false;
7449 if (vmx_get_msr(vcpu, &msr_info)) {
Avi Kivity59200272010-01-25 19:47:02 +02007450 trace_kvm_msr_read_ex(ecx);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02007451 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007452 return 1;
7453 }
7454
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007455 trace_kvm_msr_read(ecx, msr_info.data);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04007456
Avi Kivity6aa8b732006-12-10 02:21:36 -08007457 /* FIXME: handling of bits 32:63 of rax, rdx */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007458 vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
7459 vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
Kyle Huey6affcbe2016-11-29 12:40:40 -08007460 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007461}
7462
Avi Kivity851ba692009-08-24 11:10:17 +03007463static int handle_wrmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007464{
Will Auld8fe8ab42012-11-29 12:42:12 -08007465 struct msr_data msr;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08007466 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
7467 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
7468 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007469
Will Auld8fe8ab42012-11-29 12:42:12 -08007470 msr.data = data;
7471 msr.index = ecx;
7472 msr.host_initiated = false;
Nadav Amit854e8bb2014-09-16 03:24:05 +03007473 if (kvm_set_msr(vcpu, &msr) != 0) {
Avi Kivity59200272010-01-25 19:47:02 +02007474 trace_kvm_msr_write_ex(ecx, data);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02007475 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007476 return 1;
7477 }
7478
Avi Kivity59200272010-01-25 19:47:02 +02007479 trace_kvm_msr_write(ecx, data);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007480 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007481}
7482
Avi Kivity851ba692009-08-24 11:10:17 +03007483static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08007484{
Paolo Bonzinieb90f342016-12-18 14:02:21 +01007485 kvm_apic_update_ppr(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08007486 return 1;
7487}
7488
Avi Kivity851ba692009-08-24 11:10:17 +03007489static int handle_interrupt_window(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007490{
Paolo Bonzini47c01522016-12-19 11:44:07 +01007491 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7492 CPU_BASED_VIRTUAL_INTR_PENDING);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04007493
Avi Kivity3842d132010-07-27 12:30:24 +03007494 kvm_make_request(KVM_REQ_EVENT, vcpu);
7495
Jan Kiszkaa26bf122008-09-26 09:30:45 +02007496 ++vcpu->stat.irq_window_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007497 return 1;
7498}
7499
Avi Kivity851ba692009-08-24 11:10:17 +03007500static int handle_halt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007501{
Avi Kivityd3bef152007-06-05 15:53:05 +03007502 return kvm_emulate_halt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007503}
7504
Avi Kivity851ba692009-08-24 11:10:17 +03007505static int handle_vmcall(struct kvm_vcpu *vcpu)
Ingo Molnarc21415e2007-02-19 14:37:47 +02007506{
Andrey Smetanin0d9c0552016-02-11 16:44:59 +03007507 return kvm_emulate_hypercall(vcpu);
Ingo Molnarc21415e2007-02-19 14:37:47 +02007508}
7509
Gleb Natapovec25d5e2010-11-01 15:35:01 +02007510static int handle_invd(struct kvm_vcpu *vcpu)
7511{
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007512 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovec25d5e2010-11-01 15:35:01 +02007513}
7514
Avi Kivity851ba692009-08-24 11:10:17 +03007515static int handle_invlpg(struct kvm_vcpu *vcpu)
Marcelo Tosattia7052892008-09-23 13:18:35 -03007516{
Sheng Yangf9c617f2009-03-25 10:08:52 +08007517 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Marcelo Tosattia7052892008-09-23 13:18:35 -03007518
7519 kvm_mmu_invlpg(vcpu, exit_qualification);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007520 return kvm_skip_emulated_instruction(vcpu);
Marcelo Tosattia7052892008-09-23 13:18:35 -03007521}
7522
Avi Kivityfee84b02011-11-10 14:57:25 +02007523static int handle_rdpmc(struct kvm_vcpu *vcpu)
7524{
7525 int err;
7526
7527 err = kvm_rdpmc(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007528 return kvm_complete_insn_gp(vcpu, err);
Avi Kivityfee84b02011-11-10 14:57:25 +02007529}
7530
Avi Kivity851ba692009-08-24 11:10:17 +03007531static int handle_wbinvd(struct kvm_vcpu *vcpu)
Eddie Donge5edaa02007-11-11 12:28:35 +02007532{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007533 return kvm_emulate_wbinvd(vcpu);
Eddie Donge5edaa02007-11-11 12:28:35 +02007534}
7535
Dexuan Cui2acf9232010-06-10 11:27:12 +08007536static int handle_xsetbv(struct kvm_vcpu *vcpu)
7537{
7538 u64 new_bv = kvm_read_edx_eax(vcpu);
7539 u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
7540
7541 if (kvm_set_xcr(vcpu, index, new_bv) == 0)
Kyle Huey6affcbe2016-11-29 12:40:40 -08007542 return kvm_skip_emulated_instruction(vcpu);
Dexuan Cui2acf9232010-06-10 11:27:12 +08007543 return 1;
7544}
7545
Wanpeng Lif53cd632014-12-02 19:14:58 +08007546static int handle_xsaves(struct kvm_vcpu *vcpu)
7547{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007548 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08007549 WARN(1, "this should never happen\n");
7550 return 1;
7551}
7552
7553static int handle_xrstors(struct kvm_vcpu *vcpu)
7554{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007555 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08007556 WARN(1, "this should never happen\n");
7557 return 1;
7558}
7559
Avi Kivity851ba692009-08-24 11:10:17 +03007560static int handle_apic_access(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08007561{
Kevin Tian58fbbf22011-08-30 13:56:17 +03007562 if (likely(fasteoi)) {
7563 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7564 int access_type, offset;
7565
7566 access_type = exit_qualification & APIC_ACCESS_TYPE;
7567 offset = exit_qualification & APIC_ACCESS_OFFSET;
7568 /*
7569 * Sane guest uses MOV to write EOI, with written value
7570 * not cared. So make a short-circuit here by avoiding
7571 * heavy instruction emulation.
7572 */
7573 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
7574 (offset == APIC_EOI)) {
7575 kvm_lapic_set_eoi(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007576 return kvm_skip_emulated_instruction(vcpu);
Kevin Tian58fbbf22011-08-30 13:56:17 +03007577 }
7578 }
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007579 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Sheng Yangf78e0e22007-10-29 09:40:42 +08007580}
7581
Yang Zhangc7c9c562013-01-25 10:18:51 +08007582static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
7583{
7584 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7585 int vector = exit_qualification & 0xff;
7586
7587 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
7588 kvm_apic_set_eoi_accelerated(vcpu, vector);
7589 return 1;
7590}
7591
Yang Zhang83d4c282013-01-25 10:18:49 +08007592static int handle_apic_write(struct kvm_vcpu *vcpu)
7593{
7594 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7595 u32 offset = exit_qualification & 0xfff;
7596
7597 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
7598 kvm_apic_write_nodecode(vcpu, offset);
7599 return 1;
7600}
7601
Avi Kivity851ba692009-08-24 11:10:17 +03007602static int handle_task_switch(struct kvm_vcpu *vcpu)
Izik Eidus37817f22008-03-24 23:14:53 +02007603{
Jan Kiszka60637aa2008-09-26 09:30:47 +02007604 struct vcpu_vmx *vmx = to_vmx(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02007605 unsigned long exit_qualification;
Jan Kiszkae269fb22010-04-14 15:51:09 +02007606 bool has_error_code = false;
7607 u32 error_code = 0;
Izik Eidus37817f22008-03-24 23:14:53 +02007608 u16 tss_selector;
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007609 int reason, type, idt_v, idt_index;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007610
7611 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007612 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007613 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
Izik Eidus37817f22008-03-24 23:14:53 +02007614
7615 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7616
7617 reason = (u32)exit_qualification >> 30;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007618 if (reason == TASK_SWITCH_GATE && idt_v) {
7619 switch (type) {
7620 case INTR_TYPE_NMI_INTR:
7621 vcpu->arch.nmi_injected = false;
Avi Kivity654f06f2011-03-23 15:02:47 +02007622 vmx_set_nmi_mask(vcpu, true);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007623 break;
7624 case INTR_TYPE_EXT_INTR:
Gleb Natapov66fd3f72009-05-11 13:35:50 +03007625 case INTR_TYPE_SOFT_INTR:
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007626 kvm_clear_interrupt_queue(vcpu);
7627 break;
7628 case INTR_TYPE_HARD_EXCEPTION:
Jan Kiszkae269fb22010-04-14 15:51:09 +02007629 if (vmx->idt_vectoring_info &
7630 VECTORING_INFO_DELIVER_CODE_MASK) {
7631 has_error_code = true;
7632 error_code =
7633 vmcs_read32(IDT_VECTORING_ERROR_CODE);
7634 }
7635 /* fall through */
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007636 case INTR_TYPE_SOFT_EXCEPTION:
7637 kvm_clear_exception_queue(vcpu);
7638 break;
7639 default:
7640 break;
7641 }
Jan Kiszka60637aa2008-09-26 09:30:47 +02007642 }
Izik Eidus37817f22008-03-24 23:14:53 +02007643 tss_selector = exit_qualification;
7644
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007645 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
7646 type != INTR_TYPE_EXT_INTR &&
7647 type != INTR_TYPE_NMI_INTR))
7648 skip_emulated_instruction(vcpu);
7649
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007650 if (kvm_task_switch(vcpu, tss_selector,
7651 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
7652 has_error_code, error_code) == EMULATE_FAIL) {
Gleb Natapovacb54512010-04-15 21:03:50 +03007653 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7654 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7655 vcpu->run->internal.ndata = 0;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007656 return 0;
Gleb Natapovacb54512010-04-15 21:03:50 +03007657 }
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007658
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007659 /*
7660 * TODO: What about debug traps on tss switch?
7661 * Are we supposed to inject them and update dr6?
7662 */
7663
7664 return 1;
Izik Eidus37817f22008-03-24 23:14:53 +02007665}
7666
Avi Kivity851ba692009-08-24 11:10:17 +03007667static int handle_ept_violation(struct kvm_vcpu *vcpu)
Sheng Yang14394422008-04-28 12:24:45 +08007668{
Sheng Yangf9c617f2009-03-25 10:08:52 +08007669 unsigned long exit_qualification;
Sheng Yang14394422008-04-28 12:24:45 +08007670 gpa_t gpa;
Paolo Bonzinieebed242016-11-28 14:39:58 +01007671 u64 error_code;
Sheng Yang14394422008-04-28 12:24:45 +08007672
Sheng Yangf9c617f2009-03-25 10:08:52 +08007673 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Sheng Yang14394422008-04-28 12:24:45 +08007674
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03007675 /*
7676 * EPT violation happened while executing iret from NMI,
7677 * "blocked by NMI" bit has to be set before next VM entry.
7678 * There are errata that may cause this bit to not be set:
7679 * AAK134, BY25.
7680 */
Gleb Natapovbcd1c292013-09-25 10:58:22 +03007681 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007682 enable_vnmi &&
Gleb Natapovbcd1c292013-09-25 10:58:22 +03007683 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03007684 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
7685
Sheng Yang14394422008-04-28 12:24:45 +08007686 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007687 trace_kvm_page_fault(gpa, exit_qualification);
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007688
Junaid Shahid27959a42016-12-06 16:46:10 -08007689 /* Is it a read fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007690 error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
Junaid Shahid27959a42016-12-06 16:46:10 -08007691 ? PFERR_USER_MASK : 0;
7692 /* Is it a write fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007693 error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
Junaid Shahid27959a42016-12-06 16:46:10 -08007694 ? PFERR_WRITE_MASK : 0;
7695 /* Is it a fetch fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007696 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
Junaid Shahid27959a42016-12-06 16:46:10 -08007697 ? PFERR_FETCH_MASK : 0;
7698 /* ept page table entry is present? */
7699 error_code |= (exit_qualification &
7700 (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
7701 EPT_VIOLATION_EXECUTABLE))
7702 ? PFERR_PRESENT_MASK : 0;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007703
Paolo Bonzinieebed242016-11-28 14:39:58 +01007704 error_code |= (exit_qualification & 0x100) != 0 ?
7705 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
Yang Zhang25d92082013-08-06 12:00:32 +03007706
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007707 vcpu->arch.exit_qualification = exit_qualification;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007708 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
Sheng Yang14394422008-04-28 12:24:45 +08007709}
7710
Avi Kivity851ba692009-08-24 11:10:17 +03007711static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007712{
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007713 gpa_t gpa;
7714
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02007715 /*
7716 * A nested guest cannot optimize MMIO vmexits, because we have an
7717 * nGPA here instead of the required GPA.
7718 */
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007719 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02007720 if (!is_guest_mode(vcpu) &&
7721 !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
Jason Wang931c33b2015-09-15 14:41:58 +08007722 trace_kvm_fast_mmio(gpa);
Vitaly Kuznetsovd391f122018-01-25 16:37:07 +01007723 /*
7724 * Doing kvm_skip_emulated_instruction() depends on undefined
7725 * behavior: Intel's manual doesn't mandate
7726 * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
7727 * occurs and while on real hardware it was observed to be set,
7728 * other hypervisors (namely Hyper-V) don't set it, we end up
7729 * advancing IP with some random value. Disable fast mmio when
7730 * running nested and keep it for real hardware in hope that
7731 * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
7732 */
7733 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
7734 return kvm_skip_emulated_instruction(vcpu);
7735 else
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007736 return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
Sean Christophersonc4409902018-08-23 13:56:46 -07007737 EMULATE_DONE;
Michael S. Tsirkin68c3b4d2014-03-31 21:50:44 +03007738 }
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007739
Sean Christophersonc75d0edc2018-03-29 14:48:31 -07007740 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007741}
7742
Avi Kivity851ba692009-08-24 11:10:17 +03007743static int handle_nmi_window(struct kvm_vcpu *vcpu)
Sheng Yangf08864b2008-05-15 18:23:25 +08007744{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007745 WARN_ON_ONCE(!enable_vnmi);
Paolo Bonzini47c01522016-12-19 11:44:07 +01007746 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7747 CPU_BASED_VIRTUAL_NMI_PENDING);
Sheng Yangf08864b2008-05-15 18:23:25 +08007748 ++vcpu->stat.nmi_window_exits;
Avi Kivity3842d132010-07-27 12:30:24 +03007749 kvm_make_request(KVM_REQ_EVENT, vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08007750
7751 return 1;
7752}
7753
Mohammed Gamal80ced182009-09-01 12:48:18 +02007754static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007755{
Avi Kivity8b3079a2009-01-05 12:10:54 +02007756 struct vcpu_vmx *vmx = to_vmx(vcpu);
7757 enum emulation_result err = EMULATE_DONE;
Mohammed Gamal80ced182009-09-01 12:48:18 +02007758 int ret = 1;
Avi Kivity49e9d552010-09-19 14:34:08 +02007759 u32 cpu_exec_ctrl;
7760 bool intr_window_requested;
Avi Kivityb8405c12012-06-07 17:08:48 +03007761 unsigned count = 130;
Avi Kivity49e9d552010-09-19 14:34:08 +02007762
Sean Christopherson2bb8caf2018-03-12 10:56:13 -07007763 /*
7764 * We should never reach the point where we are emulating L2
7765 * due to invalid guest state as that means we incorrectly
7766 * allowed a nested VMEntry with an invalid vmcs12.
7767 */
7768 WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
7769
Avi Kivity49e9d552010-09-19 14:34:08 +02007770 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
7771 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007772
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01007773 while (vmx->emulation_required && count-- != 0) {
Avi Kivitybdea48e2012-06-10 18:07:57 +03007774 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
Avi Kivity49e9d552010-09-19 14:34:08 +02007775 return handle_interrupt_window(&vmx->vcpu);
7776
Radim Krčmář72875d82017-04-26 22:32:19 +02007777 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
Avi Kivityde87dcdd2012-06-12 20:21:38 +03007778 return 1;
7779
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007780 err = kvm_emulate_instruction(vcpu, 0);
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007781
Paolo Bonziniac0a48c2013-06-25 18:24:41 +02007782 if (err == EMULATE_USER_EXIT) {
Paolo Bonzini94452b92013-08-27 15:41:42 +02007783 ++vcpu->stat.mmio_exits;
Mohammed Gamal80ced182009-09-01 12:48:18 +02007784 ret = 0;
7785 goto out;
7786 }
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +01007787
Sean Christophersonadd5ff72018-03-23 09:34:00 -07007788 if (err != EMULATE_DONE)
7789 goto emulation_error;
7790
7791 if (vmx->emulation_required && !vmx->rmode.vm86_active &&
7792 vcpu->arch.exception.pending)
7793 goto emulation_error;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007794
Gleb Natapov8d76c492013-05-08 18:38:44 +03007795 if (vcpu->arch.halt_request) {
7796 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06007797 ret = kvm_vcpu_halt(vcpu);
Gleb Natapov8d76c492013-05-08 18:38:44 +03007798 goto out;
7799 }
7800
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007801 if (signal_pending(current))
Mohammed Gamal80ced182009-09-01 12:48:18 +02007802 goto out;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007803 if (need_resched())
7804 schedule();
7805 }
7806
Mohammed Gamal80ced182009-09-01 12:48:18 +02007807out:
7808 return ret;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007809
Sean Christophersonadd5ff72018-03-23 09:34:00 -07007810emulation_error:
7811 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7812 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7813 vcpu->run->internal.ndata = 0;
7814 return 0;
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007815}
7816
7817static void grow_ple_window(struct kvm_vcpu *vcpu)
7818{
7819 struct vcpu_vmx *vmx = to_vmx(vcpu);
7820 int old = vmx->ple_window;
7821
Babu Mogerc8e88712018-03-16 16:37:24 -04007822 vmx->ple_window = __grow_ple_window(old, ple_window,
7823 ple_window_grow,
7824 ple_window_max);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007825
7826 if (vmx->ple_window != old)
7827 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02007828
7829 trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007830}
7831
7832static void shrink_ple_window(struct kvm_vcpu *vcpu)
7833{
7834 struct vcpu_vmx *vmx = to_vmx(vcpu);
7835 int old = vmx->ple_window;
7836
Babu Mogerc8e88712018-03-16 16:37:24 -04007837 vmx->ple_window = __shrink_ple_window(old, ple_window,
7838 ple_window_shrink,
7839 ple_window);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007840
7841 if (vmx->ple_window != old)
7842 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02007843
7844 trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007845}
7846
7847/*
Feng Wubf9f6ac2015-09-18 22:29:55 +08007848 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
7849 */
7850static void wakeup_handler(void)
7851{
7852 struct kvm_vcpu *vcpu;
7853 int cpu = smp_processor_id();
7854
7855 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
7856 list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
7857 blocked_vcpu_list) {
7858 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7859
7860 if (pi_test_on(pi_desc) == 1)
7861 kvm_vcpu_kick(vcpu);
7862 }
7863 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
7864}
7865
Peng Haoe01bca22018-04-07 05:47:32 +08007866static void vmx_enable_tdp(void)
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007867{
7868 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
7869 enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
7870 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
7871 0ull, VMX_EPT_EXECUTABLE_MASK,
7872 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
Tom Lendackyd0ec49d2017-07-17 16:10:27 -05007873 VMX_EPT_RWX_MASK, 0ull);
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007874
7875 ept_set_mmio_spte_mask();
7876 kvm_enable_tdp();
7877}
7878
Tiejun Chenf2c76482014-10-28 10:14:47 +08007879static __init int hardware_setup(void)
7880{
Sean Christophersoncf81a7e2018-07-11 09:54:30 -07007881 unsigned long host_bndcfgs;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01007882 int r = -ENOMEM, i;
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007883
7884 rdmsrl_safe(MSR_EFER, &host_efer);
7885
7886 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
7887 kvm_define_shared_msr(i, vmx_msr_index[i]);
7888
Radim Krčmář23611332016-09-29 22:41:33 +02007889 for (i = 0; i < VMX_BITMAP_NR; i++) {
7890 vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
7891 if (!vmx_bitmap[i])
7892 goto out;
7893 }
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007894
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007895 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
7896 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
7897
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007898 if (setup_vmcs_config(&vmcs_config) < 0) {
7899 r = -EIO;
Radim Krčmář23611332016-09-29 22:41:33 +02007900 goto out;
Tiejun Chenbaa03522014-12-23 16:21:11 +08007901 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007902
7903 if (boot_cpu_has(X86_FEATURE_NX))
7904 kvm_enable_efer_bits(EFER_NX);
7905
Sean Christophersoncf81a7e2018-07-11 09:54:30 -07007906 if (boot_cpu_has(X86_FEATURE_MPX)) {
7907 rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
7908 WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
7909 }
7910
Wanpeng Li08d839c2017-03-23 05:30:08 -07007911 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
7912 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
Tiejun Chenf2c76482014-10-28 10:14:47 +08007913 enable_vpid = 0;
Wanpeng Li08d839c2017-03-23 05:30:08 -07007914
Tiejun Chenf2c76482014-10-28 10:14:47 +08007915 if (!cpu_has_vmx_ept() ||
David Hildenbrand42aa53b2017-08-10 23:15:29 +02007916 !cpu_has_vmx_ept_4levels() ||
David Hildenbrandf5f51582017-08-24 20:51:30 +02007917 !cpu_has_vmx_ept_mt_wb() ||
Wanpeng Li8ad81822017-10-09 15:51:53 -07007918 !cpu_has_vmx_invept_global())
Tiejun Chenf2c76482014-10-28 10:14:47 +08007919 enable_ept = 0;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007920
Wanpeng Lifce6ac42017-05-11 02:58:56 -07007921 if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007922 enable_ept_ad_bits = 0;
7923
Wanpeng Li8ad81822017-10-09 15:51:53 -07007924 if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007925 enable_unrestricted_guest = 0;
7926
Paolo Bonziniad15a292015-01-30 16:18:49 +01007927 if (!cpu_has_vmx_flexpriority())
Tiejun Chenf2c76482014-10-28 10:14:47 +08007928 flexpriority_enabled = 0;
7929
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007930 if (!cpu_has_virtual_nmis())
7931 enable_vnmi = 0;
7932
Paolo Bonziniad15a292015-01-30 16:18:49 +01007933 /*
7934 * set_apic_access_page_addr() is used to reload apic access
7935 * page upon invalidation. No need to do anything if not
7936 * using the APIC_ACCESS_ADDR VMCS field.
7937 */
7938 if (!flexpriority_enabled)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007939 kvm_x86_ops->set_apic_access_page_addr = NULL;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007940
7941 if (!cpu_has_vmx_tpr_shadow())
7942 kvm_x86_ops->update_cr8_intercept = NULL;
7943
7944 if (enable_ept && !cpu_has_vmx_ept_2m_page())
7945 kvm_disable_largepages();
7946
Tianyu Lan877ad952018-07-19 08:40:23 +00007947#if IS_ENABLED(CONFIG_HYPERV)
7948 if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
7949 && enable_ept)
7950 kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
7951#endif
7952
Wanpeng Li0f107682017-09-28 18:06:24 -07007953 if (!cpu_has_vmx_ple()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08007954 ple_gap = 0;
Wanpeng Li0f107682017-09-28 18:06:24 -07007955 ple_window = 0;
7956 ple_window_grow = 0;
7957 ple_window_max = 0;
7958 ple_window_shrink = 0;
7959 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007960
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01007961 if (!cpu_has_vmx_apicv()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08007962 enable_apicv = 0;
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01007963 kvm_x86_ops->sync_pir_to_irr = NULL;
7964 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007965
Haozhong Zhang64903d62015-10-20 15:39:09 +08007966 if (cpu_has_vmx_tsc_scaling()) {
7967 kvm_has_tsc_control = true;
7968 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
7969 kvm_tsc_scaling_ratio_frac_bits = 48;
7970 }
7971
Wanpeng Li04bb92e2015-09-16 19:31:11 +08007972 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
7973
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007974 if (enable_ept)
7975 vmx_enable_tdp();
7976 else
Tiejun Chenbaa03522014-12-23 16:21:11 +08007977 kvm_disable_tdp();
7978
Jim Mattson8fcc4b52018-07-10 11:27:20 +02007979 if (!nested) {
7980 kvm_x86_ops->get_nested_state = NULL;
7981 kvm_x86_ops->set_nested_state = NULL;
7982 }
7983
Kai Huang843e4332015-01-28 10:54:28 +08007984 /*
7985 * Only enable PML when hardware supports PML feature, and both EPT
7986 * and EPT A/D bit features are enabled -- PML depends on them to work.
7987 */
7988 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
7989 enable_pml = 0;
7990
7991 if (!enable_pml) {
7992 kvm_x86_ops->slot_enable_log_dirty = NULL;
7993 kvm_x86_ops->slot_disable_log_dirty = NULL;
7994 kvm_x86_ops->flush_log_dirty = NULL;
7995 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
7996 }
7997
Sean Christophersond264ee02018-08-27 15:21:12 -07007998 if (!cpu_has_vmx_preemption_timer())
7999 kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
8000
Yunhong Jiang64672c92016-06-13 14:19:59 -07008001 if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
8002 u64 vmx_msr;
8003
8004 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
8005 cpu_preemption_timer_multi =
8006 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
8007 } else {
8008 kvm_x86_ops->set_hv_timer = NULL;
8009 kvm_x86_ops->cancel_hv_timer = NULL;
8010 }
8011
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01008012 if (!cpu_has_vmx_shadow_vmcs())
8013 enable_shadow_vmcs = 0;
8014 if (enable_shadow_vmcs)
8015 init_vmcs_shadow_fields();
8016
Feng Wubf9f6ac2015-09-18 22:29:55 +08008017 kvm_set_posted_intr_wakeup_handler(wakeup_handler);
Paolo Bonzini13893092018-02-26 13:40:09 +01008018 nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
Feng Wubf9f6ac2015-09-18 22:29:55 +08008019
Ashok Rajc45dcc72016-06-22 14:59:56 +08008020 kvm_mce_cap_supported |= MCG_LMCE_P;
8021
Tiejun Chenf2c76482014-10-28 10:14:47 +08008022 return alloc_kvm_area();
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008023
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008024out:
Radim Krčmář23611332016-09-29 22:41:33 +02008025 for (i = 0; i < VMX_BITMAP_NR; i++)
8026 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008027
8028 return r;
Tiejun Chenf2c76482014-10-28 10:14:47 +08008029}
8030
8031static __exit void hardware_unsetup(void)
8032{
Radim Krčmář23611332016-09-29 22:41:33 +02008033 int i;
8034
8035 for (i = 0; i < VMX_BITMAP_NR; i++)
8036 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008037
Tiejun Chenf2c76482014-10-28 10:14:47 +08008038 free_kvm_area();
8039}
8040
Avi Kivity6aa8b732006-12-10 02:21:36 -08008041/*
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008042 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
8043 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
8044 */
Marcelo Tosatti9fb41ba2009-10-12 19:37:31 -03008045static int handle_pause(struct kvm_vcpu *vcpu)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008046{
Wanpeng Lib31c1142018-03-12 04:53:04 -07008047 if (!kvm_pause_in_guest(vcpu->kvm))
Radim Krčmářb4a2d312014-08-21 18:08:08 +02008048 grow_ple_window(vcpu);
8049
Longpeng(Mike)de63ad42017-08-08 12:05:33 +08008050 /*
8051 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
8052 * VM-execution control is ignored if CPL > 0. OTOH, KVM
8053 * never set PAUSE_EXITING and just set PLE if supported,
8054 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
8055 */
8056 kvm_vcpu_on_spin(vcpu, true);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008057 return kvm_skip_emulated_instruction(vcpu);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008058}
8059
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008060static int handle_nop(struct kvm_vcpu *vcpu)
Sheng Yang59708672009-12-15 13:29:54 +08008061{
Kyle Huey6affcbe2016-11-29 12:40:40 -08008062 return kvm_skip_emulated_instruction(vcpu);
Sheng Yang59708672009-12-15 13:29:54 +08008063}
8064
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008065static int handle_mwait(struct kvm_vcpu *vcpu)
8066{
8067 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
8068 return handle_nop(vcpu);
8069}
8070
Jim Mattson45ec3682017-08-23 16:32:04 -07008071static int handle_invalid_op(struct kvm_vcpu *vcpu)
8072{
8073 kvm_queue_exception(vcpu, UD_VECTOR);
8074 return 1;
8075}
8076
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03008077static int handle_monitor_trap(struct kvm_vcpu *vcpu)
8078{
8079 return 1;
8080}
8081
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008082static int handle_monitor(struct kvm_vcpu *vcpu)
8083{
8084 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
8085 return handle_nop(vcpu);
8086}
8087
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008088/*
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008089 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008090 * set the success or error code of an emulated VMX instruction (as specified
8091 * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
8092 * instruction.
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008093 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008094static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008095{
8096 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
8097 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
8098 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008099 return kvm_skip_emulated_instruction(vcpu);
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008100}
8101
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008102static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008103{
8104 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
8105 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
8106 X86_EFLAGS_SF | X86_EFLAGS_OF))
8107 | X86_EFLAGS_CF);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008108 return kvm_skip_emulated_instruction(vcpu);
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008109}
8110
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008111static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
8112 u32 vm_instruction_error)
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008113{
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008114 /*
8115 * failValid writes the error number to the current VMCS, which
8116 * can't be done if there isn't a current VMCS.
8117 */
8118 if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
8119 return nested_vmx_failInvalid(vcpu);
8120
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008121 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
8122 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
8123 X86_EFLAGS_SF | X86_EFLAGS_OF))
8124 | X86_EFLAGS_ZF);
8125 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
8126 /*
8127 * We don't need to force a shadow sync because
8128 * VM_INSTRUCTION_ERROR is not shadowed
8129 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008130 return kvm_skip_emulated_instruction(vcpu);
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008131}
Abel Gordon145c28d2013-04-18 14:36:55 +03008132
Wincy Vanff651cb2014-12-11 08:52:58 +03008133static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
8134{
8135 /* TODO: not to reset guest simply here. */
8136 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Paolo Bonzinibbe41b92016-08-19 17:51:20 +02008137 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
Wincy Vanff651cb2014-12-11 08:52:58 +03008138}
8139
Jan Kiszkaf4124502014-03-07 20:03:13 +01008140static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
8141{
8142 struct vcpu_vmx *vmx =
8143 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
8144
8145 vmx->nested.preemption_timer_expired = true;
8146 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
8147 kvm_vcpu_kick(&vmx->vcpu);
8148
8149 return HRTIMER_NORESTART;
8150}
8151
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03008152/*
Bandan Das19677e32014-05-06 02:19:15 -04008153 * Decode the memory-address operand of a vmx instruction, as recorded on an
8154 * exit caused by such an instruction (run by a guest hypervisor).
8155 * On success, returns 0. When the operand is invalid, returns 1 and throws
8156 * #UD or #GP.
8157 */
8158static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
8159 unsigned long exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008160 u32 vmx_instruction_info, bool wr, gva_t *ret)
Bandan Das19677e32014-05-06 02:19:15 -04008161{
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008162 gva_t off;
8163 bool exn;
8164 struct kvm_segment s;
8165
Bandan Das19677e32014-05-06 02:19:15 -04008166 /*
8167 * According to Vol. 3B, "Information for VM Exits Due to Instruction
8168 * Execution", on an exit, vmx_instruction_info holds most of the
8169 * addressing components of the operand. Only the displacement part
8170 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
8171 * For how an actual address is calculated from all these components,
8172 * refer to Vol. 1, "Operand Addressing".
8173 */
8174 int scaling = vmx_instruction_info & 3;
8175 int addr_size = (vmx_instruction_info >> 7) & 7;
8176 bool is_reg = vmx_instruction_info & (1u << 10);
8177 int seg_reg = (vmx_instruction_info >> 15) & 7;
8178 int index_reg = (vmx_instruction_info >> 18) & 0xf;
8179 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
8180 int base_reg = (vmx_instruction_info >> 23) & 0xf;
8181 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
8182
8183 if (is_reg) {
8184 kvm_queue_exception(vcpu, UD_VECTOR);
8185 return 1;
8186 }
8187
8188 /* Addr = segment_base + offset */
8189 /* offset = base + [index * scale] + displacement */
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008190 off = exit_qualification; /* holds the displacement */
Bandan Das19677e32014-05-06 02:19:15 -04008191 if (base_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008192 off += kvm_register_read(vcpu, base_reg);
Bandan Das19677e32014-05-06 02:19:15 -04008193 if (index_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008194 off += kvm_register_read(vcpu, index_reg)<<scaling;
8195 vmx_get_segment(vcpu, &s, seg_reg);
8196 *ret = s.base + off;
Bandan Das19677e32014-05-06 02:19:15 -04008197
8198 if (addr_size == 1) /* 32 bit */
8199 *ret &= 0xffffffff;
8200
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008201 /* Checks for #GP/#SS exceptions. */
8202 exn = false;
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008203 if (is_long_mode(vcpu)) {
8204 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
8205 * non-canonical form. This is the only check on the memory
8206 * destination for long mode!
8207 */
Yu Zhangfd8cb432017-08-24 20:27:56 +08008208 exn = is_noncanonical_address(*ret, vcpu);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008209 } else if (is_protmode(vcpu)) {
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008210 /* Protected mode: apply checks for segment validity in the
8211 * following order:
8212 * - segment type check (#GP(0) may be thrown)
8213 * - usability check (#GP(0)/#SS(0))
8214 * - limit check (#GP(0)/#SS(0))
8215 */
8216 if (wr)
8217 /* #GP(0) if the destination operand is located in a
8218 * read-only data segment or any code segment.
8219 */
8220 exn = ((s.type & 0xa) == 0 || (s.type & 8));
8221 else
8222 /* #GP(0) if the source operand is located in an
8223 * execute-only code segment
8224 */
8225 exn = ((s.type & 0xa) == 8);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008226 if (exn) {
8227 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8228 return 1;
8229 }
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008230 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
8231 */
8232 exn = (s.unusable != 0);
8233 /* Protected mode: #GP(0)/#SS(0) if the memory
8234 * operand is outside the segment limit.
8235 */
8236 exn = exn || (off + sizeof(u64) > s.limit);
8237 }
8238 if (exn) {
8239 kvm_queue_exception_e(vcpu,
8240 seg_reg == VCPU_SREG_SS ?
8241 SS_VECTOR : GP_VECTOR,
8242 0);
8243 return 1;
8244 }
8245
Bandan Das19677e32014-05-06 02:19:15 -04008246 return 0;
8247}
8248
Radim Krčmářcbf71272017-05-19 15:48:51 +02008249static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
Bandan Das3573e222014-05-06 02:19:16 -04008250{
8251 gva_t gva;
Bandan Das3573e222014-05-06 02:19:16 -04008252 struct x86_exception e;
Bandan Das3573e222014-05-06 02:19:16 -04008253
8254 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008255 vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
Bandan Das3573e222014-05-06 02:19:16 -04008256 return 1;
8257
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008258 if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
Bandan Das3573e222014-05-06 02:19:16 -04008259 kvm_inject_page_fault(vcpu, &e);
8260 return 1;
8261 }
8262
Bandan Das3573e222014-05-06 02:19:16 -04008263 return 0;
8264}
8265
Liran Alonabfc52c2018-06-23 02:35:13 +03008266/*
8267 * Allocate a shadow VMCS and associate it with the currently loaded
8268 * VMCS, unless such a shadow VMCS already exists. The newly allocated
8269 * VMCS is also VMCLEARed, so that it is ready for use.
8270 */
8271static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
8272{
8273 struct vcpu_vmx *vmx = to_vmx(vcpu);
8274 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
8275
8276 /*
8277 * We should allocate a shadow vmcs for vmcs01 only when L1
8278 * executes VMXON and free it when L1 executes VMXOFF.
8279 * As it is invalid to execute VMXON twice, we shouldn't reach
8280 * here when vmcs01 already have an allocated shadow vmcs.
8281 */
8282 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
8283
8284 if (!loaded_vmcs->shadow_vmcs) {
8285 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
8286 if (loaded_vmcs->shadow_vmcs)
8287 vmcs_clear(loaded_vmcs->shadow_vmcs);
8288 }
8289 return loaded_vmcs->shadow_vmcs;
8290}
8291
Jim Mattsone29acc52016-11-30 12:03:43 -08008292static int enter_vmx_operation(struct kvm_vcpu *vcpu)
8293{
8294 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzinif21f1652018-01-11 12:16:15 +01008295 int r;
Jim Mattsone29acc52016-11-30 12:03:43 -08008296
Paolo Bonzinif21f1652018-01-11 12:16:15 +01008297 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
8298 if (r < 0)
Jim Mattsonde3a0022017-11-27 17:22:25 -06008299 goto out_vmcs02;
Jim Mattsone29acc52016-11-30 12:03:43 -08008300
8301 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
8302 if (!vmx->nested.cached_vmcs12)
8303 goto out_cached_vmcs12;
8304
Liran Alon61ada742018-06-23 02:35:08 +03008305 vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
8306 if (!vmx->nested.cached_shadow_vmcs12)
8307 goto out_cached_shadow_vmcs12;
8308
Liran Alonabfc52c2018-06-23 02:35:13 +03008309 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
8310 goto out_shadow_vmcs;
Jim Mattsone29acc52016-11-30 12:03:43 -08008311
Jim Mattsone29acc52016-11-30 12:03:43 -08008312 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
8313 HRTIMER_MODE_REL_PINNED);
8314 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
8315
Roman Kagan63aff652018-07-19 21:59:07 +03008316 vmx->nested.vpid02 = allocate_vpid();
8317
Sean Christopherson9d6105b22018-09-26 09:23:51 -07008318 vmx->nested.vmcs02_initialized = false;
Jim Mattsone29acc52016-11-30 12:03:43 -08008319 vmx->nested.vmxon = true;
8320 return 0;
8321
8322out_shadow_vmcs:
Liran Alon61ada742018-06-23 02:35:08 +03008323 kfree(vmx->nested.cached_shadow_vmcs12);
8324
8325out_cached_shadow_vmcs12:
Jim Mattsone29acc52016-11-30 12:03:43 -08008326 kfree(vmx->nested.cached_vmcs12);
8327
8328out_cached_vmcs12:
Jim Mattsonde3a0022017-11-27 17:22:25 -06008329 free_loaded_vmcs(&vmx->nested.vmcs02);
Jim Mattsone29acc52016-11-30 12:03:43 -08008330
Jim Mattsonde3a0022017-11-27 17:22:25 -06008331out_vmcs02:
Jim Mattsone29acc52016-11-30 12:03:43 -08008332 return -ENOMEM;
8333}
8334
Bandan Das3573e222014-05-06 02:19:16 -04008335/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008336 * Emulate the VMXON instruction.
8337 * Currently, we just remember that VMX is active, and do not save or even
8338 * inspect the argument to VMXON (the so-called "VMXON pointer") because we
8339 * do not currently need to store anything in that guest-allocated memory
8340 * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
8341 * argument is different from the VMXON pointer (which the spec says they do).
8342 */
8343static int handle_vmon(struct kvm_vcpu *vcpu)
8344{
Jim Mattsone29acc52016-11-30 12:03:43 -08008345 int ret;
Radim Krčmářcbf71272017-05-19 15:48:51 +02008346 gpa_t vmptr;
8347 struct page *page;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008348 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008349 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
8350 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008351
Jim Mattson70f3aac2017-04-26 08:53:46 -07008352 /*
8353 * The Intel VMX Instruction Reference lists a bunch of bits that are
8354 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
8355 * 1 (see vmx_set_cr4() for when we allow the guest to set this).
8356 * Otherwise, we should fail with #UD. But most faulting conditions
8357 * have already been checked by hardware, prior to the VM-exit for
8358 * VMXON. We do test guest cr4.VMXE because processor CR4 always has
8359 * that bit set to 1 in non-root mode.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008360 */
Jim Mattson70f3aac2017-04-26 08:53:46 -07008361 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008362 kvm_queue_exception(vcpu, UD_VECTOR);
8363 return 1;
8364 }
8365
Felix Wilhelm727ba742018-06-11 09:43:44 +02008366 /* CPL=0 must be checked manually. */
8367 if (vmx_get_cpl(vcpu)) {
Jim Mattson36090bf2018-07-27 09:18:50 -07008368 kvm_inject_gp(vcpu, 0);
Felix Wilhelm727ba742018-06-11 09:43:44 +02008369 return 1;
8370 }
8371
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008372 if (vmx->nested.vmxon)
8373 return nested_vmx_failValid(vcpu,
8374 VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008375
Haozhong Zhang3b840802016-06-22 14:59:54 +08008376 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008377 != VMXON_NEEDED_FEATURES) {
8378 kvm_inject_gp(vcpu, 0);
8379 return 1;
8380 }
8381
Radim Krčmářcbf71272017-05-19 15:48:51 +02008382 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Jim Mattson21e7fbe2016-12-22 15:49:55 -08008383 return 1;
Radim Krčmářcbf71272017-05-19 15:48:51 +02008384
8385 /*
8386 * SDM 3: 24.11.5
8387 * The first 4 bytes of VMXON region contain the supported
8388 * VMCS revision identifier
8389 *
8390 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
8391 * which replaces physical address width with 32
8392 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008393 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
8394 return nested_vmx_failInvalid(vcpu);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008395
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02008396 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008397 if (is_error_page(page))
8398 return nested_vmx_failInvalid(vcpu);
8399
Radim Krčmářcbf71272017-05-19 15:48:51 +02008400 if (*(u32 *)kmap(page) != VMCS12_REVISION) {
8401 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008402 kvm_release_page_clean(page);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008403 return nested_vmx_failInvalid(vcpu);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008404 }
8405 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008406 kvm_release_page_clean(page);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008407
8408 vmx->nested.vmxon_ptr = vmptr;
Jim Mattsone29acc52016-11-30 12:03:43 -08008409 ret = enter_vmx_operation(vcpu);
8410 if (ret)
8411 return ret;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008412
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008413 return nested_vmx_succeed(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008414}
8415
8416/*
8417 * Intel's VMX Instruction Reference specifies a common set of prerequisites
8418 * for running VMX instructions (except VMXON, whose prerequisites are
8419 * slightly different). It also specifies what exception to inject otherwise.
Jim Mattson70f3aac2017-04-26 08:53:46 -07008420 * Note that many of these exceptions have priority over VM exits, so they
8421 * don't have to be checked again here.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008422 */
8423static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
8424{
Jim Mattson70f3aac2017-04-26 08:53:46 -07008425 if (!to_vmx(vcpu)->nested.vmxon) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008426 kvm_queue_exception(vcpu, UD_VECTOR);
8427 return 0;
8428 }
Jim Mattsone49fcb82018-07-27 13:44:45 -07008429
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008430 if (vmx_get_cpl(vcpu)) {
Jim Mattson36090bf2018-07-27 09:18:50 -07008431 kvm_inject_gp(vcpu, 0);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008432 return 0;
8433 }
8434
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008435 return 1;
8436}
8437
David Matlack8ca44e82017-08-01 14:00:39 -07008438static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
8439{
8440 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
8441 vmcs_write64(VMCS_LINK_POINTER, -1ull);
8442}
8443
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008444static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
Abel Gordone7953d72013-04-18 14:37:55 +03008445{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008446 struct vcpu_vmx *vmx = to_vmx(vcpu);
8447
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008448 if (vmx->nested.current_vmptr == -1ull)
8449 return;
8450
Abel Gordon012f83c2013-04-18 14:39:25 +03008451 if (enable_shadow_vmcs) {
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008452 /* copy to memory all shadowed fields in case
8453 they were modified */
8454 copy_shadow_to_vmcs12(vmx);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02008455 vmx->nested.need_vmcs12_sync = false;
David Matlack8ca44e82017-08-01 14:00:39 -07008456 vmx_disable_shadow_vmcs(vmx);
Abel Gordon012f83c2013-04-18 14:39:25 +03008457 }
Wincy Van705699a2015-02-03 23:58:17 +08008458 vmx->nested.posted_intr_nv = -1;
David Matlack4f2777b2016-07-13 17:16:37 -07008459
8460 /* Flush VMCS12 to guest memory */
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008461 kvm_vcpu_write_guest_page(vcpu,
Paolo Bonzini9f744c52017-07-27 15:54:46 +02008462 vmx->nested.current_vmptr >> PAGE_SHIFT,
8463 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
David Matlack4f2777b2016-07-13 17:16:37 -07008464
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008465 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
8466
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008467 vmx->nested.current_vmptr = -1ull;
Abel Gordone7953d72013-04-18 14:37:55 +03008468}
8469
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008470/*
8471 * Free whatever needs to be freed from vmx->nested when L1 goes down, or
8472 * just stops using VMX.
8473 */
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008474static void free_nested(struct kvm_vcpu *vcpu)
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008475{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008476 struct vcpu_vmx *vmx = to_vmx(vcpu);
8477
Wanpeng Lib7455822017-11-22 14:04:00 -08008478 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008479 return;
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008480
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008481 vmx->nested.vmxon = false;
Wanpeng Lib7455822017-11-22 14:04:00 -08008482 vmx->nested.smm.vmxon = false;
Wanpeng Li5c614b32015-10-13 09:18:36 -07008483 free_vpid(vmx->nested.vpid02);
David Matlack8ca44e82017-08-01 14:00:39 -07008484 vmx->nested.posted_intr_nv = -1;
8485 vmx->nested.current_vmptr = -1ull;
Jim Mattson355f4fb2016-10-28 08:29:39 -07008486 if (enable_shadow_vmcs) {
David Matlack8ca44e82017-08-01 14:00:39 -07008487 vmx_disable_shadow_vmcs(vmx);
Jim Mattson355f4fb2016-10-28 08:29:39 -07008488 vmcs_clear(vmx->vmcs01.shadow_vmcs);
8489 free_vmcs(vmx->vmcs01.shadow_vmcs);
8490 vmx->vmcs01.shadow_vmcs = NULL;
8491 }
David Matlack4f2777b2016-07-13 17:16:37 -07008492 kfree(vmx->nested.cached_vmcs12);
Liran Alon61ada742018-06-23 02:35:08 +03008493 kfree(vmx->nested.cached_shadow_vmcs12);
Jim Mattsonde3a0022017-11-27 17:22:25 -06008494 /* Unpin physical memory we referred to in the vmcs02 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03008495 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02008496 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02008497 vmx->nested.apic_access_page = NULL;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03008498 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008499 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02008500 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02008501 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008502 }
Wincy Van705699a2015-02-03 23:58:17 +08008503 if (vmx->nested.pi_desc_page) {
8504 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008505 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +08008506 vmx->nested.pi_desc_page = NULL;
8507 vmx->nested.pi_desc = NULL;
8508 }
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03008509
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008510 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
8511
Jim Mattsonde3a0022017-11-27 17:22:25 -06008512 free_loaded_vmcs(&vmx->nested.vmcs02);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008513}
8514
8515/* Emulate the VMXOFF instruction */
8516static int handle_vmoff(struct kvm_vcpu *vcpu)
8517{
8518 if (!nested_vmx_check_permission(vcpu))
8519 return 1;
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008520 free_nested(vcpu);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008521 return nested_vmx_succeed(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008522}
8523
Nadav Har'El27d6c862011-05-25 23:06:59 +03008524/* Emulate the VMCLEAR instruction */
8525static int handle_vmclear(struct kvm_vcpu *vcpu)
8526{
8527 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattson587d7e722017-03-02 12:41:48 -08008528 u32 zero = 0;
Nadav Har'El27d6c862011-05-25 23:06:59 +03008529 gpa_t vmptr;
Nadav Har'El27d6c862011-05-25 23:06:59 +03008530
8531 if (!nested_vmx_check_permission(vcpu))
8532 return 1;
8533
Radim Krčmářcbf71272017-05-19 15:48:51 +02008534 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El27d6c862011-05-25 23:06:59 +03008535 return 1;
8536
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008537 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
8538 return nested_vmx_failValid(vcpu,
8539 VMXERR_VMCLEAR_INVALID_ADDRESS);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008540
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008541 if (vmptr == vmx->nested.vmxon_ptr)
8542 return nested_vmx_failValid(vcpu,
8543 VMXERR_VMCLEAR_VMXON_POINTER);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008544
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008545 if (vmptr == vmx->nested.current_vmptr)
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008546 nested_release_vmcs12(vcpu);
Nadav Har'El27d6c862011-05-25 23:06:59 +03008547
Jim Mattson587d7e722017-03-02 12:41:48 -08008548 kvm_vcpu_write_guest(vcpu,
8549 vmptr + offsetof(struct vmcs12, launch_state),
8550 &zero, sizeof(zero));
Nadav Har'El27d6c862011-05-25 23:06:59 +03008551
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008552 return nested_vmx_succeed(vcpu);
Nadav Har'El27d6c862011-05-25 23:06:59 +03008553}
8554
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03008555static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
8556
8557/* Emulate the VMLAUNCH instruction */
8558static int handle_vmlaunch(struct kvm_vcpu *vcpu)
8559{
8560 return nested_vmx_run(vcpu, true);
8561}
8562
8563/* Emulate the VMRESUME instruction */
8564static int handle_vmresume(struct kvm_vcpu *vcpu)
8565{
8566
8567 return nested_vmx_run(vcpu, false);
8568}
8569
Nadav Har'El49f705c2011-05-25 23:08:30 +03008570/*
8571 * Read a vmcs12 field. Since these can have varying lengths and we return
8572 * one type, we chose the biggest type (u64) and zero-extend the return value
8573 * to that size. Note that the caller, handle_vmread, might need to use only
8574 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
8575 * 64-bit fields are to be returned).
8576 */
Liran Alone2536742018-06-23 02:35:02 +03008577static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008578 unsigned long field, u64 *ret)
Nadav Har'El49f705c2011-05-25 23:08:30 +03008579{
8580 short offset = vmcs_field_to_offset(field);
8581 char *p;
8582
8583 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008584 return offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008585
Liran Alone2536742018-06-23 02:35:02 +03008586 p = (char *)vmcs12 + offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008587
Jim Mattsond37f4262017-12-22 12:12:16 -08008588 switch (vmcs_field_width(field)) {
8589 case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008590 *ret = *((natural_width *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008591 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008592 case VMCS_FIELD_WIDTH_U16:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008593 *ret = *((u16 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008594 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008595 case VMCS_FIELD_WIDTH_U32:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008596 *ret = *((u32 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008597 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008598 case VMCS_FIELD_WIDTH_U64:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008599 *ret = *((u64 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008600 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008601 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008602 WARN_ON(1);
8603 return -ENOENT;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008604 }
8605}
8606
Abel Gordon20b97fe2013-04-18 14:36:25 +03008607
Liran Alone2536742018-06-23 02:35:02 +03008608static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008609 unsigned long field, u64 field_value){
Abel Gordon20b97fe2013-04-18 14:36:25 +03008610 short offset = vmcs_field_to_offset(field);
Liran Alone2536742018-06-23 02:35:02 +03008611 char *p = (char *)vmcs12 + offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008612 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008613 return offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008614
Jim Mattsond37f4262017-12-22 12:12:16 -08008615 switch (vmcs_field_width(field)) {
8616 case VMCS_FIELD_WIDTH_U16:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008617 *(u16 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008618 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008619 case VMCS_FIELD_WIDTH_U32:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008620 *(u32 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008621 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008622 case VMCS_FIELD_WIDTH_U64:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008623 *(u64 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008624 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008625 case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008626 *(natural_width *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008627 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008628 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008629 WARN_ON(1);
8630 return -ENOENT;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008631 }
8632
8633}
8634
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02008635static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
8636{
8637 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
8638 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
8639
8640 /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
8641 vmcs12->tpr_threshold = evmcs->tpr_threshold;
8642 vmcs12->guest_rip = evmcs->guest_rip;
8643
8644 if (unlikely(!(evmcs->hv_clean_fields &
8645 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
8646 vmcs12->guest_rsp = evmcs->guest_rsp;
8647 vmcs12->guest_rflags = evmcs->guest_rflags;
8648 vmcs12->guest_interruptibility_info =
8649 evmcs->guest_interruptibility_info;
8650 }
8651
8652 if (unlikely(!(evmcs->hv_clean_fields &
8653 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
8654 vmcs12->cpu_based_vm_exec_control =
8655 evmcs->cpu_based_vm_exec_control;
8656 }
8657
8658 if (unlikely(!(evmcs->hv_clean_fields &
8659 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
8660 vmcs12->exception_bitmap = evmcs->exception_bitmap;
8661 }
8662
8663 if (unlikely(!(evmcs->hv_clean_fields &
8664 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
8665 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
8666 }
8667
8668 if (unlikely(!(evmcs->hv_clean_fields &
8669 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
8670 vmcs12->vm_entry_intr_info_field =
8671 evmcs->vm_entry_intr_info_field;
8672 vmcs12->vm_entry_exception_error_code =
8673 evmcs->vm_entry_exception_error_code;
8674 vmcs12->vm_entry_instruction_len =
8675 evmcs->vm_entry_instruction_len;
8676 }
8677
8678 if (unlikely(!(evmcs->hv_clean_fields &
8679 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
8680 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
8681 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
8682 vmcs12->host_cr0 = evmcs->host_cr0;
8683 vmcs12->host_cr3 = evmcs->host_cr3;
8684 vmcs12->host_cr4 = evmcs->host_cr4;
8685 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
8686 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
8687 vmcs12->host_rip = evmcs->host_rip;
8688 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
8689 vmcs12->host_es_selector = evmcs->host_es_selector;
8690 vmcs12->host_cs_selector = evmcs->host_cs_selector;
8691 vmcs12->host_ss_selector = evmcs->host_ss_selector;
8692 vmcs12->host_ds_selector = evmcs->host_ds_selector;
8693 vmcs12->host_fs_selector = evmcs->host_fs_selector;
8694 vmcs12->host_gs_selector = evmcs->host_gs_selector;
8695 vmcs12->host_tr_selector = evmcs->host_tr_selector;
8696 }
8697
8698 if (unlikely(!(evmcs->hv_clean_fields &
8699 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
8700 vmcs12->pin_based_vm_exec_control =
8701 evmcs->pin_based_vm_exec_control;
8702 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
8703 vmcs12->secondary_vm_exec_control =
8704 evmcs->secondary_vm_exec_control;
8705 }
8706
8707 if (unlikely(!(evmcs->hv_clean_fields &
8708 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
8709 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
8710 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
8711 }
8712
8713 if (unlikely(!(evmcs->hv_clean_fields &
8714 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
8715 vmcs12->msr_bitmap = evmcs->msr_bitmap;
8716 }
8717
8718 if (unlikely(!(evmcs->hv_clean_fields &
8719 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
8720 vmcs12->guest_es_base = evmcs->guest_es_base;
8721 vmcs12->guest_cs_base = evmcs->guest_cs_base;
8722 vmcs12->guest_ss_base = evmcs->guest_ss_base;
8723 vmcs12->guest_ds_base = evmcs->guest_ds_base;
8724 vmcs12->guest_fs_base = evmcs->guest_fs_base;
8725 vmcs12->guest_gs_base = evmcs->guest_gs_base;
8726 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
8727 vmcs12->guest_tr_base = evmcs->guest_tr_base;
8728 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
8729 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
8730 vmcs12->guest_es_limit = evmcs->guest_es_limit;
8731 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
8732 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
8733 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
8734 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
8735 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
8736 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
8737 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
8738 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
8739 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
8740 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
8741 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
8742 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
8743 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
8744 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
8745 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
8746 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
8747 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
8748 vmcs12->guest_es_selector = evmcs->guest_es_selector;
8749 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
8750 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
8751 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
8752 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
8753 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
8754 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
8755 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
8756 }
8757
8758 if (unlikely(!(evmcs->hv_clean_fields &
8759 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
8760 vmcs12->tsc_offset = evmcs->tsc_offset;
8761 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
8762 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
8763 }
8764
8765 if (unlikely(!(evmcs->hv_clean_fields &
8766 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
8767 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
8768 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
8769 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
8770 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
8771 vmcs12->guest_cr0 = evmcs->guest_cr0;
8772 vmcs12->guest_cr3 = evmcs->guest_cr3;
8773 vmcs12->guest_cr4 = evmcs->guest_cr4;
8774 vmcs12->guest_dr7 = evmcs->guest_dr7;
8775 }
8776
8777 if (unlikely(!(evmcs->hv_clean_fields &
8778 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
8779 vmcs12->host_fs_base = evmcs->host_fs_base;
8780 vmcs12->host_gs_base = evmcs->host_gs_base;
8781 vmcs12->host_tr_base = evmcs->host_tr_base;
8782 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
8783 vmcs12->host_idtr_base = evmcs->host_idtr_base;
8784 vmcs12->host_rsp = evmcs->host_rsp;
8785 }
8786
8787 if (unlikely(!(evmcs->hv_clean_fields &
8788 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
8789 vmcs12->ept_pointer = evmcs->ept_pointer;
8790 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
8791 }
8792
8793 if (unlikely(!(evmcs->hv_clean_fields &
8794 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
8795 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
8796 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
8797 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
8798 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
8799 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
8800 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
8801 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
8802 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
8803 vmcs12->guest_pending_dbg_exceptions =
8804 evmcs->guest_pending_dbg_exceptions;
8805 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
8806 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
8807 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
8808 vmcs12->guest_activity_state = evmcs->guest_activity_state;
8809 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
8810 }
8811
8812 /*
8813 * Not used?
8814 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
8815 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
8816 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
8817 * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
8818 * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
8819 * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
8820 * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
8821 * vmcs12->page_fault_error_code_mask =
8822 * evmcs->page_fault_error_code_mask;
8823 * vmcs12->page_fault_error_code_match =
8824 * evmcs->page_fault_error_code_match;
8825 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
8826 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
8827 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
8828 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
8829 */
8830
8831 /*
8832 * Read only fields:
8833 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
8834 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
8835 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
8836 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
8837 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
8838 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
8839 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
8840 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
8841 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
8842 * vmcs12->exit_qualification = evmcs->exit_qualification;
8843 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
8844 *
8845 * Not present in struct vmcs12:
8846 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
8847 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
8848 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
8849 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
8850 */
8851
8852 return 0;
8853}
8854
8855static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
8856{
8857 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
8858 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
8859
8860 /*
8861 * Should not be changed by KVM:
8862 *
8863 * evmcs->host_es_selector = vmcs12->host_es_selector;
8864 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
8865 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
8866 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
8867 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
8868 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
8869 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
8870 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
8871 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
8872 * evmcs->host_cr0 = vmcs12->host_cr0;
8873 * evmcs->host_cr3 = vmcs12->host_cr3;
8874 * evmcs->host_cr4 = vmcs12->host_cr4;
8875 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
8876 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
8877 * evmcs->host_rip = vmcs12->host_rip;
8878 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
8879 * evmcs->host_fs_base = vmcs12->host_fs_base;
8880 * evmcs->host_gs_base = vmcs12->host_gs_base;
8881 * evmcs->host_tr_base = vmcs12->host_tr_base;
8882 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
8883 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
8884 * evmcs->host_rsp = vmcs12->host_rsp;
8885 * sync_vmcs12() doesn't read these:
8886 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
8887 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
8888 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
8889 * evmcs->ept_pointer = vmcs12->ept_pointer;
8890 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
8891 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
8892 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
8893 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
8894 * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
8895 * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
8896 * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
8897 * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
8898 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
8899 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
8900 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
8901 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
8902 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
8903 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
8904 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
8905 * evmcs->page_fault_error_code_mask =
8906 * vmcs12->page_fault_error_code_mask;
8907 * evmcs->page_fault_error_code_match =
8908 * vmcs12->page_fault_error_code_match;
8909 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
8910 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
8911 * evmcs->tsc_offset = vmcs12->tsc_offset;
8912 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
8913 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
8914 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
8915 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
8916 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
8917 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
8918 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
8919 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
8920 *
8921 * Not present in struct vmcs12:
8922 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
8923 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
8924 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
8925 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
8926 */
8927
8928 evmcs->guest_es_selector = vmcs12->guest_es_selector;
8929 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
8930 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
8931 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
8932 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
8933 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
8934 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
8935 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
8936
8937 evmcs->guest_es_limit = vmcs12->guest_es_limit;
8938 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
8939 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
8940 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
8941 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
8942 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
8943 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
8944 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
8945 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
8946 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
8947
8948 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
8949 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
8950 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
8951 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
8952 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
8953 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
8954 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
8955 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
8956
8957 evmcs->guest_es_base = vmcs12->guest_es_base;
8958 evmcs->guest_cs_base = vmcs12->guest_cs_base;
8959 evmcs->guest_ss_base = vmcs12->guest_ss_base;
8960 evmcs->guest_ds_base = vmcs12->guest_ds_base;
8961 evmcs->guest_fs_base = vmcs12->guest_fs_base;
8962 evmcs->guest_gs_base = vmcs12->guest_gs_base;
8963 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
8964 evmcs->guest_tr_base = vmcs12->guest_tr_base;
8965 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
8966 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
8967
8968 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
8969 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
8970
8971 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
8972 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
8973 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
8974 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
8975
8976 evmcs->guest_pending_dbg_exceptions =
8977 vmcs12->guest_pending_dbg_exceptions;
8978 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
8979 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
8980
8981 evmcs->guest_activity_state = vmcs12->guest_activity_state;
8982 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
8983
8984 evmcs->guest_cr0 = vmcs12->guest_cr0;
8985 evmcs->guest_cr3 = vmcs12->guest_cr3;
8986 evmcs->guest_cr4 = vmcs12->guest_cr4;
8987 evmcs->guest_dr7 = vmcs12->guest_dr7;
8988
8989 evmcs->guest_physical_address = vmcs12->guest_physical_address;
8990
8991 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
8992 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
8993 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
8994 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
8995 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
8996 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
8997 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
8998 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
8999
9000 evmcs->exit_qualification = vmcs12->exit_qualification;
9001
9002 evmcs->guest_linear_address = vmcs12->guest_linear_address;
9003 evmcs->guest_rsp = vmcs12->guest_rsp;
9004 evmcs->guest_rflags = vmcs12->guest_rflags;
9005
9006 evmcs->guest_interruptibility_info =
9007 vmcs12->guest_interruptibility_info;
9008 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
9009 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
9010 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
9011 evmcs->vm_entry_exception_error_code =
9012 vmcs12->vm_entry_exception_error_code;
9013 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
9014
9015 evmcs->guest_rip = vmcs12->guest_rip;
9016
9017 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
9018
9019 return 0;
9020}
9021
Jim Mattsonf4160e42018-05-29 09:11:33 -07009022/*
9023 * Copy the writable VMCS shadow fields back to the VMCS12, in case
9024 * they have been modified by the L1 guest. Note that the "read-only"
9025 * VM-exit information fields are actually writable if the vCPU is
9026 * configured to support "VMWRITE to any supported field in the VMCS."
9027 */
Abel Gordon16f5b902013-04-18 14:38:25 +03009028static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
9029{
Jim Mattsonf4160e42018-05-29 09:11:33 -07009030 const u16 *fields[] = {
9031 shadow_read_write_fields,
9032 shadow_read_only_fields
9033 };
9034 const int max_fields[] = {
9035 max_shadow_read_write_fields,
9036 max_shadow_read_only_fields
9037 };
9038 int i, q;
Abel Gordon16f5b902013-04-18 14:38:25 +03009039 unsigned long field;
9040 u64 field_value;
Jim Mattson355f4fb2016-10-28 08:29:39 -07009041 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordon16f5b902013-04-18 14:38:25 +03009042
Jan Kiszka282da872014-10-08 18:05:39 +02009043 preempt_disable();
9044
Abel Gordon16f5b902013-04-18 14:38:25 +03009045 vmcs_load(shadow_vmcs);
9046
Jim Mattsonf4160e42018-05-29 09:11:33 -07009047 for (q = 0; q < ARRAY_SIZE(fields); q++) {
9048 for (i = 0; i < max_fields[q]; i++) {
9049 field = fields[q][i];
9050 field_value = __vmcs_readl(field);
Liran Alone2536742018-06-23 02:35:02 +03009051 vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
Jim Mattsonf4160e42018-05-29 09:11:33 -07009052 }
9053 /*
9054 * Skip the VM-exit information fields if they are read-only.
9055 */
9056 if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
9057 break;
Abel Gordon16f5b902013-04-18 14:38:25 +03009058 }
9059
9060 vmcs_clear(shadow_vmcs);
9061 vmcs_load(vmx->loaded_vmcs->vmcs);
Jan Kiszka282da872014-10-08 18:05:39 +02009062
9063 preempt_enable();
Abel Gordon16f5b902013-04-18 14:38:25 +03009064}
9065
Abel Gordonc3114422013-04-18 14:38:55 +03009066static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
9067{
Paolo Bonzini44900ba2017-12-13 12:58:02 +01009068 const u16 *fields[] = {
Mathias Krausec2bae892013-06-26 20:36:21 +02009069 shadow_read_write_fields,
9070 shadow_read_only_fields
Abel Gordonc3114422013-04-18 14:38:55 +03009071 };
Mathias Krausec2bae892013-06-26 20:36:21 +02009072 const int max_fields[] = {
Abel Gordonc3114422013-04-18 14:38:55 +03009073 max_shadow_read_write_fields,
9074 max_shadow_read_only_fields
9075 };
9076 int i, q;
9077 unsigned long field;
9078 u64 field_value = 0;
Jim Mattson355f4fb2016-10-28 08:29:39 -07009079 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordonc3114422013-04-18 14:38:55 +03009080
9081 vmcs_load(shadow_vmcs);
9082
Mathias Krausec2bae892013-06-26 20:36:21 +02009083 for (q = 0; q < ARRAY_SIZE(fields); q++) {
Abel Gordonc3114422013-04-18 14:38:55 +03009084 for (i = 0; i < max_fields[q]; i++) {
9085 field = fields[q][i];
Liran Alone2536742018-06-23 02:35:02 +03009086 vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
Paolo Bonzini44900ba2017-12-13 12:58:02 +01009087 __vmcs_writel(field, field_value);
Abel Gordonc3114422013-04-18 14:38:55 +03009088 }
9089 }
9090
9091 vmcs_clear(shadow_vmcs);
9092 vmcs_load(vmx->loaded_vmcs->vmcs);
9093}
9094
Nadav Har'El49f705c2011-05-25 23:08:30 +03009095static int handle_vmread(struct kvm_vcpu *vcpu)
9096{
9097 unsigned long field;
9098 u64 field_value;
9099 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9100 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9101 gva_t gva = 0;
Liran Alon6d894f42018-06-23 02:35:09 +03009102 struct vmcs12 *vmcs12;
Nadav Har'El49f705c2011-05-25 23:08:30 +03009103
Kyle Hueyeb277562016-11-29 12:40:39 -08009104 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009105 return 1;
9106
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009107 if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
9108 return nested_vmx_failInvalid(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08009109
Liran Alon6d894f42018-06-23 02:35:09 +03009110 if (!is_guest_mode(vcpu))
9111 vmcs12 = get_vmcs12(vcpu);
9112 else {
9113 /*
9114 * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
9115 * to shadowed-field sets the ALU flags for VMfailInvalid.
9116 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009117 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
9118 return nested_vmx_failInvalid(vcpu);
Liran Alon6d894f42018-06-23 02:35:09 +03009119 vmcs12 = get_shadow_vmcs12(vcpu);
9120 }
9121
Nadav Har'El49f705c2011-05-25 23:08:30 +03009122 /* Decode instruction info and find the field to read */
Nadav Amit27e6fb52014-06-18 17:19:26 +03009123 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Nadav Har'El49f705c2011-05-25 23:08:30 +03009124 /* Read the field, zero-extended to a u64 field_value */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009125 if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
9126 return nested_vmx_failValid(vcpu,
9127 VMXERR_UNSUPPORTED_VMCS_COMPONENT);
9128
Nadav Har'El49f705c2011-05-25 23:08:30 +03009129 /*
9130 * Now copy part of this value to register or memory, as requested.
9131 * Note that the number of bits actually copied is 32 or 64 depending
9132 * on the guest's mode (32 or 64 bit), not on the given field's length.
9133 */
9134 if (vmx_instruction_info & (1u << 10)) {
Nadav Amit27e6fb52014-06-18 17:19:26 +03009135 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
Nadav Har'El49f705c2011-05-25 23:08:30 +03009136 field_value);
9137 } else {
9138 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00009139 vmx_instruction_info, true, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009140 return 1;
Felix Wilhelm727ba742018-06-11 09:43:44 +02009141 /* _system ok, nested_vmx_check_permission has verified cpl=0 */
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009142 kvm_write_guest_virt_system(vcpu, gva, &field_value,
9143 (is_long_mode(vcpu) ? 8 : 4), NULL);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009144 }
9145
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009146 return nested_vmx_succeed(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009147}
9148
9149
9150static int handle_vmwrite(struct kvm_vcpu *vcpu)
9151{
9152 unsigned long field;
9153 gva_t gva;
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009154 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009155 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9156 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009157
Nadav Har'El49f705c2011-05-25 23:08:30 +03009158 /* The value to write might be 32 or 64 bits, depending on L1's long
9159 * mode, and eventually we need to write that into a field of several
9160 * possible lengths. The code below first zero-extends the value to 64
Adam Buchbinder6a6256f2016-02-23 15:34:30 -08009161 * bit (field_value), and then copies only the appropriate number of
Nadav Har'El49f705c2011-05-25 23:08:30 +03009162 * bits into the vmcs12 field.
9163 */
9164 u64 field_value = 0;
9165 struct x86_exception e;
Liran Alon6d894f42018-06-23 02:35:09 +03009166 struct vmcs12 *vmcs12;
Nadav Har'El49f705c2011-05-25 23:08:30 +03009167
Kyle Hueyeb277562016-11-29 12:40:39 -08009168 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009169 return 1;
9170
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009171 if (vmx->nested.current_vmptr == -1ull)
9172 return nested_vmx_failInvalid(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08009173
Nadav Har'El49f705c2011-05-25 23:08:30 +03009174 if (vmx_instruction_info & (1u << 10))
Nadav Amit27e6fb52014-06-18 17:19:26 +03009175 field_value = kvm_register_readl(vcpu,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009176 (((vmx_instruction_info) >> 3) & 0xf));
9177 else {
9178 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00009179 vmx_instruction_info, false, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009180 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009181 if (kvm_read_guest_virt(vcpu, gva, &field_value,
9182 (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03009183 kvm_inject_page_fault(vcpu, &e);
9184 return 1;
9185 }
9186 }
9187
9188
Nadav Amit27e6fb52014-06-18 17:19:26 +03009189 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Jim Mattsonf4160e42018-05-29 09:11:33 -07009190 /*
9191 * If the vCPU supports "VMWRITE to any supported field in the
9192 * VMCS," then the "read-only" fields are actually read/write.
9193 */
9194 if (vmcs_field_readonly(field) &&
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009195 !nested_cpu_has_vmwrite_any_field(vcpu))
9196 return nested_vmx_failValid(vcpu,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009197 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009198
Liran Alon6d894f42018-06-23 02:35:09 +03009199 if (!is_guest_mode(vcpu))
9200 vmcs12 = get_vmcs12(vcpu);
9201 else {
9202 /*
9203 * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
9204 * to shadowed-field sets the ALU flags for VMfailInvalid.
9205 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009206 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
9207 return nested_vmx_failInvalid(vcpu);
Liran Alon6d894f42018-06-23 02:35:09 +03009208 vmcs12 = get_shadow_vmcs12(vcpu);
Liran Alon6d894f42018-06-23 02:35:09 +03009209 }
9210
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009211 if (vmcs12_write_any(vmcs12, field, field_value) < 0)
9212 return nested_vmx_failValid(vcpu,
9213 VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009214
Liran Alon6d894f42018-06-23 02:35:09 +03009215 /*
9216 * Do not track vmcs12 dirty-state if in guest-mode
9217 * as we actually dirty shadow vmcs12 instead of vmcs12.
9218 */
9219 if (!is_guest_mode(vcpu)) {
9220 switch (field) {
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009221#define SHADOW_FIELD_RW(x) case x:
9222#include "vmx_shadow_fields.h"
Liran Alon6d894f42018-06-23 02:35:09 +03009223 /*
9224 * The fields that can be updated by L1 without a vmexit are
9225 * always updated in the vmcs02, the others go down the slow
9226 * path of prepare_vmcs02.
9227 */
9228 break;
9229 default:
9230 vmx->nested.dirty_vmcs12 = true;
9231 break;
9232 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009233 }
9234
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009235 return nested_vmx_succeed(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009236}
9237
Jim Mattsona8bc2842016-11-30 12:03:44 -08009238static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
9239{
9240 vmx->nested.current_vmptr = vmptr;
9241 if (enable_shadow_vmcs) {
9242 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
9243 SECONDARY_EXEC_SHADOW_VMCS);
9244 vmcs_write64(VMCS_LINK_POINTER,
9245 __pa(vmx->vmcs01.shadow_vmcs));
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02009246 vmx->nested.need_vmcs12_sync = true;
Jim Mattsona8bc2842016-11-30 12:03:44 -08009247 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009248 vmx->nested.dirty_vmcs12 = true;
Jim Mattsona8bc2842016-11-30 12:03:44 -08009249}
9250
Nadav Har'El63846662011-05-25 23:07:29 +03009251/* Emulate the VMPTRLD instruction */
9252static int handle_vmptrld(struct kvm_vcpu *vcpu)
9253{
9254 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03009255 gpa_t vmptr;
Nadav Har'El63846662011-05-25 23:07:29 +03009256
9257 if (!nested_vmx_check_permission(vcpu))
9258 return 1;
9259
Radim Krčmářcbf71272017-05-19 15:48:51 +02009260 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El63846662011-05-25 23:07:29 +03009261 return 1;
9262
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009263 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
9264 return nested_vmx_failValid(vcpu,
9265 VMXERR_VMPTRLD_INVALID_ADDRESS);
Radim Krčmářcbf71272017-05-19 15:48:51 +02009266
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009267 if (vmptr == vmx->nested.vmxon_ptr)
9268 return nested_vmx_failValid(vcpu,
9269 VMXERR_VMPTRLD_VMXON_POINTER);
Radim Krčmářcbf71272017-05-19 15:48:51 +02009270
Nadav Har'El63846662011-05-25 23:07:29 +03009271 if (vmx->nested.current_vmptr != vmptr) {
9272 struct vmcs12 *new_vmcs12;
9273 struct page *page;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009274 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009275 if (is_error_page(page))
9276 return nested_vmx_failInvalid(vcpu);
9277
Nadav Har'El63846662011-05-25 23:07:29 +03009278 new_vmcs12 = kmap(page);
Liran Alon392b2f22018-06-23 02:35:01 +03009279 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
Liran Alonfa97d7d2018-07-18 14:07:59 +02009280 (new_vmcs12->hdr.shadow_vmcs &&
9281 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
Nadav Har'El63846662011-05-25 23:07:29 +03009282 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02009283 kvm_release_page_clean(page);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009284 return nested_vmx_failValid(vcpu,
Nadav Har'El63846662011-05-25 23:07:29 +03009285 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
Nadav Har'El63846662011-05-25 23:07:29 +03009286 }
Nadav Har'El63846662011-05-25 23:07:29 +03009287
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02009288 nested_release_vmcs12(vcpu);
9289
David Matlack4f2777b2016-07-13 17:16:37 -07009290 /*
9291 * Load VMCS12 from guest memory since it is not already
9292 * cached.
9293 */
Paolo Bonzini9f744c52017-07-27 15:54:46 +02009294 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
9295 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02009296 kvm_release_page_clean(page);
Paolo Bonzini9f744c52017-07-27 15:54:46 +02009297
Jim Mattsona8bc2842016-11-30 12:03:44 -08009298 set_current_vmptr(vmx, vmptr);
Nadav Har'El63846662011-05-25 23:07:29 +03009299 }
9300
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009301 return nested_vmx_succeed(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03009302}
9303
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009304/* Emulate the VMPTRST instruction */
9305static int handle_vmptrst(struct kvm_vcpu *vcpu)
9306{
Sean Christopherson0a06d422018-07-19 10:31:00 -07009307 unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
9308 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9309 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009310 struct x86_exception e;
Sean Christopherson0a06d422018-07-19 10:31:00 -07009311 gva_t gva;
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009312
9313 if (!nested_vmx_check_permission(vcpu))
9314 return 1;
9315
Sean Christopherson0a06d422018-07-19 10:31:00 -07009316 if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009317 return 1;
Felix Wilhelm727ba742018-06-11 09:43:44 +02009318 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
Sean Christopherson0a06d422018-07-19 10:31:00 -07009319 if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
9320 sizeof(gpa_t), &e)) {
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009321 kvm_inject_page_fault(vcpu, &e);
9322 return 1;
9323 }
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009324 return nested_vmx_succeed(vcpu);
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009325}
9326
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009327/* Emulate the INVEPT instruction */
9328static int handle_invept(struct kvm_vcpu *vcpu)
9329{
Wincy Vanb9c237b2015-02-03 23:56:30 +08009330 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009331 u32 vmx_instruction_info, types;
9332 unsigned long type;
9333 gva_t gva;
9334 struct x86_exception e;
9335 struct {
9336 u64 eptp, gpa;
9337 } operand;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009338
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009339 if (!(vmx->nested.msrs.secondary_ctls_high &
Wincy Vanb9c237b2015-02-03 23:56:30 +08009340 SECONDARY_EXEC_ENABLE_EPT) ||
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009341 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009342 kvm_queue_exception(vcpu, UD_VECTOR);
9343 return 1;
9344 }
9345
9346 if (!nested_vmx_check_permission(vcpu))
9347 return 1;
9348
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009349 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Nadav Amit27e6fb52014-06-18 17:19:26 +03009350 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009351
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009352 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009353
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009354 if (type >= 32 || !(types & (1 << type)))
9355 return nested_vmx_failValid(vcpu,
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009356 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009357
9358 /* According to the Intel VMX instruction reference, the memory
9359 * operand is read even if it isn't needed (e.g., for type==global)
9360 */
9361 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00009362 vmx_instruction_info, false, &gva))
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009363 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009364 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009365 kvm_inject_page_fault(vcpu, &e);
9366 return 1;
9367 }
9368
9369 switch (type) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009370 case VMX_EPT_EXTENT_GLOBAL:
Bandan Das45e11812016-08-02 16:32:36 -04009371 /*
9372 * TODO: track mappings and invalidate
9373 * single context requests appropriately
9374 */
9375 case VMX_EPT_EXTENT_CONTEXT:
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009376 kvm_mmu_sync_roots(vcpu);
Liang Chen77c39132014-09-18 12:38:37 -04009377 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009378 break;
9379 default:
9380 BUG_ON(1);
9381 break;
9382 }
9383
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009384 return nested_vmx_succeed(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009385}
9386
Liran Alon3d5bdae2018-10-08 23:42:18 +03009387static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
9388{
9389 struct vcpu_vmx *vmx = to_vmx(vcpu);
9390
9391 return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
9392}
9393
Petr Matouseka642fc32014-09-23 20:22:30 +02009394static int handle_invvpid(struct kvm_vcpu *vcpu)
9395{
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009396 struct vcpu_vmx *vmx = to_vmx(vcpu);
9397 u32 vmx_instruction_info;
9398 unsigned long type, types;
9399 gva_t gva;
9400 struct x86_exception e;
Jim Mattson40352602017-06-28 09:37:37 -07009401 struct {
9402 u64 vpid;
9403 u64 gla;
9404 } operand;
Liran Alon3d5bdae2018-10-08 23:42:18 +03009405 u16 vpid02;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009406
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009407 if (!(vmx->nested.msrs.secondary_ctls_high &
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009408 SECONDARY_EXEC_ENABLE_VPID) ||
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009409 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009410 kvm_queue_exception(vcpu, UD_VECTOR);
9411 return 1;
9412 }
9413
9414 if (!nested_vmx_check_permission(vcpu))
9415 return 1;
9416
9417 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9418 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
9419
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009420 types = (vmx->nested.msrs.vpid_caps &
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009421 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009422
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009423 if (type >= 32 || !(types & (1 << type)))
9424 return nested_vmx_failValid(vcpu,
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009425 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009426
9427 /* according to the intel vmx instruction reference, the memory
9428 * operand is read even if it isn't needed (e.g., for type==global)
9429 */
9430 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
9431 vmx_instruction_info, false, &gva))
9432 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009433 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009434 kvm_inject_page_fault(vcpu, &e);
9435 return 1;
9436 }
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009437 if (operand.vpid >> 16)
9438 return nested_vmx_failValid(vcpu,
Jim Mattson40352602017-06-28 09:37:37 -07009439 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009440
Liran Alon3d5bdae2018-10-08 23:42:18 +03009441 vpid02 = nested_get_vpid02(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009442 switch (type) {
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009443 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
Liran Aloncd9a4912018-05-22 17:16:15 +03009444 if (!operand.vpid ||
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009445 is_noncanonical_address(operand.gla, vcpu))
9446 return nested_vmx_failValid(vcpu,
Jim Mattson40352602017-06-28 09:37:37 -07009447 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Liran Alon3d5bdae2018-10-08 23:42:18 +03009448 if (cpu_has_vmx_invvpid_individual_addr()) {
Liran Aloncd9a4912018-05-22 17:16:15 +03009449 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
Liran Alon3d5bdae2018-10-08 23:42:18 +03009450 vpid02, operand.gla);
Liran Aloncd9a4912018-05-22 17:16:15 +03009451 } else
Liran Alon327c0722018-10-08 23:42:19 +03009452 __vmx_flush_tlb(vcpu, vpid02, false);
Liran Aloncd9a4912018-05-22 17:16:15 +03009453 break;
Paolo Bonzinief697a72016-03-18 16:58:38 +01009454 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009455 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009456 if (!operand.vpid)
9457 return nested_vmx_failValid(vcpu,
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009458 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Liran Alon327c0722018-10-08 23:42:19 +03009459 __vmx_flush_tlb(vcpu, vpid02, false);
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009460 break;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009461 case VMX_VPID_EXTENT_ALL_CONTEXT:
Liran Alon327c0722018-10-08 23:42:19 +03009462 __vmx_flush_tlb(vcpu, vpid02, false);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009463 break;
9464 default:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009465 WARN_ON_ONCE(1);
Kyle Huey6affcbe2016-11-29 12:40:40 -08009466 return kvm_skip_emulated_instruction(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009467 }
9468
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009469 return nested_vmx_succeed(vcpu);
Petr Matouseka642fc32014-09-23 20:22:30 +02009470}
9471
Junaid Shahideb4b2482018-06-27 14:59:14 -07009472static int handle_invpcid(struct kvm_vcpu *vcpu)
9473{
9474 u32 vmx_instruction_info;
9475 unsigned long type;
9476 bool pcid_enabled;
9477 gva_t gva;
9478 struct x86_exception e;
Junaid Shahidb94742c2018-06-27 14:59:20 -07009479 unsigned i;
9480 unsigned long roots_to_free = 0;
Junaid Shahideb4b2482018-06-27 14:59:14 -07009481 struct {
9482 u64 pcid;
9483 u64 gla;
9484 } operand;
9485
9486 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
9487 kvm_queue_exception(vcpu, UD_VECTOR);
9488 return 1;
9489 }
9490
9491 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9492 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
9493
9494 if (type > 3) {
9495 kvm_inject_gp(vcpu, 0);
9496 return 1;
9497 }
9498
9499 /* According to the Intel instruction reference, the memory operand
9500 * is read even if it isn't needed (e.g., for type==all)
9501 */
9502 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
9503 vmx_instruction_info, false, &gva))
9504 return 1;
9505
9506 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
9507 kvm_inject_page_fault(vcpu, &e);
9508 return 1;
9509 }
9510
9511 if (operand.pcid >> 12 != 0) {
9512 kvm_inject_gp(vcpu, 0);
9513 return 1;
9514 }
9515
9516 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
9517
9518 switch (type) {
9519 case INVPCID_TYPE_INDIV_ADDR:
9520 if ((!pcid_enabled && (operand.pcid != 0)) ||
9521 is_noncanonical_address(operand.gla, vcpu)) {
9522 kvm_inject_gp(vcpu, 0);
9523 return 1;
9524 }
9525 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
9526 return kvm_skip_emulated_instruction(vcpu);
9527
9528 case INVPCID_TYPE_SINGLE_CTXT:
9529 if (!pcid_enabled && (operand.pcid != 0)) {
9530 kvm_inject_gp(vcpu, 0);
9531 return 1;
9532 }
9533
9534 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
9535 kvm_mmu_sync_roots(vcpu);
9536 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
9537 }
9538
Junaid Shahidb94742c2018-06-27 14:59:20 -07009539 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +02009540 if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
Junaid Shahidb94742c2018-06-27 14:59:20 -07009541 == operand.pcid)
9542 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
Junaid Shahidade61e22018-06-27 14:59:15 -07009543
Vitaly Kuznetsov6a82cd12018-10-08 21:28:07 +02009544 kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
Junaid Shahideb4b2482018-06-27 14:59:14 -07009545 /*
Junaid Shahidb94742c2018-06-27 14:59:20 -07009546 * If neither the current cr3 nor any of the prev_roots use the
Junaid Shahidade61e22018-06-27 14:59:15 -07009547 * given PCID, then nothing needs to be done here because a
9548 * resync will happen anyway before switching to any other CR3.
Junaid Shahideb4b2482018-06-27 14:59:14 -07009549 */
9550
9551 return kvm_skip_emulated_instruction(vcpu);
9552
9553 case INVPCID_TYPE_ALL_NON_GLOBAL:
9554 /*
9555 * Currently, KVM doesn't mark global entries in the shadow
9556 * page tables, so a non-global flush just degenerates to a
9557 * global flush. If needed, we could optimize this later by
9558 * keeping track of global entries in shadow page tables.
9559 */
9560
9561 /* fall-through */
9562 case INVPCID_TYPE_ALL_INCL_GLOBAL:
9563 kvm_mmu_unload(vcpu);
9564 return kvm_skip_emulated_instruction(vcpu);
9565
9566 default:
9567 BUG(); /* We have already checked above that type <= 3 */
9568 }
9569}
9570
Kai Huang843e4332015-01-28 10:54:28 +08009571static int handle_pml_full(struct kvm_vcpu *vcpu)
9572{
9573 unsigned long exit_qualification;
9574
9575 trace_kvm_pml_full(vcpu->vcpu_id);
9576
9577 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9578
9579 /*
9580 * PML buffer FULL happened while executing iret from NMI,
9581 * "blocked by NMI" bit has to be set before next VM entry.
9582 */
9583 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01009584 enable_vnmi &&
Kai Huang843e4332015-01-28 10:54:28 +08009585 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
9586 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
9587 GUEST_INTR_STATE_NMI);
9588
9589 /*
9590 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
9591 * here.., and there's no userspace involvement needed for PML.
9592 */
9593 return 1;
9594}
9595
Yunhong Jiang64672c92016-06-13 14:19:59 -07009596static int handle_preemption_timer(struct kvm_vcpu *vcpu)
9597{
Sean Christophersond264ee02018-08-27 15:21:12 -07009598 if (!to_vmx(vcpu)->req_immediate_exit)
9599 kvm_lapic_expired_hv_timer(vcpu);
Yunhong Jiang64672c92016-06-13 14:19:59 -07009600 return 1;
9601}
9602
Bandan Das41ab9372017-08-03 15:54:43 -04009603static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
9604{
9605 struct vcpu_vmx *vmx = to_vmx(vcpu);
Bandan Das41ab9372017-08-03 15:54:43 -04009606 int maxphyaddr = cpuid_maxphyaddr(vcpu);
9607
9608 /* Check for memory type validity */
David Hildenbrandbb97a012017-08-10 23:15:28 +02009609 switch (address & VMX_EPTP_MT_MASK) {
9610 case VMX_EPTP_MT_UC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009611 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009612 return false;
9613 break;
David Hildenbrandbb97a012017-08-10 23:15:28 +02009614 case VMX_EPTP_MT_WB:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009615 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009616 return false;
9617 break;
9618 default:
9619 return false;
9620 }
9621
David Hildenbrandbb97a012017-08-10 23:15:28 +02009622 /* only 4 levels page-walk length are valid */
9623 if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
Bandan Das41ab9372017-08-03 15:54:43 -04009624 return false;
9625
9626 /* Reserved bits should not be set */
9627 if (address >> maxphyaddr || ((address >> 7) & 0x1f))
9628 return false;
9629
9630 /* AD, if set, should be supported */
David Hildenbrandbb97a012017-08-10 23:15:28 +02009631 if (address & VMX_EPTP_AD_ENABLE_BIT) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009632 if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009633 return false;
9634 }
9635
9636 return true;
9637}
9638
9639static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
9640 struct vmcs12 *vmcs12)
9641{
9642 u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
9643 u64 address;
9644 bool accessed_dirty;
9645 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
9646
9647 if (!nested_cpu_has_eptp_switching(vmcs12) ||
9648 !nested_cpu_has_ept(vmcs12))
9649 return 1;
9650
9651 if (index >= VMFUNC_EPTP_ENTRIES)
9652 return 1;
9653
9654
9655 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
9656 &address, index * 8, 8))
9657 return 1;
9658
David Hildenbrandbb97a012017-08-10 23:15:28 +02009659 accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
Bandan Das41ab9372017-08-03 15:54:43 -04009660
9661 /*
9662 * If the (L2) guest does a vmfunc to the currently
9663 * active ept pointer, we don't have to do anything else
9664 */
9665 if (vmcs12->ept_pointer != address) {
9666 if (!valid_ept_address(vcpu, address))
9667 return 1;
9668
9669 kvm_mmu_unload(vcpu);
9670 mmu->ept_ad = accessed_dirty;
Vitaly Kuznetsov36d95942018-10-08 21:28:10 +02009671 mmu->mmu_role.base.ad_disabled = !accessed_dirty;
Bandan Das41ab9372017-08-03 15:54:43 -04009672 vmcs12->ept_pointer = address;
9673 /*
9674 * TODO: Check what's the correct approach in case
9675 * mmu reload fails. Currently, we just let the next
9676 * reload potentially fail
9677 */
9678 kvm_mmu_reload(vcpu);
9679 }
9680
9681 return 0;
9682}
9683
Bandan Das2a499e42017-08-03 15:54:41 -04009684static int handle_vmfunc(struct kvm_vcpu *vcpu)
9685{
Bandan Das27c42a12017-08-03 15:54:42 -04009686 struct vcpu_vmx *vmx = to_vmx(vcpu);
9687 struct vmcs12 *vmcs12;
9688 u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
9689
9690 /*
9691 * VMFUNC is only supported for nested guests, but we always enable the
9692 * secondary control for simplicity; for non-nested mode, fake that we
9693 * didn't by injecting #UD.
9694 */
9695 if (!is_guest_mode(vcpu)) {
9696 kvm_queue_exception(vcpu, UD_VECTOR);
9697 return 1;
9698 }
9699
9700 vmcs12 = get_vmcs12(vcpu);
9701 if ((vmcs12->vm_function_control & (1 << function)) == 0)
9702 goto fail;
Bandan Das41ab9372017-08-03 15:54:43 -04009703
9704 switch (function) {
9705 case 0:
9706 if (nested_vmx_eptp_switching(vcpu, vmcs12))
9707 goto fail;
9708 break;
9709 default:
9710 goto fail;
9711 }
9712 return kvm_skip_emulated_instruction(vcpu);
Bandan Das27c42a12017-08-03 15:54:42 -04009713
9714fail:
9715 nested_vmx_vmexit(vcpu, vmx->exit_reason,
9716 vmcs_read32(VM_EXIT_INTR_INFO),
9717 vmcs_readl(EXIT_QUALIFICATION));
Bandan Das2a499e42017-08-03 15:54:41 -04009718 return 1;
9719}
9720
Sean Christopherson0b665d32018-08-14 09:33:34 -07009721static int handle_encls(struct kvm_vcpu *vcpu)
9722{
9723 /*
9724 * SGX virtualization is not yet supported. There is no software
9725 * enable bit for SGX, so we have to trap ENCLS and inject a #UD
9726 * to prevent the guest from executing ENCLS.
9727 */
9728 kvm_queue_exception(vcpu, UD_VECTOR);
9729 return 1;
9730}
9731
Nadav Har'El0140cae2011-05-25 23:06:28 +03009732/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08009733 * The exit handlers return 1 if the exit was handled fully and guest execution
9734 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
9735 * to be done to userspace and return 0.
9736 */
Mathias Krause772e0312012-08-30 01:30:19 +02009737static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
Avi Kivity6aa8b732006-12-10 02:21:36 -08009738 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
9739 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
Avi Kivity988ad742007-02-12 00:54:36 -08009740 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
Sheng Yangf08864b2008-05-15 18:23:25 +08009741 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009742 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009743 [EXIT_REASON_CR_ACCESS] = handle_cr,
9744 [EXIT_REASON_DR_ACCESS] = handle_dr,
9745 [EXIT_REASON_CPUID] = handle_cpuid,
9746 [EXIT_REASON_MSR_READ] = handle_rdmsr,
9747 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
9748 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
9749 [EXIT_REASON_HLT] = handle_halt,
Gleb Natapovec25d5e2010-11-01 15:35:01 +02009750 [EXIT_REASON_INVD] = handle_invd,
Marcelo Tosattia7052892008-09-23 13:18:35 -03009751 [EXIT_REASON_INVLPG] = handle_invlpg,
Avi Kivityfee84b02011-11-10 14:57:25 +02009752 [EXIT_REASON_RDPMC] = handle_rdpmc,
Ingo Molnarc21415e2007-02-19 14:37:47 +02009753 [EXIT_REASON_VMCALL] = handle_vmcall,
Nadav Har'El27d6c862011-05-25 23:06:59 +03009754 [EXIT_REASON_VMCLEAR] = handle_vmclear,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03009755 [EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
Nadav Har'El63846662011-05-25 23:07:29 +03009756 [EXIT_REASON_VMPTRLD] = handle_vmptrld,
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009757 [EXIT_REASON_VMPTRST] = handle_vmptrst,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009758 [EXIT_REASON_VMREAD] = handle_vmread,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03009759 [EXIT_REASON_VMRESUME] = handle_vmresume,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009760 [EXIT_REASON_VMWRITE] = handle_vmwrite,
Nadav Har'Elec378ae2011-05-25 23:02:54 +03009761 [EXIT_REASON_VMOFF] = handle_vmoff,
9762 [EXIT_REASON_VMON] = handle_vmon,
Sheng Yangf78e0e22007-10-29 09:40:42 +08009763 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
9764 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
Yang Zhang83d4c282013-01-25 10:18:49 +08009765 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
Yang Zhangc7c9c562013-01-25 10:18:51 +08009766 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
Eddie Donge5edaa02007-11-11 12:28:35 +02009767 [EXIT_REASON_WBINVD] = handle_wbinvd,
Dexuan Cui2acf9232010-06-10 11:27:12 +08009768 [EXIT_REASON_XSETBV] = handle_xsetbv,
Izik Eidus37817f22008-03-24 23:14:53 +02009769 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
Andi Kleena0861c02009-06-08 17:37:09 +08009770 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
Paolo Bonzini0367f202016-07-12 10:44:55 +02009771 [EXIT_REASON_GDTR_IDTR] = handle_desc,
9772 [EXIT_REASON_LDTR_TR] = handle_desc,
Marcelo Tosatti68f89402009-06-11 12:07:43 -03009773 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
9774 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08009775 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04009776 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03009777 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04009778 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009779 [EXIT_REASON_INVEPT] = handle_invept,
Petr Matouseka642fc32014-09-23 20:22:30 +02009780 [EXIT_REASON_INVVPID] = handle_invvpid,
Jim Mattson45ec3682017-08-23 16:32:04 -07009781 [EXIT_REASON_RDRAND] = handle_invalid_op,
Jim Mattson75f4fc82017-08-23 16:32:03 -07009782 [EXIT_REASON_RDSEED] = handle_invalid_op,
Wanpeng Lif53cd632014-12-02 19:14:58 +08009783 [EXIT_REASON_XSAVES] = handle_xsaves,
9784 [EXIT_REASON_XRSTORS] = handle_xrstors,
Kai Huang843e4332015-01-28 10:54:28 +08009785 [EXIT_REASON_PML_FULL] = handle_pml_full,
Junaid Shahideb4b2482018-06-27 14:59:14 -07009786 [EXIT_REASON_INVPCID] = handle_invpcid,
Bandan Das2a499e42017-08-03 15:54:41 -04009787 [EXIT_REASON_VMFUNC] = handle_vmfunc,
Yunhong Jiang64672c92016-06-13 14:19:59 -07009788 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
Sean Christopherson0b665d32018-08-14 09:33:34 -07009789 [EXIT_REASON_ENCLS] = handle_encls,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009790};
9791
9792static const int kvm_vmx_max_exit_handlers =
Robert P. J. Day50a34852007-06-03 13:35:29 -04009793 ARRAY_SIZE(kvm_vmx_exit_handlers);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009794
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009795static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
9796 struct vmcs12 *vmcs12)
9797{
9798 unsigned long exit_qualification;
9799 gpa_t bitmap, last_bitmap;
9800 unsigned int port;
9801 int size;
9802 u8 b;
9803
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009804 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
Zhihui Zhang2f0a6392013-12-30 15:56:29 -05009805 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009806
9807 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9808
9809 port = exit_qualification >> 16;
9810 size = (exit_qualification & 7) + 1;
9811
9812 last_bitmap = (gpa_t)-1;
9813 b = -1;
9814
9815 while (size > 0) {
9816 if (port < 0x8000)
9817 bitmap = vmcs12->io_bitmap_a;
9818 else if (port < 0x10000)
9819 bitmap = vmcs12->io_bitmap_b;
9820 else
Joe Perches1d804d02015-03-30 16:46:09 -07009821 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009822 bitmap += (port & 0x7fff) / 8;
9823
9824 if (last_bitmap != bitmap)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009825 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07009826 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009827 if (b & (1 << (port & 7)))
Joe Perches1d804d02015-03-30 16:46:09 -07009828 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009829
9830 port++;
9831 size--;
9832 last_bitmap = bitmap;
9833 }
9834
Joe Perches1d804d02015-03-30 16:46:09 -07009835 return false;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009836}
9837
Nadav Har'El644d7112011-05-25 23:12:35 +03009838/*
9839 * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
9840 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
9841 * disinterest in the current event (read or write a specific MSR) by using an
9842 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
9843 */
9844static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
9845 struct vmcs12 *vmcs12, u32 exit_reason)
9846{
9847 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
9848 gpa_t bitmap;
9849
Jan Kiszkacbd29cb2013-02-11 12:19:28 +01009850 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
Joe Perches1d804d02015-03-30 16:46:09 -07009851 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009852
9853 /*
9854 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
9855 * for the four combinations of read/write and low/high MSR numbers.
9856 * First we need to figure out which of the four to use:
9857 */
9858 bitmap = vmcs12->msr_bitmap;
9859 if (exit_reason == EXIT_REASON_MSR_WRITE)
9860 bitmap += 2048;
9861 if (msr_index >= 0xc0000000) {
9862 msr_index -= 0xc0000000;
9863 bitmap += 1024;
9864 }
9865
9866 /* Then read the msr_index'th bit from this bitmap: */
9867 if (msr_index < 1024*8) {
9868 unsigned char b;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009869 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07009870 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009871 return 1 & (b >> (msr_index & 7));
9872 } else
Joe Perches1d804d02015-03-30 16:46:09 -07009873 return true; /* let L1 handle the wrong parameter */
Nadav Har'El644d7112011-05-25 23:12:35 +03009874}
9875
9876/*
9877 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
9878 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
9879 * intercept (via guest_host_mask etc.) the current event.
9880 */
9881static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
9882 struct vmcs12 *vmcs12)
9883{
9884 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9885 int cr = exit_qualification & 15;
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009886 int reg;
9887 unsigned long val;
Nadav Har'El644d7112011-05-25 23:12:35 +03009888
9889 switch ((exit_qualification >> 4) & 3) {
9890 case 0: /* mov to cr */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009891 reg = (exit_qualification >> 8) & 15;
9892 val = kvm_register_readl(vcpu, reg);
Nadav Har'El644d7112011-05-25 23:12:35 +03009893 switch (cr) {
9894 case 0:
9895 if (vmcs12->cr0_guest_host_mask &
9896 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07009897 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009898 break;
9899 case 3:
9900 if ((vmcs12->cr3_target_count >= 1 &&
9901 vmcs12->cr3_target_value0 == val) ||
9902 (vmcs12->cr3_target_count >= 2 &&
9903 vmcs12->cr3_target_value1 == val) ||
9904 (vmcs12->cr3_target_count >= 3 &&
9905 vmcs12->cr3_target_value2 == val) ||
9906 (vmcs12->cr3_target_count >= 4 &&
9907 vmcs12->cr3_target_value3 == val))
Joe Perches1d804d02015-03-30 16:46:09 -07009908 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009909 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -07009910 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009911 break;
9912 case 4:
9913 if (vmcs12->cr4_guest_host_mask &
9914 (vmcs12->cr4_read_shadow ^ val))
Joe Perches1d804d02015-03-30 16:46:09 -07009915 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009916 break;
9917 case 8:
9918 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -07009919 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009920 break;
9921 }
9922 break;
9923 case 2: /* clts */
9924 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
9925 (vmcs12->cr0_read_shadow & X86_CR0_TS))
Joe Perches1d804d02015-03-30 16:46:09 -07009926 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009927 break;
9928 case 1: /* mov from cr */
9929 switch (cr) {
9930 case 3:
9931 if (vmcs12->cpu_based_vm_exec_control &
9932 CPU_BASED_CR3_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -07009933 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009934 break;
9935 case 8:
9936 if (vmcs12->cpu_based_vm_exec_control &
9937 CPU_BASED_CR8_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -07009938 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009939 break;
9940 }
9941 break;
9942 case 3: /* lmsw */
9943 /*
9944 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
9945 * cr0. Other attempted changes are ignored, with no exit.
9946 */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009947 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Nadav Har'El644d7112011-05-25 23:12:35 +03009948 if (vmcs12->cr0_guest_host_mask & 0xe &
9949 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07009950 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009951 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
9952 !(vmcs12->cr0_read_shadow & 0x1) &&
9953 (val & 0x1))
Joe Perches1d804d02015-03-30 16:46:09 -07009954 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009955 break;
9956 }
Joe Perches1d804d02015-03-30 16:46:09 -07009957 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009958}
9959
Liran Alona7cde482018-06-23 02:35:10 +03009960static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
9961 struct vmcs12 *vmcs12, gpa_t bitmap)
9962{
9963 u32 vmx_instruction_info;
9964 unsigned long field;
9965 u8 b;
9966
9967 if (!nested_cpu_has_shadow_vmcs(vmcs12))
9968 return true;
9969
9970 /* Decode instruction info and find the field to access */
9971 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9972 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
9973
9974 /* Out-of-range fields always cause a VM exit from L2 to L1 */
9975 if (field >> 15)
9976 return true;
9977
9978 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
9979 return true;
9980
9981 return 1 & (b >> (field & 7));
9982}
9983
Nadav Har'El644d7112011-05-25 23:12:35 +03009984/*
9985 * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
9986 * should handle it ourselves in L0 (and then continue L2). Only call this
9987 * when in is_guest_mode (L2).
9988 */
Paolo Bonzini7313c692017-07-27 10:31:25 +02009989static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
Nadav Har'El644d7112011-05-25 23:12:35 +03009990{
Nadav Har'El644d7112011-05-25 23:12:35 +03009991 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
9992 struct vcpu_vmx *vmx = to_vmx(vcpu);
9993 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
9994
Jim Mattson4f350c62017-09-14 16:31:44 -07009995 if (vmx->nested.nested_run_pending)
9996 return false;
9997
9998 if (unlikely(vmx->fail)) {
9999 pr_info_ratelimited("%s failed vm entry %x\n", __func__,
10000 vmcs_read32(VM_INSTRUCTION_ERROR));
10001 return true;
10002 }
Jan Kiszka542060e2014-01-04 18:47:21 +010010003
David Matlackc9f04402017-08-01 14:00:40 -070010004 /*
10005 * The host physical addresses of some pages of guest memory
Jim Mattsonde3a0022017-11-27 17:22:25 -060010006 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
10007 * Page). The CPU may write to these pages via their host
10008 * physical address while L2 is running, bypassing any
10009 * address-translation-based dirty tracking (e.g. EPT write
10010 * protection).
David Matlackc9f04402017-08-01 14:00:40 -070010011 *
10012 * Mark them dirty on every exit from L2 to prevent them from
10013 * getting out of sync with dirty tracking.
10014 */
10015 nested_mark_vmcs12_pages_dirty(vcpu);
10016
Jim Mattson4f350c62017-09-14 16:31:44 -070010017 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
10018 vmcs_readl(EXIT_QUALIFICATION),
10019 vmx->idt_vectoring_info,
10020 intr_info,
10021 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
10022 KVM_ISA_VMX);
Nadav Har'El644d7112011-05-25 23:12:35 +030010023
10024 switch (exit_reason) {
10025 case EXIT_REASON_EXCEPTION_NMI:
Jim Mattsonef85b672016-12-12 11:01:37 -080010026 if (is_nmi(intr_info))
Joe Perches1d804d02015-03-30 16:46:09 -070010027 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010028 else if (is_page_fault(intr_info))
Wanpeng Li52a5c152017-07-13 18:30:42 -070010029 return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
Jan Kiszka6f054852016-02-09 20:15:18 +010010030 else if (is_debug(intr_info) &&
10031 vcpu->guest_debug &
10032 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
10033 return false;
10034 else if (is_breakpoint(intr_info) &&
10035 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
10036 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010037 return vmcs12->exception_bitmap &
10038 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
10039 case EXIT_REASON_EXTERNAL_INTERRUPT:
Joe Perches1d804d02015-03-30 16:46:09 -070010040 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010041 case EXIT_REASON_TRIPLE_FAULT:
Joe Perches1d804d02015-03-30 16:46:09 -070010042 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010043 case EXIT_REASON_PENDING_INTERRUPT:
Jan Kiszka3b656cf2013-04-14 12:12:45 +020010044 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +030010045 case EXIT_REASON_NMI_WINDOW:
Jan Kiszka3b656cf2013-04-14 12:12:45 +020010046 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +030010047 case EXIT_REASON_TASK_SWITCH:
Joe Perches1d804d02015-03-30 16:46:09 -070010048 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010049 case EXIT_REASON_CPUID:
Joe Perches1d804d02015-03-30 16:46:09 -070010050 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010051 case EXIT_REASON_HLT:
10052 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
10053 case EXIT_REASON_INVD:
Joe Perches1d804d02015-03-30 16:46:09 -070010054 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010055 case EXIT_REASON_INVLPG:
10056 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
10057 case EXIT_REASON_RDPMC:
10058 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +020010059 case EXIT_REASON_RDRAND:
David Hildenbrand736fdf72017-08-24 20:51:37 +020010060 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +020010061 case EXIT_REASON_RDSEED:
David Hildenbrand736fdf72017-08-24 20:51:37 +020010062 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
Jan Kiszkab3a2a902015-03-23 19:27:19 +010010063 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
Nadav Har'El644d7112011-05-25 23:12:35 +030010064 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
Liran Alona7cde482018-06-23 02:35:10 +030010065 case EXIT_REASON_VMREAD:
10066 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
10067 vmcs12->vmread_bitmap);
10068 case EXIT_REASON_VMWRITE:
10069 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
10070 vmcs12->vmwrite_bitmap);
Nadav Har'El644d7112011-05-25 23:12:35 +030010071 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
10072 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
Liran Alona7cde482018-06-23 02:35:10 +030010073 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
Nadav Har'El644d7112011-05-25 23:12:35 +030010074 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
Petr Matouseka642fc32014-09-23 20:22:30 +020010075 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
Nadav Har'El644d7112011-05-25 23:12:35 +030010076 /*
10077 * VMX instructions trap unconditionally. This allows L1 to
10078 * emulate them for its L2 guest, i.e., allows 3-level nesting!
10079 */
Joe Perches1d804d02015-03-30 16:46:09 -070010080 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010081 case EXIT_REASON_CR_ACCESS:
10082 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
10083 case EXIT_REASON_DR_ACCESS:
10084 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
10085 case EXIT_REASON_IO_INSTRUCTION:
Jan Kiszka908a7bd2013-02-18 11:21:16 +010010086 return nested_vmx_exit_handled_io(vcpu, vmcs12);
Paolo Bonzini1b073042016-10-25 16:06:30 +020010087 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
10088 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
Nadav Har'El644d7112011-05-25 23:12:35 +030010089 case EXIT_REASON_MSR_READ:
10090 case EXIT_REASON_MSR_WRITE:
10091 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
10092 case EXIT_REASON_INVALID_STATE:
Joe Perches1d804d02015-03-30 16:46:09 -070010093 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010094 case EXIT_REASON_MWAIT_INSTRUCTION:
10095 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
Mihai Donțu5f3d45e2015-07-05 20:08:57 +030010096 case EXIT_REASON_MONITOR_TRAP_FLAG:
10097 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
Nadav Har'El644d7112011-05-25 23:12:35 +030010098 case EXIT_REASON_MONITOR_INSTRUCTION:
10099 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
10100 case EXIT_REASON_PAUSE_INSTRUCTION:
10101 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
10102 nested_cpu_has2(vmcs12,
10103 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
10104 case EXIT_REASON_MCE_DURING_VMENTRY:
Joe Perches1d804d02015-03-30 16:46:09 -070010105 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010106 case EXIT_REASON_TPR_BELOW_THRESHOLD:
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010107 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
Nadav Har'El644d7112011-05-25 23:12:35 +030010108 case EXIT_REASON_APIC_ACCESS:
Wincy Van82f0dd42015-02-03 23:57:18 +080010109 case EXIT_REASON_APIC_WRITE:
Wincy Van608406e2015-02-03 23:57:51 +080010110 case EXIT_REASON_EOI_INDUCED:
Jim Mattsonab5df312018-05-09 17:02:03 -040010111 /*
10112 * The controls for "virtualize APIC accesses," "APIC-
10113 * register virtualization," and "virtual-interrupt
10114 * delivery" only come from vmcs12.
10115 */
Joe Perches1d804d02015-03-30 16:46:09 -070010116 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010117 case EXIT_REASON_EPT_VIOLATION:
Nadav Har'El2b1be672013-08-05 11:07:19 +030010118 /*
10119 * L0 always deals with the EPT violation. If nested EPT is
10120 * used, and the nested mmu code discovers that the address is
10121 * missing in the guest EPT table (EPT12), the EPT violation
10122 * will be injected with nested_ept_inject_page_fault()
10123 */
Joe Perches1d804d02015-03-30 16:46:09 -070010124 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010125 case EXIT_REASON_EPT_MISCONFIG:
Nadav Har'El2b1be672013-08-05 11:07:19 +030010126 /*
10127 * L2 never uses directly L1's EPT, but rather L0's own EPT
10128 * table (shadow on EPT) or a merged EPT table that L0 built
10129 * (EPT on EPT). So any problems with the structure of the
10130 * table is L0's fault.
10131 */
Joe Perches1d804d02015-03-30 16:46:09 -070010132 return false;
Paolo Bonzini90a2db62017-07-27 13:22:13 +020010133 case EXIT_REASON_INVPCID:
10134 return
10135 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
10136 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
Nadav Har'El644d7112011-05-25 23:12:35 +030010137 case EXIT_REASON_WBINVD:
10138 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
10139 case EXIT_REASON_XSETBV:
Joe Perches1d804d02015-03-30 16:46:09 -070010140 return true;
Wanpeng Li81dc01f2014-12-04 19:11:07 +080010141 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
10142 /*
10143 * This should never happen, since it is not possible to
10144 * set XSS to a non-zero value---neither in L1 nor in L2.
10145 * If if it were, XSS would have to be checked against
10146 * the XSS exit bitmap in vmcs12.
10147 */
10148 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li55123e32016-07-06 18:29:58 +080010149 case EXIT_REASON_PREEMPTION_TIMER:
10150 return false;
Ladi Prosekab007cc2017-03-31 10:19:26 +020010151 case EXIT_REASON_PML_FULL:
Bandan Das03efce62017-05-05 15:25:15 -040010152 /* We emulate PML support to L1. */
Ladi Prosekab007cc2017-03-31 10:19:26 +020010153 return false;
Bandan Das2a499e42017-08-03 15:54:41 -040010154 case EXIT_REASON_VMFUNC:
10155 /* VM functions are emulated through L2->L0 vmexits. */
10156 return false;
Sean Christopherson0b665d32018-08-14 09:33:34 -070010157 case EXIT_REASON_ENCLS:
10158 /* SGX is never exposed to L1 */
10159 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010160 default:
Joe Perches1d804d02015-03-30 16:46:09 -070010161 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010162 }
10163}
10164
Paolo Bonzini7313c692017-07-27 10:31:25 +020010165static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason)
10166{
10167 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10168
10169 /*
10170 * At this point, the exit interruption info in exit_intr_info
10171 * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT
10172 * we need to query the in-kernel LAPIC.
10173 */
10174 WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
10175 if ((exit_intr_info &
10176 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
10177 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
10178 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10179 vmcs12->vm_exit_intr_error_code =
10180 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
10181 }
10182
10183 nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
10184 vmcs_readl(EXIT_QUALIFICATION));
10185 return 1;
10186}
10187
Avi Kivity586f9602010-11-18 13:09:54 +020010188static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
10189{
10190 *info1 = vmcs_readl(EXIT_QUALIFICATION);
10191 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
10192}
10193
Kai Huanga3eaa862015-11-04 13:46:05 +080010194static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
Kai Huang843e4332015-01-28 10:54:28 +080010195{
Kai Huanga3eaa862015-11-04 13:46:05 +080010196 if (vmx->pml_pg) {
10197 __free_page(vmx->pml_pg);
10198 vmx->pml_pg = NULL;
10199 }
Kai Huang843e4332015-01-28 10:54:28 +080010200}
10201
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010202static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
Kai Huang843e4332015-01-28 10:54:28 +080010203{
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010204 struct vcpu_vmx *vmx = to_vmx(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +080010205 u64 *pml_buf;
10206 u16 pml_idx;
10207
10208 pml_idx = vmcs_read16(GUEST_PML_INDEX);
10209
10210 /* Do nothing if PML buffer is empty */
10211 if (pml_idx == (PML_ENTITY_NUM - 1))
10212 return;
10213
10214 /* PML index always points to next available PML buffer entity */
10215 if (pml_idx >= PML_ENTITY_NUM)
10216 pml_idx = 0;
10217 else
10218 pml_idx++;
10219
10220 pml_buf = page_address(vmx->pml_pg);
10221 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
10222 u64 gpa;
10223
10224 gpa = pml_buf[pml_idx];
10225 WARN_ON(gpa & (PAGE_SIZE - 1));
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010226 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
Kai Huang843e4332015-01-28 10:54:28 +080010227 }
10228
10229 /* reset PML index */
10230 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
10231}
10232
10233/*
10234 * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
10235 * Called before reporting dirty_bitmap to userspace.
10236 */
10237static void kvm_flush_pml_buffers(struct kvm *kvm)
10238{
10239 int i;
10240 struct kvm_vcpu *vcpu;
10241 /*
10242 * We only need to kick vcpu out of guest mode here, as PML buffer
10243 * is flushed at beginning of all VMEXITs, and it's obvious that only
10244 * vcpus running in guest are possible to have unflushed GPAs in PML
10245 * buffer.
10246 */
10247 kvm_for_each_vcpu(i, vcpu, kvm)
10248 kvm_vcpu_kick(vcpu);
10249}
10250
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010251static void vmx_dump_sel(char *name, uint32_t sel)
10252{
10253 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
Chao Peng96794e42017-02-21 03:50:01 -050010254 name, vmcs_read16(sel),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010255 vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
10256 vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
10257 vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
10258}
10259
10260static void vmx_dump_dtsel(char *name, uint32_t limit)
10261{
10262 pr_err("%s limit=0x%08x, base=0x%016lx\n",
10263 name, vmcs_read32(limit),
10264 vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
10265}
10266
10267static void dump_vmcs(void)
10268{
10269 u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
10270 u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
10271 u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
10272 u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
10273 u32 secondary_exec_control = 0;
10274 unsigned long cr4 = vmcs_readl(GUEST_CR4);
Paolo Bonzinif3531052015-12-03 15:49:56 +010010275 u64 efer = vmcs_read64(GUEST_IA32_EFER);
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010276 int i, n;
10277
10278 if (cpu_has_secondary_exec_ctrls())
10279 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
10280
10281 pr_err("*** Guest State ***\n");
10282 pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
10283 vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
10284 vmcs_readl(CR0_GUEST_HOST_MASK));
10285 pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
10286 cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
10287 pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
10288 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
10289 (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
10290 {
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010291 pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
10292 vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
10293 pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
10294 vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010295 }
10296 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
10297 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
10298 pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
10299 vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
10300 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
10301 vmcs_readl(GUEST_SYSENTER_ESP),
10302 vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
10303 vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
10304 vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
10305 vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
10306 vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
10307 vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
10308 vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
10309 vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
10310 vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
10311 vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
10312 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
10313 if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
10314 (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010315 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
10316 efer, vmcs_read64(GUEST_IA32_PAT));
10317 pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
10318 vmcs_read64(GUEST_IA32_DEBUGCTL),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010319 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010320 if (cpu_has_load_perf_global_ctrl &&
10321 vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010322 pr_err("PerfGlobCtl = 0x%016llx\n",
10323 vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010324 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010325 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010326 pr_err("Interruptibility = %08x ActivityState = %08x\n",
10327 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
10328 vmcs_read32(GUEST_ACTIVITY_STATE));
10329 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
10330 pr_err("InterruptStatus = %04x\n",
10331 vmcs_read16(GUEST_INTR_STATUS));
10332
10333 pr_err("*** Host State ***\n");
10334 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
10335 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
10336 pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
10337 vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
10338 vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
10339 vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
10340 vmcs_read16(HOST_TR_SELECTOR));
10341 pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
10342 vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
10343 vmcs_readl(HOST_TR_BASE));
10344 pr_err("GDTBase=%016lx IDTBase=%016lx\n",
10345 vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
10346 pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
10347 vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
10348 vmcs_readl(HOST_CR4));
10349 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
10350 vmcs_readl(HOST_IA32_SYSENTER_ESP),
10351 vmcs_read32(HOST_IA32_SYSENTER_CS),
10352 vmcs_readl(HOST_IA32_SYSENTER_EIP));
10353 if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010354 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
10355 vmcs_read64(HOST_IA32_EFER),
10356 vmcs_read64(HOST_IA32_PAT));
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010357 if (cpu_has_load_perf_global_ctrl &&
10358 vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010359 pr_err("PerfGlobCtl = 0x%016llx\n",
10360 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010361
10362 pr_err("*** Control State ***\n");
10363 pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
10364 pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
10365 pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
10366 pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
10367 vmcs_read32(EXCEPTION_BITMAP),
10368 vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
10369 vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
10370 pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
10371 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
10372 vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
10373 vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
10374 pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
10375 vmcs_read32(VM_EXIT_INTR_INFO),
10376 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
10377 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
10378 pr_err(" reason=%08x qualification=%016lx\n",
10379 vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
10380 pr_err("IDTVectoring: info=%08x errcode=%08x\n",
10381 vmcs_read32(IDT_VECTORING_INFO_FIELD),
10382 vmcs_read32(IDT_VECTORING_ERROR_CODE));
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010383 pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
Haozhong Zhang8cfe9862015-10-20 15:39:12 +080010384 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010385 pr_err("TSC Multiplier = 0x%016llx\n",
10386 vmcs_read64(TSC_MULTIPLIER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010387 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
10388 pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
10389 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
10390 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
10391 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010392 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010393 n = vmcs_read32(CR3_TARGET_COUNT);
10394 for (i = 0; i + 1 < n; i += 4)
10395 pr_err("CR3 target%u=%016lx target%u=%016lx\n",
10396 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
10397 i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
10398 if (i < n)
10399 pr_err("CR3 target%u=%016lx\n",
10400 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
10401 if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
10402 pr_err("PLE Gap=%08x Window=%08x\n",
10403 vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
10404 if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
10405 pr_err("Virtual processor ID = 0x%04x\n",
10406 vmcs_read16(VIRTUAL_PROCESSOR_ID));
10407}
10408
Avi Kivity6aa8b732006-12-10 02:21:36 -080010409/*
10410 * The guest has exited. See if we can fix it or if we need userspace
10411 * assistance.
10412 */
Avi Kivity851ba692009-08-24 11:10:17 +030010413static int vmx_handle_exit(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -080010414{
Avi Kivity29bd8a72007-09-10 17:27:03 +030010415 struct vcpu_vmx *vmx = to_vmx(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +080010416 u32 exit_reason = vmx->exit_reason;
Avi Kivity1155f762007-11-22 11:30:47 +020010417 u32 vectoring_info = vmx->idt_vectoring_info;
Avi Kivity29bd8a72007-09-10 17:27:03 +030010418
Paolo Bonzini8b89fe12015-12-10 18:37:32 +010010419 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
10420
Kai Huang843e4332015-01-28 10:54:28 +080010421 /*
10422 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
10423 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
10424 * querying dirty_bitmap, we only need to kick all vcpus out of guest
10425 * mode as if vcpus is in root mode, the PML buffer must has been
10426 * flushed already.
10427 */
10428 if (enable_pml)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010429 vmx_flush_pml_buffer(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +080010430
Mohammed Gamal80ced182009-09-01 12:48:18 +020010431 /* If guest state is invalid, start emulating */
Gleb Natapov14168782013-01-21 15:36:49 +020010432 if (vmx->emulation_required)
Mohammed Gamal80ced182009-09-01 12:48:18 +020010433 return handle_invalid_guest_state(vcpu);
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +010010434
Paolo Bonzini7313c692017-07-27 10:31:25 +020010435 if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
10436 return nested_vmx_reflect_vmexit(vcpu, exit_reason);
Nadav Har'El644d7112011-05-25 23:12:35 +030010437
Mohammed Gamal51207022010-05-31 22:40:54 +030010438 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010439 dump_vmcs();
Mohammed Gamal51207022010-05-31 22:40:54 +030010440 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
10441 vcpu->run->fail_entry.hardware_entry_failure_reason
10442 = exit_reason;
10443 return 0;
10444 }
10445
Avi Kivity29bd8a72007-09-10 17:27:03 +030010446 if (unlikely(vmx->fail)) {
Avi Kivity851ba692009-08-24 11:10:17 +030010447 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
10448 vcpu->run->fail_entry.hardware_entry_failure_reason
Avi Kivity29bd8a72007-09-10 17:27:03 +030010449 = vmcs_read32(VM_INSTRUCTION_ERROR);
10450 return 0;
10451 }
Avi Kivity6aa8b732006-12-10 02:21:36 -080010452
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010453 /*
10454 * Note:
10455 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
10456 * delivery event since it indicates guest is accessing MMIO.
10457 * The vm-exit can be triggered again after return to guest that
10458 * will cause infinite loop.
10459 */
Mike Dayd77c26f2007-10-08 09:02:08 -040010460 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
Sheng Yang14394422008-04-28 12:24:45 +080010461 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
Jan Kiszka60637aa2008-09-26 09:30:47 +020010462 exit_reason != EXIT_REASON_EPT_VIOLATION &&
Cao, Leib244c9f2016-07-15 13:54:04 +000010463 exit_reason != EXIT_REASON_PML_FULL &&
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010464 exit_reason != EXIT_REASON_TASK_SWITCH)) {
10465 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
10466 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
Paolo Bonzini70bcd702017-07-05 12:38:06 +020010467 vcpu->run->internal.ndata = 3;
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010468 vcpu->run->internal.data[0] = vectoring_info;
10469 vcpu->run->internal.data[1] = exit_reason;
Paolo Bonzini70bcd702017-07-05 12:38:06 +020010470 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
10471 if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
10472 vcpu->run->internal.ndata++;
10473 vcpu->run->internal.data[3] =
10474 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
10475 }
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010476 return 0;
10477 }
Jan Kiszka3b86cd92008-09-26 09:30:57 +020010478
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010010479 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010480 vmx->loaded_vmcs->soft_vnmi_blocked)) {
10481 if (vmx_interrupt_allowed(vcpu)) {
10482 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
10483 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
10484 vcpu->arch.nmi_pending) {
10485 /*
10486 * This CPU don't support us in finding the end of an
10487 * NMI-blocked window if the guest runs with IRQs
10488 * disabled. So we pull the trigger after 1 s of
10489 * futile waiting, but inform the user about this.
10490 */
10491 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
10492 "state on VCPU %d after 1 s timeout\n",
10493 __func__, vcpu->vcpu_id);
10494 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
10495 }
10496 }
10497
Avi Kivity6aa8b732006-12-10 02:21:36 -080010498 if (exit_reason < kvm_vmx_max_exit_handlers
10499 && kvm_vmx_exit_handlers[exit_reason])
Avi Kivity851ba692009-08-24 11:10:17 +030010500 return kvm_vmx_exit_handlers[exit_reason](vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -080010501 else {
Radim Krčmář6c6c5e02017-01-13 18:59:04 +010010502 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
10503 exit_reason);
Michael S. Tsirkin2bc19dc2014-09-18 16:21:16 +030010504 kvm_queue_exception(vcpu, UD_VECTOR);
10505 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -080010506 }
Avi Kivity6aa8b732006-12-10 02:21:36 -080010507}
10508
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010509/*
10510 * Software based L1D cache flush which is used when microcode providing
10511 * the cache control MSR is not loaded.
10512 *
10513 * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
10514 * flush it is required to read in 64 KiB because the replacement algorithm
10515 * is not exactly LRU. This could be sized at runtime via topology
10516 * information but as all relevant affected CPUs have 32KiB L1D cache size
10517 * there is no point in doing so.
10518 */
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010519static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010520{
10521 int size = PAGE_SIZE << L1D_CACHE_ORDER;
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010522
10523 /*
Thomas Gleixner2f055942018-07-13 16:23:17 +020010524 * This code is only executed when the the flush mode is 'cond' or
10525 * 'always'
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010526 */
Nicolai Stange427362a2018-07-21 22:25:00 +020010527 if (static_branch_likely(&vmx_l1d_flush_cond)) {
Nicolai Stange45b575c2018-07-27 13:22:16 +020010528 bool flush_l1d;
Nicolai Stange5b6ccc62018-07-21 22:35:28 +020010529
Nicolai Stange379fd0c2018-07-21 22:16:56 +020010530 /*
Nicolai Stange45b575c2018-07-27 13:22:16 +020010531 * Clear the per-vcpu flush bit, it gets set again
10532 * either from vcpu_run() or from one of the unsafe
10533 * VMEXIT handlers.
Nicolai Stange379fd0c2018-07-21 22:16:56 +020010534 */
Nicolai Stange45b575c2018-07-27 13:22:16 +020010535 flush_l1d = vcpu->arch.l1tf_flush_l1d;
Thomas Gleixner4c6523e2018-07-13 16:23:20 +020010536 vcpu->arch.l1tf_flush_l1d = false;
Nicolai Stange45b575c2018-07-27 13:22:16 +020010537
10538 /*
10539 * Clear the per-cpu flush bit, it gets set again from
10540 * the interrupt handlers.
10541 */
10542 flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
10543 kvm_clear_cpu_l1tf_flush_l1d();
10544
Nicolai Stange5b6ccc62018-07-21 22:35:28 +020010545 if (!flush_l1d)
10546 return;
Nicolai Stange379fd0c2018-07-21 22:16:56 +020010547 }
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010548
10549 vcpu->stat.l1d_flush++;
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010550
Paolo Bonzini3fa045b2018-07-02 13:03:48 +020010551 if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
10552 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
10553 return;
10554 }
10555
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010556 asm volatile(
10557 /* First ensure the pages are in the TLB */
10558 "xorl %%eax, %%eax\n"
10559 ".Lpopulate_tlb:\n\t"
Nicolai Stange288d1522018-07-18 19:07:38 +020010560 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010561 "addl $4096, %%eax\n\t"
10562 "cmpl %%eax, %[size]\n\t"
10563 "jne .Lpopulate_tlb\n\t"
10564 "xorl %%eax, %%eax\n\t"
10565 "cpuid\n\t"
10566 /* Now fill the cache */
10567 "xorl %%eax, %%eax\n"
10568 ".Lfill_cache:\n"
Nicolai Stange288d1522018-07-18 19:07:38 +020010569 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010570 "addl $64, %%eax\n\t"
10571 "cmpl %%eax, %[size]\n\t"
10572 "jne .Lfill_cache\n\t"
10573 "lfence\n"
Nicolai Stange288d1522018-07-18 19:07:38 +020010574 :: [flush_pages] "r" (vmx_l1d_flush_pages),
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010575 [size] "r" (size)
10576 : "eax", "ebx", "ecx", "edx");
10577}
10578
Gleb Natapov95ba8273132009-04-21 17:45:08 +030010579static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
Yang, Sheng6e5d8652007-09-12 18:03:11 +080010580{
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010581 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10582
10583 if (is_guest_mode(vcpu) &&
10584 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
10585 return;
10586
Gleb Natapov95ba8273132009-04-21 17:45:08 +030010587 if (irr == -1 || tpr < irr) {
Yang, Sheng6e5d8652007-09-12 18:03:11 +080010588 vmcs_write32(TPR_THRESHOLD, 0);
10589 return;
10590 }
10591
Gleb Natapov95ba8273132009-04-21 17:45:08 +030010592 vmcs_write32(TPR_THRESHOLD, irr);
Yang, Sheng6e5d8652007-09-12 18:03:11 +080010593}
10594
Jim Mattson8d860bb2018-05-09 16:56:05 -040010595static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
Yang Zhang8d146952013-01-25 10:18:50 +080010596{
10597 u32 sec_exec_control;
10598
Jim Mattson8d860bb2018-05-09 16:56:05 -040010599 if (!lapic_in_kernel(vcpu))
10600 return;
10601
Sean Christophersonfd6b6d92018-10-01 14:25:34 -070010602 if (!flexpriority_enabled &&
10603 !cpu_has_vmx_virtualize_x2apic_mode())
10604 return;
10605
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020010606 /* Postpone execution until vmcs01 is the current VMCS. */
10607 if (is_guest_mode(vcpu)) {
Jim Mattson8d860bb2018-05-09 16:56:05 -040010608 to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020010609 return;
10610 }
10611
Yang Zhang8d146952013-01-25 10:18:50 +080010612 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
Jim Mattson8d860bb2018-05-09 16:56:05 -040010613 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
10614 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
Yang Zhang8d146952013-01-25 10:18:50 +080010615
Jim Mattson8d860bb2018-05-09 16:56:05 -040010616 switch (kvm_get_apic_mode(vcpu)) {
10617 case LAPIC_MODE_INVALID:
10618 WARN_ONCE(true, "Invalid local APIC state");
10619 case LAPIC_MODE_DISABLED:
10620 break;
10621 case LAPIC_MODE_XAPIC:
10622 if (flexpriority_enabled) {
10623 sec_exec_control |=
10624 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
10625 vmx_flush_tlb(vcpu, true);
10626 }
10627 break;
10628 case LAPIC_MODE_X2APIC:
10629 if (cpu_has_vmx_virtualize_x2apic_mode())
10630 sec_exec_control |=
10631 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
10632 break;
Yang Zhang8d146952013-01-25 10:18:50 +080010633 }
10634 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
10635
Paolo Bonzini904e14f2018-01-16 16:51:18 +010010636 vmx_update_msr_bitmap(vcpu);
Yang Zhang8d146952013-01-25 10:18:50 +080010637}
10638
Tang Chen38b99172014-09-24 15:57:54 +080010639static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
10640{
Jim Mattsonab5df312018-05-09 17:02:03 -040010641 if (!is_guest_mode(vcpu)) {
Tang Chen38b99172014-09-24 15:57:54 +080010642 vmcs_write64(APIC_ACCESS_ADDR, hpa);
Junaid Shahida468f2d2018-04-26 13:09:50 -070010643 vmx_flush_tlb(vcpu, true);
Jim Mattsonfb6c8192017-03-16 13:53:59 -070010644 }
Tang Chen38b99172014-09-24 15:57:54 +080010645}
10646
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010647static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
Yang Zhangc7c9c562013-01-25 10:18:51 +080010648{
10649 u16 status;
10650 u8 old;
10651
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010652 if (max_isr == -1)
10653 max_isr = 0;
Yang Zhangc7c9c562013-01-25 10:18:51 +080010654
10655 status = vmcs_read16(GUEST_INTR_STATUS);
10656 old = status >> 8;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010657 if (max_isr != old) {
Yang Zhangc7c9c562013-01-25 10:18:51 +080010658 status &= 0xff;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010659 status |= max_isr << 8;
Yang Zhangc7c9c562013-01-25 10:18:51 +080010660 vmcs_write16(GUEST_INTR_STATUS, status);
10661 }
10662}
10663
10664static void vmx_set_rvi(int vector)
10665{
10666 u16 status;
10667 u8 old;
10668
Wei Wang4114c272014-11-05 10:53:43 +080010669 if (vector == -1)
10670 vector = 0;
10671
Yang Zhangc7c9c562013-01-25 10:18:51 +080010672 status = vmcs_read16(GUEST_INTR_STATUS);
10673 old = (u8)status & 0xff;
10674 if ((u8)vector != old) {
10675 status &= ~0xff;
10676 status |= (u8)vector;
10677 vmcs_write16(GUEST_INTR_STATUS, status);
10678 }
10679}
10680
10681static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
10682{
Liran Alon851c1a182017-12-24 18:12:56 +020010683 /*
10684 * When running L2, updating RVI is only relevant when
10685 * vmcs12 virtual-interrupt-delivery enabled.
10686 * However, it can be enabled only when L1 also
10687 * intercepts external-interrupts and in that case
10688 * we should not update vmcs02 RVI but instead intercept
10689 * interrupt. Therefore, do nothing when running L2.
10690 */
10691 if (!is_guest_mode(vcpu))
Wanpeng Li963fee12014-07-17 19:03:00 +080010692 vmx_set_rvi(max_irr);
Yang Zhangc7c9c562013-01-25 10:18:51 +080010693}
10694
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010695static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010696{
10697 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010698 int max_irr;
Liran Alonf27a85c2017-12-24 18:12:55 +020010699 bool max_irr_updated;
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010700
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010701 WARN_ON(!vcpu->arch.apicv_active);
10702 if (pi_test_on(&vmx->pi_desc)) {
10703 pi_clear_on(&vmx->pi_desc);
10704 /*
10705 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
10706 * But on x86 this is just a compiler barrier anyway.
10707 */
10708 smp_mb__after_atomic();
Liran Alonf27a85c2017-12-24 18:12:55 +020010709 max_irr_updated =
10710 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
10711
10712 /*
10713 * If we are running L2 and L1 has a new pending interrupt
10714 * which can be injected, we should re-evaluate
10715 * what should be done with this new L1 interrupt.
Liran Alon851c1a182017-12-24 18:12:56 +020010716 * If L1 intercepts external-interrupts, we should
10717 * exit from L2 to L1. Otherwise, interrupt should be
10718 * delivered directly to L2.
Liran Alonf27a85c2017-12-24 18:12:55 +020010719 */
Liran Alon851c1a182017-12-24 18:12:56 +020010720 if (is_guest_mode(vcpu) && max_irr_updated) {
10721 if (nested_exit_on_intr(vcpu))
10722 kvm_vcpu_exiting_guest_mode(vcpu);
10723 else
10724 kvm_make_request(KVM_REQ_EVENT, vcpu);
10725 }
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010726 } else {
10727 max_irr = kvm_lapic_find_highest_irr(vcpu);
10728 }
10729 vmx_hwapic_irr_update(vcpu, max_irr);
10730 return max_irr;
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010731}
10732
Paolo Bonzini7e712682018-10-03 13:44:26 +020010733static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
10734{
10735 u8 rvi = vmx_get_rvi();
10736 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
10737
10738 return ((rvi & 0xf0) > (vppr & 0xf0));
10739}
10740
Andrey Smetanin63086302015-11-10 15:36:32 +030010741static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
Yang Zhangc7c9c562013-01-25 10:18:51 +080010742{
Andrey Smetanind62caab2015-11-10 15:36:33 +030010743 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhang3d81bc72013-04-11 19:25:13 +080010744 return;
10745
Yang Zhangc7c9c562013-01-25 10:18:51 +080010746 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
10747 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
10748 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
10749 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
10750}
10751
Paolo Bonzini967235d2016-12-19 14:03:45 +010010752static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
10753{
10754 struct vcpu_vmx *vmx = to_vmx(vcpu);
10755
10756 pi_clear_on(&vmx->pi_desc);
10757 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
10758}
10759
Avi Kivity51aa01d2010-07-20 14:31:20 +030010760static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
Avi Kivitycf393f72008-07-01 16:20:21 +030010761{
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010762 u32 exit_intr_info = 0;
10763 u16 basic_exit_reason = (u16)vmx->exit_reason;
Avi Kivity00eba012011-03-07 17:24:54 +020010764
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010765 if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
10766 || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
Avi Kivity00eba012011-03-07 17:24:54 +020010767 return;
10768
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010769 if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
10770 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10771 vmx->exit_intr_info = exit_intr_info;
Andi Kleena0861c02009-06-08 17:37:09 +080010772
Wanpeng Li1261bfa2017-07-13 18:30:40 -070010773 /* if exit due to PF check for async PF */
10774 if (is_page_fault(exit_intr_info))
10775 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
10776
Andi Kleena0861c02009-06-08 17:37:09 +080010777 /* Handle machine checks before interrupts are enabled */
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010778 if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
10779 is_machine_check(exit_intr_info))
Andi Kleena0861c02009-06-08 17:37:09 +080010780 kvm_machine_check();
10781
Gleb Natapov20f65982009-05-11 13:35:55 +030010782 /* We need to handle NMIs before interrupts are enabled */
Jim Mattsonef85b672016-12-12 11:01:37 -080010783 if (is_nmi(exit_intr_info)) {
Andi Kleendd60d212017-07-25 17:20:32 -070010784 kvm_before_interrupt(&vmx->vcpu);
Gleb Natapov20f65982009-05-11 13:35:55 +030010785 asm("int $2");
Andi Kleendd60d212017-07-25 17:20:32 -070010786 kvm_after_interrupt(&vmx->vcpu);
Zhang, Yanminff9d07a2010-04-19 13:32:45 +080010787 }
Avi Kivity51aa01d2010-07-20 14:31:20 +030010788}
Gleb Natapov20f65982009-05-11 13:35:55 +030010789
Yang Zhanga547c6d2013-04-11 19:25:10 +080010790static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
10791{
10792 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10793
Yang Zhanga547c6d2013-04-11 19:25:10 +080010794 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
10795 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
10796 unsigned int vector;
10797 unsigned long entry;
10798 gate_desc *desc;
10799 struct vcpu_vmx *vmx = to_vmx(vcpu);
10800#ifdef CONFIG_X86_64
10801 unsigned long tmp;
10802#endif
10803
10804 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
10805 desc = (gate_desc *)vmx->host_idt_base + vector;
Thomas Gleixner64b163f2017-08-28 08:47:37 +020010806 entry = gate_offset(desc);
Yang Zhanga547c6d2013-04-11 19:25:10 +080010807 asm volatile(
10808#ifdef CONFIG_X86_64
10809 "mov %%" _ASM_SP ", %[sp]\n\t"
10810 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
10811 "push $%c[ss]\n\t"
10812 "push %[sp]\n\t"
10813#endif
10814 "pushf\n\t"
Yang Zhanga547c6d2013-04-11 19:25:10 +080010815 __ASM_SIZE(push) " $%c[cs]\n\t"
Peter Zijlstrac940a3f2018-01-25 10:58:14 +010010816 CALL_NOSPEC
Yang Zhanga547c6d2013-04-11 19:25:10 +080010817 :
10818#ifdef CONFIG_X86_64
Chris J Arges3f62de52016-01-22 15:44:38 -060010819 [sp]"=&r"(tmp),
Yang Zhanga547c6d2013-04-11 19:25:10 +080010820#endif
Josh Poimboeuff5caf622017-09-20 16:24:33 -050010821 ASM_CALL_CONSTRAINT
Yang Zhanga547c6d2013-04-11 19:25:10 +080010822 :
Peter Zijlstrac940a3f2018-01-25 10:58:14 +010010823 THUNK_TARGET(entry),
Yang Zhanga547c6d2013-04-11 19:25:10 +080010824 [ss]"i"(__KERNEL_DS),
10825 [cs]"i"(__KERNEL_CS)
10826 );
Paolo Bonzinif2485b32016-06-15 15:23:11 +020010827 }
Yang Zhanga547c6d2013-04-11 19:25:10 +080010828}
Josh Poimboeufc207aee2017-06-28 10:11:06 -050010829STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
Yang Zhanga547c6d2013-04-11 19:25:10 +080010830
Tom Lendackybc226f02018-05-10 22:06:39 +020010831static bool vmx_has_emulated_msr(int index)
Paolo Bonzini6d396b52015-04-01 14:25:33 +020010832{
Tom Lendackybc226f02018-05-10 22:06:39 +020010833 switch (index) {
10834 case MSR_IA32_SMBASE:
10835 /*
10836 * We cannot do SMM unless we can run the guest in big
10837 * real mode.
10838 */
10839 return enable_unrestricted_guest || emulate_invalid_guest_state;
10840 case MSR_AMD64_VIRT_SPEC_CTRL:
10841 /* This is AMD only. */
10842 return false;
10843 default:
10844 return true;
10845 }
Paolo Bonzini6d396b52015-04-01 14:25:33 +020010846}
10847
Liu, Jinsongda8999d2014-02-24 10:55:46 +000010848static bool vmx_mpx_supported(void)
10849{
10850 return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
10851 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
10852}
10853
Wanpeng Li55412b22014-12-02 19:21:30 +080010854static bool vmx_xsaves_supported(void)
10855{
10856 return vmcs_config.cpu_based_2nd_exec_ctrl &
10857 SECONDARY_EXEC_XSAVES;
10858}
10859
Avi Kivity51aa01d2010-07-20 14:31:20 +030010860static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
10861{
Avi Kivityc5ca8e52011-03-07 17:37:37 +020010862 u32 exit_intr_info;
Avi Kivity51aa01d2010-07-20 14:31:20 +030010863 bool unblock_nmi;
10864 u8 vector;
10865 bool idtv_info_valid;
10866
10867 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Gleb Natapov20f65982009-05-11 13:35:55 +030010868
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010010869 if (enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010870 if (vmx->loaded_vmcs->nmi_known_unmasked)
10871 return;
10872 /*
10873 * Can't use vmx->exit_intr_info since we're not sure what
10874 * the exit reason is.
10875 */
10876 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10877 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
10878 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
10879 /*
10880 * SDM 3: 27.7.1.2 (September 2008)
10881 * Re-set bit "block by NMI" before VM entry if vmexit caused by
10882 * a guest IRET fault.
10883 * SDM 3: 23.2.2 (September 2008)
10884 * Bit 12 is undefined in any of the following cases:
10885 * If the VM exit sets the valid bit in the IDT-vectoring
10886 * information field.
10887 * If the VM exit is due to a double fault.
10888 */
10889 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
10890 vector != DF_VECTOR && !idtv_info_valid)
10891 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
10892 GUEST_INTR_STATE_NMI);
10893 else
10894 vmx->loaded_vmcs->nmi_known_unmasked =
10895 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
10896 & GUEST_INTR_STATE_NMI);
10897 } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
10898 vmx->loaded_vmcs->vnmi_blocked_time +=
10899 ktime_to_ns(ktime_sub(ktime_get(),
10900 vmx->loaded_vmcs->entry_time));
Avi Kivity51aa01d2010-07-20 14:31:20 +030010901}
10902
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010903static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
Avi Kivity83422e12010-07-20 14:43:23 +030010904 u32 idt_vectoring_info,
10905 int instr_len_field,
10906 int error_code_field)
Avi Kivity51aa01d2010-07-20 14:31:20 +030010907{
Avi Kivity51aa01d2010-07-20 14:31:20 +030010908 u8 vector;
10909 int type;
10910 bool idtv_info_valid;
10911
10912 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Avi Kivity668f6122008-07-02 09:28:55 +030010913
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010914 vcpu->arch.nmi_injected = false;
10915 kvm_clear_exception_queue(vcpu);
10916 kvm_clear_interrupt_queue(vcpu);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010917
10918 if (!idtv_info_valid)
10919 return;
10920
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010921 kvm_make_request(KVM_REQ_EVENT, vcpu);
Avi Kivity3842d132010-07-27 12:30:24 +030010922
Avi Kivity668f6122008-07-02 09:28:55 +030010923 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
10924 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
Gleb Natapov37b96e92009-03-30 16:03:13 +030010925
Gleb Natapov64a7ec02009-03-30 16:03:29 +030010926 switch (type) {
Gleb Natapov37b96e92009-03-30 16:03:13 +030010927 case INTR_TYPE_NMI_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010928 vcpu->arch.nmi_injected = true;
Avi Kivity668f6122008-07-02 09:28:55 +030010929 /*
Gleb Natapov7b4a25c2009-03-30 16:03:08 +030010930 * SDM 3: 27.7.1.2 (September 2008)
Gleb Natapov37b96e92009-03-30 16:03:13 +030010931 * Clear bit "block by NMI" before VM entry if a NMI
10932 * delivery faulted.
Avi Kivity668f6122008-07-02 09:28:55 +030010933 */
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010934 vmx_set_nmi_mask(vcpu, false);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010935 break;
Gleb Natapov37b96e92009-03-30 16:03:13 +030010936 case INTR_TYPE_SOFT_EXCEPTION:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010937 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +030010938 /* fall through */
10939 case INTR_TYPE_HARD_EXCEPTION:
Avi Kivity35920a32008-07-03 14:50:12 +030010940 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
Avi Kivity83422e12010-07-20 14:43:23 +030010941 u32 err = vmcs_read32(error_code_field);
Gleb Natapov851eb6672013-09-25 12:51:34 +030010942 kvm_requeue_exception_e(vcpu, vector, err);
Avi Kivity35920a32008-07-03 14:50:12 +030010943 } else
Gleb Natapov851eb6672013-09-25 12:51:34 +030010944 kvm_requeue_exception(vcpu, vector);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010945 break;
Gleb Natapov66fd3f72009-05-11 13:35:50 +030010946 case INTR_TYPE_SOFT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010947 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +030010948 /* fall through */
Gleb Natapov37b96e92009-03-30 16:03:13 +030010949 case INTR_TYPE_EXT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010950 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010951 break;
10952 default:
10953 break;
Avi Kivityf7d92382008-07-03 16:14:28 +030010954 }
Avi Kivitycf393f72008-07-01 16:20:21 +030010955}
10956
Avi Kivity83422e12010-07-20 14:43:23 +030010957static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
10958{
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010959 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
Avi Kivity83422e12010-07-20 14:43:23 +030010960 VM_EXIT_INSTRUCTION_LEN,
10961 IDT_VECTORING_ERROR_CODE);
10962}
10963
Avi Kivityb463a6f2010-07-20 15:06:17 +030010964static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
10965{
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010966 __vmx_complete_interrupts(vcpu,
Avi Kivityb463a6f2010-07-20 15:06:17 +030010967 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
10968 VM_ENTRY_INSTRUCTION_LEN,
10969 VM_ENTRY_EXCEPTION_ERROR_CODE);
10970
10971 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
10972}
10973
Gleb Natapovd7cd9792011-10-05 14:01:23 +020010974static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
10975{
10976 int i, nr_msrs;
10977 struct perf_guest_switch_msr *msrs;
10978
10979 msrs = perf_guest_get_msrs(&nr_msrs);
10980
10981 if (!msrs)
10982 return;
10983
10984 for (i = 0; i < nr_msrs; i++)
10985 if (msrs[i].host == msrs[i].guest)
10986 clear_atomic_switch_msr(vmx, msrs[i].msr);
10987 else
10988 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -040010989 msrs[i].host, false);
Gleb Natapovd7cd9792011-10-05 14:01:23 +020010990}
10991
Sean Christophersonf459a702018-08-27 15:21:11 -070010992static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
10993{
10994 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
10995 if (!vmx->loaded_vmcs->hv_timer_armed)
10996 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
10997 PIN_BASED_VMX_PREEMPTION_TIMER);
10998 vmx->loaded_vmcs->hv_timer_armed = true;
10999}
11000
11001static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
Yunhong Jiang64672c92016-06-13 14:19:59 -070011002{
11003 struct vcpu_vmx *vmx = to_vmx(vcpu);
11004 u64 tscl;
11005 u32 delta_tsc;
11006
Sean Christophersond264ee02018-08-27 15:21:12 -070011007 if (vmx->req_immediate_exit) {
11008 vmx_arm_hv_timer(vmx, 0);
11009 return;
11010 }
11011
Sean Christophersonf459a702018-08-27 15:21:11 -070011012 if (vmx->hv_deadline_tsc != -1) {
11013 tscl = rdtsc();
11014 if (vmx->hv_deadline_tsc > tscl)
11015 /* set_hv_timer ensures the delta fits in 32-bits */
11016 delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
11017 cpu_preemption_timer_multi);
11018 else
11019 delta_tsc = 0;
11020
11021 vmx_arm_hv_timer(vmx, delta_tsc);
Yunhong Jiang64672c92016-06-13 14:19:59 -070011022 return;
Sean Christophersonf459a702018-08-27 15:21:11 -070011023 }
Yunhong Jiang64672c92016-06-13 14:19:59 -070011024
Sean Christophersonf459a702018-08-27 15:21:11 -070011025 if (vmx->loaded_vmcs->hv_timer_armed)
11026 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
11027 PIN_BASED_VMX_PREEMPTION_TIMER);
11028 vmx->loaded_vmcs->hv_timer_armed = false;
Yunhong Jiang64672c92016-06-13 14:19:59 -070011029}
11030
Lai Jiangshana3b5ba42011-02-11 14:29:40 +080011031static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -080011032{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011033 struct vcpu_vmx *vmx = to_vmx(vcpu);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011034 unsigned long cr3, cr4, evmcs_rsp;
Avi Kivity104f2262010-11-18 13:12:52 +020011035
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010011036 /* Record the guest's net vcpu time for enforced NMI injections. */
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010011037 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010011038 vmx->loaded_vmcs->soft_vnmi_blocked))
11039 vmx->loaded_vmcs->entry_time = ktime_get();
11040
Avi Kivity104f2262010-11-18 13:12:52 +020011041 /* Don't enter VMX if guest state is invalid, let the exit handler
11042 start emulation until we arrive back to a valid state */
Gleb Natapov14168782013-01-21 15:36:49 +020011043 if (vmx->emulation_required)
Avi Kivity104f2262010-11-18 13:12:52 +020011044 return;
11045
Radim Krčmářa7653ec2014-08-21 18:08:07 +020011046 if (vmx->ple_window_dirty) {
11047 vmx->ple_window_dirty = false;
11048 vmcs_write32(PLE_WINDOW, vmx->ple_window);
11049 }
11050
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020011051 if (vmx->nested.need_vmcs12_sync) {
11052 if (vmx->nested.hv_evmcs) {
11053 copy_vmcs12_to_enlightened(vmx);
11054 /* All fields are clean */
11055 vmx->nested.hv_evmcs->hv_clean_fields |=
11056 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
11057 } else {
11058 copy_vmcs12_to_shadow(vmx);
11059 }
11060 vmx->nested.need_vmcs12_sync = false;
Abel Gordon012f83c2013-04-18 14:39:25 +030011061 }
11062
Avi Kivity104f2262010-11-18 13:12:52 +020011063 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
11064 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
11065 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
11066 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
11067
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070011068 cr3 = __get_current_cr3_fast();
Sean Christophersond7ee0392018-07-23 12:32:47 -070011069 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070011070 vmcs_writel(HOST_CR3, cr3);
Sean Christophersond7ee0392018-07-23 12:32:47 -070011071 vmx->loaded_vmcs->host_state.cr3 = cr3;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070011072 }
11073
Andy Lutomirski1e02ce42014-10-24 15:58:08 -070011074 cr4 = cr4_read_shadow();
Sean Christophersond7ee0392018-07-23 12:32:47 -070011075 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
Andy Lutomirskid974baa2014-10-08 09:02:13 -070011076 vmcs_writel(HOST_CR4, cr4);
Sean Christophersond7ee0392018-07-23 12:32:47 -070011077 vmx->loaded_vmcs->host_state.cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -070011078 }
11079
Avi Kivity104f2262010-11-18 13:12:52 +020011080 /* When single-stepping over STI and MOV SS, we must clear the
11081 * corresponding interruptibility bits in the guest state. Otherwise
11082 * vmentry fails as it then expects bit 14 (BS) in pending debug
11083 * exceptions being set, but that's not correct for the guest debugging
11084 * case. */
11085 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
11086 vmx_set_interrupt_shadow(vcpu, 0);
11087
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +020011088 if (static_cpu_has(X86_FEATURE_PKU) &&
11089 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
11090 vcpu->arch.pkru != vmx->host_pkru)
11091 __write_pkru(vcpu->arch.pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011092
Gleb Natapovd7cd9792011-10-05 14:01:23 +020011093 atomic_switch_perf_msrs(vmx);
11094
Sean Christophersonf459a702018-08-27 15:21:11 -070011095 vmx_update_hv_timer(vcpu);
Yunhong Jiang64672c92016-06-13 14:19:59 -070011096
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011097 /*
11098 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
11099 * it's non-zero. Since vmentry is serialising on affected CPUs, there
11100 * is no need to worry about the conditional branch over the wrmsr
11101 * being speculatively taken.
11102 */
Thomas Gleixnerccbcd262018-05-09 23:01:01 +020011103 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011104
Nadav Har'Eld462b812011-05-24 15:26:10 +030011105 vmx->__launched = vmx->loaded_vmcs->launched;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011106
11107 evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
11108 (unsigned long)&current_evmcs->host_rsp : 0;
11109
Nicolai Stange5b6ccc62018-07-21 22:35:28 +020011110 if (static_branch_unlikely(&vmx_l1d_should_flush))
11111 vmx_l1d_flush(vcpu);
Paolo Bonzinic595cee2018-07-02 13:07:14 +020011112
Avi Kivity104f2262010-11-18 13:12:52 +020011113 asm(
Avi Kivity6aa8b732006-12-10 02:21:36 -080011114 /* Store host registers */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011115 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
11116 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
11117 "push %%" _ASM_CX " \n\t"
11118 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Avi Kivity313dbd492008-07-17 18:04:30 +030011119 "je 1f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011120 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011121 /* Avoid VMWRITE when Enlightened VMCS is in use */
11122 "test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
11123 "jz 2f \n\t"
11124 "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
11125 "jmp 1f \n\t"
11126 "2: \n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020011127 __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
Avi Kivity313dbd492008-07-17 18:04:30 +030011128 "1: \n\t"
Avi Kivityd3edefc2009-06-16 12:33:56 +030011129 /* Reload cr2 if changed */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011130 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
11131 "mov %%cr2, %%" _ASM_DX " \n\t"
11132 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011133 "je 3f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011134 "mov %%" _ASM_AX", %%cr2 \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011135 "3: \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011136 /* Check if vmlaunch of vmresume is needed */
Avi Kivitye08aa782007-11-15 18:06:18 +020011137 "cmpl $0, %c[launched](%0) \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011138 /* Load guest registers. Don't clobber flags. */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011139 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
11140 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
11141 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
11142 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
11143 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
11144 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -080011145#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +020011146 "mov %c[r8](%0), %%r8 \n\t"
11147 "mov %c[r9](%0), %%r9 \n\t"
11148 "mov %c[r10](%0), %%r10 \n\t"
11149 "mov %c[r11](%0), %%r11 \n\t"
11150 "mov %c[r12](%0), %%r12 \n\t"
11151 "mov %c[r13](%0), %%r13 \n\t"
11152 "mov %c[r14](%0), %%r14 \n\t"
11153 "mov %c[r15](%0), %%r15 \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011154#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +030011155 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
Avi Kivityc8019492008-07-14 14:44:59 +030011156
Avi Kivity6aa8b732006-12-10 02:21:36 -080011157 /* Enter guest mode */
Avi Kivity83287ea422012-09-16 15:10:57 +030011158 "jne 1f \n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020011159 __ex("vmlaunch") "\n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030011160 "jmp 2f \n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020011161 "1: " __ex("vmresume") "\n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030011162 "2: "
Avi Kivity6aa8b732006-12-10 02:21:36 -080011163 /* Save guest registers, load host registers, keep flags */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011164 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
Avi Kivity40712fa2011-01-06 18:09:12 +020011165 "pop %0 \n\t"
Jim Mattson0cb5b302018-01-03 14:31:38 -080011166 "setbe %c[fail](%0)\n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011167 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
11168 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
11169 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
11170 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
11171 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
11172 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
11173 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -080011174#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +020011175 "mov %%r8, %c[r8](%0) \n\t"
11176 "mov %%r9, %c[r9](%0) \n\t"
11177 "mov %%r10, %c[r10](%0) \n\t"
11178 "mov %%r11, %c[r11](%0) \n\t"
11179 "mov %%r12, %c[r12](%0) \n\t"
11180 "mov %%r13, %c[r13](%0) \n\t"
11181 "mov %%r14, %c[r14](%0) \n\t"
11182 "mov %%r15, %c[r15](%0) \n\t"
Jim Mattson0cb5b302018-01-03 14:31:38 -080011183 "xor %%r8d, %%r8d \n\t"
11184 "xor %%r9d, %%r9d \n\t"
11185 "xor %%r10d, %%r10d \n\t"
11186 "xor %%r11d, %%r11d \n\t"
11187 "xor %%r12d, %%r12d \n\t"
11188 "xor %%r13d, %%r13d \n\t"
11189 "xor %%r14d, %%r14d \n\t"
11190 "xor %%r15d, %%r15d \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011191#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +030011192 "mov %%cr2, %%" _ASM_AX " \n\t"
11193 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
Avi Kivityc8019492008-07-14 14:44:59 +030011194
Jim Mattson0cb5b302018-01-03 14:31:38 -080011195 "xor %%eax, %%eax \n\t"
11196 "xor %%ebx, %%ebx \n\t"
11197 "xor %%esi, %%esi \n\t"
11198 "xor %%edi, %%edi \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011199 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030011200 ".pushsection .rodata \n\t"
11201 ".global vmx_return \n\t"
11202 "vmx_return: " _ASM_PTR " 2b \n\t"
11203 ".popsection"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011204 : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
Nadav Har'Eld462b812011-05-24 15:26:10 +030011205 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
Avi Kivitye08aa782007-11-15 18:06:18 +020011206 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
Avi Kivity313dbd492008-07-17 18:04:30 +030011207 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
Zhang Xiantaoad312c72007-12-13 23:50:52 +080011208 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
11209 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
11210 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
11211 [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
11212 [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
11213 [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
11214 [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
Avi Kivity05b3e0c2006-12-13 00:33:45 -080011215#ifdef CONFIG_X86_64
Zhang Xiantaoad312c72007-12-13 23:50:52 +080011216 [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
11217 [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
11218 [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
11219 [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
11220 [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
11221 [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
11222 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
11223 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
Avi Kivity6aa8b732006-12-10 02:21:36 -080011224#endif
Avi Kivity40712fa2011-01-06 18:09:12 +020011225 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
11226 [wordsize]"i"(sizeof(ulong))
Laurent Vivierc2036302007-10-25 14:18:52 +020011227 : "cc", "memory"
11228#ifdef CONFIG_X86_64
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011229 , "rax", "rbx", "rdi"
Laurent Vivierc2036302007-10-25 14:18:52 +020011230 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011231#else
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011232 , "eax", "ebx", "edi"
Laurent Vivierc2036302007-10-25 14:18:52 +020011233#endif
11234 );
Avi Kivity6aa8b732006-12-10 02:21:36 -080011235
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011236 /*
11237 * We do not use IBRS in the kernel. If this vCPU has used the
11238 * SPEC_CTRL MSR it may have left it on; save the value and
11239 * turn it off. This is much more efficient than blindly adding
11240 * it to the atomic save/restore list. Especially as the former
11241 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
11242 *
11243 * For non-nested case:
11244 * If the L01 MSR bitmap does not intercept the MSR, then we need to
11245 * save it.
11246 *
11247 * For nested case:
11248 * If the L02 MSR bitmap does not intercept the MSR, then we need to
11249 * save it.
11250 */
Paolo Bonzini946fbbc2018-02-22 16:43:18 +010011251 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
Paolo Bonziniecb586b2018-02-22 16:43:17 +010011252 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011253
Thomas Gleixnerccbcd262018-05-09 23:01:01 +020011254 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011255
David Woodhouse117cc7a2018-01-12 11:11:27 +000011256 /* Eliminate branch target predictions from guest mode */
11257 vmexit_fill_RSB();
11258
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011259 /* All fields are clean at this point */
11260 if (static_branch_unlikely(&enable_evmcs))
11261 current_evmcs->hv_clean_fields |=
11262 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
11263
Gleb Natapov2a7921b2012-08-12 16:12:29 +030011264 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
Wanpeng Li74c55932017-11-29 01:31:20 -080011265 if (vmx->host_debugctlmsr)
11266 update_debugctlmsr(vmx->host_debugctlmsr);
Gleb Natapov2a7921b2012-08-12 16:12:29 +030011267
Avi Kivityaa67f602012-08-01 16:48:03 +030011268#ifndef CONFIG_X86_64
11269 /*
11270 * The sysexit path does not restore ds/es, so we must set them to
11271 * a reasonable value ourselves.
11272 *
Sean Christopherson6d6095b2018-07-23 12:32:44 -070011273 * We can't defer this to vmx_prepare_switch_to_host() since that
11274 * function may be executed in interrupt context, which saves and
11275 * restore segments around it, nullifying its effect.
Avi Kivityaa67f602012-08-01 16:48:03 +030011276 */
11277 loadsegment(ds, __USER_DS);
11278 loadsegment(es, __USER_DS);
11279#endif
11280
Avi Kivity6de4f3a2009-05-31 22:58:47 +030011281 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
Avi Kivity6de12732011-03-07 12:51:22 +020011282 | (1 << VCPU_EXREG_RFLAGS)
Avi Kivityaff48ba2010-12-05 18:56:11 +020011283 | (1 << VCPU_EXREG_PDPTR)
Avi Kivity2fb92db2011-04-27 19:42:18 +030011284 | (1 << VCPU_EXREG_SEGMENTS)
Avi Kivityaff48ba2010-12-05 18:56:11 +020011285 | (1 << VCPU_EXREG_CR3));
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030011286 vcpu->arch.regs_dirty = 0;
11287
Gleb Natapove0b890d2013-09-25 12:51:33 +030011288 /*
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011289 * eager fpu is enabled if PKEY is supported and CR4 is switched
11290 * back on host, so it is safe to read guest PKRU from current
11291 * XSAVE.
11292 */
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +020011293 if (static_cpu_has(X86_FEATURE_PKU) &&
11294 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
11295 vcpu->arch.pkru = __read_pkru();
11296 if (vcpu->arch.pkru != vmx->host_pkru)
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011297 __write_pkru(vmx->host_pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011298 }
11299
Gleb Natapove0b890d2013-09-25 12:51:33 +030011300 vmx->nested.nested_run_pending = 0;
Jim Mattsonb060ca32017-09-14 16:31:42 -070011301 vmx->idt_vectoring_info = 0;
11302
11303 vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
11304 if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
11305 return;
11306
11307 vmx->loaded_vmcs->launched = 1;
11308 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
Gleb Natapove0b890d2013-09-25 12:51:33 +030011309
Avi Kivity51aa01d2010-07-20 14:31:20 +030011310 vmx_complete_atomic_exit(vmx);
11311 vmx_recover_nmi_blocking(vmx);
Avi Kivitycf393f72008-07-01 16:20:21 +030011312 vmx_complete_interrupts(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011313}
Josh Poimboeufc207aee2017-06-28 10:11:06 -050011314STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011315
Sean Christopherson434a1e92018-03-20 12:17:18 -070011316static struct kvm *vmx_vm_alloc(void)
11317{
Marc Orrd1e5b0e2018-05-15 04:37:37 -070011318 struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
Sean Christopherson40bbb9d2018-03-20 12:17:20 -070011319 return &kvm_vmx->kvm;
Sean Christopherson434a1e92018-03-20 12:17:18 -070011320}
11321
11322static void vmx_vm_free(struct kvm *kvm)
11323{
Marc Orrd1e5b0e2018-05-15 04:37:37 -070011324 vfree(to_kvm_vmx(kvm));
Sean Christopherson434a1e92018-03-20 12:17:18 -070011325}
11326
David Hildenbrand1279a6b12017-03-20 10:00:08 +010011327static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011328{
11329 struct vcpu_vmx *vmx = to_vmx(vcpu);
11330 int cpu;
11331
David Hildenbrand1279a6b12017-03-20 10:00:08 +010011332 if (vmx->loaded_vmcs == vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011333 return;
11334
11335 cpu = get_cpu();
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011336 vmx_vcpu_put(vcpu);
Sean Christophersonbd9966d2018-07-23 12:32:42 -070011337 vmx->loaded_vmcs = vmcs;
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011338 vmx_vcpu_load(vcpu, cpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011339 put_cpu();
Sean Christophersonb7031fd2018-09-26 09:23:42 -070011340
11341 vm_entry_controls_reset_shadow(vmx);
11342 vm_exit_controls_reset_shadow(vmx);
11343 vmx_segment_cache_clear(vmx);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011344}
11345
Jim Mattson2f1fe812016-07-08 15:36:06 -070011346/*
11347 * Ensure that the current vmcs of the logical processor is the
11348 * vmcs01 of the vcpu before calling free_nested().
11349 */
11350static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
11351{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020011352 vcpu_load(vcpu);
11353 vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
11354 free_nested(vcpu);
11355 vcpu_put(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -070011356}
11357
Avi Kivity6aa8b732006-12-10 02:21:36 -080011358static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
11359{
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011360 struct vcpu_vmx *vmx = to_vmx(vcpu);
11361
Kai Huang843e4332015-01-28 10:54:28 +080011362 if (enable_pml)
Kai Huanga3eaa862015-11-04 13:46:05 +080011363 vmx_destroy_pml_buffer(vmx);
Wanpeng Li991e7a02015-09-16 17:30:05 +080011364 free_vpid(vmx->vpid);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011365 leave_guest_mode(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -070011366 vmx_free_vcpu_nested(vcpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011367 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011368 kfree(vmx->guest_msrs);
11369 kvm_vcpu_uninit(vcpu);
Rusty Russella4770342007-08-01 14:46:11 +100011370 kmem_cache_free(kvm_vcpu_cache, vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011371}
11372
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011373static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
Avi Kivity6aa8b732006-12-10 02:21:36 -080011374{
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011375 int err;
Rusty Russellc16f8622007-07-30 21:12:19 +100011376 struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paolo Bonzini904e14f2018-01-16 16:51:18 +010011377 unsigned long *msr_bitmap;
Avi Kivity15ad7142007-07-11 18:17:21 +030011378 int cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -080011379
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011380 if (!vmx)
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011381 return ERR_PTR(-ENOMEM);
11382
Wanpeng Li991e7a02015-09-16 17:30:05 +080011383 vmx->vpid = allocate_vpid();
Sheng Yang2384d2b2008-01-17 15:14:33 +080011384
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011385 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
11386 if (err)
11387 goto free_vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -080011388
Peter Feiner4e595162016-07-07 14:49:58 -070011389 err = -ENOMEM;
11390
11391 /*
11392 * If PML is turned on, failure on enabling PML just results in failure
11393 * of creating the vcpu, therefore we can simplify PML logic (by
11394 * avoiding dealing with cases, such as enabling PML partially on vcpus
11395 * for the guest, etc.
11396 */
11397 if (enable_pml) {
11398 vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
11399 if (!vmx->pml_pg)
11400 goto uninit_vcpu;
11401 }
11402
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011403 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
Paolo Bonzini03916db2014-07-24 14:21:57 +020011404 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
11405 > PAGE_SIZE);
Nadav Amit0123be42014-07-24 15:06:56 +030011406
Peter Feiner4e595162016-07-07 14:49:58 -070011407 if (!vmx->guest_msrs)
11408 goto free_pml;
Ingo Molnar965b58a2007-01-05 16:36:23 -080011409
Paolo Bonzinif21f1652018-01-11 12:16:15 +010011410 err = alloc_loaded_vmcs(&vmx->vmcs01);
11411 if (err < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011412 goto free_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011413
Paolo Bonzini904e14f2018-01-16 16:51:18 +010011414 msr_bitmap = vmx->vmcs01.msr_bitmap;
11415 vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
11416 vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
11417 vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
11418 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
11419 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
11420 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
11421 vmx->msr_bitmap_mode = 0;
11422
Paolo Bonzinif21f1652018-01-11 12:16:15 +010011423 vmx->loaded_vmcs = &vmx->vmcs01;
Avi Kivity15ad7142007-07-11 18:17:21 +030011424 cpu = get_cpu();
11425 vmx_vcpu_load(&vmx->vcpu, cpu);
Zachary Amsdene48672f2010-08-19 22:07:23 -100011426 vmx->vcpu.cpu = cpu;
David Hildenbrand12d79912017-08-24 20:51:26 +020011427 vmx_vcpu_setup(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011428 vmx_vcpu_put(&vmx->vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +030011429 put_cpu();
Paolo Bonzini35754c92015-07-29 12:05:37 +020011430 if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
Jan Kiszkabe6d05c2011-04-13 01:27:55 +020011431 err = alloc_apic_access_page(kvm);
11432 if (err)
Marcelo Tosatti5e4a0b32008-02-14 21:21:43 -020011433 goto free_vmcs;
Jan Kiszkaa63cb562013-04-08 11:07:46 +020011434 }
Ingo Molnar965b58a2007-01-05 16:36:23 -080011435
Sean Christophersone90008d2018-03-05 12:04:37 -080011436 if (enable_ept && !enable_unrestricted_guest) {
Tang Chenf51770e2014-09-16 18:41:59 +080011437 err = init_rmode_identity_map(kvm);
11438 if (err)
Gleb Natapov93ea5382011-02-21 12:07:59 +020011439 goto free_vmcs;
Sheng Yangb927a3c2009-07-21 10:42:48 +080011440 }
Sheng Yangb7ebfb02008-04-25 21:44:52 +080011441
Roman Kagan63aff652018-07-19 21:59:07 +030011442 if (nested)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011443 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
11444 kvm_vcpu_apicv_active(&vmx->vcpu));
Wincy Vanb9c237b2015-02-03 23:56:30 +080011445
Wincy Van705699a2015-02-03 23:58:17 +080011446 vmx->nested.posted_intr_nv = -1;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +030011447 vmx->nested.current_vmptr = -1ull;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +030011448
Haozhong Zhang37e4c992016-06-22 14:59:55 +080011449 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
11450
Paolo Bonzini31afb2e2017-06-06 12:57:06 +020011451 /*
11452 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
11453 * or POSTED_INTR_WAKEUP_VECTOR.
11454 */
11455 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
11456 vmx->pi_desc.sn = 1;
11457
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011458 return &vmx->vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -080011459
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011460free_vmcs:
Xiao Guangrong5f3fbc32012-05-14 14:58:58 +080011461 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011462free_msrs:
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011463 kfree(vmx->guest_msrs);
Peter Feiner4e595162016-07-07 14:49:58 -070011464free_pml:
11465 vmx_destroy_pml_buffer(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011466uninit_vcpu:
11467 kvm_vcpu_uninit(&vmx->vcpu);
11468free_vcpu:
Wanpeng Li991e7a02015-09-16 17:30:05 +080011469 free_vpid(vmx->vpid);
Rusty Russella4770342007-08-01 14:46:11 +100011470 kmem_cache_free(kvm_vcpu_cache, vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011471 return ERR_PTR(err);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011472}
11473
Jiri Kosinad90a7a02018-07-13 16:23:25 +020011474#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
11475#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011476
Wanpeng Lib31c1142018-03-12 04:53:04 -070011477static int vmx_vm_init(struct kvm *kvm)
11478{
Tianyu Lan877ad952018-07-19 08:40:23 +000011479 spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
11480
Wanpeng Lib31c1142018-03-12 04:53:04 -070011481 if (!ple_gap)
11482 kvm->arch.pause_in_guest = true;
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011483
Jiri Kosinad90a7a02018-07-13 16:23:25 +020011484 if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
11485 switch (l1tf_mitigation) {
11486 case L1TF_MITIGATION_OFF:
11487 case L1TF_MITIGATION_FLUSH_NOWARN:
11488 /* 'I explicitly don't care' is set */
11489 break;
11490 case L1TF_MITIGATION_FLUSH:
11491 case L1TF_MITIGATION_FLUSH_NOSMT:
11492 case L1TF_MITIGATION_FULL:
11493 /*
11494 * Warn upon starting the first VM in a potentially
11495 * insecure environment.
11496 */
11497 if (cpu_smt_control == CPU_SMT_ENABLED)
11498 pr_warn_once(L1TF_MSG_SMT);
11499 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
11500 pr_warn_once(L1TF_MSG_L1D);
11501 break;
11502 case L1TF_MITIGATION_FULL_FORCE:
11503 /* Flush is enforced */
11504 break;
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011505 }
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011506 }
Wanpeng Lib31c1142018-03-12 04:53:04 -070011507 return 0;
11508}
11509
Yang, Sheng002c7f72007-07-31 14:23:01 +030011510static void __init vmx_check_processor_compat(void *rtn)
11511{
11512 struct vmcs_config vmcs_conf;
11513
11514 *(int *)rtn = 0;
11515 if (setup_vmcs_config(&vmcs_conf) < 0)
11516 *(int *)rtn = -EIO;
Paolo Bonzini13893092018-02-26 13:40:09 +010011517 nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
Yang, Sheng002c7f72007-07-31 14:23:01 +030011518 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
11519 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
11520 smp_processor_id());
11521 *(int *)rtn = -EIO;
11522 }
11523}
11524
Sheng Yang4b12f0d2009-04-27 20:35:42 +080011525static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
Sheng Yang64d4d522008-10-09 16:01:57 +080011526{
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011527 u8 cache;
11528 u64 ipat = 0;
Sheng Yang4b12f0d2009-04-27 20:35:42 +080011529
Sheng Yang522c68c2009-04-27 20:35:43 +080011530 /* For VT-d and EPT combination
Paolo Bonzini606decd2015-10-01 13:12:47 +020011531 * 1. MMIO: always map as UC
Sheng Yang522c68c2009-04-27 20:35:43 +080011532 * 2. EPT with VT-d:
11533 * a. VT-d without snooping control feature: can't guarantee the
Paolo Bonzini606decd2015-10-01 13:12:47 +020011534 * result, try to trust guest.
Sheng Yang522c68c2009-04-27 20:35:43 +080011535 * b. VT-d with snooping control feature: snooping control feature of
11536 * VT-d engine can guarantee the cache correctness. Just set it
11537 * to WB to keep consistent with host. So the same as item 3.
Sheng Yanga19a6d12010-02-09 16:41:53 +080011538 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
Sheng Yang522c68c2009-04-27 20:35:43 +080011539 * consistent with host MTRR
11540 */
Paolo Bonzini606decd2015-10-01 13:12:47 +020011541 if (is_mmio) {
11542 cache = MTRR_TYPE_UNCACHABLE;
11543 goto exit;
11544 }
11545
11546 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011547 ipat = VMX_EPT_IPAT_BIT;
11548 cache = MTRR_TYPE_WRBACK;
11549 goto exit;
11550 }
11551
11552 if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
11553 ipat = VMX_EPT_IPAT_BIT;
Paolo Bonzini0da029e2015-07-23 08:24:42 +020011554 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
Xiao Guangrongfb2799502015-07-16 03:25:56 +080011555 cache = MTRR_TYPE_WRBACK;
11556 else
11557 cache = MTRR_TYPE_UNCACHABLE;
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011558 goto exit;
11559 }
11560
Xiao Guangrongff536042015-06-15 16:55:22 +080011561 cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011562
11563exit:
11564 return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
Sheng Yang64d4d522008-10-09 16:01:57 +080011565}
11566
Sheng Yang17cc3932010-01-05 19:02:27 +080011567static int vmx_get_lpage_level(void)
Joerg Roedel344f4142009-07-27 16:30:48 +020011568{
Sheng Yang878403b2010-01-05 19:02:29 +080011569 if (enable_ept && !cpu_has_vmx_ept_1g_page())
11570 return PT_DIRECTORY_LEVEL;
11571 else
11572 /* For shadow and EPT supported 1GB page */
11573 return PT_PDPE_LEVEL;
Joerg Roedel344f4142009-07-27 16:30:48 +020011574}
11575
Xiao Guangrongfeda8052015-09-09 14:05:55 +080011576static void vmcs_set_secondary_exec_control(u32 new_ctl)
11577{
11578 /*
11579 * These bits in the secondary execution controls field
11580 * are dynamic, the others are mostly based on the hypervisor
11581 * architecture and the guest's CPUID. Do not touch the
11582 * dynamic bits.
11583 */
11584 u32 mask =
11585 SECONDARY_EXEC_SHADOW_VMCS |
11586 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Paolo Bonzini0367f202016-07-12 10:44:55 +020011587 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
11588 SECONDARY_EXEC_DESC;
Xiao Guangrongfeda8052015-09-09 14:05:55 +080011589
11590 u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
11591
11592 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
11593 (new_ctl & ~mask) | (cur_ctl & mask));
11594}
11595
David Matlack8322ebb2016-11-29 18:14:09 -080011596/*
11597 * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
11598 * (indicating "allowed-1") if they are supported in the guest's CPUID.
11599 */
11600static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
11601{
11602 struct vcpu_vmx *vmx = to_vmx(vcpu);
11603 struct kvm_cpuid_entry2 *entry;
11604
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011605 vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
11606 vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
David Matlack8322ebb2016-11-29 18:14:09 -080011607
11608#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
11609 if (entry && (entry->_reg & (_cpuid_mask))) \
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011610 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
David Matlack8322ebb2016-11-29 18:14:09 -080011611} while (0)
11612
11613 entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
11614 cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
11615 cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
11616 cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
11617 cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
11618 cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
11619 cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
11620 cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
11621 cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
11622 cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
11623 cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
11624 cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
11625 cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
11626 cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
11627 cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
11628
11629 entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
11630 cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
11631 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
11632 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
11633 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
Paolo Bonzinic4ad77e2017-11-13 14:23:59 +010011634 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
David Matlack8322ebb2016-11-29 18:14:09 -080011635
11636#undef cr4_fixed1_update
11637}
11638
Liran Alon5f76f6f2018-09-14 03:25:52 +030011639static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
11640{
11641 struct vcpu_vmx *vmx = to_vmx(vcpu);
11642
11643 if (kvm_mpx_supported()) {
11644 bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
11645
11646 if (mpx_enabled) {
11647 vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
11648 vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
11649 } else {
11650 vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
11651 vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
11652 }
11653 }
11654}
11655
Sheng Yang0e851882009-12-18 16:48:46 +080011656static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
11657{
Sheng Yang4e47c7a2009-12-18 16:48:47 +080011658 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sheng Yang4e47c7a2009-12-18 16:48:47 +080011659
Paolo Bonzini80154d72017-08-24 13:55:35 +020011660 if (cpu_has_secondary_exec_ctrls()) {
11661 vmx_compute_secondary_exec_control(vmx);
11662 vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
Sheng Yang4e47c7a2009-12-18 16:48:47 +080011663 }
Mao, Junjiead756a12012-07-02 01:18:48 +000011664
Haozhong Zhang37e4c992016-06-22 14:59:55 +080011665 if (nested_vmx_allowed(vcpu))
11666 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
11667 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
11668 else
11669 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
11670 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
David Matlack8322ebb2016-11-29 18:14:09 -080011671
Liran Alon5f76f6f2018-09-14 03:25:52 +030011672 if (nested_vmx_allowed(vcpu)) {
David Matlack8322ebb2016-11-29 18:14:09 -080011673 nested_vmx_cr_fixed1_bits_update(vcpu);
Liran Alon5f76f6f2018-09-14 03:25:52 +030011674 nested_vmx_entry_exit_ctls_update(vcpu);
11675 }
Sheng Yang0e851882009-12-18 16:48:46 +080011676}
11677
Joerg Roedeld4330ef2010-04-22 12:33:11 +020011678static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
11679{
Nadav Har'El7b8050f2011-05-25 23:16:10 +030011680 if (func == 1 && nested)
11681 entry->ecx |= bit(X86_FEATURE_VMX);
Joerg Roedeld4330ef2010-04-22 12:33:11 +020011682}
11683
Yang Zhang25d92082013-08-06 12:00:32 +030011684static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
11685 struct x86_exception *fault)
11686{
Jan Kiszka533558b2014-01-04 18:47:20 +010011687 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Bandan Dasc5f983f2017-05-05 15:25:14 -040011688 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka533558b2014-01-04 18:47:20 +010011689 u32 exit_reason;
Bandan Dasc5f983f2017-05-05 15:25:14 -040011690 unsigned long exit_qualification = vcpu->arch.exit_qualification;
Yang Zhang25d92082013-08-06 12:00:32 +030011691
Bandan Dasc5f983f2017-05-05 15:25:14 -040011692 if (vmx->nested.pml_full) {
11693 exit_reason = EXIT_REASON_PML_FULL;
11694 vmx->nested.pml_full = false;
11695 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
11696 } else if (fault->error_code & PFERR_RSVD_MASK)
Jan Kiszka533558b2014-01-04 18:47:20 +010011697 exit_reason = EXIT_REASON_EPT_MISCONFIG;
Yang Zhang25d92082013-08-06 12:00:32 +030011698 else
Jan Kiszka533558b2014-01-04 18:47:20 +010011699 exit_reason = EXIT_REASON_EPT_VIOLATION;
Bandan Dasc5f983f2017-05-05 15:25:14 -040011700
11701 nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
Yang Zhang25d92082013-08-06 12:00:32 +030011702 vmcs12->guest_physical_address = fault->address;
11703}
11704
Peter Feiner995f00a2017-06-30 17:26:32 -070011705static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
11706{
David Hildenbrandbb97a012017-08-10 23:15:28 +020011707 return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
Peter Feiner995f00a2017-06-30 17:26:32 -070011708}
11709
Nadav Har'El155a97a2013-08-05 11:07:16 +030011710/* Callbacks for nested_ept_init_mmu_context: */
11711
11712static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
11713{
11714 /* return the page table to be shadowed - in our case, EPT12 */
11715 return get_vmcs12(vcpu)->ept_pointer;
11716}
11717
Sean Christopherson5b8ba412018-09-26 09:23:40 -070011718static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
Nadav Har'El155a97a2013-08-05 11:07:16 +030011719{
Paolo Bonziniad896af2013-10-02 16:56:14 +020011720 WARN_ON(mmu_is_nested(vcpu));
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020011721
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020011722 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
Paolo Bonziniad896af2013-10-02 16:56:14 +020011723 kvm_init_shadow_ept_mmu(vcpu,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011724 to_vmx(vcpu)->nested.msrs.ept_caps &
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020011725 VMX_EPT_EXECUTE_ONLY_BIT,
Junaid Shahid50c28f22018-06-27 14:59:11 -070011726 nested_ept_ad_enabled(vcpu),
11727 nested_ept_get_cr3(vcpu));
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +020011728 vcpu->arch.mmu->set_cr3 = vmx_set_cr3;
11729 vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3;
11730 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
Vitaly Kuznetsov3dc773e2018-10-08 21:28:06 +020011731 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
Nadav Har'El155a97a2013-08-05 11:07:16 +030011732
11733 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
Nadav Har'El155a97a2013-08-05 11:07:16 +030011734}
11735
11736static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
11737{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020011738 vcpu->arch.mmu = &vcpu->arch.root_mmu;
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +020011739 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
Nadav Har'El155a97a2013-08-05 11:07:16 +030011740}
11741
Eugene Korenevsky19d5f102014-12-16 22:35:53 +030011742static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
11743 u16 error_code)
11744{
11745 bool inequality, bit;
11746
11747 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
11748 inequality =
11749 (error_code & vmcs12->page_fault_error_code_mask) !=
11750 vmcs12->page_fault_error_code_match;
11751 return inequality ^ bit;
11752}
11753
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011754static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
11755 struct x86_exception *fault)
11756{
11757 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11758
11759 WARN_ON(!is_guest_mode(vcpu));
11760
Wanpeng Li305d0ab2017-09-28 18:16:44 -070011761 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
11762 !to_vmx(vcpu)->nested.nested_run_pending) {
Paolo Bonzinib96fb432017-07-27 12:29:32 +020011763 vmcs12->vm_exit_intr_error_code = fault->error_code;
11764 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
11765 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
11766 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
11767 fault->address);
Paolo Bonzini7313c692017-07-27 10:31:25 +020011768 } else {
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011769 kvm_inject_page_fault(vcpu, fault);
Paolo Bonzini7313c692017-07-27 10:31:25 +020011770 }
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011771}
11772
Paolo Bonzinic9923842017-12-13 14:16:30 +010011773static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
11774 struct vmcs12 *vmcs12);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011775
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020011776static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011777{
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020011778 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011779 struct vcpu_vmx *vmx = to_vmx(vcpu);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011780 struct page *page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011781 u64 hpa;
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011782
11783 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011784 /*
11785 * Translate L1 physical address to host physical
11786 * address for vmcs02. Keep the page pinned, so this
11787 * physical address remains valid. We keep a reference
11788 * to it so we can release it later.
11789 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011790 if (vmx->nested.apic_access_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +020011791 kvm_release_page_dirty(vmx->nested.apic_access_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011792 vmx->nested.apic_access_page = NULL;
11793 }
11794 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011795 /*
11796 * If translation failed, no matter: This feature asks
11797 * to exit when accessing the given address, and if it
11798 * can never be accessed, this feature won't do
11799 * anything anyway.
11800 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011801 if (!is_error_page(page)) {
11802 vmx->nested.apic_access_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011803 hpa = page_to_phys(vmx->nested.apic_access_page);
11804 vmcs_write64(APIC_ACCESS_ADDR, hpa);
11805 } else {
11806 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
11807 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
11808 }
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011809 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011810
11811 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011812 if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +020011813 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011814 vmx->nested.virtual_apic_page = NULL;
11815 }
11816 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011817
11818 /*
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011819 * If translation failed, VM entry will fail because
11820 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
11821 * Failing the vm entry is _not_ what the processor
11822 * does but it's basically the only possibility we
11823 * have. We could still enter the guest if CR8 load
11824 * exits are enabled, CR8 store exits are enabled, and
11825 * virtualize APIC access is disabled; in this case
11826 * the processor would never use the TPR shadow and we
11827 * could simply clear the bit from the execution
11828 * control. But such a configuration is useless, so
11829 * let's keep the code simple.
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011830 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011831 if (!is_error_page(page)) {
11832 vmx->nested.virtual_apic_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011833 hpa = page_to_phys(vmx->nested.virtual_apic_page);
11834 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
11835 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011836 }
11837
Wincy Van705699a2015-02-03 23:58:17 +080011838 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080011839 if (vmx->nested.pi_desc_page) { /* shouldn't happen */
11840 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020011841 kvm_release_page_dirty(vmx->nested.pi_desc_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011842 vmx->nested.pi_desc_page = NULL;
Wincy Van705699a2015-02-03 23:58:17 +080011843 }
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011844 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
11845 if (is_error_page(page))
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011846 return;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011847 vmx->nested.pi_desc_page = page;
11848 vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080011849 vmx->nested.pi_desc =
11850 (struct pi_desc *)((void *)vmx->nested.pi_desc +
11851 (unsigned long)(vmcs12->posted_intr_desc_addr &
11852 (PAGE_SIZE - 1)));
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011853 vmcs_write64(POSTED_INTR_DESC_ADDR,
11854 page_to_phys(vmx->nested.pi_desc_page) +
11855 (unsigned long)(vmcs12->posted_intr_desc_addr &
11856 (PAGE_SIZE - 1)));
Wincy Van705699a2015-02-03 23:58:17 +080011857 }
Linus Torvaldsd4667ca2018-02-14 17:02:15 -080011858 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
KarimAllah Ahmed3712caeb2018-02-10 23:39:26 +000011859 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
11860 CPU_BASED_USE_MSR_BITMAPS);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011861 else
11862 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
11863 CPU_BASED_USE_MSR_BITMAPS);
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011864}
11865
Jan Kiszkaf4124502014-03-07 20:03:13 +010011866static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
11867{
11868 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
11869 struct vcpu_vmx *vmx = to_vmx(vcpu);
11870
Sean Christopherson4c008122018-08-27 15:21:10 -070011871 /*
11872 * A timer value of zero is architecturally guaranteed to cause
11873 * a VMExit prior to executing any instructions in the guest.
11874 */
11875 if (preemption_timeout == 0) {
Jan Kiszkaf4124502014-03-07 20:03:13 +010011876 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
11877 return;
11878 }
11879
Sean Christopherson4c008122018-08-27 15:21:10 -070011880 if (vcpu->arch.virtual_tsc_khz == 0)
11881 return;
11882
Jan Kiszkaf4124502014-03-07 20:03:13 +010011883 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
11884 preemption_timeout *= 1000000;
11885 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
11886 hrtimer_start(&vmx->nested.preemption_timer,
11887 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
11888}
11889
Jim Mattson56a20512017-07-06 16:33:06 -070011890static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
11891 struct vmcs12 *vmcs12)
11892{
11893 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
11894 return 0;
11895
11896 if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
11897 !page_address_valid(vcpu, vmcs12->io_bitmap_b))
11898 return -EINVAL;
11899
11900 return 0;
11901}
11902
Wincy Van3af18d92015-02-03 23:49:31 +080011903static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
11904 struct vmcs12 *vmcs12)
11905{
Wincy Van3af18d92015-02-03 23:49:31 +080011906 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
11907 return 0;
11908
Jim Mattson5fa99cb2017-07-06 16:33:07 -070011909 if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
Wincy Van3af18d92015-02-03 23:49:31 +080011910 return -EINVAL;
11911
11912 return 0;
11913}
11914
Jim Mattson712b12d2017-08-24 13:24:47 -070011915static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
11916 struct vmcs12 *vmcs12)
11917{
11918 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
11919 return 0;
11920
11921 if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
11922 return -EINVAL;
11923
11924 return 0;
11925}
11926
Wincy Van3af18d92015-02-03 23:49:31 +080011927/*
11928 * Merge L0's and L1's MSR bitmap, return false to indicate that
11929 * we do not use the hardware.
11930 */
Paolo Bonzinic9923842017-12-13 14:16:30 +010011931static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
11932 struct vmcs12 *vmcs12)
Wincy Van3af18d92015-02-03 23:49:31 +080011933{
Wincy Van82f0dd42015-02-03 23:57:18 +080011934 int msr;
Wincy Vanf2b93282015-02-03 23:56:03 +080011935 struct page *page;
Radim Krčmářd048c092016-08-08 20:16:22 +020011936 unsigned long *msr_bitmap_l1;
Paolo Bonzini904e14f2018-01-16 16:51:18 +010011937 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
Ashok Raj15d45072018-02-01 22:59:43 +010011938 /*
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011939 * pred_cmd & spec_ctrl are trying to verify two things:
Ashok Raj15d45072018-02-01 22:59:43 +010011940 *
11941 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
11942 * ensures that we do not accidentally generate an L02 MSR bitmap
11943 * from the L12 MSR bitmap that is too permissive.
11944 * 2. That L1 or L2s have actually used the MSR. This avoids
11945 * unnecessarily merging of the bitmap if the MSR is unused. This
11946 * works properly because we only update the L01 MSR bitmap lazily.
11947 * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
11948 * updated to reflect this when L1 (or its L2s) actually write to
11949 * the MSR.
11950 */
KarimAllah Ahmed206587a2018-02-10 23:39:25 +000011951 bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
11952 bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
Wincy Vanf2b93282015-02-03 23:56:03 +080011953
Paolo Bonzinic9923842017-12-13 14:16:30 +010011954 /* Nothing to do if the MSR bitmap is not in use. */
11955 if (!cpu_has_vmx_msr_bitmap() ||
11956 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
11957 return false;
11958
Ashok Raj15d45072018-02-01 22:59:43 +010011959 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011960 !pred_cmd && !spec_ctrl)
Wincy Vanf2b93282015-02-03 23:56:03 +080011961 return false;
11962
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011963 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
11964 if (is_error_page(page))
Wincy Vanf2b93282015-02-03 23:56:03 +080011965 return false;
Paolo Bonzinic9923842017-12-13 14:16:30 +010011966
Radim Krčmářd048c092016-08-08 20:16:22 +020011967 msr_bitmap_l1 = (unsigned long *)kmap(page);
Paolo Bonzinic9923842017-12-13 14:16:30 +010011968 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
11969 /*
11970 * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
11971 * just lets the processor take the value from the virtual-APIC page;
11972 * take those 256 bits directly from the L1 bitmap.
11973 */
11974 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
11975 unsigned word = msr / BITS_PER_LONG;
11976 msr_bitmap_l0[word] = msr_bitmap_l1[word];
11977 msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
Wincy Van608406e2015-02-03 23:57:51 +080011978 }
Paolo Bonzinic9923842017-12-13 14:16:30 +010011979 } else {
11980 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
11981 unsigned word = msr / BITS_PER_LONG;
11982 msr_bitmap_l0[word] = ~0;
11983 msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
11984 }
11985 }
11986
11987 nested_vmx_disable_intercept_for_msr(
11988 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010011989 X2APIC_MSR(APIC_TASKPRI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010011990 MSR_TYPE_W);
11991
11992 if (nested_cpu_has_vid(vmcs12)) {
11993 nested_vmx_disable_intercept_for_msr(
11994 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010011995 X2APIC_MSR(APIC_EOI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010011996 MSR_TYPE_W);
11997 nested_vmx_disable_intercept_for_msr(
11998 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010011999 X2APIC_MSR(APIC_SELF_IPI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010012000 MSR_TYPE_W);
Wincy Van82f0dd42015-02-03 23:57:18 +080012001 }
Ashok Raj15d45072018-02-01 22:59:43 +010012002
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010012003 if (spec_ctrl)
12004 nested_vmx_disable_intercept_for_msr(
12005 msr_bitmap_l1, msr_bitmap_l0,
12006 MSR_IA32_SPEC_CTRL,
12007 MSR_TYPE_R | MSR_TYPE_W);
12008
Ashok Raj15d45072018-02-01 22:59:43 +010012009 if (pred_cmd)
12010 nested_vmx_disable_intercept_for_msr(
12011 msr_bitmap_l1, msr_bitmap_l0,
12012 MSR_IA32_PRED_CMD,
12013 MSR_TYPE_W);
12014
Wincy Vanf2b93282015-02-03 23:56:03 +080012015 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020012016 kvm_release_page_clean(page);
Wincy Vanf2b93282015-02-03 23:56:03 +080012017
12018 return true;
12019}
12020
Liran Alon61ada742018-06-23 02:35:08 +030012021static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
12022 struct vmcs12 *vmcs12)
12023{
12024 struct vmcs12 *shadow;
12025 struct page *page;
12026
12027 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
12028 vmcs12->vmcs_link_pointer == -1ull)
12029 return;
12030
12031 shadow = get_shadow_vmcs12(vcpu);
12032 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
12033
12034 memcpy(shadow, kmap(page), VMCS12_SIZE);
12035
12036 kunmap(page);
12037 kvm_release_page_clean(page);
12038}
12039
12040static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
12041 struct vmcs12 *vmcs12)
12042{
12043 struct vcpu_vmx *vmx = to_vmx(vcpu);
12044
12045 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
12046 vmcs12->vmcs_link_pointer == -1ull)
12047 return;
12048
12049 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
12050 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
12051}
12052
Krish Sadhukhanf0f4cf52018-04-11 01:10:16 -040012053static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
12054 struct vmcs12 *vmcs12)
12055{
12056 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
12057 !page_address_valid(vcpu, vmcs12->apic_access_addr))
12058 return -EINVAL;
12059 else
12060 return 0;
12061}
12062
Wincy Vanf2b93282015-02-03 23:56:03 +080012063static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
12064 struct vmcs12 *vmcs12)
12065{
Wincy Van82f0dd42015-02-03 23:57:18 +080012066 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
Wincy Van608406e2015-02-03 23:57:51 +080012067 !nested_cpu_has_apic_reg_virt(vmcs12) &&
Wincy Van705699a2015-02-03 23:58:17 +080012068 !nested_cpu_has_vid(vmcs12) &&
12069 !nested_cpu_has_posted_intr(vmcs12))
Wincy Vanf2b93282015-02-03 23:56:03 +080012070 return 0;
12071
12072 /*
12073 * If virtualize x2apic mode is enabled,
12074 * virtualize apic access must be disabled.
12075 */
Wincy Van82f0dd42015-02-03 23:57:18 +080012076 if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
12077 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
Wincy Vanf2b93282015-02-03 23:56:03 +080012078 return -EINVAL;
12079
Wincy Van608406e2015-02-03 23:57:51 +080012080 /*
12081 * If virtual interrupt delivery is enabled,
12082 * we must exit on external interrupts.
12083 */
12084 if (nested_cpu_has_vid(vmcs12) &&
12085 !nested_exit_on_intr(vcpu))
12086 return -EINVAL;
12087
Wincy Van705699a2015-02-03 23:58:17 +080012088 /*
12089 * bits 15:8 should be zero in posted_intr_nv,
12090 * the descriptor address has been already checked
12091 * in nested_get_vmcs12_pages.
Krish Sadhukhan6de84e52018-08-23 20:03:03 -040012092 *
12093 * bits 5:0 of posted_intr_desc_addr should be zero.
Wincy Van705699a2015-02-03 23:58:17 +080012094 */
12095 if (nested_cpu_has_posted_intr(vmcs12) &&
12096 (!nested_cpu_has_vid(vmcs12) ||
12097 !nested_exit_intr_ack_set(vcpu) ||
Krish Sadhukhan6de84e52018-08-23 20:03:03 -040012098 (vmcs12->posted_intr_nv & 0xff00) ||
12099 (vmcs12->posted_intr_desc_addr & 0x3f) ||
12100 (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
Wincy Van705699a2015-02-03 23:58:17 +080012101 return -EINVAL;
12102
Wincy Vanf2b93282015-02-03 23:56:03 +080012103 /* tpr shadow is needed by all apicv features. */
12104 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
12105 return -EINVAL;
12106
12107 return 0;
Wincy Van3af18d92015-02-03 23:49:31 +080012108}
12109
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012110static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
12111 unsigned long count_field,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012112 unsigned long addr_field)
Wincy Vanff651cb2014-12-11 08:52:58 +030012113{
Liran Alone2536742018-06-23 02:35:02 +030012114 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012115 int maxphyaddr;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012116 u64 count, addr;
12117
Liran Alone2536742018-06-23 02:35:02 +030012118 if (vmcs12_read_any(vmcs12, count_field, &count) ||
12119 vmcs12_read_any(vmcs12, addr_field, &addr)) {
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012120 WARN_ON(1);
12121 return -EINVAL;
12122 }
12123 if (count == 0)
12124 return 0;
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012125 maxphyaddr = cpuid_maxphyaddr(vcpu);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012126 if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
12127 (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012128 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012129 "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
12130 addr_field, maxphyaddr, count, addr);
12131 return -EINVAL;
12132 }
12133 return 0;
12134}
12135
12136static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
12137 struct vmcs12 *vmcs12)
12138{
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012139 if (vmcs12->vm_exit_msr_load_count == 0 &&
12140 vmcs12->vm_exit_msr_store_count == 0 &&
12141 vmcs12->vm_entry_msr_load_count == 0)
12142 return 0; /* Fast path */
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012143 if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012144 VM_EXIT_MSR_LOAD_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012145 nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012146 VM_EXIT_MSR_STORE_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012147 nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012148 VM_ENTRY_MSR_LOAD_ADDR))
Wincy Vanff651cb2014-12-11 08:52:58 +030012149 return -EINVAL;
12150 return 0;
12151}
12152
Bandan Dasc5f983f2017-05-05 15:25:14 -040012153static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
12154 struct vmcs12 *vmcs12)
12155{
Krish Sadhukhan55c1dcd2018-09-27 14:33:27 -040012156 if (!nested_cpu_has_pml(vmcs12))
12157 return 0;
Bandan Dasc5f983f2017-05-05 15:25:14 -040012158
Krish Sadhukhan55c1dcd2018-09-27 14:33:27 -040012159 if (!nested_cpu_has_ept(vmcs12) ||
12160 !page_address_valid(vcpu, vmcs12->pml_address))
12161 return -EINVAL;
Bandan Dasc5f983f2017-05-05 15:25:14 -040012162
12163 return 0;
12164}
12165
Liran Alona8a7c022018-06-23 02:35:06 +030012166static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
12167 struct vmcs12 *vmcs12)
12168{
12169 if (!nested_cpu_has_shadow_vmcs(vmcs12))
12170 return 0;
12171
12172 if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
12173 !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
12174 return -EINVAL;
12175
12176 return 0;
12177}
12178
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012179static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
12180 struct vmx_msr_entry *e)
12181{
12182 /* x2APIC MSR accesses are not allowed */
Jan Kiszka8a9781f2015-05-04 08:32:32 +020012183 if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012184 return -EINVAL;
12185 if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
12186 e->index == MSR_IA32_UCODE_REV)
12187 return -EINVAL;
12188 if (e->reserved != 0)
12189 return -EINVAL;
12190 return 0;
12191}
12192
12193static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
12194 struct vmx_msr_entry *e)
Wincy Vanff651cb2014-12-11 08:52:58 +030012195{
12196 if (e->index == MSR_FS_BASE ||
12197 e->index == MSR_GS_BASE ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012198 e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
12199 nested_vmx_msr_check_common(vcpu, e))
12200 return -EINVAL;
12201 return 0;
12202}
12203
12204static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
12205 struct vmx_msr_entry *e)
12206{
12207 if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
12208 nested_vmx_msr_check_common(vcpu, e))
Wincy Vanff651cb2014-12-11 08:52:58 +030012209 return -EINVAL;
12210 return 0;
12211}
12212
12213/*
12214 * Load guest's/host's msr at nested entry/exit.
12215 * return 0 for success, entry index for failure.
12216 */
12217static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
12218{
12219 u32 i;
12220 struct vmx_msr_entry e;
12221 struct msr_data msr;
12222
12223 msr.host_initiated = false;
12224 for (i = 0; i < count; i++) {
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020012225 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
12226 &e, sizeof(e))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012227 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012228 "%s cannot read MSR entry (%u, 0x%08llx)\n",
12229 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030012230 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012231 }
12232 if (nested_vmx_load_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012233 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012234 "%s check failed (%u, 0x%x, 0x%x)\n",
12235 __func__, i, e.index, e.reserved);
12236 goto fail;
12237 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012238 msr.index = e.index;
12239 msr.data = e.value;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012240 if (kvm_set_msr(vcpu, &msr)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012241 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012242 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
12243 __func__, i, e.index, e.value);
Wincy Vanff651cb2014-12-11 08:52:58 +030012244 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012245 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012246 }
12247 return 0;
12248fail:
12249 return i + 1;
12250}
12251
12252static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
12253{
12254 u32 i;
12255 struct vmx_msr_entry e;
12256
12257 for (i = 0; i < count; i++) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +020012258 struct msr_data msr_info;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020012259 if (kvm_vcpu_read_guest(vcpu,
12260 gpa + i * sizeof(e),
12261 &e, 2 * sizeof(u32))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012262 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012263 "%s cannot read MSR entry (%u, 0x%08llx)\n",
12264 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030012265 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012266 }
12267 if (nested_vmx_store_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012268 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012269 "%s check failed (%u, 0x%x, 0x%x)\n",
12270 __func__, i, e.index, e.reserved);
Wincy Vanff651cb2014-12-11 08:52:58 +030012271 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012272 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +020012273 msr_info.host_initiated = false;
12274 msr_info.index = e.index;
12275 if (kvm_get_msr(vcpu, &msr_info)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012276 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012277 "%s cannot read MSR (%u, 0x%x)\n",
12278 __func__, i, e.index);
12279 return -EINVAL;
12280 }
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020012281 if (kvm_vcpu_write_guest(vcpu,
12282 gpa + i * sizeof(e) +
12283 offsetof(struct vmx_msr_entry, value),
12284 &msr_info.data, sizeof(msr_info.data))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012285 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012286 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
Paolo Bonzini609e36d2015-04-08 15:30:38 +020012287 __func__, i, e.index, msr_info.data);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012288 return -EINVAL;
12289 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012290 }
12291 return 0;
12292}
12293
Ladi Prosek1dc35da2016-11-30 16:03:11 +010012294static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
12295{
12296 unsigned long invalid_mask;
12297
12298 invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
12299 return (val & invalid_mask) == 0;
12300}
12301
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012302/*
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012303 * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
12304 * emulating VM entry into a guest with EPT enabled.
12305 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
12306 * is assigned to entry_failure_code on failure.
12307 */
12308static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
Jim Mattsonca0bde22016-11-30 12:03:46 -080012309 u32 *entry_failure_code)
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012310{
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012311 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
Ladi Prosek1dc35da2016-11-30 16:03:11 +010012312 if (!nested_cr3_valid(vcpu, cr3)) {
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012313 *entry_failure_code = ENTRY_FAIL_DEFAULT;
12314 return 1;
12315 }
12316
12317 /*
12318 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
12319 * must not be dereferenced.
12320 */
12321 if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
12322 !nested_ept) {
12323 if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
12324 *entry_failure_code = ENTRY_FAIL_PDPTE;
12325 return 1;
12326 }
12327 }
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012328 }
12329
Junaid Shahid50c28f22018-06-27 14:59:11 -070012330 if (!nested_ept)
Junaid Shahidade61e22018-06-27 14:59:15 -070012331 kvm_mmu_new_cr3(vcpu, cr3, false);
Junaid Shahid50c28f22018-06-27 14:59:11 -070012332
12333 vcpu->arch.cr3 = cr3;
12334 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
12335
12336 kvm_init_mmu(vcpu, false);
12337
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012338 return 0;
12339}
12340
Liran Alonefebf0a2018-10-08 23:42:20 +030012341/*
12342 * Returns if KVM is able to config CPU to tag TLB entries
12343 * populated by L2 differently than TLB entries populated
12344 * by L1.
12345 *
12346 * If L1 uses EPT, then TLB entries are tagged with different EPTP.
12347 *
12348 * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
12349 * with different VPID (L1 entries are tagged with vmx->vpid
12350 * while L2 entries are tagged with vmx->nested.vpid02).
12351 */
12352static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
12353{
12354 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
12355
12356 return nested_cpu_has_ept(vmcs12) ||
12357 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
12358}
12359
Sean Christopherson3df5c372018-09-26 09:23:44 -070012360static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
12361{
12362 if (vmx->nested.nested_run_pending &&
12363 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
12364 return vmcs12->guest_ia32_efer;
12365 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
12366 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
12367 else
12368 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
12369}
12370
Sean Christopherson09abe322018-09-26 09:23:50 -070012371static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
Paolo Bonzini74a497f2017-12-20 13:55:39 +010012372{
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012373 /*
Sean Christopherson9d6105b22018-09-26 09:23:51 -070012374 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
Sean Christopherson09abe322018-09-26 09:23:50 -070012375 * according to L0's settings (vmcs12 is irrelevant here). Host
12376 * fields that come from L0 and are not constant, e.g. HOST_CR3,
12377 * will be set as needed prior to VMLAUNCH/VMRESUME.
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012378 */
Sean Christopherson9d6105b22018-09-26 09:23:51 -070012379 if (vmx->nested.vmcs02_initialized)
Sean Christopherson09abe322018-09-26 09:23:50 -070012380 return;
Sean Christopherson9d6105b22018-09-26 09:23:51 -070012381 vmx->nested.vmcs02_initialized = true;
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012382
Sean Christopherson52017602018-09-26 09:23:57 -070012383 /*
12384 * We don't care what the EPTP value is we just need to guarantee
12385 * it's valid so we don't get a false positive when doing early
12386 * consistency checks.
12387 */
12388 if (enable_ept && nested_early_check)
12389 vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0));
12390
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012391 /* All VMFUNCs are currently emulated through L0 vmexits. */
12392 if (cpu_has_vmx_vmfunc())
12393 vmcs_write64(VM_FUNCTION_CONTROL, 0);
12394
Sean Christopherson09abe322018-09-26 09:23:50 -070012395 if (cpu_has_vmx_posted_intr())
12396 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
12397
12398 if (cpu_has_vmx_msr_bitmap())
12399 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
12400
12401 if (enable_pml)
12402 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012403
12404 /*
Sean Christopherson09abe322018-09-26 09:23:50 -070012405 * Set the MSR load/store lists to match L0's settings. Only the
12406 * addresses are constant (for vmcs02), the counts can change based
12407 * on L2's behavior, e.g. switching to/from long mode.
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012408 */
12409 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -040012410 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -040012411 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012412
Sean Christopherson09abe322018-09-26 09:23:50 -070012413 vmx_set_constant_host_state(vmx);
12414}
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012415
Sean Christopherson09abe322018-09-26 09:23:50 -070012416static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,
12417 struct vmcs12 *vmcs12)
12418{
12419 prepare_vmcs02_constant_state(vmx);
12420
12421 vmcs_write64(VMCS_LINK_POINTER, -1ull);
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012422
12423 if (enable_vpid) {
12424 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
12425 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
12426 else
12427 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
12428 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +010012429}
12430
Sean Christopherson09abe322018-09-26 09:23:50 -070012431static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012432{
Bandan Das03efce62017-05-05 15:25:15 -040012433 u32 exec_control, vmcs12_exec_ctrl;
Sean Christopherson09abe322018-09-26 09:23:50 -070012434 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012435
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020012436 if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
Sean Christopherson09abe322018-09-26 09:23:50 -070012437 prepare_vmcs02_early_full(vmx, vmcs12);
Sean Christopherson9d1887e2018-03-05 09:33:27 -080012438
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010012439 /*
Sean Christopherson09abe322018-09-26 09:23:50 -070012440 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
12441 * entry, but only if the current (host) sp changed from the value
12442 * we wrote last (vmx->host_rsp). This cache is no longer relevant
12443 * if we switch vmcs, and rather than hold a separate cache per vmcs,
Sean Christopherson52017602018-09-26 09:23:57 -070012444 * here we just force the write to happen on entry. host_rsp will
12445 * also be written unconditionally by nested_vmx_check_vmentry_hw()
12446 * if we are doing early consistency checks via hardware.
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010012447 */
Sean Christopherson09abe322018-09-26 09:23:50 -070012448 vmx->host_rsp = 0;
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010012449
Sean Christopherson09abe322018-09-26 09:23:50 -070012450 /*
12451 * PIN CONTROLS
12452 */
Jan Kiszkaf4124502014-03-07 20:03:13 +010012453 exec_control = vmcs12->pin_based_vm_exec_control;
Wincy Van705699a2015-02-03 23:58:17 +080012454
Sean Christophersonf459a702018-08-27 15:21:11 -070012455 /* Preemption timer setting is computed directly in vmx_vcpu_run. */
Paolo Bonzini93140062016-07-06 13:23:51 +020012456 exec_control |= vmcs_config.pin_based_exec_ctrl;
Sean Christophersonf459a702018-08-27 15:21:11 -070012457 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
12458 vmx->loaded_vmcs->hv_timer_armed = false;
Paolo Bonzini93140062016-07-06 13:23:51 +020012459
12460 /* Posted interrupts setting is only taken from vmcs12. */
Wincy Van705699a2015-02-03 23:58:17 +080012461 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080012462 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
12463 vmx->nested.pi_pending = false;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080012464 } else {
Wincy Van705699a2015-02-03 23:58:17 +080012465 exec_control &= ~PIN_BASED_POSTED_INTR;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080012466 }
Jan Kiszkaf4124502014-03-07 20:03:13 +010012467 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012468
Sean Christopherson09abe322018-09-26 09:23:50 -070012469 /*
12470 * EXEC CONTROLS
12471 */
12472 exec_control = vmx_exec_control(vmx); /* L0's desires */
12473 exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
12474 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
12475 exec_control &= ~CPU_BASED_TPR_SHADOW;
12476 exec_control |= vmcs12->cpu_based_vm_exec_control;
Jan Kiszka0238ea92013-03-13 11:31:24 +010012477
Sean Christopherson09abe322018-09-26 09:23:50 -070012478 /*
12479 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
12480 * nested_get_vmcs12_pages can't fix it up, the illegal value
12481 * will result in a VM entry failure.
12482 */
12483 if (exec_control & CPU_BASED_TPR_SHADOW) {
12484 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
12485 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
12486 } else {
12487#ifdef CONFIG_X86_64
12488 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
12489 CPU_BASED_CR8_STORE_EXITING;
12490#endif
12491 }
12492
12493 /*
12494 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
12495 * for I/O port accesses.
12496 */
12497 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
12498 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
12499 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
12500
12501 /*
12502 * SECONDARY EXEC CONTROLS
12503 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012504 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +020012505 exec_control = vmx->secondary_exec_control;
Xiao Guangronge2821622015-09-09 14:05:52 +080012506
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012507 /* Take the following fields only from vmcs12 */
Paolo Bonzini696dfd92014-05-07 11:20:54 +020012508 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Paolo Bonzini90a2db62017-07-27 13:22:13 +020012509 SECONDARY_EXEC_ENABLE_INVPCID |
Jan Kiszkab3a2a902015-03-23 19:27:19 +010012510 SECONDARY_EXEC_RDTSCP |
Paolo Bonzini3db13482017-08-24 14:48:03 +020012511 SECONDARY_EXEC_XSAVES |
Paolo Bonzini696dfd92014-05-07 11:20:54 +020012512 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Bandan Das27c42a12017-08-03 15:54:42 -040012513 SECONDARY_EXEC_APIC_REGISTER_VIRT |
12514 SECONDARY_EXEC_ENABLE_VMFUNC);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012515 if (nested_cpu_has(vmcs12,
Bandan Das03efce62017-05-05 15:25:15 -040012516 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
12517 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
12518 ~SECONDARY_EXEC_ENABLE_PML;
12519 exec_control |= vmcs12_exec_ctrl;
12520 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012521
Liran Alon32c7acf2018-06-23 02:35:11 +030012522 /* VMCS shadowing for L2 is emulated for now */
12523 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
12524
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012525 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
Wincy Van608406e2015-02-03 23:57:51 +080012526 vmcs_write16(GUEST_INTR_STATUS,
12527 vmcs12->guest_intr_status);
Wincy Van608406e2015-02-03 23:57:51 +080012528
Jim Mattson6beb7bd2016-11-30 12:03:45 -080012529 /*
12530 * Write an illegal value to APIC_ACCESS_ADDR. Later,
12531 * nested_get_vmcs12_pages will either fix it up or
12532 * remove the VM execution control.
12533 */
12534 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
12535 vmcs_write64(APIC_ACCESS_ADDR, -1ull);
12536
Sean Christopherson0b665d32018-08-14 09:33:34 -070012537 if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
12538 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
12539
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012540 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
12541 }
12542
Jim Mattson83bafef2016-10-04 10:48:38 -070012543 /*
Sean Christopherson09abe322018-09-26 09:23:50 -070012544 * ENTRY CONTROLS
12545 *
Sean Christopherson3df5c372018-09-26 09:23:44 -070012546 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
Sean Christopherson09abe322018-09-26 09:23:50 -070012547 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
12548 * on the related bits (if supported by the CPU) in the hope that
12549 * we can avoid VMWrites during vmx_set_efer().
Sean Christopherson3df5c372018-09-26 09:23:44 -070012550 */
Sean Christopherson3df5c372018-09-26 09:23:44 -070012551 exec_control = (vmcs12->vm_entry_controls | vmcs_config.vmentry_ctrl) &
12552 ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
12553 if (cpu_has_load_ia32_efer) {
12554 if (guest_efer & EFER_LMA)
12555 exec_control |= VM_ENTRY_IA32E_MODE;
12556 if (guest_efer != host_efer)
12557 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
12558 }
12559 vm_entry_controls_init(vmx, exec_control);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012560
Sean Christopherson09abe322018-09-26 09:23:50 -070012561 /*
12562 * EXIT CONTROLS
12563 *
12564 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
12565 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
12566 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
12567 */
12568 exec_control = vmcs_config.vmexit_ctrl;
12569 if (cpu_has_load_ia32_efer && guest_efer != host_efer)
12570 exec_control |= VM_EXIT_LOAD_IA32_EFER;
12571 vm_exit_controls_init(vmx, exec_control);
12572
12573 /*
12574 * Conceptually we want to copy the PML address and index from
12575 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
12576 * since we always flush the log on each vmexit and never change
12577 * the PML address (once set), this happens to be equivalent to
12578 * simply resetting the index in vmcs02.
12579 */
12580 if (enable_pml)
12581 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
12582
12583 /*
12584 * Interrupt/Exception Fields
12585 */
12586 if (vmx->nested.nested_run_pending) {
12587 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
12588 vmcs12->vm_entry_intr_info_field);
12589 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
12590 vmcs12->vm_entry_exception_error_code);
12591 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
12592 vmcs12->vm_entry_instruction_len);
12593 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
12594 vmcs12->guest_interruptibility_info);
12595 vmx->loaded_vmcs->nmi_known_unmasked =
12596 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
12597 } else {
12598 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
12599 }
12600}
12601
12602static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
12603{
12604 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
12605 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
12606 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
12607 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
12608 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
12609 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
12610 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
12611 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
12612 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
12613 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
12614 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
12615 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
12616 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
12617 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
12618 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
12619 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
12620 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
12621 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
12622 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
12623 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
12624 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
12625 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
12626 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
12627 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
12628 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
12629 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
12630 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
12631 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
12632 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
12633 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
12634 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
12635
12636 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
12637 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
12638 vmcs12->guest_pending_dbg_exceptions);
12639 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
12640 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
12641
12642 if (nested_cpu_has_xsaves(vmcs12))
12643 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
12644
12645 /*
12646 * Whether page-faults are trapped is determined by a combination of
12647 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
12648 * If enable_ept, L0 doesn't care about page faults and we should
12649 * set all of these to L1's desires. However, if !enable_ept, L0 does
12650 * care about (at least some) page faults, and because it is not easy
12651 * (if at all possible?) to merge L0 and L1's desires, we simply ask
12652 * to exit on each and every L2 page fault. This is done by setting
12653 * MASK=MATCH=0 and (see below) EB.PF=1.
12654 * Note that below we don't need special code to set EB.PF beyond the
12655 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
12656 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
12657 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
12658 */
12659 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
12660 enable_ept ? vmcs12->page_fault_error_code_mask : 0);
12661 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
12662 enable_ept ? vmcs12->page_fault_error_code_match : 0);
12663
12664 if (cpu_has_vmx_apicv()) {
12665 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
12666 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
12667 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
12668 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
12669 }
12670
12671 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
12672 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
12673
12674 set_cr4_guest_host_mask(vmx);
12675
12676 if (kvm_mpx_supported()) {
12677 if (vmx->nested.nested_run_pending &&
12678 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
12679 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
12680 else
12681 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
12682 }
12683
12684 /*
12685 * L1 may access the L2's PDPTR, so save them to construct vmcs12
12686 */
12687 if (enable_ept) {
12688 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
12689 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
12690 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
12691 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
12692 }
12693}
12694
12695/*
12696 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
12697 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
12698 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
12699 * guest in a way that will both be appropriate to L1's requests, and our
12700 * needs. In addition to modifying the active vmcs (which is vmcs02), this
12701 * function also has additional necessary side-effects, like setting various
12702 * vcpu->arch fields.
12703 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
12704 * is assigned to entry_failure_code on failure.
12705 */
12706static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
12707 u32 *entry_failure_code)
12708{
12709 struct vcpu_vmx *vmx = to_vmx(vcpu);
12710
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020012711 if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
Sean Christopherson09abe322018-09-26 09:23:50 -070012712 prepare_vmcs02_full(vmx, vmcs12);
12713 vmx->nested.dirty_vmcs12 = false;
12714 }
12715
12716 /*
12717 * First, the fields that are shadowed. This must be kept in sync
12718 * with vmx_shadow_fields.h.
12719 */
12720
12721 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
12722 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
12723 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
12724 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
12725 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
12726
12727 if (vmx->nested.nested_run_pending &&
12728 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
12729 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
12730 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
12731 } else {
12732 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
12733 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
12734 }
12735 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
12736
12737 vmx->nested.preemption_timer_expired = false;
12738 if (nested_cpu_has_preemption_timer(vmcs12))
12739 vmx_start_preemption_timer(vcpu);
12740
12741 /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
12742 * bitwise-or of what L1 wants to trap for L2, and what we want to
12743 * trap. Note that CR0.TS also needs updating - we do this later.
12744 */
12745 update_exception_bitmap(vcpu);
12746 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
12747 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
12748
Jim Mattson6514dc32018-04-26 16:09:12 -070012749 if (vmx->nested.nested_run_pending &&
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012750 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012751 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020012752 vcpu->arch.pat = vmcs12->guest_ia32_pat;
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012753 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012754 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012755 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012756
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020012757 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
12758
Peter Feinerc95ba922016-08-17 09:36:47 -070012759 if (kvm_has_tsc_control)
12760 decache_tsc_multiplier(vmx);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012761
12762 if (enable_vpid) {
12763 /*
Wanpeng Li5c614b32015-10-13 09:18:36 -070012764 * There is no direct mapping between vpid02 and vpid12, the
12765 * vpid02 is per-vCPU for L0 and reused while the value of
12766 * vpid12 is changed w/ one invvpid during nested vmentry.
12767 * The vpid12 is allocated by L1 for L2, so it will not
12768 * influence global bitmap(for vpid01 and vpid02 allocation)
12769 * even if spawn a lot of nested vCPUs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012770 */
Liran Alonefebf0a2018-10-08 23:42:20 +030012771 if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
Wanpeng Li5c614b32015-10-13 09:18:36 -070012772 if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
12773 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
Liran Alonefebf0a2018-10-08 23:42:20 +030012774 __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
Wanpeng Li5c614b32015-10-13 09:18:36 -070012775 }
12776 } else {
Liran Alon14389212018-10-08 23:42:17 +030012777 /*
12778 * If L1 use EPT, then L0 needs to execute INVEPT on
12779 * EPTP02 instead of EPTP01. Therefore, delay TLB
12780 * flush until vmcs02->eptp is fully updated by
12781 * KVM_REQ_LOAD_CR3. Note that this assumes
12782 * KVM_REQ_TLB_FLUSH is evaluated after
12783 * KVM_REQ_LOAD_CR3 in vcpu_enter_guest().
12784 */
12785 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Wanpeng Li5c614b32015-10-13 09:18:36 -070012786 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012787 }
12788
Sean Christopherson5b8ba412018-09-26 09:23:40 -070012789 if (nested_cpu_has_ept(vmcs12))
12790 nested_ept_init_mmu_context(vcpu);
12791 else if (nested_cpu_has2(vmcs12,
12792 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
Junaid Shahida468f2d2018-04-26 13:09:50 -070012793 vmx_flush_tlb(vcpu, true);
Nadav Har'El155a97a2013-08-05 11:07:16 +030012794
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012795 /*
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080012796 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
12797 * bits which we consider mandatory enabled.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012798 * The CR0_READ_SHADOW is what L2 should have expected to read given
12799 * the specifications by L1; It's not enough to take
12800 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
12801 * have more bits than L1 expected.
12802 */
12803 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
12804 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
12805
12806 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
12807 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
12808
Sean Christopherson09abe322018-09-26 09:23:50 -070012809 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
Sean Christopherson3df5c372018-09-26 09:23:44 -070012810 /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
David Matlack5a6a9742016-11-29 18:14:10 -080012811 vmx_set_efer(vcpu, vcpu->arch.efer);
12812
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070012813 /*
12814 * Guest state is invalid and unrestricted guest is disabled,
12815 * which means L1 attempted VMEntry to L2 with invalid state.
12816 * Fail the VMEntry.
12817 */
Paolo Bonzini3184a992018-03-21 14:20:18 +010012818 if (vmx->emulation_required) {
12819 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070012820 return 1;
Paolo Bonzini3184a992018-03-21 14:20:18 +010012821 }
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070012822
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012823 /* Shadow page tables on either EPT or shadow page tables. */
Ladi Prosek7ad658b2017-03-23 07:18:08 +010012824 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012825 entry_failure_code))
12826 return 1;
Ladi Prosek7ca29de2016-11-30 16:03:08 +010012827
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030012828 if (!enable_ept)
12829 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
12830
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012831 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
12832 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
Ladi Prosekee146c12016-11-30 16:03:09 +010012833 return 0;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012834}
12835
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -050012836static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
12837{
12838 if (!nested_cpu_has_nmi_exiting(vmcs12) &&
12839 nested_cpu_has_virtual_nmis(vmcs12))
12840 return -EINVAL;
12841
12842 if (!nested_cpu_has_virtual_nmis(vmcs12) &&
12843 nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
12844 return -EINVAL;
12845
12846 return 0;
12847}
12848
Jim Mattsonca0bde22016-11-30 12:03:46 -080012849static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
12850{
12851 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson64a919f2018-09-26 09:23:39 -070012852 bool ia32e;
Jim Mattsonca0bde22016-11-30 12:03:46 -080012853
12854 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
12855 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
12856 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12857
Krish Sadhukhanba8e23d2018-09-04 14:42:58 -040012858 if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)
12859 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12860
Jim Mattson56a20512017-07-06 16:33:06 -070012861 if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
12862 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12863
Jim Mattsonca0bde22016-11-30 12:03:46 -080012864 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
12865 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12866
Krish Sadhukhanf0f4cf52018-04-11 01:10:16 -040012867 if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
12868 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12869
Jim Mattson712b12d2017-08-24 13:24:47 -070012870 if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
12871 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12872
Jim Mattsonca0bde22016-11-30 12:03:46 -080012873 if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
12874 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12875
12876 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
12877 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12878
Bandan Dasc5f983f2017-05-05 15:25:14 -040012879 if (nested_vmx_check_pml_controls(vcpu, vmcs12))
12880 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12881
Liran Alona8a7c022018-06-23 02:35:06 +030012882 if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
12883 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12884
Jim Mattsonca0bde22016-11-30 12:03:46 -080012885 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012886 vmx->nested.msrs.procbased_ctls_low,
12887 vmx->nested.msrs.procbased_ctls_high) ||
Jim Mattson2e5b0bd2017-05-04 11:51:58 -070012888 (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
12889 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012890 vmx->nested.msrs.secondary_ctls_low,
12891 vmx->nested.msrs.secondary_ctls_high)) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080012892 !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012893 vmx->nested.msrs.pinbased_ctls_low,
12894 vmx->nested.msrs.pinbased_ctls_high) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080012895 !vmx_control_verify(vmcs12->vm_exit_controls,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012896 vmx->nested.msrs.exit_ctls_low,
12897 vmx->nested.msrs.exit_ctls_high) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080012898 !vmx_control_verify(vmcs12->vm_entry_controls,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012899 vmx->nested.msrs.entry_ctls_low,
12900 vmx->nested.msrs.entry_ctls_high))
Jim Mattsonca0bde22016-11-30 12:03:46 -080012901 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12902
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -050012903 if (nested_vmx_check_nmi_controls(vmcs12))
Jim Mattsonca0bde22016-11-30 12:03:46 -080012904 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12905
Bandan Das41ab9372017-08-03 15:54:43 -040012906 if (nested_cpu_has_vmfunc(vmcs12)) {
12907 if (vmcs12->vm_function_control &
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012908 ~vmx->nested.msrs.vmfunc_controls)
Bandan Das41ab9372017-08-03 15:54:43 -040012909 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12910
12911 if (nested_cpu_has_eptp_switching(vmcs12)) {
12912 if (!nested_cpu_has_ept(vmcs12) ||
12913 !page_address_valid(vcpu, vmcs12->eptp_list_address))
12914 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12915 }
12916 }
Bandan Das27c42a12017-08-03 15:54:42 -040012917
Jim Mattsonc7c2c702017-05-05 11:28:09 -070012918 if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
12919 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12920
Jim Mattsonca0bde22016-11-30 12:03:46 -080012921 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
12922 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
12923 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
12924 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
12925
Marc Orr04473782018-06-20 17:21:29 -070012926 /*
Sean Christopherson64a919f2018-09-26 09:23:39 -070012927 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
12928 * IA32_EFER MSR must be 0 in the field for that register. In addition,
12929 * the values of the LMA and LME bits in the field must each be that of
12930 * the host address-space size VM-exit control.
12931 */
12932 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
12933 ia32e = (vmcs12->vm_exit_controls &
12934 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
12935 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
12936 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
12937 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
12938 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
12939 }
12940
12941 /*
Marc Orr04473782018-06-20 17:21:29 -070012942 * From the Intel SDM, volume 3:
12943 * Fields relevant to VM-entry event injection must be set properly.
12944 * These fields are the VM-entry interruption-information field, the
12945 * VM-entry exception error code, and the VM-entry instruction length.
12946 */
12947 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
12948 u32 intr_info = vmcs12->vm_entry_intr_info_field;
12949 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
12950 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
12951 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
12952 bool should_have_error_code;
12953 bool urg = nested_cpu_has2(vmcs12,
12954 SECONDARY_EXEC_UNRESTRICTED_GUEST);
12955 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
12956
12957 /* VM-entry interruption-info field: interruption type */
12958 if (intr_type == INTR_TYPE_RESERVED ||
12959 (intr_type == INTR_TYPE_OTHER_EVENT &&
12960 !nested_cpu_supports_monitor_trap_flag(vcpu)))
12961 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12962
12963 /* VM-entry interruption-info field: vector */
12964 if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
12965 (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
12966 (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
12967 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12968
12969 /* VM-entry interruption-info field: deliver error code */
12970 should_have_error_code =
12971 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
12972 x86_exception_has_error_code(vector);
12973 if (has_error_code != should_have_error_code)
12974 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12975
12976 /* VM-entry exception error code */
12977 if (has_error_code &&
12978 vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
12979 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12980
12981 /* VM-entry interruption-info field: reserved bits */
12982 if (intr_info & INTR_INFO_RESVD_BITS_MASK)
12983 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12984
12985 /* VM-entry instruction length */
12986 switch (intr_type) {
12987 case INTR_TYPE_SOFT_EXCEPTION:
12988 case INTR_TYPE_SOFT_INTR:
12989 case INTR_TYPE_PRIV_SW_EXCEPTION:
12990 if ((vmcs12->vm_entry_instruction_len > 15) ||
12991 (vmcs12->vm_entry_instruction_len == 0 &&
12992 !nested_cpu_has_zero_length_injection(vcpu)))
12993 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12994 }
12995 }
12996
Sean Christopherson5b8ba412018-09-26 09:23:40 -070012997 if (nested_cpu_has_ept(vmcs12) &&
12998 !valid_ept_address(vcpu, vmcs12->ept_pointer))
12999 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13000
Jim Mattsonca0bde22016-11-30 12:03:46 -080013001 return 0;
13002}
13003
Liran Alonf145d902018-06-23 02:35:07 +030013004static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
13005 struct vmcs12 *vmcs12)
13006{
13007 int r;
13008 struct page *page;
13009 struct vmcs12 *shadow;
13010
13011 if (vmcs12->vmcs_link_pointer == -1ull)
13012 return 0;
13013
13014 if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
13015 return -EINVAL;
13016
13017 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
13018 if (is_error_page(page))
13019 return -EINVAL;
13020
13021 r = 0;
13022 shadow = kmap(page);
13023 if (shadow->hdr.revision_id != VMCS12_REVISION ||
13024 shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
13025 r = -EINVAL;
13026 kunmap(page);
13027 kvm_release_page_clean(page);
13028 return r;
13029}
13030
Jim Mattsonca0bde22016-11-30 12:03:46 -080013031static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
13032 u32 *exit_qual)
13033{
13034 bool ia32e;
13035
13036 *exit_qual = ENTRY_FAIL_DEFAULT;
13037
13038 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
13039 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
13040 return 1;
13041
Liran Alonf145d902018-06-23 02:35:07 +030013042 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
Jim Mattsonca0bde22016-11-30 12:03:46 -080013043 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
13044 return 1;
13045 }
13046
13047 /*
13048 * If the load IA32_EFER VM-entry control is 1, the following checks
13049 * are performed on the field for the IA32_EFER MSR:
13050 * - Bits reserved in the IA32_EFER MSR must be 0.
13051 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
13052 * the IA-32e mode guest VM-exit control. It must also be identical
13053 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
13054 * CR0.PG) is 1.
13055 */
13056 if (to_vmx(vcpu)->nested.nested_run_pending &&
13057 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
13058 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
13059 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
13060 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
13061 ((vmcs12->guest_cr0 & X86_CR0_PG) &&
13062 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
13063 return 1;
13064 }
13065
Wanpeng Lif1b026a2017-11-05 16:54:48 -080013066 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
13067 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
13068 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
13069 return 1;
13070
Jim Mattsonca0bde22016-11-30 12:03:46 -080013071 return 0;
13072}
13073
Sean Christopherson52017602018-09-26 09:23:57 -070013074static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
13075{
13076 struct vcpu_vmx *vmx = to_vmx(vcpu);
13077 unsigned long cr3, cr4;
13078
13079 if (!nested_early_check)
13080 return 0;
13081
13082 if (vmx->msr_autoload.host.nr)
13083 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
13084 if (vmx->msr_autoload.guest.nr)
13085 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
13086
13087 preempt_disable();
13088
13089 vmx_prepare_switch_to_guest(vcpu);
13090
13091 /*
13092 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
13093 * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
13094 * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e.
13095 * there is no need to preserve other bits or save/restore the field.
13096 */
13097 vmcs_writel(GUEST_RFLAGS, 0);
13098
13099 vmcs_writel(HOST_RIP, vmx_early_consistency_check_return);
13100
13101 cr3 = __get_current_cr3_fast();
13102 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
13103 vmcs_writel(HOST_CR3, cr3);
13104 vmx->loaded_vmcs->host_state.cr3 = cr3;
13105 }
13106
13107 cr4 = cr4_read_shadow();
13108 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
13109 vmcs_writel(HOST_CR4, cr4);
13110 vmx->loaded_vmcs->host_state.cr4 = cr4;
13111 }
13112
13113 vmx->__launched = vmx->loaded_vmcs->launched;
13114
13115 asm(
13116 /* Set HOST_RSP */
Uros Bizjak4b1e5472018-10-11 19:40:44 +020013117 __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
Sean Christopherson52017602018-09-26 09:23:57 -070013118 "mov %%" _ASM_SP ", %c[host_rsp](%0)\n\t"
13119
13120 /* Check if vmlaunch of vmresume is needed */
13121 "cmpl $0, %c[launched](%0)\n\t"
13122 "je 1f\n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020013123 __ex("vmresume") "\n\t"
Sean Christopherson52017602018-09-26 09:23:57 -070013124 "jmp 2f\n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020013125 "1: " __ex("vmlaunch") "\n\t"
Sean Christopherson52017602018-09-26 09:23:57 -070013126 "jmp 2f\n\t"
13127 "2: "
13128
13129 /* Set vmx->fail accordingly */
13130 "setbe %c[fail](%0)\n\t"
13131
13132 ".pushsection .rodata\n\t"
13133 ".global vmx_early_consistency_check_return\n\t"
13134 "vmx_early_consistency_check_return: " _ASM_PTR " 2b\n\t"
13135 ".popsection"
13136 :
13137 : "c"(vmx), "d"((unsigned long)HOST_RSP),
13138 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
13139 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
13140 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp))
13141 : "rax", "cc", "memory"
13142 );
13143
13144 vmcs_writel(HOST_RIP, vmx_return);
13145
13146 preempt_enable();
13147
13148 if (vmx->msr_autoload.host.nr)
13149 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
13150 if (vmx->msr_autoload.guest.nr)
13151 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
13152
13153 if (vmx->fail) {
13154 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
13155 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
13156 vmx->fail = 0;
13157 return 1;
13158 }
13159
13160 /*
13161 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
13162 */
13163 local_irq_enable();
13164 if (hw_breakpoint_active())
13165 set_debugreg(__this_cpu_read(cpu_dr7), 7);
13166
13167 /*
13168 * A non-failing VMEntry means we somehow entered guest mode with
13169 * an illegal RIP, and that's just the tip of the iceberg. There
13170 * is no telling what memory has been modified or what state has
13171 * been exposed to unknown code. Hitting this all but guarantees
13172 * a (very critical) hardware issue.
13173 */
13174 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
13175 VMX_EXIT_REASONS_FAILED_VMENTRY));
13176
13177 return 0;
13178}
13179STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw);
13180
Sean Christophersona633e412018-09-26 09:23:47 -070013181static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
13182 struct vmcs12 *vmcs12);
13183
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013184/*
Sean Christophersona633e412018-09-26 09:23:47 -070013185 * If from_vmentry is false, this is being called from state restore (either RSM
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013186 * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
Sean Christopherson52017602018-09-26 09:23:57 -070013187+ *
13188+ * Returns:
13189+ * 0 - success, i.e. proceed with actual VMEnter
13190+ * 1 - consistency check VMExit
13191+ * -1 - consistency check VMFail
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013192 */
Sean Christophersona633e412018-09-26 09:23:47 -070013193static int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
13194 bool from_vmentry)
Jim Mattson858e25c2016-11-30 12:03:47 -080013195{
13196 struct vcpu_vmx *vmx = to_vmx(vcpu);
13197 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Paolo Bonzini7e712682018-10-03 13:44:26 +020013198 bool evaluate_pending_interrupts;
Sean Christophersona633e412018-09-26 09:23:47 -070013199 u32 exit_reason = EXIT_REASON_INVALID_STATE;
13200 u32 exit_qual;
Jim Mattson858e25c2016-11-30 12:03:47 -080013201
Paolo Bonzini7e712682018-10-03 13:44:26 +020013202 evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
13203 (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
13204 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
13205 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
Liran Alonb5861e52018-09-03 15:20:22 +030013206
Jim Mattson858e25c2016-11-30 12:03:47 -080013207 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
13208 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
Liran Alon62cf9bd812018-09-14 03:25:54 +030013209 if (kvm_mpx_supported() &&
13210 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
13211 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
Jim Mattson858e25c2016-11-30 12:03:47 -080013212
Jim Mattsonde3a0022017-11-27 17:22:25 -060013213 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
Jim Mattson858e25c2016-11-30 12:03:47 -080013214
Sean Christopherson16fb9a42018-09-26 09:23:52 -070013215 prepare_vmcs02_early(vmx, vmcs12);
13216
13217 if (from_vmentry) {
13218 nested_get_vmcs12_pages(vcpu);
13219
Sean Christopherson52017602018-09-26 09:23:57 -070013220 if (nested_vmx_check_vmentry_hw(vcpu)) {
13221 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
13222 return -1;
13223 }
13224
Sean Christopherson16fb9a42018-09-26 09:23:52 -070013225 if (check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
13226 goto vmentry_fail_vmexit;
13227 }
13228
13229 enter_guest_mode(vcpu);
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013230 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
13231 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
13232
Sean Christophersona633e412018-09-26 09:23:47 -070013233 if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
Sean Christopherson39f9c382018-09-26 09:23:48 -070013234 goto vmentry_fail_vmexit_guest_mode;
Jim Mattson858e25c2016-11-30 12:03:47 -080013235
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013236 if (from_vmentry) {
Sean Christophersona633e412018-09-26 09:23:47 -070013237 exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
13238 exit_qual = nested_vmx_load_msr(vcpu,
13239 vmcs12->vm_entry_msr_load_addr,
13240 vmcs12->vm_entry_msr_load_count);
13241 if (exit_qual)
Sean Christopherson39f9c382018-09-26 09:23:48 -070013242 goto vmentry_fail_vmexit_guest_mode;
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013243 } else {
13244 /*
13245 * The MMU is not initialized to point at the right entities yet and
13246 * "get pages" would need to read data from the guest (i.e. we will
13247 * need to perform gpa to hpa translation). Request a call
13248 * to nested_get_vmcs12_pages before the next VM-entry. The MSRs
13249 * have already been set at vmentry time and should not be reset.
13250 */
13251 kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
13252 }
Jim Mattson858e25c2016-11-30 12:03:47 -080013253
Jim Mattson858e25c2016-11-30 12:03:47 -080013254 /*
Liran Alonb5861e52018-09-03 15:20:22 +030013255 * If L1 had a pending IRQ/NMI until it executed
13256 * VMLAUNCH/VMRESUME which wasn't delivered because it was
13257 * disallowed (e.g. interrupts disabled), L0 needs to
13258 * evaluate if this pending event should cause an exit from L2
13259 * to L1 or delivered directly to L2 (e.g. In case L1 don't
13260 * intercept EXTERNAL_INTERRUPT).
13261 *
Paolo Bonzini7e712682018-10-03 13:44:26 +020013262 * Usually this would be handled by the processor noticing an
13263 * IRQ/NMI window request, or checking RVI during evaluation of
13264 * pending virtual interrupts. However, this setting was done
13265 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
13266 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
Liran Alonb5861e52018-09-03 15:20:22 +030013267 */
Paolo Bonzini7e712682018-10-03 13:44:26 +020013268 if (unlikely(evaluate_pending_interrupts))
Liran Alonb5861e52018-09-03 15:20:22 +030013269 kvm_make_request(KVM_REQ_EVENT, vcpu);
Liran Alonb5861e52018-09-03 15:20:22 +030013270
13271 /*
Jim Mattson858e25c2016-11-30 12:03:47 -080013272 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
13273 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
13274 * returned as far as L1 is concerned. It will only return (and set
13275 * the success flag) when L2 exits (see nested_vmx_vmexit()).
13276 */
13277 return 0;
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013278
Sean Christophersona633e412018-09-26 09:23:47 -070013279 /*
13280 * A failed consistency check that leads to a VMExit during L1's
13281 * VMEnter to L2 is a variation of a normal VMexit, as explained in
13282 * 26.7 "VM-entry failures during or after loading guest state".
13283 */
Sean Christopherson39f9c382018-09-26 09:23:48 -070013284vmentry_fail_vmexit_guest_mode:
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013285 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
13286 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
13287 leave_guest_mode(vcpu);
Sean Christopherson16fb9a42018-09-26 09:23:52 -070013288
13289vmentry_fail_vmexit:
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013290 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Sean Christophersona633e412018-09-26 09:23:47 -070013291
13292 if (!from_vmentry)
13293 return 1;
13294
Sean Christophersona633e412018-09-26 09:23:47 -070013295 load_vmcs12_host_state(vcpu, vmcs12);
13296 vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
13297 vmcs12->exit_qualification = exit_qual;
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020013298 if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
13299 vmx->nested.need_vmcs12_sync = true;
Sean Christophersona633e412018-09-26 09:23:47 -070013300 return 1;
Jim Mattson858e25c2016-11-30 12:03:47 -080013301}
13302
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013303/*
13304 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
13305 * for running an L2 nested guest.
13306 */
13307static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
13308{
13309 struct vmcs12 *vmcs12;
13310 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070013311 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
Jim Mattsonca0bde22016-11-30 12:03:46 -080013312 int ret;
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013313
Kyle Hueyeb277562016-11-29 12:40:39 -080013314 if (!nested_vmx_check_permission(vcpu))
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013315 return 1;
13316
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013317 if (vmx->nested.current_vmptr == -1ull)
13318 return nested_vmx_failInvalid(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -080013319
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013320 vmcs12 = get_vmcs12(vcpu);
13321
Liran Alona6192d42018-06-23 02:35:04 +030013322 /*
13323 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
13324 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
13325 * rather than RFLAGS.ZF, and no error number is stored to the
13326 * VM-instruction error field.
13327 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013328 if (vmcs12->hdr.shadow_vmcs)
13329 return nested_vmx_failInvalid(vcpu);
Liran Alona6192d42018-06-23 02:35:04 +030013330
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020013331 if (vmx->nested.hv_evmcs) {
13332 copy_enlightened_to_vmcs12(vmx);
13333 /* Enlightened VMCS doesn't have launch state */
13334 vmcs12->launch_state = !launch;
13335 } else if (enable_shadow_vmcs) {
Abel Gordon012f83c2013-04-18 14:39:25 +030013336 copy_shadow_to_vmcs12(vmx);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020013337 }
Abel Gordon012f83c2013-04-18 14:39:25 +030013338
Nadav Har'El7c177932011-05-25 23:12:04 +030013339 /*
13340 * The nested entry process starts with enforcing various prerequisites
13341 * on vmcs12 as required by the Intel SDM, and act appropriately when
13342 * they fail: As the SDM explains, some conditions should cause the
13343 * instruction to fail, while others will cause the instruction to seem
13344 * to succeed, but return an EXIT_REASON_INVALID_STATE.
13345 * To speed up the normal (success) code path, we should avoid checking
13346 * for misconfigurations which will anyway be caught by the processor
13347 * when using the merged vmcs02.
13348 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013349 if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
13350 return nested_vmx_failValid(vcpu,
13351 VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070013352
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013353 if (vmcs12->launch_state == launch)
13354 return nested_vmx_failValid(vcpu,
Nadav Har'El7c177932011-05-25 23:12:04 +030013355 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
13356 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
Nadav Har'El7c177932011-05-25 23:12:04 +030013357
Jim Mattsonca0bde22016-11-30 12:03:46 -080013358 ret = check_vmentry_prereqs(vcpu, vmcs12);
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013359 if (ret)
13360 return nested_vmx_failValid(vcpu, ret);
Paolo Bonzini26539bd2013-04-15 15:00:27 +020013361
Nadav Har'El7c177932011-05-25 23:12:04 +030013362 /*
13363 * We're finally done with prerequisite checking, and can start with
13364 * the nested entry.
13365 */
Jim Mattson6514dc32018-04-26 16:09:12 -070013366 vmx->nested.nested_run_pending = 1;
Sean Christophersona633e412018-09-26 09:23:47 -070013367 ret = nested_vmx_enter_non_root_mode(vcpu, true);
Sean Christopherson52017602018-09-26 09:23:57 -070013368 vmx->nested.nested_run_pending = !ret;
13369 if (ret > 0)
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013370 return 1;
Sean Christopherson52017602018-09-26 09:23:57 -070013371 else if (ret)
13372 return nested_vmx_failValid(vcpu,
13373 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Wincy Vanff651cb2014-12-11 08:52:58 +030013374
Paolo Bonzinic595cee2018-07-02 13:07:14 +020013375 /* Hide L1D cache contents from the nested guest. */
13376 vmx->vcpu.arch.l1tf_flush_l1d = true;
13377
Chao Gao135a06c2018-02-11 10:06:30 +080013378 /*
Sean Christophersond63907d2018-09-26 09:23:45 -070013379 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
Liran Alon61ada742018-06-23 02:35:08 +030013380 * also be used as part of restoring nVMX state for
13381 * snapshot restore (migration).
13382 *
13383 * In this flow, it is assumed that vmcs12 cache was
13384 * trasferred as part of captured nVMX state and should
13385 * therefore not be read from guest memory (which may not
13386 * exist on destination host yet).
13387 */
13388 nested_cache_shadow_vmcs12(vcpu, vmcs12);
13389
13390 /*
Chao Gao135a06c2018-02-11 10:06:30 +080013391 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
13392 * by event injection, halt vcpu.
13393 */
13394 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
Jim Mattson6514dc32018-04-26 16:09:12 -070013395 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
13396 vmx->nested.nested_run_pending = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -060013397 return kvm_vcpu_halt(vcpu);
Jim Mattson6514dc32018-04-26 16:09:12 -070013398 }
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013399 return 1;
13400}
13401
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013402/*
13403 * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
13404 * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
13405 * This function returns the new value we should put in vmcs12.guest_cr0.
13406 * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
13407 * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
13408 * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
13409 * didn't trap the bit, because if L1 did, so would L0).
13410 * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
13411 * been modified by L2, and L1 knows it. So just leave the old value of
13412 * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
13413 * isn't relevant, because if L0 traps this bit it can set it to anything.
13414 * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
13415 * changed these bits, and therefore they need to be updated, but L0
13416 * didn't necessarily allow them to be changed in GUEST_CR0 - and rather
13417 * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
13418 */
13419static inline unsigned long
13420vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
13421{
13422 return
13423 /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
13424 /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
13425 /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
13426 vcpu->arch.cr0_guest_owned_bits));
13427}
13428
13429static inline unsigned long
13430vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
13431{
13432 return
13433 /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
13434 /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
13435 /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
13436 vcpu->arch.cr4_guest_owned_bits));
13437}
13438
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013439static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
13440 struct vmcs12 *vmcs12)
13441{
13442 u32 idt_vectoring;
13443 unsigned int nr;
13444
Wanpeng Li664f8e22017-08-24 03:35:09 -070013445 if (vcpu->arch.exception.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013446 nr = vcpu->arch.exception.nr;
13447 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
13448
13449 if (kvm_exception_is_soft(nr)) {
13450 vmcs12->vm_exit_instruction_len =
13451 vcpu->arch.event_exit_inst_len;
13452 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
13453 } else
13454 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
13455
13456 if (vcpu->arch.exception.has_error_code) {
13457 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
13458 vmcs12->idt_vectoring_error_code =
13459 vcpu->arch.exception.error_code;
13460 }
13461
13462 vmcs12->idt_vectoring_info_field = idt_vectoring;
Jan Kiszkacd2633c2013-10-23 17:42:15 +010013463 } else if (vcpu->arch.nmi_injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013464 vmcs12->idt_vectoring_info_field =
13465 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
Liran Alon04140b42018-03-23 03:01:31 +030013466 } else if (vcpu->arch.interrupt.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013467 nr = vcpu->arch.interrupt.nr;
13468 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
13469
13470 if (vcpu->arch.interrupt.soft) {
13471 idt_vectoring |= INTR_TYPE_SOFT_INTR;
13472 vmcs12->vm_entry_instruction_len =
13473 vcpu->arch.event_exit_inst_len;
13474 } else
13475 idt_vectoring |= INTR_TYPE_EXT_INTR;
13476
13477 vmcs12->idt_vectoring_info_field = idt_vectoring;
13478 }
13479}
13480
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013481static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
13482{
13483 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013484 unsigned long exit_qual;
Liran Alon917dc602017-11-05 16:07:43 +020013485 bool block_nested_events =
13486 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
Wanpeng Liacc9ab62017-02-27 04:24:39 -080013487
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013488 if (vcpu->arch.exception.pending &&
13489 nested_vmx_check_exception(vcpu, &exit_qual)) {
Liran Alon917dc602017-11-05 16:07:43 +020013490 if (block_nested_events)
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013491 return -EBUSY;
13492 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013493 return 0;
13494 }
13495
Jan Kiszkaf4124502014-03-07 20:03:13 +010013496 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
13497 vmx->nested.preemption_timer_expired) {
Liran Alon917dc602017-11-05 16:07:43 +020013498 if (block_nested_events)
Jan Kiszkaf4124502014-03-07 20:03:13 +010013499 return -EBUSY;
13500 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
13501 return 0;
13502 }
13503
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013504 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020013505 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013506 return -EBUSY;
13507 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
13508 NMI_VECTOR | INTR_TYPE_NMI_INTR |
13509 INTR_INFO_VALID_MASK, 0);
13510 /*
13511 * The NMI-triggered VM exit counts as injection:
13512 * clear this one and block further NMIs.
13513 */
13514 vcpu->arch.nmi_pending = 0;
13515 vmx_set_nmi_mask(vcpu, true);
13516 return 0;
13517 }
13518
13519 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
13520 nested_exit_on_intr(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020013521 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013522 return -EBUSY;
13523 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
Wincy Van705699a2015-02-03 23:58:17 +080013524 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013525 }
13526
David Hildenbrand6342c502017-01-25 11:58:58 +010013527 vmx_complete_nested_posted_interrupt(vcpu);
13528 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013529}
13530
Sean Christophersond264ee02018-08-27 15:21:12 -070013531static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
13532{
13533 to_vmx(vcpu)->req_immediate_exit = true;
13534}
13535
Jan Kiszkaf4124502014-03-07 20:03:13 +010013536static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
13537{
13538 ktime_t remaining =
13539 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
13540 u64 value;
13541
13542 if (ktime_to_ns(remaining) <= 0)
13543 return 0;
13544
13545 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
13546 do_div(value, 1000000);
13547 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
13548}
13549
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013550/*
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080013551 * Update the guest state fields of vmcs12 to reflect changes that
13552 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
13553 * VM-entry controls is also updated, since this is really a guest
13554 * state bit.)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013555 */
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080013556static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013557{
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013558 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
13559 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
13560
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013561 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
13562 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
13563 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
13564
13565 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
13566 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
13567 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
13568 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
13569 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
13570 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
13571 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
13572 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
13573 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
13574 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
13575 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
13576 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
13577 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
13578 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
13579 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
13580 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
13581 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
13582 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
13583 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
13584 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
13585 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
13586 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
13587 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
13588 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
13589 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
13590 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
13591 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
13592 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
13593 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
13594 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
13595 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
13596 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
13597 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
13598 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
13599 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
13600 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
13601
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013602 vmcs12->guest_interruptibility_info =
13603 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
13604 vmcs12->guest_pending_dbg_exceptions =
13605 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
Jan Kiszka3edf1e62014-01-04 18:47:24 +010013606 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
13607 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
13608 else
13609 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013610
Jan Kiszkaf4124502014-03-07 20:03:13 +010013611 if (nested_cpu_has_preemption_timer(vmcs12)) {
13612 if (vmcs12->vm_exit_controls &
13613 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
13614 vmcs12->vmx_preemption_timer_value =
13615 vmx_get_preemption_timer_value(vcpu);
13616 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
13617 }
Arthur Chunqi Li7854cbc2013-09-16 16:11:44 +080013618
Nadav Har'El3633cfc2013-08-05 11:07:07 +030013619 /*
13620 * In some cases (usually, nested EPT), L2 is allowed to change its
13621 * own CR3 without exiting. If it has changed it, we must keep it.
13622 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
13623 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
13624 *
13625 * Additionally, restore L2's PDPTR to vmcs12.
13626 */
13627 if (enable_ept) {
Paolo Bonzinif3531052015-12-03 15:49:56 +010013628 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
Nadav Har'El3633cfc2013-08-05 11:07:07 +030013629 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
13630 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
13631 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
13632 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
13633 }
13634
Jim Mattsond281e132017-06-01 12:44:46 -070013635 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
Jan Dakinevich119a9c02016-09-04 21:22:47 +030013636
Wincy Van608406e2015-02-03 23:57:51 +080013637 if (nested_cpu_has_vid(vmcs12))
13638 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
13639
Jan Kiszkac18911a2013-03-13 16:06:41 +010013640 vmcs12->vm_entry_controls =
13641 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
Gleb Natapov2961e8762013-11-25 15:37:13 +020013642 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
Jan Kiszkac18911a2013-03-13 16:06:41 +010013643
Jan Kiszka2996fca2014-06-16 13:59:43 +020013644 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
13645 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
13646 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
13647 }
13648
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013649 /* TODO: These cannot have changed unless we have MSR bitmaps and
13650 * the relevant bit asks not to trap the change */
Jan Kiszkab8c07d52013-04-06 13:51:21 +020013651 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013652 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
Jan Kiszka10ba54a2013-08-08 16:26:31 +020013653 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
13654 vmcs12->guest_ia32_efer = vcpu->arch.efer;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013655 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
13656 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
13657 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
Paolo Bonzinia87036a2016-03-08 09:52:13 +010013658 if (kvm_mpx_supported())
Paolo Bonzini36be0b92014-02-24 12:30:04 +010013659 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080013660}
13661
13662/*
13663 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
13664 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
13665 * and this function updates it to reflect the changes to the guest state while
13666 * L2 was running (and perhaps made some exits which were handled directly by L0
13667 * without going back to L1), and to reflect the exit reason.
13668 * Note that we do not have to copy here all VMCS fields, just those that
13669 * could have changed by the L2 guest or the exit - i.e., the guest-state and
13670 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
13671 * which already writes to vmcs12 directly.
13672 */
13673static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
13674 u32 exit_reason, u32 exit_intr_info,
13675 unsigned long exit_qualification)
13676{
13677 /* update guest state fields: */
13678 sync_vmcs12(vcpu, vmcs12);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013679
13680 /* update exit information fields: */
13681
Jan Kiszka533558b2014-01-04 18:47:20 +010013682 vmcs12->vm_exit_reason = exit_reason;
13683 vmcs12->exit_qualification = exit_qualification;
Jan Kiszka533558b2014-01-04 18:47:20 +010013684 vmcs12->vm_exit_intr_info = exit_intr_info;
Paolo Bonzini7313c692017-07-27 10:31:25 +020013685
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013686 vmcs12->idt_vectoring_info_field = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013687 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
13688 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
13689
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013690 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
Jim Mattson7cdc2d62017-07-06 16:33:05 -070013691 vmcs12->launch_state = 1;
13692
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013693 /* vm_entry_intr_info_field is cleared on exit. Emulate this
13694 * instead of reading the real value. */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013695 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013696
13697 /*
13698 * Transfer the event that L0 or L1 may wanted to inject into
13699 * L2 to IDT_VECTORING_INFO_FIELD.
13700 */
13701 vmcs12_save_pending_event(vcpu, vmcs12);
13702 }
13703
13704 /*
13705 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
13706 * preserved above and would only end up incorrectly in L1.
13707 */
13708 vcpu->arch.nmi_injected = false;
13709 kvm_clear_exception_queue(vcpu);
13710 kvm_clear_interrupt_queue(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013711}
13712
13713/*
13714 * A part of what we need to when the nested L2 guest exits and we want to
13715 * run its L1 parent, is to reset L1's guest state to the host state specified
13716 * in vmcs12.
13717 * This function is to be called not only on normal nested exit, but also on
13718 * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
13719 * Failures During or After Loading Guest State").
13720 * This function should be called when the active VMCS is L1's (vmcs01).
13721 */
Jan Kiszka733568f2013-02-23 15:07:47 +010013722static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
13723 struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013724{
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013725 struct kvm_segment seg;
Sean Christophersonbd18bff2018-08-22 14:57:07 -070013726 u32 entry_failure_code;
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013727
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013728 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
13729 vcpu->arch.efer = vmcs12->host_ia32_efer;
Jan Kiszkad1fa0352013-04-14 12:44:54 +020013730 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013731 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
13732 else
13733 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
13734 vmx_set_efer(vcpu, vcpu->arch.efer);
13735
13736 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
13737 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
H. Peter Anvin1adfa762013-04-27 16:10:11 -070013738 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
Sean Christophersoncb61de22018-09-26 09:23:53 -070013739 vmx_set_interrupt_shadow(vcpu, 0);
13740
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013741 /*
13742 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080013743 * actually changed, because vmx_set_cr0 refers to efer set above.
13744 *
13745 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
13746 * (KVM doesn't change it);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013747 */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080013748 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
Jan Kiszka9e3e4db2013-09-03 21:11:45 +020013749 vmx_set_cr0(vcpu, vmcs12->host_cr0);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013750
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080013751 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013752 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
Haozhong Zhang8eb3f872017-10-10 15:01:22 +080013753 vmx_set_cr4(vcpu, vmcs12->host_cr4);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013754
Sean Christophersonbd18bff2018-08-22 14:57:07 -070013755 nested_ept_uninit_mmu_context(vcpu);
13756
13757 /*
13758 * Only PDPTE load can fail as the value of cr3 was checked on entry and
13759 * couldn't have changed.
13760 */
13761 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
13762 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
13763
13764 if (!enable_ept)
13765 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030013766
Liran Alon6f1e03b2018-05-22 17:16:14 +030013767 /*
Liran Alonefebf0a2018-10-08 23:42:20 +030013768 * If vmcs01 doesn't use VPID, CPU flushes TLB on every
Liran Alon6f1e03b2018-05-22 17:16:14 +030013769 * VMEntry/VMExit. Thus, no need to flush TLB.
13770 *
Liran Alonefebf0a2018-10-08 23:42:20 +030013771 * If vmcs12 doesn't use VPID, L1 expects TLB to be
13772 * flushed on every VMEntry/VMExit.
Liran Alon6f1e03b2018-05-22 17:16:14 +030013773 *
Liran Alonefebf0a2018-10-08 23:42:20 +030013774 * Otherwise, we can preserve TLB entries as long as we are
13775 * able to tag L1 TLB entries differently than L2 TLB entries.
Liran Alon14389212018-10-08 23:42:17 +030013776 *
13777 * If vmcs12 uses EPT, we need to execute this flush on EPTP01
13778 * and therefore we request the TLB flush to happen only after VMCS EPTP
13779 * has been set by KVM_REQ_LOAD_CR3.
Liran Alon6f1e03b2018-05-22 17:16:14 +030013780 */
13781 if (enable_vpid &&
Liran Alonefebf0a2018-10-08 23:42:20 +030013782 (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
Liran Alon14389212018-10-08 23:42:17 +030013783 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013784 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013785
13786 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
13787 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
13788 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
13789 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
13790 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
Ladi Prosek21f2d552017-10-11 16:54:42 +020013791 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
13792 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013793
Paolo Bonzini36be0b92014-02-24 12:30:04 +010013794 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
13795 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
13796 vmcs_write64(GUEST_BNDCFGS, 0);
13797
Jan Kiszka44811c02013-08-04 17:17:27 +020013798 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013799 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020013800 vcpu->arch.pat = vmcs12->host_ia32_pat;
13801 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013802 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
13803 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
13804 vmcs12->host_ia32_perf_global_ctrl);
Jan Kiszka503cd0c2013-03-03 13:05:44 +010013805
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013806 /* Set L1 segment info according to Intel SDM
13807 27.5.2 Loading Host Segment and Descriptor-Table Registers */
13808 seg = (struct kvm_segment) {
13809 .base = 0,
13810 .limit = 0xFFFFFFFF,
13811 .selector = vmcs12->host_cs_selector,
13812 .type = 11,
13813 .present = 1,
13814 .s = 1,
13815 .g = 1
13816 };
13817 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
13818 seg.l = 1;
13819 else
13820 seg.db = 1;
13821 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
13822 seg = (struct kvm_segment) {
13823 .base = 0,
13824 .limit = 0xFFFFFFFF,
13825 .type = 3,
13826 .present = 1,
13827 .s = 1,
13828 .db = 1,
13829 .g = 1
13830 };
13831 seg.selector = vmcs12->host_ds_selector;
13832 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
13833 seg.selector = vmcs12->host_es_selector;
13834 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
13835 seg.selector = vmcs12->host_ss_selector;
13836 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
13837 seg.selector = vmcs12->host_fs_selector;
13838 seg.base = vmcs12->host_fs_base;
13839 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
13840 seg.selector = vmcs12->host_gs_selector;
13841 seg.base = vmcs12->host_gs_base;
13842 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
13843 seg = (struct kvm_segment) {
Gleb Natapov205befd2013-08-04 15:08:06 +030013844 .base = vmcs12->host_tr_base,
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013845 .limit = 0x67,
13846 .selector = vmcs12->host_tr_selector,
13847 .type = 11,
13848 .present = 1
13849 };
13850 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
13851
Jan Kiszka503cd0c2013-03-03 13:05:44 +010013852 kvm_set_dr(vcpu, 7, 0x400);
13853 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
Wincy Vanff651cb2014-12-11 08:52:58 +030013854
Wincy Van3af18d92015-02-03 23:49:31 +080013855 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +010013856 vmx_update_msr_bitmap(vcpu);
Wincy Van3af18d92015-02-03 23:49:31 +080013857
Wincy Vanff651cb2014-12-11 08:52:58 +030013858 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
13859 vmcs12->vm_exit_msr_load_count))
13860 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013861}
13862
Sean Christophersonbd18bff2018-08-22 14:57:07 -070013863static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
13864{
13865 struct shared_msr_entry *efer_msr;
13866 unsigned int i;
13867
13868 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
13869 return vmcs_read64(GUEST_IA32_EFER);
13870
13871 if (cpu_has_load_ia32_efer)
13872 return host_efer;
13873
13874 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
13875 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
13876 return vmx->msr_autoload.guest.val[i].value;
13877 }
13878
13879 efer_msr = find_msr_entry(vmx, MSR_EFER);
13880 if (efer_msr)
13881 return efer_msr->data;
13882
13883 return host_efer;
13884}
13885
13886static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
13887{
13888 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
13889 struct vcpu_vmx *vmx = to_vmx(vcpu);
13890 struct vmx_msr_entry g, h;
13891 struct msr_data msr;
13892 gpa_t gpa;
13893 u32 i, j;
13894
13895 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
13896
13897 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
13898 /*
13899 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
13900 * as vmcs01.GUEST_DR7 contains a userspace defined value
13901 * and vcpu->arch.dr7 is not squirreled away before the
13902 * nested VMENTER (not worth adding a variable in nested_vmx).
13903 */
13904 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
13905 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
13906 else
13907 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
13908 }
13909
13910 /*
13911 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
13912 * handle a variety of side effects to KVM's software model.
13913 */
13914 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
13915
13916 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
13917 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
13918
13919 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
13920 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
13921
13922 nested_ept_uninit_mmu_context(vcpu);
13923 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
13924 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
13925
13926 /*
13927 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
13928 * from vmcs01 (if necessary). The PDPTRs are not loaded on
13929 * VMFail, like everything else we just need to ensure our
13930 * software model is up-to-date.
13931 */
13932 ept_save_pdptrs(vcpu);
13933
13934 kvm_mmu_reset_context(vcpu);
13935
13936 if (cpu_has_vmx_msr_bitmap())
13937 vmx_update_msr_bitmap(vcpu);
13938
13939 /*
13940 * This nasty bit of open coding is a compromise between blindly
13941 * loading L1's MSRs using the exit load lists (incorrect emulation
13942 * of VMFail), leaving the nested VM's MSRs in the software model
13943 * (incorrect behavior) and snapshotting the modified MSRs (too
13944 * expensive since the lists are unbound by hardware). For each
13945 * MSR that was (prematurely) loaded from the nested VMEntry load
13946 * list, reload it from the exit load list if it exists and differs
13947 * from the guest value. The intent is to stuff host state as
13948 * silently as possible, not to fully process the exit load list.
13949 */
13950 msr.host_initiated = false;
13951 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
13952 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
13953 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
13954 pr_debug_ratelimited(
13955 "%s read MSR index failed (%u, 0x%08llx)\n",
13956 __func__, i, gpa);
13957 goto vmabort;
13958 }
13959
13960 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
13961 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
13962 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
13963 pr_debug_ratelimited(
13964 "%s read MSR failed (%u, 0x%08llx)\n",
13965 __func__, j, gpa);
13966 goto vmabort;
13967 }
13968 if (h.index != g.index)
13969 continue;
13970 if (h.value == g.value)
13971 break;
13972
13973 if (nested_vmx_load_msr_check(vcpu, &h)) {
13974 pr_debug_ratelimited(
13975 "%s check failed (%u, 0x%x, 0x%x)\n",
13976 __func__, j, h.index, h.reserved);
13977 goto vmabort;
13978 }
13979
13980 msr.index = h.index;
13981 msr.data = h.value;
13982 if (kvm_set_msr(vcpu, &msr)) {
13983 pr_debug_ratelimited(
13984 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
13985 __func__, j, h.index, h.value);
13986 goto vmabort;
13987 }
13988 }
13989 }
13990
13991 return;
13992
13993vmabort:
13994 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
13995}
13996
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013997/*
13998 * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
13999 * and modify vmcs12 to make it see what it would expect to see there if
14000 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
14001 */
Jan Kiszka533558b2014-01-04 18:47:20 +010014002static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
14003 u32 exit_intr_info,
14004 unsigned long exit_qualification)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014005{
14006 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014007 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
14008
Jan Kiszka5f3d5792013-04-14 12:12:46 +020014009 /* trying to cancel vmlaunch/vmresume is a bug */
14010 WARN_ON_ONCE(vmx->nested.nested_run_pending);
14011
Jim Mattson4f350c62017-09-14 16:31:44 -070014012 leave_guest_mode(vcpu);
14013
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020014014 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
14015 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
14016
Jim Mattson4f350c62017-09-14 16:31:44 -070014017 if (likely(!vmx->fail)) {
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014018 if (exit_reason == -1)
14019 sync_vmcs12(vcpu, vmcs12);
14020 else
14021 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
14022 exit_qualification);
Jim Mattson4f350c62017-09-14 16:31:44 -070014023
Liran Alon61ada742018-06-23 02:35:08 +030014024 /*
14025 * Must happen outside of sync_vmcs12() as it will
14026 * also be used to capture vmcs12 cache as part of
14027 * capturing nVMX state for snapshot (migration).
14028 *
14029 * Otherwise, this flush will dirty guest memory at a
14030 * point it is already assumed by user-space to be
14031 * immutable.
14032 */
14033 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
14034
Jim Mattson4f350c62017-09-14 16:31:44 -070014035 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
14036 vmcs12->vm_exit_msr_store_count))
14037 nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
Sean Christopherson2768c0c2018-09-26 09:23:58 -070014038 } else {
14039 /*
14040 * The only expected VM-instruction error is "VM entry with
14041 * invalid control field(s)." Anything else indicates a
14042 * problem with L0. And we should never get here with a
14043 * VMFail of any type if early consistency checks are enabled.
14044 */
14045 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
14046 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
14047 WARN_ON_ONCE(nested_early_check);
Bandan Das77b0f5d2014-04-19 18:17:45 -040014048 }
14049
Jim Mattson4f350c62017-09-14 16:31:44 -070014050 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jan Kiszka36c3cc42013-02-23 22:35:37 +010014051
Paolo Bonzini93140062016-07-06 13:23:51 +020014052 /* Update any VMCS fields that might have changed while L2 ran */
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -040014053 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
14054 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
Paolo Bonziniea26e4e2016-11-01 00:39:48 +010014055 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Sean Christophersonf459a702018-08-27 15:21:11 -070014056
Peter Feinerc95ba922016-08-17 09:36:47 -070014057 if (kvm_has_tsc_control)
14058 decache_tsc_multiplier(vmx);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014059
Jim Mattson8d860bb2018-05-09 16:56:05 -040014060 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
14061 vmx->nested.change_vmcs01_virtual_apic_mode = false;
14062 vmx_set_virtual_apic_mode(vcpu);
Jim Mattsonfb6c8192017-03-16 13:53:59 -070014063 } else if (!nested_cpu_has_ept(vmcs12) &&
14064 nested_cpu_has2(vmcs12,
14065 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Junaid Shahida468f2d2018-04-26 13:09:50 -070014066 vmx_flush_tlb(vcpu, true);
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020014067 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014068
14069 /* This is needed for same reason as it was needed in prepare_vmcs02 */
14070 vmx->host_rsp = 0;
14071
14072 /* Unpin physical memory we referred to in vmcs02 */
14073 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020014074 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020014075 vmx->nested.apic_access_page = NULL;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014076 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080014077 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020014078 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020014079 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +080014080 }
Wincy Van705699a2015-02-03 23:58:17 +080014081 if (vmx->nested.pi_desc_page) {
14082 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020014083 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080014084 vmx->nested.pi_desc_page = NULL;
14085 vmx->nested.pi_desc = NULL;
14086 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014087
14088 /*
Tang Chen38b99172014-09-24 15:57:54 +080014089 * We are now running in L2, mmu_notifier will force to reload the
14090 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
14091 */
Wanpeng Lic83b6d12016-09-06 17:20:33 +080014092 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Tang Chen38b99172014-09-24 15:57:54 +080014093
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014094 if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
14095 vmx->nested.need_vmcs12_sync = true;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010014096
14097 /* in case we halted in L2 */
14098 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Jim Mattson4f350c62017-09-14 16:31:44 -070014099
14100 if (likely(!vmx->fail)) {
14101 /*
14102 * TODO: SDM says that with acknowledge interrupt on
14103 * exit, bit 31 of the VM-exit interrupt information
14104 * (valid interrupt) is always set to 1 on
14105 * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
14106 * need kvm_cpu_has_interrupt(). See the commit
14107 * message for details.
14108 */
14109 if (nested_exit_intr_ack_set(vcpu) &&
14110 exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
14111 kvm_cpu_has_interrupt(vcpu)) {
14112 int irq = kvm_cpu_get_interrupt(vcpu);
14113 WARN_ON(irq < 0);
14114 vmcs12->vm_exit_intr_info = irq |
14115 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
14116 }
14117
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014118 if (exit_reason != -1)
14119 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
14120 vmcs12->exit_qualification,
14121 vmcs12->idt_vectoring_info_field,
14122 vmcs12->vm_exit_intr_info,
14123 vmcs12->vm_exit_intr_error_code,
14124 KVM_ISA_VMX);
Jim Mattson4f350c62017-09-14 16:31:44 -070014125
14126 load_vmcs12_host_state(vcpu, vmcs12);
14127
14128 return;
14129 }
Sean Christopherson09abb5e2018-09-26 09:23:55 -070014130
Jim Mattson4f350c62017-09-14 16:31:44 -070014131 /*
14132 * After an early L2 VM-entry failure, we're now back
14133 * in L1 which thinks it just finished a VMLAUNCH or
14134 * VMRESUME instruction, so we need to set the failure
14135 * flag and the VM-instruction error field of the VMCS
Sean Christophersoncb61de22018-09-26 09:23:53 -070014136 * accordingly, and skip the emulated instruction.
Jim Mattson4f350c62017-09-14 16:31:44 -070014137 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -070014138 (void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christophersoncb61de22018-09-26 09:23:53 -070014139
Sean Christophersonbd18bff2018-08-22 14:57:07 -070014140 /*
14141 * Restore L1's host state to KVM's software model. We're here
14142 * because a consistency check was caught by hardware, which
14143 * means some amount of guest state has been propagated to KVM's
14144 * model and needs to be unwound to the host's state.
14145 */
14146 nested_vmx_restore_host_state(vcpu);
Wanpeng Li5af41572017-11-05 16:54:49 -080014147
Jim Mattson4f350c62017-09-14 16:31:44 -070014148 vmx->fail = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014149}
14150
Nadav Har'El7c177932011-05-25 23:12:04 +030014151/*
Jan Kiszka42124922014-01-04 18:47:19 +010014152 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
14153 */
14154static void vmx_leave_nested(struct kvm_vcpu *vcpu)
14155{
Wanpeng Li2f707d92017-03-06 04:03:28 -080014156 if (is_guest_mode(vcpu)) {
14157 to_vmx(vcpu)->nested.nested_run_pending = 0;
Jan Kiszka533558b2014-01-04 18:47:20 +010014158 nested_vmx_vmexit(vcpu, -1, 0, 0);
Wanpeng Li2f707d92017-03-06 04:03:28 -080014159 }
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020014160 free_nested(vcpu);
Jan Kiszka42124922014-01-04 18:47:19 +010014161}
14162
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020014163static int vmx_check_intercept(struct kvm_vcpu *vcpu,
14164 struct x86_instruction_info *info,
14165 enum x86_intercept_stage stage)
14166{
Paolo Bonzinifb6d4d32016-07-12 11:04:26 +020014167 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
14168 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
14169
14170 /*
14171 * RDPID causes #UD if disabled through secondary execution controls.
14172 * Because it is marked as EmulateOnUD, we need to intercept it here.
14173 */
14174 if (info->intercept == x86_intercept_rdtscp &&
14175 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
14176 ctxt->exception.vector = UD_VECTOR;
14177 ctxt->exception.error_code_valid = false;
14178 return X86EMUL_PROPAGATE_FAULT;
14179 }
14180
14181 /* TODO: check more intercepts... */
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020014182 return X86EMUL_CONTINUE;
14183}
14184
Yunhong Jiang64672c92016-06-13 14:19:59 -070014185#ifdef CONFIG_X86_64
14186/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
14187static inline int u64_shl_div_u64(u64 a, unsigned int shift,
14188 u64 divisor, u64 *result)
14189{
14190 u64 low = a << shift, high = a >> (64 - shift);
14191
14192 /* To avoid the overflow on divq */
14193 if (high >= divisor)
14194 return 1;
14195
14196 /* Low hold the result, high hold rem which is discarded */
14197 asm("divq %2\n\t" : "=a" (low), "=d" (high) :
14198 "rm" (divisor), "0" (low), "1" (high));
14199 *result = low;
14200
14201 return 0;
14202}
14203
14204static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
14205{
KarimAllah Ahmed386c6dd2018-04-10 14:15:46 +020014206 struct vcpu_vmx *vmx;
Wanpeng Lic5ce8232018-05-29 14:53:17 +080014207 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
KarimAllah Ahmed386c6dd2018-04-10 14:15:46 +020014208
14209 if (kvm_mwait_in_guest(vcpu->kvm))
14210 return -EOPNOTSUPP;
14211
14212 vmx = to_vmx(vcpu);
14213 tscl = rdtsc();
14214 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
14215 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
Wanpeng Lic5ce8232018-05-29 14:53:17 +080014216 lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
14217
14218 if (delta_tsc > lapic_timer_advance_cycles)
14219 delta_tsc -= lapic_timer_advance_cycles;
14220 else
14221 delta_tsc = 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070014222
14223 /* Convert to host delta tsc if tsc scaling is enabled */
14224 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
14225 u64_shl_div_u64(delta_tsc,
14226 kvm_tsc_scaling_ratio_frac_bits,
14227 vcpu->arch.tsc_scaling_ratio,
14228 &delta_tsc))
14229 return -ERANGE;
14230
14231 /*
14232 * If the delta tsc can't fit in the 32 bit after the multi shift,
14233 * we can't use the preemption timer.
14234 * It's possible that it fits on later vmentries, but checking
14235 * on every vmentry is costly so we just use an hrtimer.
14236 */
14237 if (delta_tsc >> (cpu_preemption_timer_multi + 32))
14238 return -ERANGE;
14239
14240 vmx->hv_deadline_tsc = tscl + delta_tsc;
Wanpeng Lic8533542017-06-29 06:28:09 -070014241 return delta_tsc == 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070014242}
14243
14244static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
14245{
Sean Christophersonf459a702018-08-27 15:21:11 -070014246 to_vmx(vcpu)->hv_deadline_tsc = -1;
Yunhong Jiang64672c92016-06-13 14:19:59 -070014247}
14248#endif
14249
Paolo Bonzini48d89b92014-08-26 13:27:46 +020014250static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
Radim Krčmářae97a3b2014-08-21 18:08:06 +020014251{
Wanpeng Lib31c1142018-03-12 04:53:04 -070014252 if (!kvm_pause_in_guest(vcpu->kvm))
Radim Krčmářb4a2d312014-08-21 18:08:08 +020014253 shrink_ple_window(vcpu);
Radim Krčmářae97a3b2014-08-21 18:08:06 +020014254}
14255
Kai Huang843e4332015-01-28 10:54:28 +080014256static void vmx_slot_enable_log_dirty(struct kvm *kvm,
14257 struct kvm_memory_slot *slot)
14258{
14259 kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
14260 kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
14261}
14262
14263static void vmx_slot_disable_log_dirty(struct kvm *kvm,
14264 struct kvm_memory_slot *slot)
14265{
14266 kvm_mmu_slot_set_dirty(kvm, slot);
14267}
14268
14269static void vmx_flush_log_dirty(struct kvm *kvm)
14270{
14271 kvm_flush_pml_buffers(kvm);
14272}
14273
Bandan Dasc5f983f2017-05-05 15:25:14 -040014274static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
14275{
14276 struct vmcs12 *vmcs12;
14277 struct vcpu_vmx *vmx = to_vmx(vcpu);
14278 gpa_t gpa;
14279 struct page *page = NULL;
14280 u64 *pml_address;
14281
14282 if (is_guest_mode(vcpu)) {
14283 WARN_ON_ONCE(vmx->nested.pml_full);
14284
14285 /*
14286 * Check if PML is enabled for the nested guest.
14287 * Whether eptp bit 6 is set is already checked
14288 * as part of A/D emulation.
14289 */
14290 vmcs12 = get_vmcs12(vcpu);
14291 if (!nested_cpu_has_pml(vmcs12))
14292 return 0;
14293
Dan Carpenter47698862017-05-10 22:43:17 +030014294 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
Bandan Dasc5f983f2017-05-05 15:25:14 -040014295 vmx->nested.pml_full = true;
14296 return 1;
14297 }
14298
14299 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
14300
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020014301 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
14302 if (is_error_page(page))
Bandan Dasc5f983f2017-05-05 15:25:14 -040014303 return 0;
14304
14305 pml_address = kmap(page);
14306 pml_address[vmcs12->guest_pml_index--] = gpa;
14307 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020014308 kvm_release_page_clean(page);
Bandan Dasc5f983f2017-05-05 15:25:14 -040014309 }
14310
14311 return 0;
14312}
14313
Kai Huang843e4332015-01-28 10:54:28 +080014314static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
14315 struct kvm_memory_slot *memslot,
14316 gfn_t offset, unsigned long mask)
14317{
14318 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
14319}
14320
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014321static void __pi_post_block(struct kvm_vcpu *vcpu)
14322{
14323 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
14324 struct pi_desc old, new;
14325 unsigned int dest;
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014326
14327 do {
14328 old.control = new.control = pi_desc->control;
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014329 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
14330 "Wakeup handler not enabled while the VCPU is blocked\n");
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014331
14332 dest = cpu_physical_id(vcpu->cpu);
14333
14334 if (x2apic_enabled())
14335 new.ndst = dest;
14336 else
14337 new.ndst = (dest << 8) & 0xFF00;
14338
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014339 /* set 'NV' to 'notification vector' */
14340 new.nv = POSTED_INTR_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020014341 } while (cmpxchg64(&pi_desc->control, old.control,
14342 new.control) != old.control);
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014343
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014344 if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
14345 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014346 list_del(&vcpu->blocked_vcpu_list);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014347 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014348 vcpu->pre_pcpu = -1;
14349 }
14350}
14351
Feng Wuefc64402015-09-18 22:29:51 +080014352/*
Feng Wubf9f6ac2015-09-18 22:29:55 +080014353 * This routine does the following things for vCPU which is going
14354 * to be blocked if VT-d PI is enabled.
14355 * - Store the vCPU to the wakeup list, so when interrupts happen
14356 * we can find the right vCPU to wake up.
14357 * - Change the Posted-interrupt descriptor as below:
14358 * 'NDST' <-- vcpu->pre_pcpu
14359 * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
14360 * - If 'ON' is set during this process, which means at least one
14361 * interrupt is posted for this vCPU, we cannot block it, in
14362 * this case, return 1, otherwise, return 0.
14363 *
14364 */
Yunhong Jiangbc225122016-06-13 14:19:58 -070014365static int pi_pre_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080014366{
Feng Wubf9f6ac2015-09-18 22:29:55 +080014367 unsigned int dest;
14368 struct pi_desc old, new;
14369 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
14370
14371 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080014372 !irq_remapping_cap(IRQ_POSTING_CAP) ||
14373 !kvm_vcpu_apicv_active(vcpu))
Feng Wubf9f6ac2015-09-18 22:29:55 +080014374 return 0;
14375
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014376 WARN_ON(irqs_disabled());
14377 local_irq_disable();
14378 if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
14379 vcpu->pre_pcpu = vcpu->cpu;
14380 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
14381 list_add_tail(&vcpu->blocked_vcpu_list,
14382 &per_cpu(blocked_vcpu_on_cpu,
14383 vcpu->pre_pcpu));
14384 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
14385 }
Feng Wubf9f6ac2015-09-18 22:29:55 +080014386
14387 do {
14388 old.control = new.control = pi_desc->control;
14389
Feng Wubf9f6ac2015-09-18 22:29:55 +080014390 WARN((pi_desc->sn == 1),
14391 "Warning: SN field of posted-interrupts "
14392 "is set before blocking\n");
14393
14394 /*
14395 * Since vCPU can be preempted during this process,
14396 * vcpu->cpu could be different with pre_pcpu, we
14397 * need to set pre_pcpu as the destination of wakeup
14398 * notification event, then we can find the right vCPU
14399 * to wakeup in wakeup handler if interrupts happen
14400 * when the vCPU is in blocked state.
14401 */
14402 dest = cpu_physical_id(vcpu->pre_pcpu);
14403
14404 if (x2apic_enabled())
14405 new.ndst = dest;
14406 else
14407 new.ndst = (dest << 8) & 0xFF00;
14408
14409 /* set 'NV' to 'wakeup vector' */
14410 new.nv = POSTED_INTR_WAKEUP_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020014411 } while (cmpxchg64(&pi_desc->control, old.control,
14412 new.control) != old.control);
Feng Wubf9f6ac2015-09-18 22:29:55 +080014413
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014414 /* We should not block the vCPU if an interrupt is posted for it. */
14415 if (pi_test_on(pi_desc) == 1)
14416 __pi_post_block(vcpu);
14417
14418 local_irq_enable();
14419 return (vcpu->pre_pcpu == -1);
Feng Wubf9f6ac2015-09-18 22:29:55 +080014420}
14421
Yunhong Jiangbc225122016-06-13 14:19:58 -070014422static int vmx_pre_block(struct kvm_vcpu *vcpu)
14423{
14424 if (pi_pre_block(vcpu))
14425 return 1;
14426
Yunhong Jiang64672c92016-06-13 14:19:59 -070014427 if (kvm_lapic_hv_timer_in_use(vcpu))
14428 kvm_lapic_switch_to_sw_timer(vcpu);
14429
Yunhong Jiangbc225122016-06-13 14:19:58 -070014430 return 0;
14431}
14432
14433static void pi_post_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080014434{
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014435 if (vcpu->pre_pcpu == -1)
Feng Wubf9f6ac2015-09-18 22:29:55 +080014436 return;
14437
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014438 WARN_ON(irqs_disabled());
14439 local_irq_disable();
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014440 __pi_post_block(vcpu);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014441 local_irq_enable();
Feng Wubf9f6ac2015-09-18 22:29:55 +080014442}
14443
Yunhong Jiangbc225122016-06-13 14:19:58 -070014444static void vmx_post_block(struct kvm_vcpu *vcpu)
14445{
Yunhong Jiang64672c92016-06-13 14:19:59 -070014446 if (kvm_x86_ops->set_hv_timer)
14447 kvm_lapic_switch_to_hv_timer(vcpu);
14448
Yunhong Jiangbc225122016-06-13 14:19:58 -070014449 pi_post_block(vcpu);
14450}
14451
Feng Wubf9f6ac2015-09-18 22:29:55 +080014452/*
Feng Wuefc64402015-09-18 22:29:51 +080014453 * vmx_update_pi_irte - set IRTE for Posted-Interrupts
14454 *
14455 * @kvm: kvm
14456 * @host_irq: host irq of the interrupt
14457 * @guest_irq: gsi of the interrupt
14458 * @set: set or unset PI
14459 * returns 0 on success, < 0 on failure
14460 */
14461static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
14462 uint32_t guest_irq, bool set)
14463{
14464 struct kvm_kernel_irq_routing_entry *e;
14465 struct kvm_irq_routing_table *irq_rt;
14466 struct kvm_lapic_irq irq;
14467 struct kvm_vcpu *vcpu;
14468 struct vcpu_data vcpu_info;
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010014469 int idx, ret = 0;
Feng Wuefc64402015-09-18 22:29:51 +080014470
14471 if (!kvm_arch_has_assigned_device(kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080014472 !irq_remapping_cap(IRQ_POSTING_CAP) ||
14473 !kvm_vcpu_apicv_active(kvm->vcpus[0]))
Feng Wuefc64402015-09-18 22:29:51 +080014474 return 0;
14475
14476 idx = srcu_read_lock(&kvm->irq_srcu);
14477 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010014478 if (guest_irq >= irq_rt->nr_rt_entries ||
14479 hlist_empty(&irq_rt->map[guest_irq])) {
14480 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
14481 guest_irq, irq_rt->nr_rt_entries);
14482 goto out;
14483 }
Feng Wuefc64402015-09-18 22:29:51 +080014484
14485 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
14486 if (e->type != KVM_IRQ_ROUTING_MSI)
14487 continue;
14488 /*
14489 * VT-d PI cannot support posting multicast/broadcast
14490 * interrupts to a vCPU, we still use interrupt remapping
14491 * for these kind of interrupts.
14492 *
14493 * For lowest-priority interrupts, we only support
14494 * those with single CPU as the destination, e.g. user
14495 * configures the interrupts via /proc/irq or uses
14496 * irqbalance to make the interrupts single-CPU.
14497 *
14498 * We will support full lowest-priority interrupt later.
14499 */
14500
Radim Krčmář371313132016-07-12 22:09:27 +020014501 kvm_set_msi_irq(kvm, e, &irq);
Feng Wu23a1c252016-01-25 16:53:32 +080014502 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
14503 /*
14504 * Make sure the IRTE is in remapped mode if
14505 * we don't handle it in posted mode.
14506 */
14507 ret = irq_set_vcpu_affinity(host_irq, NULL);
14508 if (ret < 0) {
14509 printk(KERN_INFO
14510 "failed to back to remapped mode, irq: %u\n",
14511 host_irq);
14512 goto out;
14513 }
14514
Feng Wuefc64402015-09-18 22:29:51 +080014515 continue;
Feng Wu23a1c252016-01-25 16:53:32 +080014516 }
Feng Wuefc64402015-09-18 22:29:51 +080014517
14518 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
14519 vcpu_info.vector = irq.vector;
14520
hu huajun2698d822018-04-11 15:16:40 +080014521 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
Feng Wuefc64402015-09-18 22:29:51 +080014522 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
14523
14524 if (set)
14525 ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
Haozhong Zhangdc91f2e2017-09-18 09:56:49 +080014526 else
Feng Wuefc64402015-09-18 22:29:51 +080014527 ret = irq_set_vcpu_affinity(host_irq, NULL);
Feng Wuefc64402015-09-18 22:29:51 +080014528
14529 if (ret < 0) {
14530 printk(KERN_INFO "%s: failed to update PI IRTE\n",
14531 __func__);
14532 goto out;
14533 }
14534 }
14535
14536 ret = 0;
14537out:
14538 srcu_read_unlock(&kvm->irq_srcu, idx);
14539 return ret;
14540}
14541
Ashok Rajc45dcc72016-06-22 14:59:56 +080014542static void vmx_setup_mce(struct kvm_vcpu *vcpu)
14543{
14544 if (vcpu->arch.mcg_cap & MCG_LMCE_P)
14545 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
14546 FEATURE_CONTROL_LMCE;
14547 else
14548 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
14549 ~FEATURE_CONTROL_LMCE;
14550}
14551
Ladi Prosek72d7b372017-10-11 16:54:41 +020014552static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
14553{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014554 /* we need a nested vmexit to enter SMM, postpone if run is pending */
14555 if (to_vmx(vcpu)->nested.nested_run_pending)
14556 return 0;
Ladi Prosek72d7b372017-10-11 16:54:41 +020014557 return 1;
14558}
14559
Ladi Prosek0234bf82017-10-11 16:54:40 +020014560static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
14561{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014562 struct vcpu_vmx *vmx = to_vmx(vcpu);
14563
14564 vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
14565 if (vmx->nested.smm.guest_mode)
14566 nested_vmx_vmexit(vcpu, -1, 0, 0);
14567
14568 vmx->nested.smm.vmxon = vmx->nested.vmxon;
14569 vmx->nested.vmxon = false;
Wanpeng Licaa057a2018-03-12 04:53:03 -070014570 vmx_clear_hlt(vcpu);
Ladi Prosek0234bf82017-10-11 16:54:40 +020014571 return 0;
14572}
14573
14574static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
14575{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014576 struct vcpu_vmx *vmx = to_vmx(vcpu);
14577 int ret;
14578
14579 if (vmx->nested.smm.vmxon) {
14580 vmx->nested.vmxon = true;
14581 vmx->nested.smm.vmxon = false;
14582 }
14583
14584 if (vmx->nested.smm.guest_mode) {
14585 vcpu->arch.hflags &= ~HF_SMM_MASK;
Sean Christophersona633e412018-09-26 09:23:47 -070014586 ret = nested_vmx_enter_non_root_mode(vcpu, false);
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014587 vcpu->arch.hflags |= HF_SMM_MASK;
14588 if (ret)
14589 return ret;
14590
14591 vmx->nested.smm.guest_mode = false;
14592 }
Ladi Prosek0234bf82017-10-11 16:54:40 +020014593 return 0;
14594}
14595
Ladi Prosekcc3d9672017-10-17 16:02:39 +020014596static int enable_smi_window(struct kvm_vcpu *vcpu)
14597{
14598 return 0;
14599}
14600
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014601static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
14602 struct kvm_nested_state __user *user_kvm_nested_state,
14603 u32 user_data_size)
14604{
14605 struct vcpu_vmx *vmx;
14606 struct vmcs12 *vmcs12;
14607 struct kvm_nested_state kvm_state = {
14608 .flags = 0,
14609 .format = 0,
14610 .size = sizeof(kvm_state),
14611 .vmx.vmxon_pa = -1ull,
14612 .vmx.vmcs_pa = -1ull,
14613 };
14614
14615 if (!vcpu)
14616 return kvm_state.size + 2 * VMCS12_SIZE;
14617
14618 vmx = to_vmx(vcpu);
14619 vmcs12 = get_vmcs12(vcpu);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014620
14621 /* FIXME: Enlightened VMCS is currently unsupported */
14622 if (vmx->nested.hv_evmcs)
14623 return -ENOTSUPP;
14624
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014625 if (nested_vmx_allowed(vcpu) &&
14626 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
14627 kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
14628 kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
14629
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014630 if (vmx->nested.current_vmptr != -1ull) {
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014631 kvm_state.size += VMCS12_SIZE;
14632
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014633 if (is_guest_mode(vcpu) &&
14634 nested_cpu_has_shadow_vmcs(vmcs12) &&
14635 vmcs12->vmcs_link_pointer != -1ull)
14636 kvm_state.size += VMCS12_SIZE;
14637 }
14638
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014639 if (vmx->nested.smm.vmxon)
14640 kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
14641
14642 if (vmx->nested.smm.guest_mode)
14643 kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
14644
14645 if (is_guest_mode(vcpu)) {
14646 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
14647
14648 if (vmx->nested.nested_run_pending)
14649 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
14650 }
14651 }
14652
14653 if (user_data_size < kvm_state.size)
14654 goto out;
14655
14656 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
14657 return -EFAULT;
14658
14659 if (vmx->nested.current_vmptr == -1ull)
14660 goto out;
14661
14662 /*
14663 * When running L2, the authoritative vmcs12 state is in the
14664 * vmcs02. When running L1, the authoritative vmcs12 state is
14665 * in the shadow vmcs linked to vmcs01, unless
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014666 * need_vmcs12_sync is set, in which case, the authoritative
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014667 * vmcs12 state is in the vmcs12 already.
14668 */
14669 if (is_guest_mode(vcpu))
14670 sync_vmcs12(vcpu, vmcs12);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014671 else if (enable_shadow_vmcs && !vmx->nested.need_vmcs12_sync)
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014672 copy_shadow_to_vmcs12(vmx);
14673
14674 if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
14675 return -EFAULT;
14676
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014677 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
14678 vmcs12->vmcs_link_pointer != -1ull) {
14679 if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
14680 get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
14681 return -EFAULT;
14682 }
14683
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014684out:
14685 return kvm_state.size;
14686}
14687
14688static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
14689 struct kvm_nested_state __user *user_kvm_nested_state,
14690 struct kvm_nested_state *kvm_state)
14691{
14692 struct vcpu_vmx *vmx = to_vmx(vcpu);
14693 struct vmcs12 *vmcs12;
14694 u32 exit_qual;
14695 int ret;
14696
14697 if (kvm_state->format != 0)
14698 return -EINVAL;
14699
14700 if (!nested_vmx_allowed(vcpu))
14701 return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
14702
14703 if (kvm_state->vmx.vmxon_pa == -1ull) {
14704 if (kvm_state->vmx.smm.flags)
14705 return -EINVAL;
14706
14707 if (kvm_state->vmx.vmcs_pa != -1ull)
14708 return -EINVAL;
14709
14710 vmx_leave_nested(vcpu);
14711 return 0;
14712 }
14713
14714 if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
14715 return -EINVAL;
14716
14717 if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
14718 return -EINVAL;
14719
14720 if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
14721 !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
14722 return -EINVAL;
14723
14724 if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
14725 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
14726 return -EINVAL;
14727
14728 if (kvm_state->vmx.smm.flags &
14729 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
14730 return -EINVAL;
14731
Paolo Bonzini5bea5122018-09-18 15:19:17 +020014732 /*
14733 * SMM temporarily disables VMX, so we cannot be in guest mode,
14734 * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
14735 * must be zero.
14736 */
14737 if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
14738 return -EINVAL;
14739
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014740 if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
14741 !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
14742 return -EINVAL;
14743
14744 vmx_leave_nested(vcpu);
14745 if (kvm_state->vmx.vmxon_pa == -1ull)
14746 return 0;
14747
14748 vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
14749 ret = enter_vmx_operation(vcpu);
14750 if (ret)
14751 return ret;
14752
14753 set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
14754
14755 if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
14756 vmx->nested.smm.vmxon = true;
14757 vmx->nested.vmxon = false;
14758
14759 if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
14760 vmx->nested.smm.guest_mode = true;
14761 }
14762
14763 vmcs12 = get_vmcs12(vcpu);
14764 if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
14765 return -EFAULT;
14766
Liran Alon392b2f22018-06-23 02:35:01 +030014767 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014768 return -EINVAL;
14769
14770 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
14771 return 0;
14772
14773 vmx->nested.nested_run_pending =
14774 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
14775
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014776 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
14777 vmcs12->vmcs_link_pointer != -1ull) {
14778 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
14779 if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
14780 return -EINVAL;
14781
14782 if (copy_from_user(shadow_vmcs12,
14783 user_kvm_nested_state->data + VMCS12_SIZE,
14784 sizeof(*vmcs12)))
14785 return -EFAULT;
14786
14787 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
14788 !shadow_vmcs12->hdr.shadow_vmcs)
14789 return -EINVAL;
14790 }
14791
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014792 if (check_vmentry_prereqs(vcpu, vmcs12) ||
14793 check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
14794 return -EINVAL;
14795
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014796 vmx->nested.dirty_vmcs12 = true;
Sean Christophersona633e412018-09-26 09:23:47 -070014797 ret = nested_vmx_enter_non_root_mode(vcpu, false);
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014798 if (ret)
14799 return -EINVAL;
14800
14801 return 0;
14802}
14803
Kees Cook404f6aa2016-08-08 16:29:06 -070014804static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
Avi Kivity6aa8b732006-12-10 02:21:36 -080014805 .cpu_has_kvm_support = cpu_has_kvm_support,
14806 .disabled_by_bios = vmx_disabled_by_bios,
14807 .hardware_setup = hardware_setup,
14808 .hardware_unsetup = hardware_unsetup,
Yang, Sheng002c7f72007-07-31 14:23:01 +030014809 .check_processor_compatibility = vmx_check_processor_compat,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014810 .hardware_enable = hardware_enable,
14811 .hardware_disable = hardware_disable,
Sheng Yang04547152009-04-01 15:52:31 +080014812 .cpu_has_accelerated_tpr = report_flexpriority,
Tom Lendackybc226f02018-05-10 22:06:39 +020014813 .has_emulated_msr = vmx_has_emulated_msr,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014814
Wanpeng Lib31c1142018-03-12 04:53:04 -070014815 .vm_init = vmx_vm_init,
Sean Christopherson434a1e92018-03-20 12:17:18 -070014816 .vm_alloc = vmx_vm_alloc,
14817 .vm_free = vmx_vm_free,
Wanpeng Lib31c1142018-03-12 04:53:04 -070014818
Avi Kivity6aa8b732006-12-10 02:21:36 -080014819 .vcpu_create = vmx_create_vcpu,
14820 .vcpu_free = vmx_free_vcpu,
Avi Kivity04d2cc72007-09-10 18:10:54 +030014821 .vcpu_reset = vmx_vcpu_reset,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014822
Sean Christopherson6d6095b2018-07-23 12:32:44 -070014823 .prepare_guest_switch = vmx_prepare_switch_to_guest,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014824 .vcpu_load = vmx_vcpu_load,
14825 .vcpu_put = vmx_vcpu_put,
14826
Paolo Bonzinia96036b2015-11-10 11:55:36 +010014827 .update_bp_intercept = update_exception_bitmap,
Tom Lendacky801e4592018-02-21 13:39:51 -060014828 .get_msr_feature = vmx_get_msr_feature,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014829 .get_msr = vmx_get_msr,
14830 .set_msr = vmx_set_msr,
14831 .get_segment_base = vmx_get_segment_base,
14832 .get_segment = vmx_get_segment,
14833 .set_segment = vmx_set_segment,
Izik Eidus2e4d2652008-03-24 19:38:34 +020014834 .get_cpl = vmx_get_cpl,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014835 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
Avi Kivitye8467fd2009-12-29 18:43:06 +020014836 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
Avi Kivityaff48ba2010-12-05 18:56:11 +020014837 .decache_cr3 = vmx_decache_cr3,
Anthony Liguori25c4c272007-04-27 09:29:21 +030014838 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014839 .set_cr0 = vmx_set_cr0,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014840 .set_cr3 = vmx_set_cr3,
14841 .set_cr4 = vmx_set_cr4,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014842 .set_efer = vmx_set_efer,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014843 .get_idt = vmx_get_idt,
14844 .set_idt = vmx_set_idt,
14845 .get_gdt = vmx_get_gdt,
14846 .set_gdt = vmx_set_gdt,
Jan Kiszka73aaf249e2014-01-04 18:47:16 +010014847 .get_dr6 = vmx_get_dr6,
14848 .set_dr6 = vmx_set_dr6,
Gleb Natapov020df072010-04-13 10:05:23 +030014849 .set_dr7 = vmx_set_dr7,
Paolo Bonzini81908bf2014-02-21 10:32:27 +010014850 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030014851 .cache_reg = vmx_cache_reg,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014852 .get_rflags = vmx_get_rflags,
14853 .set_rflags = vmx_set_rflags,
Huaitong Hanbe94f6b2016-03-22 16:51:20 +080014854
Avi Kivity6aa8b732006-12-10 02:21:36 -080014855 .tlb_flush = vmx_flush_tlb,
Junaid Shahidfaff8752018-06-29 13:10:05 -070014856 .tlb_flush_gva = vmx_flush_tlb_gva,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014857
Avi Kivity6aa8b732006-12-10 02:21:36 -080014858 .run = vmx_vcpu_run,
Avi Kivity6062d012009-03-23 17:35:17 +020014859 .handle_exit = vmx_handle_exit,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014860 .skip_emulated_instruction = skip_emulated_instruction,
Glauber Costa2809f5d2009-05-12 16:21:05 -040014861 .set_interrupt_shadow = vmx_set_interrupt_shadow,
14862 .get_interrupt_shadow = vmx_get_interrupt_shadow,
Ingo Molnar102d8322007-02-19 14:37:47 +020014863 .patch_hypercall = vmx_patch_hypercall,
Eddie Dong2a8067f2007-08-06 16:29:07 +030014864 .set_irq = vmx_inject_irq,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030014865 .set_nmi = vmx_inject_nmi,
Avi Kivity298101d2007-11-25 13:41:11 +020014866 .queue_exception = vmx_queue_exception,
Avi Kivityb463a6f2010-07-20 15:06:17 +030014867 .cancel_injection = vmx_cancel_injection,
Gleb Natapov78646122009-03-23 12:12:11 +020014868 .interrupt_allowed = vmx_interrupt_allowed,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030014869 .nmi_allowed = vmx_nmi_allowed,
Jan Kiszka3cfc3092009-11-12 01:04:25 +010014870 .get_nmi_mask = vmx_get_nmi_mask,
14871 .set_nmi_mask = vmx_set_nmi_mask,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030014872 .enable_nmi_window = enable_nmi_window,
14873 .enable_irq_window = enable_irq_window,
14874 .update_cr8_intercept = update_cr8_intercept,
Jim Mattson8d860bb2018-05-09 16:56:05 -040014875 .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
Tang Chen38b99172014-09-24 15:57:54 +080014876 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
Andrey Smetanind62caab2015-11-10 15:36:33 +030014877 .get_enable_apicv = vmx_get_enable_apicv,
14878 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
Yang Zhangc7c9c562013-01-25 10:18:51 +080014879 .load_eoi_exitmap = vmx_load_eoi_exitmap,
Paolo Bonzini967235d2016-12-19 14:03:45 +010014880 .apicv_post_state_restore = vmx_apicv_post_state_restore,
Yang Zhangc7c9c562013-01-25 10:18:51 +080014881 .hwapic_irr_update = vmx_hwapic_irr_update,
14882 .hwapic_isr_update = vmx_hwapic_isr_update,
Liran Alone6c67d82018-09-04 10:56:52 +030014883 .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
Yang Zhanga20ed542013-04-11 19:25:15 +080014884 .sync_pir_to_irr = vmx_sync_pir_to_irr,
14885 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030014886
Izik Eiduscbc94022007-10-25 00:29:55 +020014887 .set_tss_addr = vmx_set_tss_addr,
Sean Christopherson2ac52ab2018-03-20 12:17:19 -070014888 .set_identity_map_addr = vmx_set_identity_map_addr,
Sheng Yang67253af2008-04-25 10:20:22 +080014889 .get_tdp_level = get_ept_level,
Sheng Yang4b12f0d2009-04-27 20:35:42 +080014890 .get_mt_mask = vmx_get_mt_mask,
Marcelo Tosatti229456f2009-06-17 09:22:14 -030014891
Avi Kivity586f9602010-11-18 13:09:54 +020014892 .get_exit_info = vmx_get_exit_info,
Avi Kivity586f9602010-11-18 13:09:54 +020014893
Sheng Yang17cc3932010-01-05 19:02:27 +080014894 .get_lpage_level = vmx_get_lpage_level,
Sheng Yang0e851882009-12-18 16:48:46 +080014895
14896 .cpuid_update = vmx_cpuid_update,
Sheng Yang4e47c7a2009-12-18 16:48:47 +080014897
14898 .rdtscp_supported = vmx_rdtscp_supported,
Mao, Junjiead756a12012-07-02 01:18:48 +000014899 .invpcid_supported = vmx_invpcid_supported,
Joerg Roedeld4330ef2010-04-22 12:33:11 +020014900
14901 .set_supported_cpuid = vmx_set_supported_cpuid,
Sheng Yangf5f48ee2010-06-30 12:25:15 +080014902
14903 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
Zachary Amsden99e3e302010-08-19 22:07:17 -100014904
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020014905 .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
Zachary Amsden99e3e302010-08-19 22:07:17 -100014906 .write_tsc_offset = vmx_write_tsc_offset,
Joerg Roedel1c97f0a2010-09-10 17:30:41 +020014907
14908 .set_tdp_cr3 = vmx_set_cr3,
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020014909
14910 .check_intercept = vmx_check_intercept,
Yang Zhanga547c6d2013-04-11 19:25:10 +080014911 .handle_external_intr = vmx_handle_external_intr,
Liu, Jinsongda8999d2014-02-24 10:55:46 +000014912 .mpx_supported = vmx_mpx_supported,
Wanpeng Li55412b22014-12-02 19:21:30 +080014913 .xsaves_supported = vmx_xsaves_supported,
Paolo Bonzini66336ca2016-07-12 10:36:41 +020014914 .umip_emulated = vmx_umip_emulated,
Jan Kiszkab6b8a142014-03-07 20:03:12 +010014915
14916 .check_nested_events = vmx_check_nested_events,
Sean Christophersond264ee02018-08-27 15:21:12 -070014917 .request_immediate_exit = vmx_request_immediate_exit,
Radim Krčmářae97a3b2014-08-21 18:08:06 +020014918
14919 .sched_in = vmx_sched_in,
Kai Huang843e4332015-01-28 10:54:28 +080014920
14921 .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
14922 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
14923 .flush_log_dirty = vmx_flush_log_dirty,
14924 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
Bandan Dasc5f983f2017-05-05 15:25:14 -040014925 .write_log_dirty = vmx_write_pml_buffer,
Wei Huang25462f72015-06-19 15:45:05 +020014926
Feng Wubf9f6ac2015-09-18 22:29:55 +080014927 .pre_block = vmx_pre_block,
14928 .post_block = vmx_post_block,
14929
Wei Huang25462f72015-06-19 15:45:05 +020014930 .pmu_ops = &intel_pmu_ops,
Feng Wuefc64402015-09-18 22:29:51 +080014931
14932 .update_pi_irte = vmx_update_pi_irte,
Yunhong Jiang64672c92016-06-13 14:19:59 -070014933
14934#ifdef CONFIG_X86_64
14935 .set_hv_timer = vmx_set_hv_timer,
14936 .cancel_hv_timer = vmx_cancel_hv_timer,
14937#endif
Ashok Rajc45dcc72016-06-22 14:59:56 +080014938
14939 .setup_mce = vmx_setup_mce,
Ladi Prosek0234bf82017-10-11 16:54:40 +020014940
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014941 .get_nested_state = vmx_get_nested_state,
14942 .set_nested_state = vmx_set_nested_state,
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020014943 .get_vmcs12_pages = nested_get_vmcs12_pages,
14944
Ladi Prosek72d7b372017-10-11 16:54:41 +020014945 .smi_allowed = vmx_smi_allowed,
Ladi Prosek0234bf82017-10-11 16:54:40 +020014946 .pre_enter_smm = vmx_pre_enter_smm,
14947 .pre_leave_smm = vmx_pre_leave_smm,
Ladi Prosekcc3d9672017-10-17 16:02:39 +020014948 .enable_smi_window = enable_smi_window,
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +020014949
14950 .nested_enable_evmcs = nested_enable_evmcs,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014951};
14952
Thomas Gleixner72c6d2d2018-07-13 16:23:16 +020014953static void vmx_cleanup_l1d_flush(void)
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020014954{
14955 if (vmx_l1d_flush_pages) {
14956 free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
14957 vmx_l1d_flush_pages = NULL;
14958 }
Thomas Gleixner72c6d2d2018-07-13 16:23:16 +020014959 /* Restore state so sysfs ignores VMX */
14960 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +020014961}
14962
Thomas Gleixnera7b90202018-07-13 16:23:18 +020014963static void vmx_exit(void)
14964{
14965#ifdef CONFIG_KEXEC_CORE
14966 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
14967 synchronize_rcu();
14968#endif
14969
14970 kvm_exit();
14971
14972#if IS_ENABLED(CONFIG_HYPERV)
14973 if (static_branch_unlikely(&enable_evmcs)) {
14974 int cpu;
14975 struct hv_vp_assist_page *vp_ap;
14976 /*
14977 * Reset everything to support using non-enlightened VMCS
14978 * access later (e.g. when we reload the module with
14979 * enlightened_vmcs=0)
14980 */
14981 for_each_online_cpu(cpu) {
14982 vp_ap = hv_get_vp_assist_page(cpu);
14983
14984 if (!vp_ap)
14985 continue;
14986
14987 vp_ap->current_nested_vmcs = 0;
14988 vp_ap->enlighten_vmentry = 0;
14989 }
14990
14991 static_branch_disable(&enable_evmcs);
14992 }
14993#endif
14994 vmx_cleanup_l1d_flush();
14995}
14996module_exit(vmx_exit);
14997
Avi Kivity6aa8b732006-12-10 02:21:36 -080014998static int __init vmx_init(void)
14999{
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010015000 int r;
15001
15002#if IS_ENABLED(CONFIG_HYPERV)
15003 /*
15004 * Enlightened VMCS usage should be recommended and the host needs
15005 * to support eVMCS v1 or above. We can also disable eVMCS support
15006 * with module parameter.
15007 */
15008 if (enlightened_vmcs &&
15009 ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
15010 (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
15011 KVM_EVMCS_VERSION) {
15012 int cpu;
15013
15014 /* Check that we have assist pages on all online CPUs */
15015 for_each_online_cpu(cpu) {
15016 if (!hv_get_vp_assist_page(cpu)) {
15017 enlightened_vmcs = false;
15018 break;
15019 }
15020 }
15021
15022 if (enlightened_vmcs) {
15023 pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
15024 static_branch_enable(&enable_evmcs);
15025 }
15026 } else {
15027 enlightened_vmcs = false;
15028 }
15029#endif
15030
15031 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015032 __alignof__(struct vcpu_vmx), THIS_MODULE);
He, Qingfdef3ad2007-04-30 09:45:24 +030015033 if (r)
Tiejun Chen34a1cd62014-10-28 10:14:48 +080015034 return r;
Sheng Yang25c5f222008-03-28 13:18:56 +080015035
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015036 /*
Thomas Gleixner7db92e12018-07-13 16:23:19 +020015037 * Must be called after kvm_init() so enable_ept is properly set
15038 * up. Hand the parameter mitigation value in which was stored in
15039 * the pre module init parser. If no parameter was given, it will
15040 * contain 'auto' which will be turned into the default 'cond'
15041 * mitigation mode.
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015042 */
Thomas Gleixner7db92e12018-07-13 16:23:19 +020015043 if (boot_cpu_has(X86_BUG_L1TF)) {
15044 r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
15045 if (r) {
15046 vmx_exit();
15047 return r;
15048 }
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020015049 }
15050
Dave Young2965faa2015-09-09 15:38:55 -070015051#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +080015052 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
15053 crash_vmclear_local_loaded_vmcss);
15054#endif
Jim Mattson21ebf532018-05-01 15:40:28 -070015055 vmx_check_vmcs12_offsets();
Zhang Yanfei8f536b72012-12-06 23:43:34 +080015056
He, Qingfdef3ad2007-04-30 09:45:24 +030015057 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -080015058}
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015059module_init(vmx_init);