x86: Support Host exposes CPU topology to Guest VM
At present the Guest generates its own CPU topology. To mitigate
cross-hyperthread speculative execution side channel attacks, allow
Guest to use mirror CPU topology of Host is needed for future scheduling
optimization.
Add a config option "--host-cpu-topology" to ask the vCPU number to be
identical to physical CPUs, and make the vCPU has the same APIC ID in
MADT and CPUID as the corresponding physical CPU. The same APIC ID can
ensure the same topology.
"--host-cpu-topology" requires vCPU number must equal to pCPU number,
and it has the default vCPU number setting, which equals to pCPU number.
"--host-cpu-topology" also defaultly sets CPU affinity for each vCPU to
the pCPU which has the same processor ID, like 1=1:2=2:3=3:4=4, so that
the vCPU and its corresponding pCPU will have the same processor ID and
the same APIC ID. User can't set CPU affinity if "--host-cpu-topology"
was set.
BUG=b:197875305
TEST=Set "--host-cpu-topology" option and check Guest's /proc/cpuinfo,
lscpu, CPUID for different vCPU
Change-Id: Ibc4eb10649e89f43b81bde6d46d6e0e6c7234324
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3217035
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Chirantan Ekbote <chirantan@chromium.org>
Reviewed-by: Chirantan Ekbote <chirantan@chromium.org>
diff --git a/aarch64/src/lib.rs b/aarch64/src/lib.rs
index 953d62f..7d4eee0 100644
--- a/aarch64/src/lib.rs
+++ b/aarch64/src/lib.rs
@@ -262,6 +262,7 @@
ramoops_region: Option<arch::pstore::RamoopsRegion>,
devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChipAArch64,
+ kvm_vcpu_ids: &mut Vec<usize>,
) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
where
V: VmAArch64,
@@ -296,6 +297,7 @@
)?;
has_pvtime &= vcpu.has_pvtime_support();
vcpus.push(vcpu);
+ kvm_vcpu_ids.push(vcpu_id);
}
irq_chip
@@ -510,6 +512,7 @@
_num_cpus: usize,
_has_bios: bool,
_no_smt: bool,
+ _host_cpu_topology: bool,
) -> std::result::Result<(), Self::Error> {
// AArch64 doesn't configure vcpus on the vcpu thread, so nothing to do here.
Ok(())
diff --git a/arch/src/lib.rs b/arch/src/lib.rs
index 4d3e34a..ded5fcb 100644
--- a/arch/src/lib.rs
+++ b/arch/src/lib.rs
@@ -98,6 +98,7 @@
pub gdb: Option<(u32, Tube)>, // port and control tube.
pub dmi_path: Option<PathBuf>,
pub no_legacy: bool,
+ pub host_cpu_topology: bool,
}
/// Holds the elements needed to run a Linux VM. Created by `build_vm`.
@@ -182,6 +183,7 @@
ramoops_region: Option<pstore::RamoopsRegion>,
devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChipArch,
+ kvm_vcpu_ids: &mut Vec<usize>,
) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
where
V: VmArch,
@@ -207,6 +209,7 @@
num_cpus: usize,
has_bios: bool,
no_smt: bool,
+ host_cpu_topology: bool,
) -> Result<(), Self::Error>;
/// Configures and add a pci device into vm
diff --git a/src/crosvm.rs b/src/crosvm.rs
index 49848ba..84c0f39 100644
--- a/src/crosvm.rs
+++ b/src/crosvm.rs
@@ -383,6 +383,7 @@
pub direct_edge_irq: Vec<u32>,
pub dmi_path: Option<PathBuf>,
pub no_legacy: bool,
+ pub host_cpu_topology: bool,
}
impl Default for Config {
@@ -477,6 +478,7 @@
direct_edge_irq: Vec::new(),
dmi_path: None,
no_legacy: false,
+ host_cpu_topology: false,
}
}
}
diff --git a/src/linux.rs b/src/linux.rs
index 6958478..f5782e7 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -1852,6 +1852,7 @@
// Sets up a vcpu and converts it into a runnable vcpu.
fn runnable_vcpu<V>(
cpu_id: usize,
+ kvm_vcpu_id: usize,
vcpu: Option<V>,
vm: impl VmArch,
irq_chip: &mut dyn IrqChipArch,
@@ -1862,6 +1863,7 @@
has_bios: bool,
use_hypervisor_signals: bool,
enable_per_vm_core_scheduling: bool,
+ host_cpu_topology: bool,
) -> Result<(V, VcpuRunHandle)>
where
V: VcpuArch,
@@ -1872,7 +1874,7 @@
// If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
// the vcpu thread.
match vm
- .create_vcpu(cpu_id)
+ .create_vcpu(kvm_vcpu_id)
.map_err(Error::CreateVcpu)?
.downcast::<V>()
{
@@ -1901,6 +1903,7 @@
vcpu_count,
has_bios,
no_smt,
+ host_cpu_topology,
)
.map_err(Error::ConfigureVcpu)?;
@@ -2011,6 +2014,7 @@
fn run_vcpu<V>(
cpu_id: usize,
+ kvm_vcpu_id: usize,
vcpu: Option<V>,
vm: impl VmArch + 'static,
mut irq_chip: Box<dyn IrqChipArch + 'static>,
@@ -2031,6 +2035,7 @@
mpsc::Sender<VcpuDebugStatusMessage>,
>,
enable_per_vm_core_scheduling: bool,
+ host_cpu_topology: bool,
) -> Result<JoinHandle<()>>
where
V: VcpuArch + 'static,
@@ -2046,6 +2051,7 @@
let guest_mem = vm.get_memory().clone();
let runnable_vcpu = runnable_vcpu(
cpu_id,
+ kvm_vcpu_id,
vcpu,
vm,
irq_chip.as_mut(),
@@ -2056,6 +2062,7 @@
has_bios,
use_hypervisor_signals,
enable_per_vm_core_scheduling,
+ host_cpu_topology,
);
start_barrier.wait();
@@ -2369,6 +2376,7 @@
gdb: None,
dmi_path: cfg.dmi_path.clone(),
no_legacy: cfg.no_legacy,
+ host_cpu_topology: cfg.host_cpu_topology,
})
}
@@ -2618,6 +2626,9 @@
components.acpi_sdts = sdts;
}
+ // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
+ let mut kvm_vcpu_ids = Vec::new();
+
#[cfg_attr(not(feature = "direct"), allow(unused_mut))]
let mut linux = Arch::build_vm::<V, Vcpu>(
components,
@@ -2630,6 +2641,7 @@
ramoops_region,
devices,
irq_chip,
+ &mut kvm_vcpu_ids,
)
.map_err(Error::BuildVm)?;
@@ -2708,6 +2720,8 @@
Arc::clone(&map_request),
gralloc,
cfg.per_vm_core_scheduling,
+ cfg.host_cpu_topology,
+ kvm_vcpu_ids,
)
}
@@ -2801,6 +2815,8 @@
map_request: Arc<Mutex<Option<ExternalMapping>>>,
mut gralloc: RutabagaGralloc,
enable_per_vm_core_scheduling: bool,
+ host_cpu_topology: bool,
+ kvm_vcpu_ids: Vec<usize>,
) -> Result<()> {
#[derive(PollToken)]
enum Token {
@@ -2889,6 +2905,7 @@
};
let handle = run_vcpu(
cpu_id,
+ kvm_vcpu_ids[cpu_id],
vcpu,
linux.vm.try_clone().map_err(Error::CloneEvent)?,
linux.irq_chip.try_box_clone().map_err(Error::CloneEvent)?,
@@ -2908,6 +2925,7 @@
#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
to_gdb_channel.clone(),
enable_per_vm_core_scheduling,
+ host_cpu_topology,
)?;
vcpu_handles.push((handle, to_vcpu_channel));
}
diff --git a/src/main.rs b/src/main.rs
index 0b0bb53..f1be103 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1979,6 +1979,10 @@
"no-legacy" => {
cfg.no_legacy = true;
}
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ "host-cpu-topology" => {
+ cfg.host_cpu_topology = true;
+ }
"help" => return Err(argument::Error::PrintHelp),
_ => unreachable!(),
}
@@ -2040,6 +2044,38 @@
));
}
}
+ if cfg.host_cpu_topology {
+ // Safe because we pass a flag for this call and the host supports this system call
+ let pcpu_count = unsafe { libc::sysconf(libc::_SC_NPROCESSORS_CONF) } as usize;
+ if cfg.vcpu_count.is_some() {
+ if pcpu_count != cfg.vcpu_count.unwrap() {
+ return Err(argument::Error::ExpectedArgument(format!(
+ "`host-cpu-topology` requires the count of vCPUs({}) to equal the \
+ count of CPUs({}) on host.",
+ cfg.vcpu_count.unwrap(),
+ pcpu_count
+ )));
+ }
+ } else {
+ cfg.vcpu_count = Some(pcpu_count);
+ }
+
+ match &cfg.vcpu_affinity {
+ None => {
+ let mut affinity_map = BTreeMap::new();
+ for cpu_id in 0..cfg.vcpu_count.unwrap() {
+ affinity_map.insert(cpu_id, vec![cpu_id]);
+ }
+ cfg.vcpu_affinity = Some(VcpuAffinity::PerVcpu(affinity_map));
+ }
+ _ => {
+ return Err(argument::Error::ExpectedArgument(
+ "`host-cpu-topology` requires not to set `cpu-affinity` at the same time"
+ .to_owned(),
+ ));
+ }
+ }
+ }
set_default_serial_parameters(&mut cfg.serial_parameters);
Ok(())
}
@@ -2248,6 +2284,8 @@
Argument::value("direct-edge-irq", "irq", "Enable interrupt passthrough"),
Argument::value("dmi", "DIR", "Directory with smbios_entry_point/DMI files"),
Argument::flag("no-legacy", "Don't use legacy KBD/RTC devices emulation"),
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ Argument::flag("host-cpu-topology", "Use mirror cpu topology of Host for Guest VM"),
Argument::short_flag('h', "help", "Print help message.")];
let mut cfg = Config::default();
diff --git a/x86_64/src/acpi.rs b/x86_64/src/acpi.rs
index ef815d0..8e7d527 100644
--- a/x86_64/src/acpi.rs
+++ b/x86_64/src/acpi.rs
@@ -276,8 +276,7 @@
/// * `acpi_dev_resource` - resouces needed by the ACPI devices for creating tables.
/// * `host_cpus` - The CPU affinity per CPU used to get corresponding CPUs' apic
/// id and set these apic id in MADT if `--host-cpu-topology`
-/// option is set. Now `--host-cpu-topology` hasn't been supported,
-/// and just set it as None.
+/// option is set.
/// * `apic_ids` - The apic id for vCPU will be sent to KVM by KVM_CREATE_VCPU ioctl.
pub fn create_acpi_tables(
guest_mem: &GuestMemory,
diff --git a/x86_64/src/cpuid.rs b/x86_64/src/cpuid.rs
index 899ab28..18f1e50 100644
--- a/x86_64/src/cpuid.rs
+++ b/x86_64/src/cpuid.rs
@@ -161,8 +161,7 @@
/// * `vcpu_id` - The vcpu index of `vcpu`.
/// * `nrcpus` - The number of vcpus being used by this VM.
/// * `no_smt` - The flag indicates whether vCPUs supports SMT.
-/// * `host_cpu_topology` - The flag indicates whether vCPUs use mirror CPU topology. Now
-/// `--host-cpu-topology` hasn't been supported, and just set it as false.
+/// * `host_cpu_topology` - The flag indicates whether vCPUs use mirror CPU topology.
pub fn setup_cpuid(
hypervisor: &dyn HypervisorX86_64,
irq_chip: &dyn IrqChipX86_64,
diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs
index 969dec6..6a7f2f6 100644
--- a/x86_64/src/lib.rs
+++ b/x86_64/src/lib.rs
@@ -378,6 +378,7 @@
ramoops_region: Option<arch::pstore::RamoopsRegion>,
devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChipX86_64,
+ kvm_vcpu_ids: &mut Vec<usize>,
) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
where
V: VmX86_64,
@@ -478,11 +479,11 @@
mptable::setup_mptable(&mem, vcpu_count as u8, pci_irqs).map_err(Error::SetupMptable)?;
smbios::setup_smbios(&mem, components.dmi_path).map_err(Error::SetupSmbios)?;
- // Temporarily set to None to ensure the compilation independence of commit.
- // It is the CPU affinity per CPU.
- let host_cpus = None;
- // Get the APIC ID in MADT.
- let mut kvm_vcpu_ids = Vec::new();
+ let host_cpus = if components.host_cpu_topology {
+ components.vcpu_affinity.clone()
+ } else {
+ None
+ };
// TODO (tjeznach) Write RSDP to bootconfig before writing to memory
acpi::create_acpi_tables(
@@ -491,7 +492,7 @@
X86_64_SCI_IRQ,
acpi_dev_resource,
host_cpus,
- &mut kvm_vcpu_ids,
+ kvm_vcpu_ids,
)
.ok_or(Error::CreateAcpi)?;
@@ -573,9 +574,18 @@
num_cpus: usize,
has_bios: bool,
no_smt: bool,
+ host_cpu_topology: bool,
) -> Result<()> {
- cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, no_smt, false)
- .map_err(Error::SetupCpuid)?;
+ cpuid::setup_cpuid(
+ hypervisor,
+ irq_chip,
+ vcpu,
+ vcpu_id,
+ num_cpus,
+ no_smt,
+ host_cpu_topology,
+ )
+ .map_err(Error::SetupCpuid)?;
if has_bios {
return Ok(());