blob: b78d4ea178ed64edfe22bc924e02f3b93f4c7f84 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Charles William Dick0e3d4b62020-12-14 12:16:46 +09005use std::cmp::{max, min, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Michael Hoylea596a072020-11-10 19:32:45 -080020use std::os::unix::io::FromRawFd;
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070025use std::sync::{mpsc, Arc, Barrier};
26
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use std::thread;
28use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090029use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080030
David Tolnay41a6f842019-03-01 16:18:44 -080031use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080032
Tomasz Jeznach42644642020-05-20 23:27:59 -070033use acpi_tables::sdt::SDT;
34
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080035use base::net::{UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
36use base::*;
Keiichi Watanabe60686582021-03-12 04:53:51 +090037use devices::virtio::vhost::user::{
Woody Chow5890b702021-02-12 14:57:02 +090038 Block as VhostUserBlock, Error as VhostUserError, Fs as VhostUserFs, Net as VhostUserNet,
Keiichi Watanabe60686582021-03-12 04:53:51 +090039};
Zach Reizner65b98f12019-11-22 17:34:58 -080040#[cfg(feature = "gpu")]
41use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070042use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080043#[cfg(feature = "audio")]
44use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080045use devices::{
Steven Richman11dc6712020-09-02 15:39:14 -070046 self, HostBackendDeviceProvider, IrqChip, IrqEventIndex, KvmKernelIrqChip, PciDevice,
47 VcpuRunState, VfioContainer, VfioDevice, VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080048};
Steven Richmanf32d0b42020-06-20 21:45:32 -070049use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner304e7312020-09-29 16:00:24 -070050use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070051use minijail::{self, Minijail};
David Tolnay2b089fc2019-03-04 15:33:22 -080052use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070053use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080054use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080055use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070056use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080057use vm_control::*;
Dylan Reidec058d62020-07-20 20:21:11 -070058use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080059
Keiichi Watanabec5262e92020-10-21 15:57:33 +090060#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
61use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090062use crate::{
Woody Chow5890b702021-02-12 14:57:02 +090063 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VhostUserFsOption,
64 VhostUserOption,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090065};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070066use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070067 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
68 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070069};
Sonny Raoed517d12018-02-13 22:09:43 -080070
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080071#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070072use {
73 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070074 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070075 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
76};
Zach Reizner55a9e502018-10-03 10:22:32 -070077#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070078use {
Steven Richman11dc6712020-09-02 15:39:14 -070079 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
80 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070081 x86_64::X8664arch as Arch,
82};
Zach Reizner39aa26b2017-12-12 18:03:23 -080083
David Tolnay3df35522019-03-11 12:36:30 -070084#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070085#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080086pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070087 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070088 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070089 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -080090 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070091 AllocatePmemDeviceAddress(resources::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +090092 BalloonActualTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -080093 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -070094 BlockDeviceNew(base::Error),
95 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080096 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070097 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -070098 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070099 CloneVcpu(base::Error),
100 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000101 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800102 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700103 CreateConsole(arch::serial::Error),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800104 CreateControlServer(io::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700105 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700106 CreateEvent(base::Error),
Gurchetan Singh293913c2020-12-09 10:44:13 -0800107 CreateGrallocError(rutabaga_gfx::RutabagaError),
Zach Reiznera90649a2021-03-31 12:56:08 -0700108 CreateKvm(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700109 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800110 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800111 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700112 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800113 CreateTpmStorage(PathBuf, io::Error),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800114 CreateTube(TubeError),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800115 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700116 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800117 CreateVfioDevice(devices::vfio::VfioError),
Zach Reiznera90649a2021-03-31 12:56:08 -0700118 CreateVm(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700119 CreateWaitContext(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700120 DeviceJail(minijail::Error),
121 DevicePivotRoot(minijail::Error),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800122 #[cfg(feature = "direct")]
Tomasz Jeznach3ce74762021-02-26 01:01:53 -0800123 DirectIo(io::Error),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800124 #[cfg(feature = "direct")]
125 DirectIrq(devices::DirectIrqError),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800126 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700127 DiskImageLock(base::Error),
128 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900129 FsDeviceNew(virtio::fs::Error),
130 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700131 GetSignalMask(signal::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900132 GuestCachedMissing(),
133 GuestCachedTooLarge(std::num::TryFromIntError),
134 GuestFreeMissing(),
135 GuestFreeTooLarge(std::num::TryFromIntError),
Zach Reiznera90649a2021-03-31 12:56:08 -0700136 GuestMemoryLayout(<Arch as LinuxArch>::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900137 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
138 HandleDebugCommand(<Arch as LinuxArch>::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800139 InputDeviceNew(virtio::InputError),
140 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700141 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700142 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700143 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800144 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800145 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800146 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700147 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800148 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700149 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800150 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800151 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800152 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800153 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900154 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800155 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700156 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700157 PmemDeviceNew(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900158 ReadMemAvailable(io::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900159 ReadStatm(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700160 RegisterBalloon(arch::DeviceRegistrationError),
161 RegisterBlock(arch::DeviceRegistrationError),
162 RegisterGpu(arch::DeviceRegistrationError),
163 RegisterNet(arch::DeviceRegistrationError),
164 RegisterP9(arch::DeviceRegistrationError),
165 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700166 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700167 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700168 ReserveGpuMemory(base::MmapError),
169 ReserveMemory(base::Error),
170 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700171 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800172 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700173 RunnableVcpu(base::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900174 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
175 SendDebugStatus(Box<mpsc::SendError<VcpuDebugStatusMessage>>),
Allen Webbf3024c82020-06-19 07:19:48 -0700176 SettingGidMap(minijail::Error),
177 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700178 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700179 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700180 SignalFd(base::SignalFdError),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900181 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
182 SpawnGdbServer(io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800183 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700184 Timer(base::Error),
Michael Hoylea596a072020-11-10 19:32:45 -0800185 ValidateRawDescriptor(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800186 VhostNetDeviceNew(virtio::vhost::Error),
Keiichi Watanabe60686582021-03-12 04:53:51 +0900187 VhostUserBlockDeviceNew(VhostUserError),
Woody Chow5890b702021-02-12 14:57:02 +0900188 VhostUserFsDeviceNew(VhostUserError),
Keiichi Watanabe60686582021-03-12 04:53:51 +0900189 VhostUserNetDeviceNew(VhostUserError),
190 VhostUserNetWithNetArgs,
David Tolnay2b089fc2019-03-04 15:33:22 -0800191 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700192 VirtioPciDev(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700193 WaitContextAdd(base::Error),
194 WaitContextDelete(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700195 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800196}
197
David Tolnayc69f9752019-03-01 18:07:56 -0800198impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700199 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800200 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800201 use self::Error::*;
202
David Tolnay3df35522019-03-11 12:36:30 -0700203 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800204 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800205 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700206 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700207 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800208 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700209 AllocatePmemDeviceAddress(e) => {
210 write!(f, "failed to allocate memory for pmem device: {}", e)
211 }
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900212 BalloonActualTooLarge => write!(f, "balloon actual size is too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800213 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
214 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
215 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800216 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800217 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700218 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700219 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
220 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000221 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800222 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700223 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800224 CreateControlServer(e) => write!(f, "failed to create control server: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700225 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700226 CreateEvent(e) => write!(f, "failed to create event: {}", e),
Gurchetan Singh293913c2020-12-09 10:44:13 -0800227 CreateGrallocError(e) => write!(f, "failed to create gralloc: {}", e),
Zach Reiznera90649a2021-03-31 12:56:08 -0700228 CreateKvm(e) => write!(f, "failed to create kvm: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800229 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
230 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
231 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700232 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800233 CreateTpmStorage(p, e) => {
234 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
235 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800236 CreateTube(e) => write!(f, "failed to create tube: {}", e),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800237 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700238 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800239 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
Zach Reiznera90649a2021-03-31 12:56:08 -0700240 CreateVm(e) => write!(f, "failed to create vm: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700241 CreateWaitContext(e) => write!(f, "failed to create wait context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800242 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
243 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800244 #[cfg(feature = "direct")]
Tomasz Jeznach3ce74762021-02-26 01:01:53 -0800245 DirectIo(e) => write!(f, "failed to open direct io device: {}", e),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800246 #[cfg(feature = "direct")]
247 DirectIrq(e) => write!(f, "failed to enable interrupt forwarding: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800248 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800249 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800250 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900251 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
252 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700253 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900254 GuestCachedMissing() => write!(f, "guest cached is missing from balloon stats"),
255 GuestCachedTooLarge(e) => write!(f, "guest cached is too large: {}", e),
256 GuestFreeMissing() => write!(f, "guest free is missing from balloon stats"),
257 GuestFreeTooLarge(e) => write!(f, "guest free is too large: {}", e),
Zach Reiznera90649a2021-03-31 12:56:08 -0700258 GuestMemoryLayout(e) => write!(f, "failed to create guest memory layout: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900259 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
260 HandleDebugCommand(e) => write!(f, "failed to handle a gdb command: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700261 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
262 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800263 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
264 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800265 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800266 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800267 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800268 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700269 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800270 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800271 f,
272 "failed to open android fstab file {}: {}",
273 p.display(),
274 e
275 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700276 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700277 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
278 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800279 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800280 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900281 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800282 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700283 PmemDeviceImageTooBig => {
284 write!(f, "failed to create pmem device: pmem device image too big")
285 }
286 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900287 ReadMemAvailable(e) => write!(
288 f,
289 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
290 e
291 ),
292 ReadStatm(e) => write!(f, "failed to read /proc/self/statm: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800293 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
294 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
295 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
296 RegisterNet(e) => write!(f, "error registering net device: {}", e),
297 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
298 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
299 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
300 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800301 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
302 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700303 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700304 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800305 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700306 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900307 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
308 SendDebugStatus(e) => write!(f, "failed to send a debug status to GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800309 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900310 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700311 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800312 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
313 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900314 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
315 SpawnGdbServer(e) => write!(f, "failed to spawn GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800316 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700317 Timer(e) => write!(f, "failed to read timer fd: {}", e),
Michael Hoylea596a072020-11-10 19:32:45 -0800318 ValidateRawDescriptor(e) => write!(f, "failed to validate raw descriptor: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800319 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900320 VhostUserBlockDeviceNew(e) => {
321 write!(f, "failed to set up vhost-user block device: {}", e)
322 }
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700323 VhostUserFsDeviceNew(e) => write!(f, "failed to set up vhost-user fs device: {}", e),
324 VhostUserNetDeviceNew(e) => write!(f, "failed to set up vhost-user net device: {}", e),
325 VhostUserNetWithNetArgs => write!(
326 f,
327 "vhost-user-net cannot be used with any of --host_ip, --netmask or --mac"
328 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800329 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
330 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700331 WaitContextAdd(e) => write!(f, "failed to add descriptor to wait context: {}", e),
332 WaitContextDelete(e) => {
333 write!(f, "failed to remove descriptor from wait context: {}", e)
334 }
David Tolnayc69f9752019-03-01 18:07:56 -0800335 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800336 }
337 }
338}
339
Allen Webbf3024c82020-06-19 07:19:48 -0700340impl From<minijail::Error> for Error {
341 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800342 Error::IoJail(err)
343 }
344}
345
David Tolnayc69f9752019-03-01 18:07:56 -0800346impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700347
Zach Reizner39aa26b2017-12-12 18:03:23 -0800348type Result<T> = std::result::Result<T, Error>;
349
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800350enum TaggedControlTube {
351 Fs(Tube),
352 Vm(Tube),
353 VmMemory(Tube),
354 VmIrq(Tube),
355 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700356}
357
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800358impl AsRef<Tube> for TaggedControlTube {
359 fn as_ref(&self) -> &Tube {
360 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700361 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800362 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700363 }
364 }
365}
366
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800367impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700368 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800369 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700370 }
371}
372
Andrew Walbranf50bab62020-07-07 13:22:53 +0100373fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900374 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900375
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900376 // Safe because this will only modify `buf` and we check the return value.
377 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
378 if res == 0 {
379 // Safe because the kernel guarantees that the struct is fully initialized.
380 let limit = unsafe { buf.assume_init() };
381 Ok(limit.rlim_max)
382 } else {
383 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
384 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900385}
386
Matt Delcoc24ad782020-02-14 13:24:36 -0800387struct SandboxConfig<'a> {
388 limit_caps: bool,
389 log_failures: bool,
390 seccomp_policy: &'a Path,
391 uid_map: Option<&'a str>,
392 gid_map: Option<&'a str>,
393}
394
Zach Reizner44863792019-06-26 14:22:08 -0700395fn create_base_minijail(
396 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800397 r_limit: Option<u64>,
398 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700399) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800400 // All child jails run in a new user namespace without any users mapped,
401 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800402 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800403
404 if let Some(config) = config {
405 j.namespace_pids();
406 j.namespace_user();
407 j.namespace_user_disable_setgroups();
408 if config.limit_caps {
409 // Don't need any capabilities.
410 j.use_caps(0);
411 }
412 if let Some(uid_map) = config.uid_map {
413 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
414 }
415 if let Some(gid_map) = config.gid_map {
416 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
417 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900418 // Run in a new mount namespace.
419 j.namespace_vfs();
420
Matt Delcoc24ad782020-02-14 13:24:36 -0800421 // Run in an empty network namespace.
422 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900423
424 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800425 j.no_new_privs();
426
427 // By default we'll prioritize using the pre-compiled .bpf over the .policy
428 // file (the .bpf is expected to be compiled using "trap" as the failure
429 // behavior instead of the default "kill" behavior).
430 // Refer to the code comment for the "seccomp-log-failures"
431 // command-line parameter for an explanation about why the |log_failures|
432 // flag forces the use of .policy files (and the build-time alternative to
433 // this run-time flag).
434 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
435 if bpf_policy_file.exists() && !config.log_failures {
436 j.parse_seccomp_program(&bpf_policy_file)
437 .map_err(Error::DeviceJail)?;
438 } else {
439 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
440 // which will correctly kill the entire device process if a worker
441 // thread commits a seccomp violation.
442 j.set_seccomp_filter_tsync();
443 if config.log_failures {
444 j.log_seccomp_filter_failures();
445 }
446 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
447 .map_err(Error::DeviceJail)?;
448 }
449 j.use_seccomp_filter();
450 // Don't do init setup.
451 j.run_as_init();
452 }
453
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900454 // Only pivot_root if we are not re-using the current root directory.
455 if root != Path::new("/") {
456 // It's safe to call `namespace_vfs` multiple times.
457 j.namespace_vfs();
458 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
459 }
Matt Delco45caf912019-11-13 08:11:09 -0800460
Matt Delcoc24ad782020-02-14 13:24:36 -0800461 // Most devices don't need to open many fds.
462 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
463 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
464 .map_err(Error::SettingMaxOpenFiles)?;
465
Zach Reizner39aa26b2017-12-12 18:03:23 -0800466 Ok(j)
467}
468
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800469fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700470 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800471 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
472 // A directory for a jailed device's pivot root.
473 let root_path = Path::new(pivot_root);
474 if !root_path.exists() {
475 return Err(Error::PivotRootDoesntExist(pivot_root));
476 }
477 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800478 let config = SandboxConfig {
479 limit_caps: true,
480 log_failures: cfg.seccomp_log_failures,
481 seccomp_policy: &policy_path,
482 uid_map: None,
483 gid_map: None,
484 };
485 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800486 } else {
487 Ok(None)
488 }
489}
490
David Tolnayfd0971d2019-03-04 17:15:57 -0800491type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800492
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800493fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800494 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
495 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
496 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -0800497 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
David Tolnay2b089fc2019-03-04 15:33:22 -0800498 } else {
499 OpenOptions::new()
500 .read(true)
501 .write(!disk.read_only)
502 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800503 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800504 };
505 // Lock the disk image to prevent other crosvm instances from using it.
506 let lock_op = if disk.read_only {
507 FlockOperation::LockShared
508 } else {
509 FlockOperation::LockExclusive
510 };
511 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
512
Dylan Reid503c5ab2020-07-17 11:20:07 -0700513 let dev = if disk::async_ok(&raw_image).map_err(Error::CreateDiskError)? {
514 let async_file = disk::create_async_disk_file(raw_image).map_err(Error::CreateDiskError)?;
515 Box::new(
516 virtio::BlockAsync::new(
517 virtio::base_features(cfg.protected_vm),
518 async_file,
519 disk.read_only,
520 disk.sparse,
521 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700522 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800523 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700524 )
525 .map_err(Error::BlockDeviceNew)?,
526 ) as Box<dyn VirtioDevice>
527 } else {
528 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
529 Box::new(
530 virtio::Block::new(
531 virtio::base_features(cfg.protected_vm),
532 disk_file,
533 disk.read_only,
534 disk.sparse,
535 disk.block_size,
536 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800537 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700538 )
539 .map_err(Error::BlockDeviceNew)?,
540 ) as Box<dyn VirtioDevice>
541 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800542
543 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700544 dev,
Matt Delco45caf912019-11-13 08:11:09 -0800545 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800546 })
547}
548
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900549fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
550 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
551 .map_err(Error::VhostUserBlockDeviceNew)?;
552
553 Ok(VirtioDeviceStub {
554 dev: Box::new(dev),
555 // no sandbox here because virtqueue handling is exported to a different process.
556 jail: None,
557 })
558}
559
Woody Chow5890b702021-02-12 14:57:02 +0900560fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
561 let dev = VhostUserFs::new(
562 virtio::base_features(cfg.protected_vm),
563 &option.socket,
564 &option.tag,
565 )
566 .map_err(Error::VhostUserFsDeviceNew)?;
567
568 Ok(VirtioDeviceStub {
569 dev: Box::new(dev),
570 // no sandbox here because virtqueue handling is exported to a different process.
571 jail: None,
572 })
573}
574
David Tolnay2b089fc2019-03-04 15:33:22 -0800575fn create_rng_device(cfg: &Config) -> DeviceResult {
Keiichi Watanabef70350b2020-11-24 21:57:53 +0900576 let dev =
577 virtio::Rng::new(virtio::base_features(cfg.protected_vm)).map_err(Error::RngDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800578
579 Ok(VirtioDeviceStub {
580 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800581 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800582 })
583}
584
585#[cfg(feature = "tpm")]
586fn create_tpm_device(cfg: &Config) -> DeviceResult {
587 use std::ffi::CString;
588 use std::fs;
589 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800590
591 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800592 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800593
594 match &mut tpm_jail {
595 Some(jail) => {
596 // Create a tmpfs in the device's root directory for tpm
597 // simulator storage. The size is 20*1024, or 20 KB.
598 jail.mount_with_data(
599 Path::new("none"),
600 Path::new("/"),
601 "tmpfs",
602 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
603 "size=20480",
604 )?;
605
606 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
607
608 let pid = process::id();
609 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
610 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800611 fs::create_dir_all(&tpm_storage)
612 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800613 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800614 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
615 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800616
617 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
618 }
619 None => {
620 // Path used inside cros_sdk which does not have /run/vm.
621 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
622 }
623 }
624
625 let dev = virtio::Tpm::new(tpm_storage);
626
627 Ok(VirtioDeviceStub {
628 dev: Box::new(dev),
629 jail: tpm_jail,
630 })
631}
632
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800633fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800634 let socket = single_touch_spec
635 .get_path()
636 .into_unix_stream()
637 .map_err(|e| {
638 error!("failed configuring virtio single touch: {:?}", e);
639 e
640 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800641
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800642 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700643 let dev = virtio::new_single_touch(
644 socket,
645 width,
646 height,
647 virtio::base_features(cfg.protected_vm),
648 )
649 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800650 Ok(VirtioDeviceStub {
651 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800652 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800653 })
654}
655
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000656fn create_multi_touch_device(cfg: &Config, multi_touch_spec: &TouchDeviceOption) -> DeviceResult {
657 let socket = multi_touch_spec
658 .get_path()
659 .into_unix_stream()
660 .map_err(|e| {
661 error!("failed configuring virtio multi touch: {:?}", e);
662 e
663 })?;
664
665 let (width, height) = multi_touch_spec.get_size();
666 let dev = virtio::new_multi_touch(
667 socket,
668 width,
669 height,
670 virtio::base_features(cfg.protected_vm),
671 )
672 .map_err(Error::InputDeviceNew)?;
673
674 Ok(VirtioDeviceStub {
675 dev: Box::new(dev),
676 jail: simple_jail(&cfg, "input_device")?,
677 })
678}
679
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800680fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800681 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800682 error!("failed configuring virtio trackpad: {}", e);
683 e
684 })?;
685
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800686 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700687 let dev = virtio::new_trackpad(
688 socket,
689 width,
690 height,
691 virtio::base_features(cfg.protected_vm),
692 )
693 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800694
695 Ok(VirtioDeviceStub {
696 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800697 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800698 })
699}
700
Zach Reizner65b98f12019-11-22 17:34:58 -0800701fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
702 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800703 error!("failed configuring virtio mouse: {}", e);
704 e
705 })?;
706
Noah Goldd4ca29b2020-10-27 12:21:52 -0700707 let dev = virtio::new_mouse(socket, virtio::base_features(cfg.protected_vm))
708 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800709
710 Ok(VirtioDeviceStub {
711 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800712 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800713 })
714}
715
Zach Reizner65b98f12019-11-22 17:34:58 -0800716fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
717 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800718 error!("failed configuring virtio keyboard: {}", e);
719 e
720 })?;
721
Noah Goldd4ca29b2020-10-27 12:21:52 -0700722 let dev = virtio::new_keyboard(socket, virtio::base_features(cfg.protected_vm))
723 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800724
725 Ok(VirtioDeviceStub {
726 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800727 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800728 })
729}
730
Daniel Norman5e23df72021-03-11 10:11:02 -0800731fn create_switches_device<T: IntoUnixStream>(cfg: &Config, switches_socket: T) -> DeviceResult {
732 let socket = switches_socket.into_unix_stream().map_err(|e| {
733 error!("failed configuring virtio switches: {}", e);
734 e
735 })?;
736
737 let dev = virtio::new_switches(socket, virtio::base_features(cfg.protected_vm))
738 .map_err(Error::InputDeviceNew)?;
739
740 Ok(VirtioDeviceStub {
741 dev: Box::new(dev),
742 jail: simple_jail(&cfg, "input_device")?,
743 })
744}
745
David Tolnay2b089fc2019-03-04 15:33:22 -0800746fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
747 let dev_file = OpenOptions::new()
748 .read(true)
749 .write(true)
750 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800751 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800752
Noah Goldd4ca29b2020-10-27 12:21:52 -0700753 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
754 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800755
756 Ok(VirtioDeviceStub {
757 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800758 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800759 })
760}
761
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800762fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
763 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100764 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800765
766 Ok(VirtioDeviceStub {
767 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800768 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800769 })
770}
771
Michael Hoylea596a072020-11-10 19:32:45 -0800772fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800773 // Safe because we ensure that we get a unique handle to the fd.
774 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800775 Tap::from_raw_descriptor(
776 validate_raw_descriptor(tap_fd).map_err(Error::ValidateRawDescriptor)?,
777 )
778 .map_err(Error::CreateTapDevice)?
David Tolnay2b089fc2019-03-04 15:33:22 -0800779 };
780
Xiong Zhang773c7072020-03-20 10:39:55 +0800781 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
782 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700783 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800784 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
785 vq_pairs = 1;
786 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100787 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100788 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800789
790 Ok(VirtioDeviceStub {
791 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800792 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800793 })
794}
795
796fn create_net_device(
797 cfg: &Config,
798 host_ip: Ipv4Addr,
799 netmask: Ipv4Addr,
800 mac_address: MacAddress,
801 mem: &GuestMemory,
802) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800803 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
804 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700805 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800806 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
807 vq_pairs = 1;
808 }
809
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100810 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800811 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100812 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100813 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100814 features,
815 host_ip,
816 netmask,
817 mac_address,
818 mem,
819 )
820 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800821 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800822 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100823 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800824 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800825 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800826 };
827
828 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800829 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800830 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800831 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800832 };
833
834 Ok(VirtioDeviceStub {
835 dev,
836 jail: simple_jail(&cfg, policy)?,
837 })
838}
839
Keiichi Watanabe60686582021-03-12 04:53:51 +0900840fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
841 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
842 .map_err(Error::VhostUserNetDeviceNew)?;
843
844 Ok(VirtioDeviceStub {
845 dev: Box::new(dev),
846 // no sandbox here because virtqueue handling is exported to a different process.
847 jail: None,
848 })
849}
850
David Tolnay2b089fc2019-03-04 15:33:22 -0800851#[cfg(feature = "gpu")]
852fn create_gpu_device(
853 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700854 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800855 gpu_device_tube: Tube,
856 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900857 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700858 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800859 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700860 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singhdb174782019-10-01 15:16:15 -0700861 mem: &GuestMemory,
David Tolnay2b089fc2019-03-04 15:33:22 -0800862) -> DeviceResult {
863 let jailed_wayland_path = Path::new("/wayland-0");
864
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700865 let mut display_backends = vec![
866 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800867 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700868 ];
869
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900870 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700871 display_backends.insert(
872 0,
873 virtio::DisplayBackend::Wayland(if cfg.sandbox {
874 Some(jailed_wayland_path.to_owned())
875 } else {
876 Some(socket_path.to_owned())
877 }),
878 );
879 }
880
David Tolnay2b089fc2019-03-04 15:33:22 -0800881 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700882 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800883 Some(gpu_device_tube),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700884 NonZeroU8::new(1).unwrap(), // number of scanouts
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800885 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700886 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800887 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800888 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700889 map_request,
890 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100891 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800892 cfg.wayland_socket_paths.clone(),
Gurchetan Singhdb174782019-10-01 15:16:15 -0700893 mem.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800894 );
895
Matt Delco45caf912019-11-13 08:11:09 -0800896 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800897 Some(mut jail) => {
898 // Create a tmpfs in the device's root directory so that we can bind mount the
899 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
900 jail.mount_with_data(
901 Path::new("none"),
902 Path::new("/"),
903 "tmpfs",
904 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
905 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800906 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800907
908 // Device nodes required for DRM.
909 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800910 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800911 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800912 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700913
David Tolnay2b089fc2019-03-04 15:33:22 -0800914 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700915 if drm_dri_path.exists() {
916 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
917 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800918
John Batesb220eac2020-09-14 17:03:02 -0700919 // Prepare GPU shader disk cache directory.
920 if let Some(cache_dir) = cfg
921 .gpu_parameters
922 .as_ref()
923 .and_then(|params| params.cache_path.as_ref())
924 {
925 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
926 warn!("shader caching not yet supported on ARM with sandbox enabled");
927 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
928 } else {
John Bates04059732020-10-01 15:58:55 -0700929 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700930 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
931 if let Some(cache_size) = cfg
932 .gpu_parameters
933 .as_ref()
934 .and_then(|params| params.cache_size.as_ref())
935 {
936 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
937 }
938 let shadercache_path = Path::new(cache_dir);
939 jail.mount_bind(shadercache_path, shadercache_path, true)?;
940 }
941 }
942
David Riley06787c52019-07-24 12:09:07 -0700943 // If the ARM specific devices exist on the host, bind mount them in.
944 let mali0_path = Path::new("/dev/mali0");
945 if mali0_path.exists() {
946 jail.mount_bind(mali0_path, mali0_path, true)?;
947 }
948
949 let pvr_sync_path = Path::new("/dev/pvr_sync");
950 if pvr_sync_path.exists() {
951 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
952 }
953
Gurchetan Singhb66d6f62019-11-08 10:41:29 -0800954 // If the udmabuf driver exists on the host, bind mount it in.
955 let udmabuf_path = Path::new("/dev/udmabuf");
956 if udmabuf_path.exists() {
957 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
958 }
959
David Tolnay2b089fc2019-03-04 15:33:22 -0800960 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -0800961 let lib_dirs = &[
962 "/usr/lib",
963 "/usr/lib64",
964 "/lib",
965 "/lib64",
966 "/usr/share/vulkan",
967 ];
David Riley06787c52019-07-24 12:09:07 -0700968 for dir in lib_dirs {
969 let dir_path = Path::new(dir);
970 if dir_path.exists() {
971 jail.mount_bind(dir_path, dir_path, false)?;
972 }
973 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800974
Hiroki Sato942b8fc2021-02-15 06:30:21 +0000975 // Bind mount the wayland socket into jail's root. This is necessary since each
Gurchetan Singh781d9752021-02-15 17:45:22 -0800976 // new wayland context must open() the socket. Don't bind mount the camera socket
977 // since it seems to cause problems on ARCVM (b/180126126) + Mali. It's unclear if
978 // camera team will opt for virtio-camera or continue using virtio-wl, so this should
979 // be fine for now.
Hiroki Sato942b8fc2021-02-15 06:30:21 +0000980 if let Some(path) = wayland_socket_path {
981 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700982 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800983
984 add_crosvm_user_to_jail(&mut jail, "gpu")?;
985
David Riley54e660b2019-07-24 17:22:50 -0700986 // pvr driver requires read access to /proc/self/task/*/comm.
987 let proc_path = Path::new("/proc");
988 jail.mount(
989 proc_path,
990 proc_path,
991 "proc",
992 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
993 )?;
994
John Bates0d9d0e32020-12-03 11:37:33 -0800995 // To enable perfetto tracing, we need to give access to the perfetto service IPC
996 // endpoints.
997 let perfetto_path = Path::new("/run/perfetto");
998 if perfetto_path.exists() {
999 jail.mount_bind(perfetto_path, perfetto_path, true)?;
1000 }
1001
David Tolnay2b089fc2019-03-04 15:33:22 -08001002 Some(jail)
1003 }
1004 None => None,
1005 };
1006
1007 Ok(VirtioDeviceStub {
1008 dev: Box::new(dev),
1009 jail,
1010 })
1011}
1012
1013fn create_wayland_device(
1014 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001015 control_tube: Tube,
1016 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001017) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001018 let wayland_socket_dirs = cfg
1019 .wayland_socket_paths
1020 .iter()
1021 .map(|(_name, path)| path.parent())
1022 .collect::<Option<Vec<_>>>()
1023 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001024
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001025 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001026 let dev = virtio::Wl::new(
1027 features,
1028 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001029 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +01001030 resource_bridge,
1031 )
1032 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001033
Matt Delco45caf912019-11-13 08:11:09 -08001034 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -08001035 Some(mut jail) => {
1036 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
1037 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
1038 jail.mount_with_data(
1039 Path::new("none"),
1040 Path::new("/"),
1041 "tmpfs",
1042 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1043 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -08001044 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001045
1046 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1047 // each new wayland context must open() the socket. If the wayland socket is ever
1048 // destroyed and remade in the same host directory, new connections will be possible
1049 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001050 for dir in &wayland_socket_dirs {
1051 jail.mount_bind(dir, dir, true)?;
1052 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001053 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
1054
1055 Some(jail)
1056 }
1057 None => None,
1058 };
1059
1060 Ok(VirtioDeviceStub {
1061 dev: Box::new(dev),
1062 jail,
1063 })
1064}
1065
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001066#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1067fn create_video_device(
1068 cfg: &Config,
1069 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001070 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001071) -> DeviceResult {
1072 let jail = match simple_jail(&cfg, "video_device")? {
1073 Some(mut jail) => {
1074 match typ {
1075 devices::virtio::VideoDeviceType::Decoder => {
1076 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
1077 }
1078 devices::virtio::VideoDeviceType::Encoder => {
1079 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
1080 }
1081 };
1082
1083 // Create a tmpfs in the device's root directory so that we can bind mount files.
1084 jail.mount_with_data(
1085 Path::new("none"),
1086 Path::new("/"),
1087 "tmpfs",
1088 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1089 "size=67108864",
1090 )?;
1091
1092 // Render node for libvda.
1093 let dev_dri_path = Path::new("/dev/dri/renderD128");
1094 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1095
David Stevense341d0a2020-10-08 18:02:32 +09001096 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1097 {
1098 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1099 let sys_dev_char_path = Path::new("/sys/dev/char");
1100 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1101 let sys_devices_path = Path::new("/sys/devices");
1102 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1103
1104 // Required for loading dri libraries loaded by minigbm on AMD devices.
1105 let lib_dir = Path::new("/usr/lib64");
1106 jail.mount_bind(lib_dir, lib_dir, false)?;
1107 }
1108
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001109 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1110 let dev_urandom_path = Path::new("/dev/urandom");
1111 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1112 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1113 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1114
1115 Some(jail)
1116 }
1117 None => None,
1118 };
1119
1120 Ok(VirtioDeviceStub {
1121 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001122 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001123 typ,
1124 Some(resource_bridge),
1125 )),
1126 jail,
1127 })
1128}
1129
1130#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1131fn register_video_device(
1132 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001133 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001134 cfg: &Config,
1135 typ: devices::virtio::VideoDeviceType,
1136) -> std::result::Result<(), Error> {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001137 devs.push(create_video_device(cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001138 Ok(())
1139}
1140
David Tolnay2b089fc2019-03-04 15:33:22 -08001141fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001142 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann2f5d4b62021-03-10 18:08:08 +01001143 let dev = virtio::vhost::Vsock::new(&cfg.vhost_vsock_device_path, features, cid, mem)
1144 .map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001145
1146 Ok(VirtioDeviceStub {
1147 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001148 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001149 })
1150}
1151
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001152fn create_fs_device(
1153 cfg: &Config,
1154 uid_map: &str,
1155 gid_map: &str,
1156 src: &Path,
1157 tag: &str,
1158 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001159 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001160) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001161 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001162 let j = if cfg.sandbox {
1163 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1164 let config = SandboxConfig {
1165 limit_caps: false,
1166 uid_map: Some(uid_map),
1167 gid_map: Some(gid_map),
1168 log_failures: cfg.seccomp_log_failures,
1169 seccomp_policy: &seccomp_policy,
1170 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001171 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1172 // We want bind mounts from the parent namespaces to propagate into the fs device's
1173 // namespace.
1174 jail.set_remount_mode(libc::MS_SLAVE);
1175
1176 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001177 } else {
1178 create_base_minijail(src, Some(max_open_files), None)?
1179 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001180
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001181 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001182 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1183 // when num_queues > 1.
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001184 let dev =
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001185 virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001186
1187 Ok(VirtioDeviceStub {
1188 dev: Box::new(dev),
1189 jail: Some(j),
1190 })
1191}
1192
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001193fn create_9p_device(
1194 cfg: &Config,
1195 uid_map: &str,
1196 gid_map: &str,
1197 src: &Path,
1198 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001199 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001200) -> DeviceResult {
1201 let max_open_files = get_max_open_files()?;
1202 let (jail, root) = if cfg.sandbox {
1203 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1204 let config = SandboxConfig {
1205 limit_caps: false,
1206 uid_map: Some(uid_map),
1207 gid_map: Some(gid_map),
1208 log_failures: cfg.seccomp_log_failures,
1209 seccomp_policy: &seccomp_policy,
1210 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001211
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001212 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1213 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1214 // namespace.
1215 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001216
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001217 // The shared directory becomes the root of the device's file system.
1218 let root = Path::new("/");
1219 (Some(jail), root)
1220 } else {
1221 // There's no mount namespace so we tell the server to treat the source directory as the
1222 // root.
1223 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001224 };
1225
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001226 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001227 p9_cfg.root = root.into();
1228 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001229
1230 Ok(VirtioDeviceStub {
1231 dev: Box::new(dev),
1232 jail,
1233 })
1234}
1235
Jakub Starona3411ea2019-04-24 10:55:25 -07001236fn create_pmem_device(
1237 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001238 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001239 resources: &mut SystemAllocator,
1240 disk: &DiskOption,
1241 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001242 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001243) -> DeviceResult {
Mike Gerowec618a52021-02-26 20:57:14 +00001244 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
1245 let fd: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
1246 // Safe because we will validate |raw_fd|.
1247 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
1248 } else {
1249 OpenOptions::new()
1250 .read(true)
1251 .write(!disk.read_only)
1252 .open(&disk.path)
1253 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
1254 };
Jakub Starona3411ea2019-04-24 10:55:25 -07001255
Iliyan Malcheved149862020-04-17 23:57:47 +00001256 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001257 let metadata =
1258 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001259 let disk_len = metadata.len();
1260 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1261 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1262 // we just align the size of the file to 2 MiB then access beyond the last page of the
1263 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1264 // padding up to 2 MiB.
1265 let alignment = 2 * 1024 * 1024;
1266 let align_adjust = if disk_len % alignment != 0 {
1267 alignment - (disk_len % alignment)
1268 } else {
1269 0
1270 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001271 disk_len
1272 .checked_add(align_adjust)
1273 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001274 };
1275
1276 let protection = {
1277 if disk.read_only {
1278 Protection::read()
1279 } else {
1280 Protection::read_write()
1281 }
1282 };
1283
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001284 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001285 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001286 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001287
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001288 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1289 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001290 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001291 .map_err(Error::ReservePmemMemory)?;
1292 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001293 };
1294
1295 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001296 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001297 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001298 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001299 Alloc::PmemDevice(index),
1300 format!("pmem_disk_image_{}", index),
1301 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1302 128 * 1024 * 1024, /* 128 MiB */
1303 )
1304 .map_err(Error::AllocatePmemDeviceAddress)?;
1305
Daniel Verkampe1980a92020-02-07 11:00:55 -08001306 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001307 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001308 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001309 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001310 /* read_only = */ disk.read_only,
1311 /* log_dirty_pages = */ false,
1312 )
1313 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001314
Daniel Verkampe1980a92020-02-07 11:00:55 -08001315 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001316 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001317 fd,
1318 GuestAddress(mapping_address),
1319 slot,
1320 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001321 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001322 )
1323 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001324
1325 Ok(VirtioDeviceStub {
1326 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001327 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001328 })
1329}
1330
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001331fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001332 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001333 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001334 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001335 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001336 .map_err(Error::CreateConsole)?;
1337
Nicholas Verne71e73d82020-07-08 17:19:55 +10001338 let jail = match simple_jail(&cfg, "serial")? {
1339 Some(mut jail) => {
1340 // Create a tmpfs in the device's root directory so that we can bind mount the
1341 // log socket directory into it.
1342 // The size=67108864 is size=64*1024*1024 or size=64MB.
1343 jail.mount_with_data(
1344 Path::new("none"),
1345 Path::new("/"),
1346 "tmpfs",
1347 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1348 "size=67108864",
1349 )?;
1350 add_crosvm_user_to_jail(&mut jail, "serial")?;
1351 let res = param.add_bind_mounts(&mut jail);
1352 if res.is_err() {
1353 error!("failed to add bind mounts for console device");
1354 }
1355 Some(jail)
1356 }
1357 None => None,
1358 };
1359
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001360 Ok(VirtioDeviceStub {
1361 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001362 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001363 })
1364}
1365
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001366// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001367#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001368fn create_virtio_devices(
1369 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001370 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001371 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001372 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001373 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001374 wayland_device_tube: Tube,
1375 gpu_device_tube: Tube,
1376 balloon_device_tube: Tube,
1377 disk_device_tubes: &mut Vec<Tube>,
1378 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001379 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001380 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001381) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001382 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001383
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001384 for (_, param) in cfg
1385 .serial_parameters
1386 .iter()
1387 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1388 {
1389 let dev = create_console_device(cfg, param)?;
1390 devs.push(dev);
1391 }
1392
Zach Reizner8fb52112017-12-13 16:04:39 -08001393 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001394 let disk_device_tube = disk_device_tubes.remove(0);
1395 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001396 }
1397
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001398 for blk in &cfg.vhost_user_blk {
1399 devs.push(create_vhost_user_block_device(cfg, blk)?);
1400 }
1401
Jakub Starona3411ea2019-04-24 10:55:25 -07001402 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001403 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001404 devs.push(create_pmem_device(
1405 cfg,
1406 vm,
1407 resources,
1408 pmem_disk,
1409 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001410 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001411 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001412 }
1413
David Tolnay2b089fc2019-03-04 15:33:22 -08001414 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001415
David Tolnayde6b29a2018-12-20 11:49:46 -08001416 #[cfg(feature = "tpm")]
1417 {
David Tolnay43f8e212019-02-13 17:28:16 -08001418 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001419 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001420 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001421 }
1422
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001423 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1424 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1425 }
1426
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001427 if let Some(multi_touch_spec) = &cfg.virtio_multi_touch {
1428 devs.push(create_multi_touch_device(cfg, multi_touch_spec)?);
1429 }
1430
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001431 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001432 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001433 }
1434
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001435 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001436 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001437 }
1438
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001439 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001440 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001441 }
1442
Daniel Norman5e23df72021-03-11 10:11:02 -08001443 if let Some(switches_socket) = &cfg.virtio_switches {
1444 devs.push(create_switches_device(cfg, switches_socket)?);
1445 }
1446
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001447 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001448 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001449 }
1450
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001451 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001452
Zach Reizner39aa26b2017-12-12 18:03:23 -08001453 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001454 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001455 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001456 }
1457
David Tolnay2b089fc2019-03-04 15:33:22 -08001458 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1459 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1460 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001461 if !cfg.vhost_user_net.is_empty() {
1462 return Err(Error::VhostUserNetWithNetArgs);
1463 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001464 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001465 }
1466
Keiichi Watanabe60686582021-03-12 04:53:51 +09001467 for net in &cfg.vhost_user_net {
1468 devs.push(create_vhost_user_net_device(cfg, net)?);
1469 }
1470
David Tolnayfa701712019-02-13 16:42:54 -08001471 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001472 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001473
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001474 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001475 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001476 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001477
1478 #[cfg(feature = "gpu")]
1479 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001480 if cfg.gpu_parameters.is_some() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001481 let (wl_socket, gpu_socket) = Tube::pair().map_err(Error::CreateTube)?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001482 resource_bridges.push(gpu_socket);
1483 wl_resource_bridge = Some(wl_socket);
1484 }
1485 }
1486
1487 devs.push(create_wayland_device(
1488 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001489 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001490 wl_resource_bridge,
1491 )?);
1492 }
David Tolnayfa701712019-02-13 16:42:54 -08001493
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001494 #[cfg(feature = "video-decoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001495 let video_dec_tube = if cfg.video_dec {
1496 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1497 resource_bridges.push(gpu_tube);
1498 Some(video_tube)
1499 } else {
1500 None
1501 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001502
1503 #[cfg(feature = "video-encoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001504 let video_enc_tube = if cfg.video_enc {
1505 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1506 resource_bridges.push(gpu_tube);
1507 Some(video_tube)
1508 } else {
1509 None
1510 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001511
Zach Reizner3a8100a2017-09-13 19:15:43 -07001512 #[cfg(feature = "gpu")]
1513 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001514 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001515 let mut event_devices = Vec::new();
1516 if cfg.display_window_mouse {
1517 let (event_device_socket, virtio_dev_socket) =
1518 UnixStream::pair().map_err(Error::CreateSocket)?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001519 let (multi_touch_width, multi_touch_height) = cfg
1520 .virtio_multi_touch
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001521 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001522 .map(|multi_touch_spec| multi_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001523 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001524 let dev = virtio::new_multi_touch(
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001525 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001526 multi_touch_width,
1527 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001528 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001529 )
1530 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001531 devs.push(VirtioDeviceStub {
1532 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001533 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001534 });
1535 event_devices.push(EventDevice::touchscreen(event_device_socket));
1536 }
1537 if cfg.display_window_keyboard {
1538 let (event_device_socket, virtio_dev_socket) =
1539 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001540 let dev = virtio::new_keyboard(
1541 virtio_dev_socket,
1542 virtio::base_features(cfg.protected_vm),
1543 )
1544 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001545 devs.push(VirtioDeviceStub {
1546 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001547 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001548 });
1549 event_devices.push(EventDevice::keyboard(event_device_socket));
1550 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001551 devs.push(create_gpu_device(
1552 cfg,
1553 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001554 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001555 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001556 // Use the unnamed socket for GPU display screens.
1557 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001558 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001559 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001560 map_request,
Gurchetan Singhdb174782019-10-01 15:16:15 -07001561 mem,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001562 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001563 }
1564 }
1565
Daniel Verkampffb59122021-03-18 14:06:15 -07001566 #[cfg(feature = "video-decoder")]
1567 {
1568 if let Some(video_dec_tube) = video_dec_tube {
1569 register_video_device(
1570 &mut devs,
1571 video_dec_tube,
1572 cfg,
1573 devices::virtio::VideoDeviceType::Decoder,
1574 )?;
1575 }
1576 }
1577
1578 #[cfg(feature = "video-encoder")]
1579 {
1580 if let Some(video_enc_tube) = video_enc_tube {
1581 register_video_device(
1582 &mut devs,
1583 video_enc_tube,
1584 cfg,
1585 devices::virtio::VideoDeviceType::Encoder,
1586 )?;
1587 }
1588 }
1589
Zach Reizneraa575662018-08-15 10:46:32 -07001590 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001591 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001592 }
1593
Woody Chow5890b702021-02-12 14:57:02 +09001594 for vhost_user_fs in &cfg.vhost_user_fs {
1595 devs.push(create_vhost_user_fs_device(cfg, &vhost_user_fs)?);
1596 }
1597
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001598 for shared_dir in &cfg.shared_dirs {
1599 let SharedDir {
1600 src,
1601 tag,
1602 kind,
1603 uid_map,
1604 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001605 fs_cfg,
1606 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001607 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001608
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001609 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001610 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001611 let device_tube = fs_device_tubes.remove(0);
1612 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001613 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001614 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001615 };
1616 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001617 }
1618
1619 Ok(devs)
1620}
1621
1622fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001623 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001624 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001625 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001626 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001627 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001628 control_tubes: &mut Vec<TaggedControlTube>,
1629 wayland_device_tube: Tube,
1630 gpu_device_tube: Tube,
1631 balloon_device_tube: Tube,
1632 disk_device_tubes: &mut Vec<Tube>,
1633 pmem_device_tubes: &mut Vec<Tube>,
1634 fs_device_tubes: &mut Vec<Tube>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001635 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001636 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001637) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001638 let stubs = create_virtio_devices(
1639 &cfg,
1640 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001641 vm,
1642 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001643 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001644 wayland_device_tube,
1645 gpu_device_tube,
1646 balloon_device_tube,
1647 disk_device_tubes,
1648 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001649 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001650 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001651 )?;
1652
1653 let mut pci_devices = Vec::new();
1654
1655 for stub in stubs {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001656 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1657 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1658 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_tube)
Daniel Verkampbb712d62019-11-19 09:47:33 -08001659 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001660 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001661 pci_devices.push((dev, stub.jail));
1662 }
1663
Andrew Scull1590e6f2020-03-18 18:00:47 +00001664 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001665 for ac97_param in &cfg.ac97_parameters {
1666 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001667 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1668 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001669 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001670
Jingkui Wang100e6e42019-03-08 20:41:57 -08001671 // Create xhci controller.
1672 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001673 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001674
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001675 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001676 let vfio_container = Arc::new(Mutex::new(
1677 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1678 ));
1679
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001680 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001681 // create MSI, MSI-X, and Mem request sockets for each vfio device
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001682 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1683 Tube::pair().map_err(Error::CreateTube)?;
1684 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
Daniel Verkamp10154a92020-09-28 17:44:40 -07001685
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001686 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1687 Tube::pair().map_err(Error::CreateTube)?;
1688 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001689
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001690 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1691 Tube::pair().map_err(Error::CreateTube)?;
1692 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001693
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001694 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1695 .map_err(Error::CreateVfioDevice)?;
Tomasz Jeznach502b5de2021-02-03 21:45:47 -08001696 let mut vfiopcidevice = Box::new(VfioPciDevice::new(
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001697 vfiodevice,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001698 vfio_device_tube_msi,
1699 vfio_device_tube_msix,
1700 vfio_device_tube_mem,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001701 ));
Tomasz Jeznach502b5de2021-02-03 21:45:47 -08001702 // early reservation for pass-through PCI devices.
1703 if vfiopcidevice.allocate_address(resources).is_err() {
1704 warn!(
1705 "address reservation failed for vfio {}",
1706 vfiopcidevice.debug_label()
1707 );
1708 }
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001709 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1710 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001711 }
1712
David Tolnay2b089fc2019-03-04 15:33:22 -08001713 Ok(pci_devices)
1714}
1715
1716#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001717#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001718struct Ids {
1719 uid: uid_t,
1720 gid: gid_t,
1721}
1722
David Tolnay48c48292019-03-01 16:54:25 -08001723// Set the uid/gid for the jailed process and give a basic id map. This is
1724// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001725fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001726 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1727
1728 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1729 Ok(u) => u,
1730 Err(e) => {
1731 warn!("falling back to current user id for {}: {}", feature, e);
1732 geteuid()
1733 }
1734 };
1735
1736 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1737 Ok(u) => u,
1738 Err(e) => {
1739 warn!("falling back to current group id for {}: {}", feature, e);
1740 getegid()
1741 }
1742 };
1743
1744 jail.change_uid(crosvm_uid);
1745 jail.change_gid(crosvm_gid);
1746 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1747 .map_err(Error::SettingUidMap)?;
1748 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1749 .map_err(Error::SettingGidMap)?;
1750
David Tolnay41a6f842019-03-01 16:18:44 -08001751 Ok(Ids {
1752 uid: crosvm_uid,
1753 gid: crosvm_gid,
1754 })
David Tolnay48c48292019-03-01 16:54:25 -08001755}
1756
Michael Hoylea596a072020-11-10 19:32:45 -08001757fn raw_descriptor_from_path(path: &Path) -> Result<RawDescriptor> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001758 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001759 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001760 }
Michael Hoylea596a072020-11-10 19:32:45 -08001761 let raw_descriptor = path
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001762 .file_name()
1763 .and_then(|fd_osstr| fd_osstr.to_str())
1764 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1765 .ok_or(Error::InvalidFdPath)?;
Michael Hoylea596a072020-11-10 19:32:45 -08001766 validate_raw_descriptor(raw_descriptor).map_err(Error::ValidateRawDescriptor)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001767}
1768
Zach Reizner65b98f12019-11-22 17:34:58 -08001769trait IntoUnixStream {
1770 fn into_unix_stream(self) -> Result<UnixStream>;
1771}
1772
1773impl<'a> IntoUnixStream for &'a Path {
1774 fn into_unix_stream(self) -> Result<UnixStream> {
1775 if self.parent() == Some(Path::new("/proc/self/fd")) {
1776 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -08001777 unsafe { Ok(UnixStream::from_raw_fd(raw_descriptor_from_path(self)?)) }
Zach Reizner65b98f12019-11-22 17:34:58 -08001778 } else {
1779 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1780 }
1781 }
1782}
1783impl<'a> IntoUnixStream for &'a PathBuf {
1784 fn into_unix_stream(self) -> Result<UnixStream> {
1785 self.as_path().into_unix_stream()
1786 }
1787}
1788
1789impl IntoUnixStream for UnixStream {
1790 fn into_unix_stream(self) -> Result<UnixStream> {
1791 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001792 }
1793}
1794
Steven Richmanf32d0b42020-06-20 21:45:32 -07001795fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1796 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001797 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001798 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001799 // Our signal handler does nothing and is trivially async signal safe.
1800 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1801 .map_err(Error::RegisterSignalHandler)?;
1802 }
1803 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1804 } else {
1805 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001806 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001807 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001808 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001809 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001810 .map_err(Error::RegisterSignalHandler)?;
1811 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001812 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001813 Ok(())
1814}
1815
Steven Richmanf32d0b42020-06-20 21:45:32 -07001816// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001817fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001818 cpu_id: usize,
1819 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001820 vm: impl VmArch,
1821 irq_chip: &mut impl IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001822 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001823 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001824 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001825 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001826 has_bios: bool,
1827 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001828) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001829where
Zach Reizner2c770e62020-09-30 16:49:59 -07001830 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001831{
Zach Reizner304e7312020-09-29 16:00:24 -07001832 let mut vcpu = match vcpu {
1833 Some(v) => v,
1834 None => {
1835 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1836 // the vcpu thread.
1837 match vm
1838 .create_vcpu(cpu_id)
1839 .map_err(Error::CreateVcpu)?
1840 .downcast::<V>()
1841 {
1842 Ok(v) => *v,
1843 Err(_) => panic!("VM created wrong type of VCPU"),
1844 }
1845 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001846 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001847
Steven Richmanf32d0b42020-06-20 21:45:32 -07001848 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001849 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001850 .map_err(Error::AddIrqChipVcpu)?;
1851
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001852 if !vcpu_affinity.is_empty() {
1853 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1854 error!("Failed to set CPU affinity: {}", e);
1855 }
1856 }
1857
Steven Richmanf32d0b42020-06-20 21:45:32 -07001858 Arch::configure_vcpu(
1859 vm.get_memory(),
1860 vm.get_hypervisor(),
1861 irq_chip,
1862 &mut vcpu,
1863 cpu_id,
1864 vcpu_count,
1865 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001866 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001867 )
1868 .map_err(Error::ConfigureVcpu)?;
1869
Steven Richmanf32d0b42020-06-20 21:45:32 -07001870 #[cfg(feature = "chromeos")]
1871 if let Err(e) = base::sched::enable_core_scheduling() {
1872 error!("Failed to enable core scheduling: {}", e);
1873 }
1874
Kansho Nishidaab205af2020-08-13 18:17:50 +09001875 if run_rt {
1876 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1877 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1878 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1879 {
1880 warn!("Failed to set vcpu to real time: {}", e);
1881 }
1882 }
1883
Steven Richmanf32d0b42020-06-20 21:45:32 -07001884 if use_hypervisor_signals {
1885 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1886 v.retain(|&x| x != SIGRTMIN() + 0);
1887 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1888 }
1889
Zach Reizner2c770e62020-09-30 16:49:59 -07001890 let vcpu_run_handle = vcpu
1891 .take_run_handle(Some(SIGRTMIN() + 0))
1892 .map_err(Error::RunnableVcpu)?;
1893
1894 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001895}
1896
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001897#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1898fn handle_debug_msg<V>(
1899 cpu_id: usize,
1900 vcpu: &V,
1901 guest_mem: &GuestMemory,
1902 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001903 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001904) -> Result<()>
1905where
1906 V: VcpuArch + 'static,
1907{
1908 match d {
1909 VcpuDebug::ReadRegs => {
1910 let msg = VcpuDebugStatusMessage {
1911 cpu: cpu_id as usize,
1912 msg: VcpuDebugStatus::RegValues(
1913 Arch::debug_read_registers(vcpu as &V).map_err(Error::HandleDebugCommand)?,
1914 ),
1915 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001916 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001917 .send(msg)
1918 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1919 }
1920 VcpuDebug::WriteRegs(regs) => {
1921 Arch::debug_write_registers(vcpu as &V, &regs).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001922 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001923 .send(VcpuDebugStatusMessage {
1924 cpu: cpu_id as usize,
1925 msg: VcpuDebugStatus::CommandComplete,
1926 })
1927 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1928 }
1929 VcpuDebug::ReadMem(vaddr, len) => {
1930 let msg = VcpuDebugStatusMessage {
1931 cpu: cpu_id as usize,
1932 msg: VcpuDebugStatus::MemoryRegion(
1933 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
1934 .unwrap_or(Vec::new()),
1935 ),
1936 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001937 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001938 .send(msg)
1939 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1940 }
1941 VcpuDebug::WriteMem(vaddr, buf) => {
1942 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
1943 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001944 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001945 .send(VcpuDebugStatusMessage {
1946 cpu: cpu_id as usize,
1947 msg: VcpuDebugStatus::CommandComplete,
1948 })
1949 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1950 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001951 VcpuDebug::EnableSinglestep => {
1952 Arch::debug_enable_singlestep(vcpu as &V).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001953 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001954 .send(VcpuDebugStatusMessage {
1955 cpu: cpu_id as usize,
1956 msg: VcpuDebugStatus::CommandComplete,
1957 })
1958 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1959 }
1960 VcpuDebug::SetHwBreakPoint(addrs) => {
1961 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
1962 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001963 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001964 .send(VcpuDebugStatusMessage {
1965 cpu: cpu_id as usize,
1966 msg: VcpuDebugStatus::CommandComplete,
1967 })
1968 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1969 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001970 }
1971}
1972
Zach Reizner2c770e62020-09-30 16:49:59 -07001973fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001974 cpu_id: usize,
1975 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001976 vm: impl VmArch + 'static,
1977 mut irq_chip: impl IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001978 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001979 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001980 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001981 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001982 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001983 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001984 io_bus: devices::Bus,
1985 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001986 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001987 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001988 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001989 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001990 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001991 mpsc::Sender<VcpuDebugStatusMessage>,
1992 >,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001993) -> Result<JoinHandle<()>>
1994where
Zach Reizner2c770e62020-09-30 16:49:59 -07001995 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001996{
Zach Reizner8fb52112017-12-13 16:04:39 -08001997 thread::Builder::new()
1998 .name(format!("crosvm_vcpu{}", cpu_id))
1999 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002000 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2001 // implementation accomplishes that.
2002 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2003
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002004 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2005 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002006 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002007 cpu_id,
2008 vcpu,
2009 vm,
2010 &mut irq_chip,
2011 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002012 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002013 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002014 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002015 has_bios,
2016 use_hypervisor_signals,
2017 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002018
Zach Reizner8fb52112017-12-13 16:04:39 -08002019 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002020
Zach Reizner2c770e62020-09-30 16:49:59 -07002021 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002022 Ok(v) => v,
2023 Err(e) => {
2024 error!("failed to start vcpu {}: {}", cpu_id, e);
2025 return;
2026 }
2027 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002028
Dylan Reidb0492662019-05-17 14:50:13 -07002029 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002030 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002031 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002032 // Wait until a GDB client attaches
2033 run_mode = VmRunMode::Breakpoint;
2034 }
2035
Dylan Reidb0492662019-05-17 14:50:13 -07002036 let mut interrupted_by_signal = false;
2037
2038 'vcpu_loop: loop {
2039 // Start by checking for messages to process and the run state of the CPU.
2040 // An extra check here for Running so there isn't a need to call recv unless a
2041 // message is likely to be ready because a signal was sent.
2042 if interrupted_by_signal || run_mode != VmRunMode::Running {
2043 'state_loop: loop {
2044 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002045 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002046 Ok(m) => m,
2047 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2048 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002049 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002050 break 'state_loop;
2051 }
2052 Err(mpsc::TryRecvError::Empty) => {
2053 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002054 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002055 Ok(m) => m,
2056 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002057 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002058 break 'vcpu_loop;
2059 }
2060 }
2061 }
2062 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002063 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002064 break 'vcpu_loop;
2065 }
2066 };
2067
2068 // Collect all pending messages.
2069 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002070 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002071
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002072 for msg in messages {
2073 match msg {
2074 VcpuControl::RunState(new_mode) => {
2075 run_mode = new_mode;
2076 match run_mode {
2077 VmRunMode::Running => break 'state_loop,
2078 VmRunMode::Suspending => {
2079 // On KVM implementations that use a paravirtualized
2080 // clock (e.g. x86), a flag must be set to indicate to
2081 // the guest kernel that a vCPU was suspended. The guest
2082 // kernel will use this flag to prevent the soft lockup
2083 // detection from triggering when this vCPU resumes,
2084 // which could happen days later in realtime.
2085 if requires_pvclock_ctrl {
2086 if let Err(e) = vcpu.pvclock_ctrl() {
2087 error!(
2088 "failed to tell hypervisor vcpu {} is suspending: {}",
2089 cpu_id, e
2090 );
2091 }
2092 }
2093 }
2094 VmRunMode::Breakpoint => {}
2095 VmRunMode::Exiting => break 'vcpu_loop,
2096 }
2097 }
2098 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2099 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002100 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002101 Some(ref ch) => {
2102 if let Err(e) = handle_debug_msg(
2103 cpu_id, &vcpu, &guest_mem, d, &ch,
2104 ) {
2105 error!("Failed to handle gdb message: {}", e);
2106 }
2107 },
2108 None => {
2109 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002110 }
2111 }
2112 }
Dylan Reidb0492662019-05-17 14:50:13 -07002113 }
2114 }
2115 }
2116 }
2117
2118 interrupted_by_signal = false;
2119
Steven Richman11dc6712020-09-02 15:39:14 -07002120 // Vcpus may have run a HLT instruction, which puts them into a state other than
2121 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2122 // until either the irqchip receives an interrupt for this vcpu, or until the main
2123 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2124 // implementations HLT instructions do not make it to crosvm, and thus this is a
2125 // no-op that always returns VcpuRunState::Runnable.
2126 match irq_chip.wait_until_runnable(&vcpu) {
2127 Ok(VcpuRunState::Runnable) => {}
2128 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2129 Err(e) => error!(
2130 "error waiting for vcpu {} to become runnable: {}",
2131 cpu_id, e
2132 ),
2133 }
2134
2135 if !interrupted_by_signal {
2136 match vcpu.run(&vcpu_run_handle) {
2137 Ok(VcpuExit::IoIn { port, mut size }) => {
2138 let mut data = [0; 8];
2139 if size > data.len() {
2140 error!("unsupported IoIn size of {} bytes", size);
2141 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002142 }
Steven Richman11dc6712020-09-02 15:39:14 -07002143 io_bus.read(port as u64, &mut data[..size]);
2144 if let Err(e) = vcpu.set_data(&data[..size]) {
2145 error!("failed to set return data for IoIn: {}", e);
2146 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002147 }
Steven Richman11dc6712020-09-02 15:39:14 -07002148 Ok(VcpuExit::IoOut {
2149 port,
2150 mut size,
2151 data,
2152 }) => {
2153 if size > data.len() {
2154 error!("unsupported IoOut size of {} bytes", size);
2155 size = data.len();
2156 }
2157 io_bus.write(port as u64, &data[..size]);
2158 }
2159 Ok(VcpuExit::MmioRead { address, size }) => {
2160 let mut data = [0; 8];
2161 mmio_bus.read(address, &mut data[..size]);
2162 // Setting data for mmio can not fail.
2163 let _ = vcpu.set_data(&data[..size]);
2164 }
2165 Ok(VcpuExit::MmioWrite {
2166 address,
2167 size,
2168 data,
2169 }) => {
2170 mmio_bus.write(address, &data[..size]);
2171 }
2172 Ok(VcpuExit::IoapicEoi { vector }) => {
2173 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2174 error!(
2175 "failed to broadcast eoi {} on vcpu {}: {}",
2176 vector, cpu_id, e
2177 );
2178 }
2179 }
2180 Ok(VcpuExit::IrqWindowOpen) => {}
2181 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
2182 Ok(VcpuExit::Shutdown) => break,
2183 Ok(VcpuExit::FailEntry {
2184 hardware_entry_failure_reason,
2185 }) => {
2186 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002187 break;
2188 }
Steven Richman11dc6712020-09-02 15:39:14 -07002189 Ok(VcpuExit::SystemEvent(_, _)) => break,
2190 Ok(VcpuExit::Debug { .. }) => {
2191 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2192 {
2193 let msg = VcpuDebugStatusMessage {
2194 cpu: cpu_id as usize,
2195 msg: VcpuDebugStatus::HitBreakPoint,
2196 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002197 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002198 if let Err(e) = ch.send(msg) {
2199 error!("failed to notify breakpoint to GDB thread: {}", e);
2200 break;
2201 }
2202 }
2203 run_mode = VmRunMode::Breakpoint;
2204 }
2205 }
2206 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2207 Err(e) => match e.errno() {
2208 libc::EINTR => interrupted_by_signal = true,
2209 libc::EAGAIN => {}
2210 _ => {
2211 error!("vcpu hit unknown error: {}", e);
2212 break;
2213 }
2214 },
2215 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002216 }
2217
2218 if interrupted_by_signal {
2219 if use_hypervisor_signals {
2220 // Try to clear the signal that we use to kick VCPU if it is pending before
2221 // attempting to handle pause requests.
2222 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2223 error!("failed to clear pending signal: {}", e);
2224 break;
2225 }
2226 } else {
2227 vcpu.set_immediate_exit(false);
2228 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002229 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002230
Steven Richman11dc6712020-09-02 15:39:14 -07002231 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2232 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2233 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002234 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002235 })
2236 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002237}
2238
Charles William Dick0bf8a552019-10-29 15:36:01 +09002239// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07002240// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002241fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002242 let mut file = File::open(path)?;
2243
2244 let mut buf = [0u8; 32];
2245 let count = file.read(&mut buf)?;
2246
Zach Reizner55a9e502018-10-03 10:22:32 -07002247 let content =
2248 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2249 content
2250 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07002251 .split_whitespace()
2252 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002253 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07002254 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
2255 })
2256 .collect()
2257}
2258
2259// Reads the contents of a file and converts them into a u64, and if there
2260// are multiple fields it only returns the first one.
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002261fn file_to_i64<P: AsRef<Path>>(path: P, nth: usize) -> io::Result<i64> {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002262 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07002263 .into_iter()
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002264 .nth(nth)
Sonny Raod5f66082019-04-24 12:24:38 -07002265 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002266}
2267
Steven Richmanf32d0b42020-06-20 21:45:32 -07002268fn create_kvm_kernel_irq_chip(
2269 vm: &KvmVm,
2270 vcpu_count: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002271 _ioapic_device_tube: Tube,
Zach Reizner304e7312020-09-29 16:00:24 -07002272) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002273 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
2274 Ok(irq_chip)
2275}
2276
2277#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2278fn create_kvm_split_irq_chip(
2279 vm: &KvmVm,
2280 vcpu_count: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002281 ioapic_device_tube: Tube,
Zach Reizner304e7312020-09-29 16:00:24 -07002282) -> base::Result<impl IrqChipArch> {
Tomasz Jeznacheb1114c2021-02-26 20:33:11 -08002283 let irq_chip =
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002284 KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_tube, Some(120))?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002285 Ok(irq_chip)
2286}
2287
Dylan Reid059a1882018-07-23 17:58:09 -07002288pub fn run_config(cfg: Config) -> Result<()> {
Zach Reiznera90649a2021-03-31 12:56:08 -07002289 let components = setup_vm_components(&cfg)?;
2290
2291 let guest_mem_layout =
2292 Arch::guest_memory_layout(&components).map_err(Error::GuestMemoryLayout)?;
2293 let guest_mem = GuestMemory::new(&guest_mem_layout).unwrap();
2294 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).map_err(Error::CreateKvm)?;
2295 let vm = KvmVm::new(&kvm, guest_mem).map_err(Error::CreateVm)?;
2296
Steven Richmanf32d0b42020-06-20 21:45:32 -07002297 if cfg.split_irqchip {
2298 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
2299 {
2300 unimplemented!("KVM split irqchip mode only supported on x86 processors")
2301 }
2302
2303 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2304 {
Zach Reiznera90649a2021-03-31 12:56:08 -07002305 run_vm::<KvmVcpu, _, _, _>(cfg, components, vm, create_kvm_split_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07002306 }
2307 } else {
Zach Reiznera90649a2021-03-31 12:56:08 -07002308 run_vm::<KvmVcpu, _, _, _>(cfg, components, vm, create_kvm_kernel_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07002309 }
2310}
2311
Zach Reiznera90649a2021-03-31 12:56:08 -07002312fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002313 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
2314 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002315 } else {
2316 None
2317 };
2318
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002319 let vm_image = match cfg.executable_path {
2320 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
2321 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
2322 ),
2323 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
2324 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
2325 ),
2326 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2327 };
2328
Zach Reiznera90649a2021-03-31 12:56:08 -07002329 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002330 memory_size: cfg
2331 .memory
2332 .unwrap_or(256)
2333 .checked_mul(1024 * 1024)
2334 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07002335 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002336 vcpu_affinity: cfg.vcpu_affinity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002337 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002338 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002339 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002340 android_fstab: cfg
2341 .android_fstab
2342 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002343 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002344 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002345 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002346 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002347 extra_kernel_params: cfg.params.clone(),
2348 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07002349 acpi_sdts: cfg
2350 .acpi_tables
2351 .iter()
2352 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2353 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002354 rt_cpus: cfg.rt_cpus.clone(),
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002355 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002356 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002357 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002358 dmi_path: cfg.dmi_path.clone(),
Zach Reiznera90649a2021-03-31 12:56:08 -07002359 })
2360}
2361
2362fn run_vm<Vcpu, V, I, FI>(
2363 cfg: Config,
2364 #[allow(unused_mut)] mut components: VmComponents,
2365 vm: V,
2366 create_irq_chip: FI,
2367) -> Result<()>
2368where
2369 Vcpu: VcpuArch + 'static,
2370 V: VmArch + 'static,
2371 I: IrqChipArch + 'static,
2372 FI: FnOnce(
2373 &V,
2374 usize, // vcpu_count
2375 Tube, // ioapic_device_tube
2376 ) -> base::Result<I>,
2377{
2378 if cfg.sandbox {
2379 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2380 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2381 // access to those files will not be possible.
2382 info!("crosvm entering multiprocess mode");
2383 }
2384
2385 let (usb_control_tube, usb_provider) =
2386 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
2387 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2388 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2389 // quickly.
2390 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002391
Zach Reiznera60744b2019-02-13 17:33:32 -08002392 let control_server_socket = match &cfg.socket_path {
2393 Some(path) => Some(UnlinkUnixSeqpacketListener(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002394 UnixSeqpacketListener::bind(path).map_err(Error::CreateControlServer)?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002395 )),
2396 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002397 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002398
Zach Reiznera90649a2021-03-31 12:56:08 -07002399 let mut control_tubes = Vec::new();
2400
2401 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2402 if let Some(port) = cfg.gdb {
2403 // GDB needs a control socket to interrupt vcpus.
2404 let (gdb_host_tube, gdb_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
2405 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2406 components.gdb = Some((port, gdb_control_tube));
2407 }
2408
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002409 let (wayland_host_tube, wayland_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2410 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002411 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002412 let (balloon_host_tube, balloon_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002413
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002414 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002415 let mut disk_device_tubes = Vec::new();
2416 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002417 let disk_count = cfg.disks.len();
2418 for _ in 0..disk_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002419 let (disk_host_tub, disk_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2420 disk_host_tubes.push(disk_host_tub);
2421 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002422 }
2423
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002424 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002425 let pmem_count = cfg.pmem_devices.len();
2426 for _ in 0..pmem_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002427 let (pmem_host_tube, pmem_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2428 pmem_device_tubes.push(pmem_device_tube);
2429 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002430 }
2431
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002432 let (gpu_host_tube, gpu_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2433 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002434
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002435 let (ioapic_host_tube, ioapic_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2436 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002437
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002438 let battery = if cfg.battery_type.is_some() {
Alex Lauf408c732020-11-10 18:24:04 +09002439 let jail = match simple_jail(&cfg, "battery")? {
2440 #[cfg_attr(not(feature = "powerd-monitor-powerd"), allow(unused_mut))]
2441 Some(mut jail) => {
2442 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2443 #[cfg(feature = "power-monitor-powerd")]
2444 {
2445 add_crosvm_user_to_jail(&mut jail, "battery")?;
2446
2447 // Create a tmpfs in the device's root directory so that we can bind mount files.
2448 jail.mount_with_data(
2449 Path::new("none"),
2450 Path::new("/"),
2451 "tmpfs",
2452 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2453 "size=67108864",
2454 )?;
2455
2456 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2457 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2458 }
2459 Some(jail)
2460 }
2461 None => None,
2462 };
2463 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002464 } else {
2465 (&cfg.battery_type, None)
2466 };
2467
Gurchetan Singh293913c2020-12-09 10:44:13 -08002468 let gralloc = RutabagaGralloc::new().map_err(Error::CreateGrallocError)?;
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002469 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2470
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002471 let fs_count = cfg
2472 .shared_dirs
2473 .iter()
2474 .filter(|sd| sd.kind == SharedDirKind::FS)
2475 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002476 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002477 for _ in 0..fs_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002478 let (fs_host_tube, fs_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2479 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2480 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002481 }
2482
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002483 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002484 let mut linux: RunnableLinuxVm<_, Vcpu, _> = Arch::build_vm(
Trent Begin17ccaad2019-04-17 13:51:25 -06002485 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06002486 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002487 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002488 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002489 vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07002490 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06002491 create_devices(
2492 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07002493 mem,
2494 vm,
2495 sys_allocator,
2496 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002497 &mut control_tubes,
2498 wayland_device_tube,
2499 gpu_device_tube,
2500 balloon_device_tube,
2501 &mut disk_device_tubes,
2502 &mut pmem_device_tubes,
2503 &mut fs_device_tubes,
Trent Begin17ccaad2019-04-17 13:51:25 -06002504 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002505 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06002506 )
2507 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002508 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_tube),
Trent Begin17ccaad2019-04-17 13:51:25 -06002509 )
David Tolnaybe034262019-03-04 17:48:36 -08002510 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002511
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002512 #[cfg(feature = "direct")]
2513 if let Some(pmio) = &cfg.direct_pmio {
2514 let direct_io =
2515 Arc::new(devices::DirectIo::new(&pmio.path, false).map_err(Error::DirectIo)?);
2516 for range in pmio.ranges.iter() {
2517 linux
2518 .io_bus
2519 .insert_sync(direct_io.clone(), range.0, range.1)
2520 .unwrap();
2521 }
2522 };
2523
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002524 #[cfg(feature = "direct")]
2525 let mut irqs = Vec::new();
2526
2527 #[cfg(feature = "direct")]
2528 for irq in &cfg.direct_level_irq {
2529 if !linux.resources.reserve_irq(*irq) {
2530 warn!("irq {} already reserved.", irq);
2531 }
2532 let trigger = Event::new().map_err(Error::CreateEvent)?;
2533 let resample = Event::new().map_err(Error::CreateEvent)?;
2534 linux
2535 .irq_chip
2536 .register_irq_event(*irq, &trigger, Some(&resample))
2537 .unwrap();
2538 let direct_irq =
2539 devices::DirectIrq::new(trigger, Some(resample)).map_err(Error::DirectIrq)?;
2540 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2541 irqs.push(direct_irq);
2542 }
2543
2544 #[cfg(feature = "direct")]
2545 for irq in &cfg.direct_edge_irq {
2546 if !linux.resources.reserve_irq(*irq) {
2547 warn!("irq {} already reserved.", irq);
2548 }
2549 let trigger = Event::new().map_err(Error::CreateEvent)?;
2550 linux
2551 .irq_chip
2552 .register_irq_event(*irq, &trigger, None)
2553 .unwrap();
2554 let direct_irq = devices::DirectIrq::new(trigger, None).map_err(Error::DirectIrq)?;
2555 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2556 irqs.push(direct_irq);
2557 }
2558
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002559 run_control(
2560 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08002561 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002562 control_tubes,
2563 balloon_host_tube,
2564 &disk_host_tubes,
2565 usb_control_tube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002566 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002567 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002568 Arc::clone(&map_request),
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002569 cfg.balloon_bias,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002570 gralloc,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002571 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002572}
2573
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002574/// Signals all running VCPUs to vmexit, sends VmRunMode message to each VCPU tube, and tells
2575/// `irq_chip` to stop blocking halted VCPUs. The tube message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07002576/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2577/// loop.
2578fn kick_all_vcpus(
2579 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
2580 irq_chip: &impl IrqChip,
2581 run_mode: &VmRunMode,
2582) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002583 for (handle, tube) in vcpu_handles {
2584 if let Err(e) = tube.send(VcpuControl::RunState(run_mode.clone())) {
Steven Richman11dc6712020-09-02 15:39:14 -07002585 error!("failed to send VmRunMode: {}", e);
2586 }
2587 let _ = handle.kill(SIGRTMIN() + 0);
2588 }
2589 irq_chip.kick_halted_vcpus();
2590}
2591
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002592// BalloonPolicy determines the size to set the balloon.
2593struct BalloonPolicy {
2594 // Estimate for when the guest starts aggressivly freeing memory.
2595 critical_guest_available: i64,
2596 critical_host_available: i64, // ChromeOS critical margin.
2597 guest_available_bias: i64,
2598 max_balloon_actual: i64, // The largest the balloon has ever been observed.
2599 prev_balloon_full_percent: i64, // How full was the balloon at the previous timestep.
2600 prev_guest_available: i64, // Available memory in the guest at the previous timestep.
2601}
2602
2603const ONE_KB: i64 = 1024;
2604const ONE_MB: i64 = 1024 * ONE_KB;
2605
2606const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
2607const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
2608
2609// BalloonPolicy implements the virtio balloon sizing logic.
2610// The balloon is sized with the following heuristics:
2611// Balance Available
2612// The balloon is sized to balance the amount of available memory above a
2613// critical margin. The critical margin is the level at which memory is
2614// freed. In the host, this is the ChromeOS available critical margin, which
2615// is the trigger to kill tabs. In the guest, we estimate this level by
2616// tracking the minimum amount of available memory, discounting sharp
2617// 'valleys'. If the guest manages to keep available memory above a given
2618// level even with some pressure, then we determine that this is the
2619// 'critical' level for the guest. We don't update this critical value if
2620// the balloon is fully inflated because in that case, the guest may be out
2621// of memory to free.
2622// guest_available_bias
2623// Even if available memory is perfectly balanced between host and guest,
2624// The size of the balloon will still drift randomly depending on whether
2625// those host or guest reclaims memory first/faster every time memory is
2626// low. To encourage large balloons to shrink and small balloons to grow,
2627// the following bias is added to the guest critical margin:
2628// (guest_available_bias * balloon_full_percent) / 100
2629// This give the guest more memory when the balloon is full.
2630impl BalloonPolicy {
2631 fn new(
2632 memory_size: i64,
2633 critical_host_available: i64,
2634 guest_available_bias: i64,
2635 ) -> BalloonPolicy {
2636 // Estimate some reasonable initial maximum for balloon size.
2637 let max_balloon_actual = (memory_size * 3) / 4;
2638 // 400MB is above the zone min margin even for Crostini VMs on 16GB
2639 // devices (~85MB), and is above when Android Low Memory Killer kills
2640 // apps (~250MB).
2641 let critical_guest_available = 400 * ONE_MB;
2642
2643 BalloonPolicy {
2644 critical_guest_available,
2645 critical_host_available,
2646 guest_available_bias,
2647 max_balloon_actual,
2648 prev_balloon_full_percent: 0,
2649 prev_guest_available: 0,
2650 }
2651 }
2652 fn delta(&mut self, stats: BalloonStats, balloon_actual_u: u64) -> Result<i64> {
2653 let guest_free = stats
2654 .free_memory
2655 .map(i64::try_from)
2656 .ok_or(Error::GuestFreeMissing())?
2657 .map_err(Error::GuestFreeTooLarge)?;
2658 let guest_cached = stats
2659 .disk_caches
2660 .map(i64::try_from)
2661 .ok_or(Error::GuestFreeMissing())?
2662 .map_err(Error::GuestFreeTooLarge)?;
2663 let balloon_actual = match balloon_actual_u {
2664 size if size < i64::max_value() as u64 => size as i64,
2665 _ => return Err(Error::BalloonActualTooLarge),
2666 };
2667 let guest_available = guest_free + guest_cached;
2668 // Available memory is reported in MB, and we need bytes.
2669 let host_available =
2670 file_to_i64(LOWMEM_AVAILABLE, 0).map_err(Error::ReadMemAvailable)? * ONE_MB;
2671 if self.max_balloon_actual < balloon_actual {
2672 self.max_balloon_actual = balloon_actual;
2673 info!(
2674 "balloon updated max_balloon_actual to {} MiB",
2675 self.max_balloon_actual / ONE_MB,
2676 );
2677 }
2678 let balloon_full_percent = balloon_actual * 100 / self.max_balloon_actual;
2679 // Update critical_guest_available if we see a lower available with the
2680 // balloon not fully inflated. If the balloon is completely inflated
2681 // there is a risk that the low available level we see comes at the cost
2682 // of stability. The Linux OOM Killer might have been forced to kill
2683 // something important, or page reclaim was so aggressive that there are
2684 // long UI hangs.
2685 if guest_available < self.critical_guest_available && balloon_full_percent < 95 {
2686 // To ignore temporary low memory states, we require that two guest
2687 // available measurements in a row are low.
2688 if self.prev_guest_available < self.critical_guest_available
2689 && self.prev_balloon_full_percent < 95
2690 {
2691 self.critical_guest_available = self.prev_guest_available;
2692 info!(
2693 "balloon updated critical_guest_available to {} MiB",
2694 self.critical_guest_available / ONE_MB,
2695 );
2696 }
2697 }
2698
2699 // Compute the difference in available memory above the host and guest
2700 // critical thresholds.
2701 let bias = (self.guest_available_bias * balloon_full_percent) / 100;
2702 let guest_above_critical = guest_available - self.critical_guest_available - bias;
2703 let host_above_critical = host_available - self.critical_host_available;
2704 let balloon_delta = guest_above_critical - host_above_critical;
2705 // Only let the balloon take up MAX_CRITICAL_DELTA of available memory
2706 // below the critical level in host or guest.
2707 const MAX_CRITICAL_DELTA: i64 = 10 * ONE_MB;
2708 let balloon_delta_capped = if balloon_delta < 0 {
2709 // The balloon is deflating, taking memory from the host. Don't let
2710 // it take more than the amount of available memory above the
2711 // critical margin, plus MAX_CRITICAL_DELTA.
2712 max(
2713 balloon_delta,
2714 -(host_available - self.critical_host_available + MAX_CRITICAL_DELTA),
2715 )
2716 } else {
2717 // The balloon is inflating, taking memory from the guest. Don't let
2718 // it take more than the amount of available memory above the
2719 // critical margin, plus MAX_CRITICAL_DELTA.
2720 min(
2721 balloon_delta,
2722 guest_available - self.critical_guest_available + MAX_CRITICAL_DELTA,
2723 )
2724 };
2725
2726 self.prev_balloon_full_percent = balloon_full_percent;
2727 self.prev_guest_available = guest_available;
2728
2729 // Only return a value if target would change available above critical
2730 // by more than 1%, or we are within 1 MB of critical in host or guest.
2731 if guest_above_critical < ONE_MB
2732 || host_above_critical < ONE_MB
2733 || (balloon_delta.abs() * 100) / guest_above_critical > 1
2734 || (balloon_delta.abs() * 100) / host_above_critical > 1
2735 {
2736 // Finally, make sure the balloon delta won't cause a negative size.
2737 let result = max(balloon_delta_capped, -balloon_actual);
2738 if result != 0 {
2739 info!(
2740 "balloon delta={:<6} ha={:<6} hc={:<6} ga={:<6} gc={:<6} bias={:<6} full={:>3}%",
2741 result / ONE_MB,
2742 host_available / ONE_MB,
2743 self.critical_host_available / ONE_MB,
2744 guest_available / ONE_MB,
2745 self.critical_guest_available / ONE_MB,
2746 bias / ONE_MB,
2747 balloon_full_percent,
2748 );
2749 }
2750 return Ok(result);
2751 }
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002752 Ok(0)
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002753 }
2754}
2755
Zach Reizner304e7312020-09-29 16:00:24 -07002756fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static, I: IrqChipArch + 'static>(
2757 mut linux: RunnableLinuxVm<V, Vcpu, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08002758 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002759 mut control_tubes: Vec<TaggedControlTube>,
2760 balloon_host_tube: Tube,
2761 disk_host_tubes: &[Tube],
2762 usb_control_tube: Tube,
Zach Reizner55a9e502018-10-03 10:22:32 -07002763 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002764 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002765 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002766 balloon_bias: i64,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002767 mut gralloc: RutabagaGralloc,
Zach Reizner55a9e502018-10-03 10:22:32 -07002768) -> Result<()> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002769 #[derive(PollToken)]
2770 enum Token {
2771 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002772 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002773 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002774 IrqFd { index: IrqEventIndex },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002775 BalanceMemory,
2776 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002777 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002778 VmControl { index: usize },
2779 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002780
Zach Reizner19ad1f32019-12-12 18:58:50 -08002781 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002782 .set_raw_mode()
2783 .expect("failed to set terminal raw mode");
2784
Michael Hoylee392c462020-10-07 03:29:24 -07002785 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerb2110be2019-07-23 15:55:03 -07002786 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002787 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002788 (&sigchld_fd, Token::ChildSignal),
2789 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002790 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002791
Zach Reiznera60744b2019-02-13 17:33:32 -08002792 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002793 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002794 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002795 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002796 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002797 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002798 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002799 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002800 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002801 }
2802
Steven Richmanf32d0b42020-06-20 21:45:32 -07002803 let events = linux
2804 .irq_chip
2805 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002806 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002807
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002808 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002809 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002810 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002811 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002812 }
2813
Charles William Dick0bf8a552019-10-29 15:36:01 +09002814 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002815 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002816 let mut balloon_policy = if let Ok(critical_margin) = file_to_i64(LOWMEM_MARGIN, 0) {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002817 // Create timer request balloon stats every 1s.
Michael Hoylee392c462020-10-07 03:29:24 -07002818 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002819 .add(&balancemem_timer, Token::BalanceMemory)
Michael Hoylee392c462020-10-07 03:29:24 -07002820 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002821 let balancemem_dur = Duration::from_secs(1);
2822 let balancemem_int = Duration::from_secs(1);
2823 balancemem_timer
2824 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002825 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002826
2827 // Listen for balloon statistics from the guest so we can balance.
Michael Hoylee392c462020-10-07 03:29:24 -07002828 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002829 .add(&balloon_host_tube, Token::BalloonResult)
Michael Hoylee392c462020-10-07 03:29:24 -07002830 .map_err(Error::WaitContextAdd)?;
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002831 Some(BalloonPolicy::new(
2832 linux.vm.get_memory().memory_size() as i64,
2833 critical_margin * ONE_MB,
2834 balloon_bias,
2835 ))
Charles William Dick0bf8a552019-10-29 15:36:01 +09002836 } else {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002837 warn!("Unable to open low mem margin, maybe not a chrome os kernel");
2838 None
2839 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002840
Lepton Wu20333e42019-03-14 10:48:03 -07002841 if sandbox {
2842 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2843 drop_capabilities().map_err(Error::DropCapabilities)?;
2844 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002845
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002846 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2847 // Create a channel for GDB thread.
2848 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2849 let (s, r) = mpsc::channel();
2850 (Some(s), Some(r))
2851 } else {
2852 (None, None)
2853 };
2854
Steven Richmanf32d0b42020-06-20 21:45:32 -07002855 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2856 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002857 let use_hypervisor_signals = !linux
2858 .vm
2859 .get_hypervisor()
2860 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002861 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002862
Zach Reizner304e7312020-09-29 16:00:24 -07002863 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002864 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002865 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2866 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002867 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002868 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002869 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2870 Some(VcpuAffinity::Global(v)) => v,
2871 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2872 None => Default::default(),
2873 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002874 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002875 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002876 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002877 linux.vm.try_clone().map_err(Error::CloneEvent)?,
2878 linux.irq_chip.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002879 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002880 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002881 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002882 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002883 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002884 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002885 linux.io_bus.clone(),
2886 linux.mmio_bus.clone(),
Michael Hoyle685316f2020-09-16 15:29:20 -07002887 linux.exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002888 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07002889 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002890 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002891 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2892 to_gdb_channel.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07002893 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07002894 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07002895 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002896
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002897 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2898 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002899 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002900 let to_vcpu_channels = vcpu_handles
2901 .iter()
2902 .map(|(_handle, channel)| channel.clone())
2903 .collect();
2904 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002905 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002906 to_vcpu_channels,
2907 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
2908 );
2909 thread::Builder::new()
2910 .name("gdb".to_owned())
2911 .spawn(move || gdb_thread(target, gdb_port_num))
2912 .map_err(Error::SpawnGdbServer)?;
2913 };
2914
Dylan Reid059a1882018-07-23 17:58:09 -07002915 vcpu_thread_barrier.wait();
2916
Michael Hoylee392c462020-10-07 03:29:24 -07002917 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002918 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002919 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002920 Ok(v) => v,
2921 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002922 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002923 break;
2924 }
2925 }
2926 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002927
Steven Richmanf32d0b42020-06-20 21:45:32 -07002928 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2929 warn!("can't deliver delayed irqs: {}", e);
2930 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002931
Zach Reiznera60744b2019-02-13 17:33:32 -08002932 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002933 for event in events.iter().filter(|e| e.is_readable) {
2934 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002935 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002936 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002937 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002938 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002939 Token::Suspend => {
2940 info!("VM requested suspend");
2941 linux.suspend_evt.read().unwrap();
Steven Richman11dc6712020-09-02 15:39:14 -07002942 kick_all_vcpus(&vcpu_handles, &linux.irq_chip, &VmRunMode::Suspending);
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002943 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002944 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002945 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002946 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002947 let pid = siginfo.ssi_pid;
2948 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2949 Some(label) => format!("{} (pid {})", label, pid),
2950 None => format!("pid {}", pid),
2951 };
David Tolnayf5032762018-12-03 10:46:45 -08002952 error!(
2953 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002954 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002955 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002956 }
Michael Hoylee392c462020-10-07 03:29:24 -07002957 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002958 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002959 Token::IrqFd { index } => {
2960 if let Err(e) = linux.irq_chip.service_irq_event(index) {
2961 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002962 }
2963 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002964 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07002965 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002966 let command = BalloonControlCommand::Stats {};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002967 if let Err(e) = balloon_host_tube.send(&command) {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002968 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002969 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002970 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002971 Token::BalloonResult => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002972 match balloon_host_tube.recv() {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002973 Ok(BalloonControlResult::Stats {
2974 stats,
2975 balloon_actual: balloon_actual_u,
2976 }) => {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002977 match balloon_policy
2978 .as_mut()
2979 .map(|p| p.delta(stats, balloon_actual_u))
2980 {
2981 None => {
2982 error!(
2983 "got result from balloon stats, but no policy is running"
2984 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002985 }
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002986 Some(Err(e)) => {
2987 warn!("failed to run balloon policy {}", e);
2988 }
2989 Some(Ok(delta)) if delta != 0 => {
2990 let target = max((balloon_actual_u as i64) + delta, 0) as u64;
2991 let command =
2992 BalloonControlCommand::Adjust { num_bytes: target };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002993 if let Err(e) = balloon_host_tube.send(&command) {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002994 warn!(
2995 "failed to send memory value to balloon device: {}",
2996 e
2997 );
2998 }
2999 }
3000 Some(Ok(_)) => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09003001 }
3002 }
3003 Err(e) => {
3004 error!("failed to recv BalloonControlResult: {}", e);
3005 }
3006 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07003007 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003008 Token::VmControlServer => {
3009 if let Some(socket_server) = &control_server_socket {
3010 match socket_server.accept() {
3011 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003012 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003013 .add(
3014 &socket,
3015 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003016 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003017 },
3018 )
Michael Hoylee392c462020-10-07 03:29:24 -07003019 .map_err(Error::WaitContextAdd)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003020 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003021 }
3022 Err(e) => error!("failed to accept socket: {}", e),
3023 }
3024 }
3025 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003026 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003027 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003028 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003029 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003030 Ok(request) => {
3031 let mut run_mode_opt = None;
3032 let response = request.execute(
3033 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003034 &balloon_host_tube,
3035 disk_host_tubes,
3036 &usb_control_tube,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08003037 &mut linux.bat_control,
Jakub Starond99cd0a2019-04-11 14:09:39 -07003038 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003039 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003040 error!("failed to send VmResponse: {}", e);
3041 }
3042 if let Some(run_mode) = run_mode_opt {
3043 info!("control socket changed run mode to {}", run_mode);
3044 match run_mode {
3045 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003046 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003047 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003048 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003049 if other == VmRunMode::Running {
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003050 linux.io_bus.notify_resume();
3051 }
Steven Richman11dc6712020-09-02 15:39:14 -07003052 kick_all_vcpus(
3053 &vcpu_handles,
3054 &linux.irq_chip,
3055 &other,
3056 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003057 }
3058 }
3059 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003060 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003061 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003062 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003063 vm_control_indices_to_remove.push(index);
3064 } else {
3065 error!("failed to recv VmRequest: {}", e);
3066 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003067 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003068 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003069 TaggedControlTube::VmMemory(tube) => {
3070 match tube.recv::<VmMemoryRequest>() {
3071 Ok(request) => {
3072 let response = request.execute(
3073 &mut linux.vm,
3074 &mut linux.resources,
3075 Arc::clone(&map_request),
3076 &mut gralloc,
3077 );
3078 if let Err(e) = tube.send(&response) {
3079 error!("failed to send VmMemoryControlResponse: {}", e);
3080 }
3081 }
3082 Err(e) => {
3083 if let TubeError::Disconnected = e {
3084 vm_control_indices_to_remove.push(index);
3085 } else {
3086 error!("failed to recv VmMemoryControlRequest: {}", e);
3087 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003088 }
3089 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003090 }
3091 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003092 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003093 let response = {
3094 let irq_chip = &mut linux.irq_chip;
3095 request.execute(
3096 |setup| match setup {
3097 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003098 if let Some(event_index) = irq_chip
3099 .register_irq_event(irq, ev, None)?
3100 {
3101 match wait_ctx.add(
3102 ev,
3103 Token::IrqFd {
3104 index: event_index
3105 },
3106 ) {
3107 Err(e) => {
3108 warn!("failed to add IrqFd to poll context: {}", e);
3109 Err(e)
3110 },
3111 Ok(_) => {
3112 Ok(())
3113 }
3114 }
3115 } else {
3116 Ok(())
3117 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003118 }
3119 IrqSetup::Route(route) => irq_chip.route_irq(route),
3120 },
3121 &mut linux.resources,
3122 )
3123 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003124 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003125 error!("failed to send VmIrqResponse: {}", e);
3126 }
3127 }
3128 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003129 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003130 vm_control_indices_to_remove.push(index);
3131 } else {
3132 error!("failed to recv VmIrqRequest: {}", e);
3133 }
3134 }
3135 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003136 TaggedControlTube::VmMsync(tube) => {
3137 match tube.recv::<VmMsyncRequest>() {
3138 Ok(request) => {
3139 let response = request.execute(&mut linux.vm);
3140 if let Err(e) = tube.send(&response) {
3141 error!("failed to send VmMsyncResponse: {}", e);
3142 }
3143 }
3144 Err(e) => {
3145 if let TubeError::Disconnected = e {
3146 vm_control_indices_to_remove.push(index);
3147 } else {
3148 error!("failed to recv VmMsyncRequest: {}", e);
3149 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003150 }
3151 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003152 }
3153 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003154 Ok(request) => {
3155 let response =
3156 request.execute(&mut linux.vm, &mut linux.resources);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003157 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003158 error!("failed to send VmResponse: {}", e);
3159 }
3160 }
3161 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003162 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003163 vm_control_indices_to_remove.push(index);
3164 } else {
3165 error!("failed to recv VmResponse: {}", e);
3166 }
3167 }
3168 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003169 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003170 }
3171 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003172 }
3173 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003174
Michael Hoylee392c462020-10-07 03:29:24 -07003175 for event in events.iter().filter(|e| e.is_hungup) {
3176 match event.token {
Zach Reiznera60744b2019-02-13 17:33:32 -08003177 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003178 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08003179 Token::ChildSignal => {}
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003180 Token::IrqFd { index: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09003181 Token::BalanceMemory => {}
3182 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08003183 Token::VmControlServer => {}
3184 Token::VmControl { index } => {
3185 // It's possible more data is readable and buffered while the socket is hungup,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003186 // so don't delete the tube from the poll context until we're sure all the
Zach Reiznera60744b2019-02-13 17:33:32 -08003187 // data is read.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003188 if control_tubes
Jakub Starond99cd0a2019-04-11 14:09:39 -07003189 .get(index)
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003190 .map(|s| !s.as_ref().is_packet_ready())
3191 .unwrap_or(false)
Jakub Starond99cd0a2019-04-11 14:09:39 -07003192 {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003193 vm_control_indices_to_remove.push(index);
Zach Reizner55a9e502018-10-03 10:22:32 -07003194 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003195 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003196 }
3197 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003198
3199 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003200 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003201 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003202 vm_control_indices_to_remove.dedup();
3203 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003204 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3205 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003206 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003207 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003208 // that has already been closed. Because the token associated with that spurious event
3209 // now belongs to a different socket, the control loop will start to interact with
3210 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3211 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003212 if let Some(socket) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003213 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08003214 }
3215
3216 // This line implicitly drops the socket at `index` when it gets returned by
3217 // `swap_remove`. After this line, the socket at `index` is not the one from
3218 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003219 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003220 control_tubes.swap_remove(index);
3221 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003222 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003223 .modify(tube, EventType::Read, Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07003224 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003225 }
3226 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003227 }
3228
Steven Richman11dc6712020-09-02 15:39:14 -07003229 kick_all_vcpus(&vcpu_handles, &linux.irq_chip, &VmRunMode::Exiting);
3230 for (handle, _) in vcpu_handles {
3231 if let Err(e) = handle.join() {
3232 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003233 }
3234 }
3235
Daniel Verkamp94c35272019-09-12 13:31:30 -07003236 // Explicitly drop the VM structure here to allow the devices to clean up before the
3237 // control sockets are closed when this function exits.
3238 mem::drop(linux);
3239
Zach Reizner19ad1f32019-12-12 18:58:50 -08003240 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003241 .set_canon_mode()
3242 .expect("failed to restore canonical mode for terminal");
3243
3244 Ok(())
3245}