blob: 9ac3e64417737622548abbb54e778102249e5c7c [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Charles William Dick0e3d4b62020-12-14 12:16:46 +09005use std::cmp::{max, min, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Michael Hoylea596a072020-11-10 19:32:45 -080020use std::os::unix::io::FromRawFd;
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070025use std::sync::{mpsc, Arc, Barrier};
26
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use std::thread;
28use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090029use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080030
David Tolnay41a6f842019-03-01 16:18:44 -080031use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080032
Tomasz Jeznach42644642020-05-20 23:27:59 -070033use acpi_tables::sdt::SDT;
34
Michael Hoyle6b196952020-08-02 20:09:41 -070035use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Keiichi Watanabe60686582021-03-12 04:53:51 +090036use devices::virtio::vhost::user::{
Woody Chow5890b702021-02-12 14:57:02 +090037 Block as VhostUserBlock, Error as VhostUserError, Fs as VhostUserFs, Net as VhostUserNet,
Keiichi Watanabe60686582021-03-12 04:53:51 +090038};
Zach Reizner65b98f12019-11-22 17:34:58 -080039#[cfg(feature = "gpu")]
40use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070041use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080042#[cfg(feature = "audio")]
43use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080044use devices::{
Steven Richman11dc6712020-09-02 15:39:14 -070045 self, HostBackendDeviceProvider, IrqChip, IrqEventIndex, KvmKernelIrqChip, PciDevice,
46 VcpuRunState, VfioContainer, VfioDevice, VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080047};
Steven Richmanf32d0b42020-06-20 21:45:32 -070048use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner304e7312020-09-29 16:00:24 -070049use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070050use minijail::{self, Minijail};
Zach Reiznera60744b2019-02-13 17:33:32 -080051use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080052use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070053use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080054use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080055use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070056use sync::Mutex;
Jakub Starona3411ea2019-04-24 10:55:25 -070057
Michael Hoyle6b196952020-08-02 20:09:41 -070058use base::{
David Tolnay633426a2019-04-12 12:18:35 -070059 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060060 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
Michael Hoylea596a072020-11-10 19:32:45 -080061 set_cpu_affinity, set_rt_prio_limit, set_rt_round_robin, signal, validate_raw_descriptor, warn,
62 AsRawDescriptor, Event, EventType, ExternalMapping, FlockOperation, FromRawDescriptor,
63 Killable, MemoryMappingArena, PollToken, Protection, RawDescriptor, ScopedEvent, SignalFd,
64 Terminal, Timer, WaitContext, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080065};
Jakub Starone7c59052019-04-09 12:31:14 -070066use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070067 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Charles William Dick0e3d4b62020-12-14 12:16:46 +090068 BalloonControlResult, BalloonStats, DiskControlCommand, DiskControlRequestSocket,
69 DiskControlResponseSocket, DiskControlResult, FsMappingRequest, FsMappingRequestSocket,
70 FsMappingResponseSocket, IrqSetup, UsbControlSocket, VcpuControl, VmControlResponseSocket,
71 VmIrqRequest, VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket,
72 VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse,
73 VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmResponse,
74 VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070075};
Keiichi Watanabec5262e92020-10-21 15:57:33 +090076#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +090077use vm_control::{VcpuDebug, VcpuDebugStatus, VcpuDebugStatusMessage, VmRequest};
Dylan Reidec058d62020-07-20 20:21:11 -070078use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080079
Keiichi Watanabec5262e92020-10-21 15:57:33 +090080#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
81use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090082use crate::{
Woody Chow5890b702021-02-12 14:57:02 +090083 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VhostUserFsOption,
84 VhostUserOption,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090085};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070086use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070087 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
88 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070089};
Sonny Raoed517d12018-02-13 22:09:43 -080090
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080091#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070092use {
93 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070094 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070095 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
96};
Zach Reizner55a9e502018-10-03 10:22:32 -070097#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070098use {
Steven Richman11dc6712020-09-02 15:39:14 -070099 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
100 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -0700101 x86_64::X8664arch as Arch,
102};
Zach Reizner39aa26b2017-12-12 18:03:23 -0800103
David Tolnay3df35522019-03-11 12:36:30 -0700104#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -0700105#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800106pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -0700107 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700108 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700109 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -0800110 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -0700111 AllocatePmemDeviceAddress(resources::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900112 BalloonActualTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800113 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700114 BlockDeviceNew(base::Error),
115 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -0800116 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700117 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700118 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700119 CloneVcpu(base::Error),
120 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000121 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800122 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700123 CreateConsole(arch::serial::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700124 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700125 CreateEvent(base::Error),
Gurchetan Singh293913c2020-12-09 10:44:13 -0800126 CreateGrallocError(rutabaga_gfx::RutabagaError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700127 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800128 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800129 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700130 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800131 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800132 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700133 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800134 CreateVfioDevice(devices::vfio::VfioError),
Michael Hoylee392c462020-10-07 03:29:24 -0700135 CreateWaitContext(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700136 DeviceJail(minijail::Error),
137 DevicePivotRoot(minijail::Error),
Tomasz Jeznach3ce74762021-02-26 01:01:53 -0800138 DirectIo(io::Error),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800139 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700140 DiskImageLock(base::Error),
141 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900142 FsDeviceNew(virtio::fs::Error),
143 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700144 GetSignalMask(signal::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900145 GuestCachedMissing(),
146 GuestCachedTooLarge(std::num::TryFromIntError),
147 GuestFreeMissing(),
148 GuestFreeTooLarge(std::num::TryFromIntError),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900149 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
150 HandleDebugCommand(<Arch as LinuxArch>::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800151 InputDeviceNew(virtio::InputError),
152 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700153 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700154 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700155 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800156 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800157 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800158 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700159 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800160 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700161 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800162 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800163 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800164 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800165 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900166 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800167 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700168 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700169 PmemDeviceNew(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900170 ReadMemAvailable(io::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900171 ReadStatm(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700172 RegisterBalloon(arch::DeviceRegistrationError),
173 RegisterBlock(arch::DeviceRegistrationError),
174 RegisterGpu(arch::DeviceRegistrationError),
175 RegisterNet(arch::DeviceRegistrationError),
176 RegisterP9(arch::DeviceRegistrationError),
177 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700178 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700179 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700180 ReserveGpuMemory(base::MmapError),
181 ReserveMemory(base::Error),
182 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700183 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800184 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700185 RunnableVcpu(base::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900186 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
187 SendDebugStatus(Box<mpsc::SendError<VcpuDebugStatusMessage>>),
Allen Webbf3024c82020-06-19 07:19:48 -0700188 SettingGidMap(minijail::Error),
189 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700190 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700191 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700192 SignalFd(base::SignalFdError),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900193 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
194 SpawnGdbServer(io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800195 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700196 Timer(base::Error),
Michael Hoylea596a072020-11-10 19:32:45 -0800197 ValidateRawDescriptor(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800198 VhostNetDeviceNew(virtio::vhost::Error),
Keiichi Watanabe60686582021-03-12 04:53:51 +0900199 VhostUserBlockDeviceNew(VhostUserError),
Woody Chow5890b702021-02-12 14:57:02 +0900200 VhostUserFsDeviceNew(VhostUserError),
Keiichi Watanabe60686582021-03-12 04:53:51 +0900201 VhostUserNetDeviceNew(VhostUserError),
202 VhostUserNetWithNetArgs,
David Tolnay2b089fc2019-03-04 15:33:22 -0800203 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700204 VirtioPciDev(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700205 WaitContextAdd(base::Error),
206 WaitContextDelete(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700207 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800208}
209
David Tolnayc69f9752019-03-01 18:07:56 -0800210impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700211 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800212 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800213 use self::Error::*;
214
David Tolnay3df35522019-03-11 12:36:30 -0700215 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800216 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800217 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700218 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700219 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800220 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700221 AllocatePmemDeviceAddress(e) => {
222 write!(f, "failed to allocate memory for pmem device: {}", e)
223 }
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900224 BalloonActualTooLarge => write!(f, "balloon actual size is too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800225 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
226 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
227 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800228 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800229 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700230 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700231 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
232 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000233 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800234 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700235 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700236 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700237 CreateEvent(e) => write!(f, "failed to create event: {}", e),
Gurchetan Singh293913c2020-12-09 10:44:13 -0800238 CreateGrallocError(e) => write!(f, "failed to create gralloc: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800239 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
240 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
241 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700242 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800243 CreateTpmStorage(p, e) => {
244 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
245 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800246 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700247 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800248 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700249 CreateWaitContext(e) => write!(f, "failed to create wait context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800250 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
251 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Tomasz Jeznach3ce74762021-02-26 01:01:53 -0800252 DirectIo(e) => write!(f, "failed to open direct io device: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800253 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800254 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800255 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900256 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
257 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700258 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900259 GuestCachedMissing() => write!(f, "guest cached is missing from balloon stats"),
260 GuestCachedTooLarge(e) => write!(f, "guest cached is too large: {}", e),
261 GuestFreeMissing() => write!(f, "guest free is missing from balloon stats"),
262 GuestFreeTooLarge(e) => write!(f, "guest free is too large: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900263 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
264 HandleDebugCommand(e) => write!(f, "failed to handle a gdb command: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700265 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
266 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800267 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
268 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800269 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800270 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800271 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800272 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700273 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800274 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800275 f,
276 "failed to open android fstab file {}: {}",
277 p.display(),
278 e
279 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700280 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700281 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
282 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800283 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800284 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900285 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800286 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700287 PmemDeviceImageTooBig => {
288 write!(f, "failed to create pmem device: pmem device image too big")
289 }
290 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900291 ReadMemAvailable(e) => write!(
292 f,
293 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
294 e
295 ),
296 ReadStatm(e) => write!(f, "failed to read /proc/self/statm: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800297 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
298 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
299 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
300 RegisterNet(e) => write!(f, "error registering net device: {}", e),
301 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
302 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
303 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
304 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800305 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
306 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700307 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700308 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800309 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700310 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900311 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
312 SendDebugStatus(e) => write!(f, "failed to send a debug status to GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800313 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900314 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700315 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800316 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
317 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900318 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
319 SpawnGdbServer(e) => write!(f, "failed to spawn GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800320 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700321 Timer(e) => write!(f, "failed to read timer fd: {}", e),
Michael Hoylea596a072020-11-10 19:32:45 -0800322 ValidateRawDescriptor(e) => write!(f, "failed to validate raw descriptor: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800323 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900324 VhostUserBlockDeviceNew(e) => {
325 write!(f, "failed to set up vhost-user block device: {}", e)
326 }
Woody Chow5890b702021-02-12 14:57:02 +0900327 VhostUserFsDeviceNew(e) => {
328 write!(f, "failed to set up vhost-user fs device: {}", e)
329 }
Keiichi Watanabe60686582021-03-12 04:53:51 +0900330 VhostUserNetDeviceNew(e) => {
331 write!(f, "failed to set up vhost-user net device: {}", e)
332 }
333 VhostUserNetWithNetArgs => {
334 write!(
335 f,
336 "vhost-user-net cannot be used with any of --host_ip, --netmask or --mac"
337 )
338 }
David Tolnayc69f9752019-03-01 18:07:56 -0800339 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
340 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700341 WaitContextAdd(e) => write!(f, "failed to add descriptor to wait context: {}", e),
342 WaitContextDelete(e) => {
343 write!(f, "failed to remove descriptor from wait context: {}", e)
344 }
David Tolnayc69f9752019-03-01 18:07:56 -0800345 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800346 }
347 }
348}
349
Allen Webbf3024c82020-06-19 07:19:48 -0700350impl From<minijail::Error> for Error {
351 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800352 Error::IoJail(err)
353 }
354}
355
David Tolnayc69f9752019-03-01 18:07:56 -0800356impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700357
Zach Reizner39aa26b2017-12-12 18:03:23 -0800358type Result<T> = std::result::Result<T, Error>;
359
Jakub Starond99cd0a2019-04-11 14:09:39 -0700360enum TaggedControlSocket {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900361 Fs(FsMappingResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700362 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700363 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800364 VmIrq(VmIrqResponseSocket),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800365 VmMsync(VmMsyncResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700366}
367
368impl AsRef<UnixSeqpacket> for TaggedControlSocket {
369 fn as_ref(&self) -> &UnixSeqpacket {
370 use self::TaggedControlSocket::*;
371 match &self {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900372 Fs(ref socket) => socket.as_ref(),
Chirantan Ekbote50582532020-01-16 16:49:14 +0900373 Vm(ref socket) => socket.as_ref(),
374 VmMemory(ref socket) => socket.as_ref(),
375 VmIrq(ref socket) => socket.as_ref(),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800376 VmMsync(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700377 }
378 }
379}
380
Michael Hoylee392c462020-10-07 03:29:24 -0700381impl AsRawDescriptor for TaggedControlSocket {
382 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800383 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700384 }
385}
386
Andrew Walbranf50bab62020-07-07 13:22:53 +0100387fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900388 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900389
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900390 // Safe because this will only modify `buf` and we check the return value.
391 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
392 if res == 0 {
393 // Safe because the kernel guarantees that the struct is fully initialized.
394 let limit = unsafe { buf.assume_init() };
395 Ok(limit.rlim_max)
396 } else {
397 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
398 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900399}
400
Matt Delcoc24ad782020-02-14 13:24:36 -0800401struct SandboxConfig<'a> {
402 limit_caps: bool,
403 log_failures: bool,
404 seccomp_policy: &'a Path,
405 uid_map: Option<&'a str>,
406 gid_map: Option<&'a str>,
407}
408
Zach Reizner44863792019-06-26 14:22:08 -0700409fn create_base_minijail(
410 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800411 r_limit: Option<u64>,
412 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700413) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800414 // All child jails run in a new user namespace without any users mapped,
415 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800416 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800417
418 if let Some(config) = config {
419 j.namespace_pids();
420 j.namespace_user();
421 j.namespace_user_disable_setgroups();
422 if config.limit_caps {
423 // Don't need any capabilities.
424 j.use_caps(0);
425 }
426 if let Some(uid_map) = config.uid_map {
427 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
428 }
429 if let Some(gid_map) = config.gid_map {
430 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
431 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900432 // Run in a new mount namespace.
433 j.namespace_vfs();
434
Matt Delcoc24ad782020-02-14 13:24:36 -0800435 // Run in an empty network namespace.
436 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900437
438 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800439 j.no_new_privs();
440
441 // By default we'll prioritize using the pre-compiled .bpf over the .policy
442 // file (the .bpf is expected to be compiled using "trap" as the failure
443 // behavior instead of the default "kill" behavior).
444 // Refer to the code comment for the "seccomp-log-failures"
445 // command-line parameter for an explanation about why the |log_failures|
446 // flag forces the use of .policy files (and the build-time alternative to
447 // this run-time flag).
448 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
449 if bpf_policy_file.exists() && !config.log_failures {
450 j.parse_seccomp_program(&bpf_policy_file)
451 .map_err(Error::DeviceJail)?;
452 } else {
453 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
454 // which will correctly kill the entire device process if a worker
455 // thread commits a seccomp violation.
456 j.set_seccomp_filter_tsync();
457 if config.log_failures {
458 j.log_seccomp_filter_failures();
459 }
460 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
461 .map_err(Error::DeviceJail)?;
462 }
463 j.use_seccomp_filter();
464 // Don't do init setup.
465 j.run_as_init();
466 }
467
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900468 // Only pivot_root if we are not re-using the current root directory.
469 if root != Path::new("/") {
470 // It's safe to call `namespace_vfs` multiple times.
471 j.namespace_vfs();
472 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
473 }
Matt Delco45caf912019-11-13 08:11:09 -0800474
Matt Delcoc24ad782020-02-14 13:24:36 -0800475 // Most devices don't need to open many fds.
476 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
477 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
478 .map_err(Error::SettingMaxOpenFiles)?;
479
Zach Reizner39aa26b2017-12-12 18:03:23 -0800480 Ok(j)
481}
482
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800483fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700484 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800485 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
486 // A directory for a jailed device's pivot root.
487 let root_path = Path::new(pivot_root);
488 if !root_path.exists() {
489 return Err(Error::PivotRootDoesntExist(pivot_root));
490 }
491 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800492 let config = SandboxConfig {
493 limit_caps: true,
494 log_failures: cfg.seccomp_log_failures,
495 seccomp_policy: &policy_path,
496 uid_map: None,
497 gid_map: None,
498 };
499 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800500 } else {
501 Ok(None)
502 }
503}
504
David Tolnayfd0971d2019-03-04 17:15:57 -0800505type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800506
507fn create_block_device(
508 cfg: &Config,
509 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700510 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800511) -> DeviceResult {
512 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
513 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
514 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -0800515 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
David Tolnay2b089fc2019-03-04 15:33:22 -0800516 } else {
517 OpenOptions::new()
518 .read(true)
519 .write(!disk.read_only)
520 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800521 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800522 };
523 // Lock the disk image to prevent other crosvm instances from using it.
524 let lock_op = if disk.read_only {
525 FlockOperation::LockShared
526 } else {
527 FlockOperation::LockExclusive
528 };
529 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
530
Dylan Reid503c5ab2020-07-17 11:20:07 -0700531 let dev = if disk::async_ok(&raw_image).map_err(Error::CreateDiskError)? {
532 let async_file = disk::create_async_disk_file(raw_image).map_err(Error::CreateDiskError)?;
533 Box::new(
534 virtio::BlockAsync::new(
535 virtio::base_features(cfg.protected_vm),
536 async_file,
537 disk.read_only,
538 disk.sparse,
539 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700540 disk.id,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700541 Some(disk_device_socket),
542 )
543 .map_err(Error::BlockDeviceNew)?,
544 ) as Box<dyn VirtioDevice>
545 } else {
546 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
547 Box::new(
548 virtio::Block::new(
549 virtio::base_features(cfg.protected_vm),
550 disk_file,
551 disk.read_only,
552 disk.sparse,
553 disk.block_size,
554 disk.id,
555 Some(disk_device_socket),
556 )
557 .map_err(Error::BlockDeviceNew)?,
558 ) as Box<dyn VirtioDevice>
559 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800560
561 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700562 dev,
Matt Delco45caf912019-11-13 08:11:09 -0800563 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800564 })
565}
566
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900567fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
568 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
569 .map_err(Error::VhostUserBlockDeviceNew)?;
570
571 Ok(VirtioDeviceStub {
572 dev: Box::new(dev),
573 // no sandbox here because virtqueue handling is exported to a different process.
574 jail: None,
575 })
576}
577
Woody Chow5890b702021-02-12 14:57:02 +0900578fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
579 let dev = VhostUserFs::new(
580 virtio::base_features(cfg.protected_vm),
581 &option.socket,
582 &option.tag,
583 )
584 .map_err(Error::VhostUserFsDeviceNew)?;
585
586 Ok(VirtioDeviceStub {
587 dev: Box::new(dev),
588 // no sandbox here because virtqueue handling is exported to a different process.
589 jail: None,
590 })
591}
592
David Tolnay2b089fc2019-03-04 15:33:22 -0800593fn create_rng_device(cfg: &Config) -> DeviceResult {
Keiichi Watanabef70350b2020-11-24 21:57:53 +0900594 let dev =
595 virtio::Rng::new(virtio::base_features(cfg.protected_vm)).map_err(Error::RngDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800596
597 Ok(VirtioDeviceStub {
598 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800599 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800600 })
601}
602
603#[cfg(feature = "tpm")]
604fn create_tpm_device(cfg: &Config) -> DeviceResult {
Michael Hoyle6b196952020-08-02 20:09:41 -0700605 use base::chown;
David Tolnay2b089fc2019-03-04 15:33:22 -0800606 use std::ffi::CString;
607 use std::fs;
608 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800609
610 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800611 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800612
613 match &mut tpm_jail {
614 Some(jail) => {
615 // Create a tmpfs in the device's root directory for tpm
616 // simulator storage. The size is 20*1024, or 20 KB.
617 jail.mount_with_data(
618 Path::new("none"),
619 Path::new("/"),
620 "tmpfs",
621 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
622 "size=20480",
623 )?;
624
625 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
626
627 let pid = process::id();
628 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
629 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800630 fs::create_dir_all(&tpm_storage)
631 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800632 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800633 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
634 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800635
636 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
637 }
638 None => {
639 // Path used inside cros_sdk which does not have /run/vm.
640 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
641 }
642 }
643
644 let dev = virtio::Tpm::new(tpm_storage);
645
646 Ok(VirtioDeviceStub {
647 dev: Box::new(dev),
648 jail: tpm_jail,
649 })
650}
651
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800652fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800653 let socket = single_touch_spec
654 .get_path()
655 .into_unix_stream()
656 .map_err(|e| {
657 error!("failed configuring virtio single touch: {:?}", e);
658 e
659 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800660
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800661 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700662 let dev = virtio::new_single_touch(
663 socket,
664 width,
665 height,
666 virtio::base_features(cfg.protected_vm),
667 )
668 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800669 Ok(VirtioDeviceStub {
670 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800671 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800672 })
673}
674
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000675fn create_multi_touch_device(cfg: &Config, multi_touch_spec: &TouchDeviceOption) -> DeviceResult {
676 let socket = multi_touch_spec
677 .get_path()
678 .into_unix_stream()
679 .map_err(|e| {
680 error!("failed configuring virtio multi touch: {:?}", e);
681 e
682 })?;
683
684 let (width, height) = multi_touch_spec.get_size();
685 let dev = virtio::new_multi_touch(
686 socket,
687 width,
688 height,
689 virtio::base_features(cfg.protected_vm),
690 )
691 .map_err(Error::InputDeviceNew)?;
692
693 Ok(VirtioDeviceStub {
694 dev: Box::new(dev),
695 jail: simple_jail(&cfg, "input_device")?,
696 })
697}
698
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800699fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800700 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800701 error!("failed configuring virtio trackpad: {}", e);
702 e
703 })?;
704
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800705 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700706 let dev = virtio::new_trackpad(
707 socket,
708 width,
709 height,
710 virtio::base_features(cfg.protected_vm),
711 )
712 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800713
714 Ok(VirtioDeviceStub {
715 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800716 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800717 })
718}
719
Zach Reizner65b98f12019-11-22 17:34:58 -0800720fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
721 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800722 error!("failed configuring virtio mouse: {}", e);
723 e
724 })?;
725
Noah Goldd4ca29b2020-10-27 12:21:52 -0700726 let dev = virtio::new_mouse(socket, virtio::base_features(cfg.protected_vm))
727 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800728
729 Ok(VirtioDeviceStub {
730 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800731 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800732 })
733}
734
Zach Reizner65b98f12019-11-22 17:34:58 -0800735fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
736 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800737 error!("failed configuring virtio keyboard: {}", e);
738 e
739 })?;
740
Noah Goldd4ca29b2020-10-27 12:21:52 -0700741 let dev = virtio::new_keyboard(socket, virtio::base_features(cfg.protected_vm))
742 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800743
744 Ok(VirtioDeviceStub {
745 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800746 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800747 })
748}
749
Daniel Norman5e23df72021-03-11 10:11:02 -0800750fn create_switches_device<T: IntoUnixStream>(cfg: &Config, switches_socket: T) -> DeviceResult {
751 let socket = switches_socket.into_unix_stream().map_err(|e| {
752 error!("failed configuring virtio switches: {}", e);
753 e
754 })?;
755
756 let dev = virtio::new_switches(socket, virtio::base_features(cfg.protected_vm))
757 .map_err(Error::InputDeviceNew)?;
758
759 Ok(VirtioDeviceStub {
760 dev: Box::new(dev),
761 jail: simple_jail(&cfg, "input_device")?,
762 })
763}
764
David Tolnay2b089fc2019-03-04 15:33:22 -0800765fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
766 let dev_file = OpenOptions::new()
767 .read(true)
768 .write(true)
769 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800770 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800771
Noah Goldd4ca29b2020-10-27 12:21:52 -0700772 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
773 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800774
775 Ok(VirtioDeviceStub {
776 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800777 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800778 })
779}
780
Jakub Staron1f828d72019-04-11 12:49:29 -0700781fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100782 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), socket)
783 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800784
785 Ok(VirtioDeviceStub {
786 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800787 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800788 })
789}
790
Michael Hoylea596a072020-11-10 19:32:45 -0800791fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800792 // Safe because we ensure that we get a unique handle to the fd.
793 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800794 Tap::from_raw_descriptor(
795 validate_raw_descriptor(tap_fd).map_err(Error::ValidateRawDescriptor)?,
796 )
797 .map_err(Error::CreateTapDevice)?
David Tolnay2b089fc2019-03-04 15:33:22 -0800798 };
799
Xiong Zhang773c7072020-03-20 10:39:55 +0800800 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
801 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700802 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800803 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
804 vq_pairs = 1;
805 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100806 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100807 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800808
809 Ok(VirtioDeviceStub {
810 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800811 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800812 })
813}
814
815fn create_net_device(
816 cfg: &Config,
817 host_ip: Ipv4Addr,
818 netmask: Ipv4Addr,
819 mac_address: MacAddress,
820 mem: &GuestMemory,
821) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800822 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
823 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700824 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800825 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
826 vq_pairs = 1;
827 }
828
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100829 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800830 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100831 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100832 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100833 features,
834 host_ip,
835 netmask,
836 mac_address,
837 mem,
838 )
839 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800840 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800841 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100842 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800843 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800844 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800845 };
846
847 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800848 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800849 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800850 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800851 };
852
853 Ok(VirtioDeviceStub {
854 dev,
855 jail: simple_jail(&cfg, policy)?,
856 })
857}
858
Keiichi Watanabe60686582021-03-12 04:53:51 +0900859fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
860 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
861 .map_err(Error::VhostUserNetDeviceNew)?;
862
863 Ok(VirtioDeviceStub {
864 dev: Box::new(dev),
865 // no sandbox here because virtqueue handling is exported to a different process.
866 jail: None,
867 })
868}
869
David Tolnay2b089fc2019-03-04 15:33:22 -0800870#[cfg(feature = "gpu")]
871fn create_gpu_device(
872 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700873 exit_evt: &Event,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700874 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900875 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900876 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700877 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800878 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700879 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singhdb174782019-10-01 15:16:15 -0700880 mem: &GuestMemory,
David Tolnay2b089fc2019-03-04 15:33:22 -0800881) -> DeviceResult {
882 let jailed_wayland_path = Path::new("/wayland-0");
883
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700884 let mut display_backends = vec![
885 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800886 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700887 ];
888
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900889 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700890 display_backends.insert(
891 0,
892 virtio::DisplayBackend::Wayland(if cfg.sandbox {
893 Some(jailed_wayland_path.to_owned())
894 } else {
895 Some(socket_path.to_owned())
896 }),
897 );
898 }
899
David Tolnay2b089fc2019-03-04 15:33:22 -0800900 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700901 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700902 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700903 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900904 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700905 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800906 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800907 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700908 map_request,
909 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100910 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800911 cfg.wayland_socket_paths.clone(),
Gurchetan Singhdb174782019-10-01 15:16:15 -0700912 mem.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800913 );
914
Matt Delco45caf912019-11-13 08:11:09 -0800915 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800916 Some(mut jail) => {
917 // Create a tmpfs in the device's root directory so that we can bind mount the
918 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
919 jail.mount_with_data(
920 Path::new("none"),
921 Path::new("/"),
922 "tmpfs",
923 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
924 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800925 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800926
927 // Device nodes required for DRM.
928 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800929 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800930 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800931 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700932
David Tolnay2b089fc2019-03-04 15:33:22 -0800933 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700934 if drm_dri_path.exists() {
935 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
936 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800937
John Batesb220eac2020-09-14 17:03:02 -0700938 // Prepare GPU shader disk cache directory.
939 if let Some(cache_dir) = cfg
940 .gpu_parameters
941 .as_ref()
942 .and_then(|params| params.cache_path.as_ref())
943 {
944 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
945 warn!("shader caching not yet supported on ARM with sandbox enabled");
946 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
947 } else {
John Bates04059732020-10-01 15:58:55 -0700948 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700949 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
950 if let Some(cache_size) = cfg
951 .gpu_parameters
952 .as_ref()
953 .and_then(|params| params.cache_size.as_ref())
954 {
955 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
956 }
957 let shadercache_path = Path::new(cache_dir);
958 jail.mount_bind(shadercache_path, shadercache_path, true)?;
959 }
960 }
961
David Riley06787c52019-07-24 12:09:07 -0700962 // If the ARM specific devices exist on the host, bind mount them in.
963 let mali0_path = Path::new("/dev/mali0");
964 if mali0_path.exists() {
965 jail.mount_bind(mali0_path, mali0_path, true)?;
966 }
967
968 let pvr_sync_path = Path::new("/dev/pvr_sync");
969 if pvr_sync_path.exists() {
970 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
971 }
972
Gurchetan Singhb66d6f62019-11-08 10:41:29 -0800973 // If the udmabuf driver exists on the host, bind mount it in.
974 let udmabuf_path = Path::new("/dev/udmabuf");
975 if udmabuf_path.exists() {
976 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
977 }
978
David Tolnay2b089fc2019-03-04 15:33:22 -0800979 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -0800980 let lib_dirs = &[
981 "/usr/lib",
982 "/usr/lib64",
983 "/lib",
984 "/lib64",
985 "/usr/share/vulkan",
986 ];
David Riley06787c52019-07-24 12:09:07 -0700987 for dir in lib_dirs {
988 let dir_path = Path::new(dir);
989 if dir_path.exists() {
990 jail.mount_bind(dir_path, dir_path, false)?;
991 }
992 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800993
Hiroki Sato942b8fc2021-02-15 06:30:21 +0000994 // Bind mount the wayland socket into jail's root. This is necessary since each
Gurchetan Singh781d9752021-02-15 17:45:22 -0800995 // new wayland context must open() the socket. Don't bind mount the camera socket
996 // since it seems to cause problems on ARCVM (b/180126126) + Mali. It's unclear if
997 // camera team will opt for virtio-camera or continue using virtio-wl, so this should
998 // be fine for now.
Hiroki Sato942b8fc2021-02-15 06:30:21 +0000999 if let Some(path) = wayland_socket_path {
1000 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001001 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001002
1003 add_crosvm_user_to_jail(&mut jail, "gpu")?;
1004
David Riley54e660b2019-07-24 17:22:50 -07001005 // pvr driver requires read access to /proc/self/task/*/comm.
1006 let proc_path = Path::new("/proc");
1007 jail.mount(
1008 proc_path,
1009 proc_path,
1010 "proc",
1011 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
1012 )?;
1013
John Bates0d9d0e32020-12-03 11:37:33 -08001014 // To enable perfetto tracing, we need to give access to the perfetto service IPC
1015 // endpoints.
1016 let perfetto_path = Path::new("/run/perfetto");
1017 if perfetto_path.exists() {
1018 jail.mount_bind(perfetto_path, perfetto_path, true)?;
1019 }
1020
David Tolnay2b089fc2019-03-04 15:33:22 -08001021 Some(jail)
1022 }
1023 None => None,
1024 };
1025
1026 Ok(VirtioDeviceStub {
1027 dev: Box::new(dev),
1028 jail,
1029 })
1030}
1031
1032fn create_wayland_device(
1033 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001034 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001035 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
1036) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001037 let wayland_socket_dirs = cfg
1038 .wayland_socket_paths
1039 .iter()
1040 .map(|(_name, path)| path.parent())
1041 .collect::<Option<Vec<_>>>()
1042 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001043
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001044 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001045 let dev = virtio::Wl::new(
1046 features,
1047 cfg.wayland_socket_paths.clone(),
1048 socket,
1049 resource_bridge,
1050 )
1051 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001052
Matt Delco45caf912019-11-13 08:11:09 -08001053 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -08001054 Some(mut jail) => {
1055 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
1056 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
1057 jail.mount_with_data(
1058 Path::new("none"),
1059 Path::new("/"),
1060 "tmpfs",
1061 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1062 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -08001063 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001064
1065 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1066 // each new wayland context must open() the socket. If the wayland socket is ever
1067 // destroyed and remade in the same host directory, new connections will be possible
1068 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001069 for dir in &wayland_socket_dirs {
1070 jail.mount_bind(dir, dir, true)?;
1071 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001072 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
1073
1074 Some(jail)
1075 }
1076 None => None,
1077 };
1078
1079 Ok(VirtioDeviceStub {
1080 dev: Box::new(dev),
1081 jail,
1082 })
1083}
1084
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001085#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1086fn create_video_device(
1087 cfg: &Config,
1088 typ: devices::virtio::VideoDeviceType,
1089 resource_bridge: virtio::resource_bridge::ResourceRequestSocket,
1090) -> DeviceResult {
1091 let jail = match simple_jail(&cfg, "video_device")? {
1092 Some(mut jail) => {
1093 match typ {
1094 devices::virtio::VideoDeviceType::Decoder => {
1095 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
1096 }
1097 devices::virtio::VideoDeviceType::Encoder => {
1098 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
1099 }
1100 };
1101
1102 // Create a tmpfs in the device's root directory so that we can bind mount files.
1103 jail.mount_with_data(
1104 Path::new("none"),
1105 Path::new("/"),
1106 "tmpfs",
1107 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1108 "size=67108864",
1109 )?;
1110
1111 // Render node for libvda.
1112 let dev_dri_path = Path::new("/dev/dri/renderD128");
1113 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1114
David Stevense341d0a2020-10-08 18:02:32 +09001115 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1116 {
1117 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1118 let sys_dev_char_path = Path::new("/sys/dev/char");
1119 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1120 let sys_devices_path = Path::new("/sys/devices");
1121 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1122
1123 // Required for loading dri libraries loaded by minigbm on AMD devices.
1124 let lib_dir = Path::new("/usr/lib64");
1125 jail.mount_bind(lib_dir, lib_dir, false)?;
1126 }
1127
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001128 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1129 let dev_urandom_path = Path::new("/dev/urandom");
1130 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1131 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1132 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1133
1134 Some(jail)
1135 }
1136 None => None,
1137 };
1138
1139 Ok(VirtioDeviceStub {
1140 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001141 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001142 typ,
1143 Some(resource_bridge),
1144 )),
1145 jail,
1146 })
1147}
1148
1149#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1150fn register_video_device(
1151 devs: &mut Vec<VirtioDeviceStub>,
1152 resource_bridges: &mut Vec<virtio::resource_bridge::ResourceResponseSocket>,
1153 cfg: &Config,
1154 typ: devices::virtio::VideoDeviceType,
1155) -> std::result::Result<(), Error> {
1156 let (video_socket, gpu_socket) =
1157 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1158 resource_bridges.push(gpu_socket);
1159 devs.push(create_video_device(cfg, typ, video_socket)?);
1160 Ok(())
1161}
1162
David Tolnay2b089fc2019-03-04 15:33:22 -08001163fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001164 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann2f5d4b62021-03-10 18:08:08 +01001165 let dev = virtio::vhost::Vsock::new(&cfg.vhost_vsock_device_path, features, cid, mem)
1166 .map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001167
1168 Ok(VirtioDeviceStub {
1169 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001170 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001171 })
1172}
1173
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001174fn create_fs_device(
1175 cfg: &Config,
1176 uid_map: &str,
1177 gid_map: &str,
1178 src: &Path,
1179 tag: &str,
1180 fs_cfg: virtio::fs::passthrough::Config,
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001181 device_socket: FsMappingRequestSocket,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001182) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001183 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001184 let j = if cfg.sandbox {
1185 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1186 let config = SandboxConfig {
1187 limit_caps: false,
1188 uid_map: Some(uid_map),
1189 gid_map: Some(gid_map),
1190 log_failures: cfg.seccomp_log_failures,
1191 seccomp_policy: &seccomp_policy,
1192 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001193 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1194 // We want bind mounts from the parent namespaces to propagate into the fs device's
1195 // namespace.
1196 jail.set_remount_mode(libc::MS_SLAVE);
1197
1198 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001199 } else {
1200 create_base_minijail(src, Some(max_open_files), None)?
1201 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001202
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001203 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001204 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1205 // when num_queues > 1.
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001206 let dev =
1207 virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_socket).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001208
1209 Ok(VirtioDeviceStub {
1210 dev: Box::new(dev),
1211 jail: Some(j),
1212 })
1213}
1214
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001215fn create_9p_device(
1216 cfg: &Config,
1217 uid_map: &str,
1218 gid_map: &str,
1219 src: &Path,
1220 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001221 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001222) -> DeviceResult {
1223 let max_open_files = get_max_open_files()?;
1224 let (jail, root) = if cfg.sandbox {
1225 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1226 let config = SandboxConfig {
1227 limit_caps: false,
1228 uid_map: Some(uid_map),
1229 gid_map: Some(gid_map),
1230 log_failures: cfg.seccomp_log_failures,
1231 seccomp_policy: &seccomp_policy,
1232 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001233
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001234 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1235 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1236 // namespace.
1237 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001238
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001239 // The shared directory becomes the root of the device's file system.
1240 let root = Path::new("/");
1241 (Some(jail), root)
1242 } else {
1243 // There's no mount namespace so we tell the server to treat the source directory as the
1244 // root.
1245 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001246 };
1247
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001248 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001249 p9_cfg.root = root.into();
1250 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001251
1252 Ok(VirtioDeviceStub {
1253 dev: Box::new(dev),
1254 jail,
1255 })
1256}
1257
Jakub Starona3411ea2019-04-24 10:55:25 -07001258fn create_pmem_device(
1259 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001260 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001261 resources: &mut SystemAllocator,
1262 disk: &DiskOption,
1263 index: usize,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001264 pmem_device_socket: VmMsyncRequestSocket,
Jakub Starona3411ea2019-04-24 10:55:25 -07001265) -> DeviceResult {
Mike Gerowec618a52021-02-26 20:57:14 +00001266 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
1267 let fd: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
1268 // Safe because we will validate |raw_fd|.
1269 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
1270 } else {
1271 OpenOptions::new()
1272 .read(true)
1273 .write(!disk.read_only)
1274 .open(&disk.path)
1275 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
1276 };
Jakub Starona3411ea2019-04-24 10:55:25 -07001277
Iliyan Malcheved149862020-04-17 23:57:47 +00001278 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001279 let metadata =
1280 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001281 let disk_len = metadata.len();
1282 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1283 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1284 // we just align the size of the file to 2 MiB then access beyond the last page of the
1285 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1286 // padding up to 2 MiB.
1287 let alignment = 2 * 1024 * 1024;
1288 let align_adjust = if disk_len % alignment != 0 {
1289 alignment - (disk_len % alignment)
1290 } else {
1291 0
1292 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001293 disk_len
1294 .checked_add(align_adjust)
1295 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001296 };
1297
1298 let protection = {
1299 if disk.read_only {
1300 Protection::read()
1301 } else {
1302 Protection::read_write()
1303 }
1304 };
1305
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001306 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001307 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001308 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001309
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001310 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1311 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001312 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001313 .map_err(Error::ReservePmemMemory)?;
1314 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001315 };
1316
1317 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001318 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001319 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001320 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001321 Alloc::PmemDevice(index),
1322 format!("pmem_disk_image_{}", index),
1323 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1324 128 * 1024 * 1024, /* 128 MiB */
1325 )
1326 .map_err(Error::AllocatePmemDeviceAddress)?;
1327
Daniel Verkampe1980a92020-02-07 11:00:55 -08001328 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001329 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001330 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001331 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001332 /* read_only = */ disk.read_only,
1333 /* log_dirty_pages = */ false,
1334 )
1335 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001336
Daniel Verkampe1980a92020-02-07 11:00:55 -08001337 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001338 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001339 fd,
1340 GuestAddress(mapping_address),
1341 slot,
1342 arena_size,
1343 Some(pmem_device_socket),
1344 )
1345 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001346
1347 Ok(VirtioDeviceStub {
1348 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001349 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001350 })
1351}
1352
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001353fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001354 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001355 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001356 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001357 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001358 .map_err(Error::CreateConsole)?;
1359
Nicholas Verne71e73d82020-07-08 17:19:55 +10001360 let jail = match simple_jail(&cfg, "serial")? {
1361 Some(mut jail) => {
1362 // Create a tmpfs in the device's root directory so that we can bind mount the
1363 // log socket directory into it.
1364 // The size=67108864 is size=64*1024*1024 or size=64MB.
1365 jail.mount_with_data(
1366 Path::new("none"),
1367 Path::new("/"),
1368 "tmpfs",
1369 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1370 "size=67108864",
1371 )?;
1372 add_crosvm_user_to_jail(&mut jail, "serial")?;
1373 let res = param.add_bind_mounts(&mut jail);
1374 if res.is_err() {
1375 error!("failed to add bind mounts for console device");
1376 }
1377 Some(jail)
1378 }
1379 None => None,
1380 };
1381
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001382 Ok(VirtioDeviceStub {
1383 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001384 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001385 })
1386}
1387
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001388// gpu_device_socket is not used when GPU support is disabled.
1389#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001390fn create_virtio_devices(
1391 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001392 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001393 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001394 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001395 _exit_evt: &Event,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001396 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001397 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001398 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001399 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001400 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001401 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001402 fs_device_sockets: &mut Vec<FsMappingRequestSocket>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001403) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001404 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001405
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001406 for (_, param) in cfg
1407 .serial_parameters
1408 .iter()
1409 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1410 {
1411 let dev = create_console_device(cfg, param)?;
1412 devs.push(dev);
1413 }
1414
Zach Reizner8fb52112017-12-13 16:04:39 -08001415 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001416 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -08001417 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001418 }
1419
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001420 for blk in &cfg.vhost_user_blk {
1421 devs.push(create_vhost_user_block_device(cfg, blk)?);
1422 }
1423
Jakub Starona3411ea2019-04-24 10:55:25 -07001424 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Daniel Verkampe1980a92020-02-07 11:00:55 -08001425 let pmem_device_socket = pmem_device_sockets.remove(0);
1426 devs.push(create_pmem_device(
1427 cfg,
1428 vm,
1429 resources,
1430 pmem_disk,
1431 index,
1432 pmem_device_socket,
1433 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001434 }
1435
David Tolnay2b089fc2019-03-04 15:33:22 -08001436 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001437
David Tolnayde6b29a2018-12-20 11:49:46 -08001438 #[cfg(feature = "tpm")]
1439 {
David Tolnay43f8e212019-02-13 17:28:16 -08001440 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001441 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001442 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001443 }
1444
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001445 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1446 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1447 }
1448
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001449 if let Some(multi_touch_spec) = &cfg.virtio_multi_touch {
1450 devs.push(create_multi_touch_device(cfg, multi_touch_spec)?);
1451 }
1452
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001453 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001454 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001455 }
1456
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001457 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001458 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001459 }
1460
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001461 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001462 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001463 }
1464
Daniel Norman5e23df72021-03-11 10:11:02 -08001465 if let Some(switches_socket) = &cfg.virtio_switches {
1466 devs.push(create_switches_device(cfg, switches_socket)?);
1467 }
1468
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001469 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001470 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001471 }
1472
David Tolnay2b089fc2019-03-04 15:33:22 -08001473 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001474
Zach Reizner39aa26b2017-12-12 18:03:23 -08001475 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001476 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001477 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001478 }
1479
David Tolnay2b089fc2019-03-04 15:33:22 -08001480 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1481 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1482 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001483 if !cfg.vhost_user_net.is_empty() {
1484 return Err(Error::VhostUserNetWithNetArgs);
1485 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001486 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001487 }
1488
Keiichi Watanabe60686582021-03-12 04:53:51 +09001489 for net in &cfg.vhost_user_net {
1490 devs.push(create_vhost_user_net_device(cfg, net)?);
1491 }
1492
David Tolnayfa701712019-02-13 16:42:54 -08001493 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001494 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
1495
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001496 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001497 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1498 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1499
1500 #[cfg(feature = "gpu")]
1501 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001502 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001503 let (wl_socket, gpu_socket) =
1504 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1505 resource_bridges.push(gpu_socket);
1506 wl_resource_bridge = Some(wl_socket);
1507 }
1508 }
1509
1510 devs.push(create_wayland_device(
1511 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001512 wayland_device_socket,
1513 wl_resource_bridge,
1514 )?);
1515 }
David Tolnayfa701712019-02-13 16:42:54 -08001516
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001517 #[cfg(feature = "video-decoder")]
1518 {
1519 if cfg.video_dec {
1520 register_video_device(
1521 &mut devs,
1522 &mut resource_bridges,
1523 cfg,
1524 devices::virtio::VideoDeviceType::Decoder,
1525 )?;
1526 }
1527 }
1528
1529 #[cfg(feature = "video-encoder")]
1530 {
1531 if cfg.video_enc {
1532 register_video_device(
1533 &mut devs,
1534 &mut resource_bridges,
1535 cfg,
1536 devices::virtio::VideoDeviceType::Encoder,
1537 )?;
1538 }
1539 }
1540
Zach Reizner3a8100a2017-09-13 19:15:43 -07001541 #[cfg(feature = "gpu")]
1542 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001543 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001544 let mut event_devices = Vec::new();
1545 if cfg.display_window_mouse {
1546 let (event_device_socket, virtio_dev_socket) =
1547 UnixStream::pair().map_err(Error::CreateSocket)?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001548 let (multi_touch_width, multi_touch_height) = cfg
1549 .virtio_multi_touch
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001550 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001551 .map(|multi_touch_spec| multi_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001552 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001553 let dev = virtio::new_multi_touch(
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001554 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001555 multi_touch_width,
1556 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001557 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001558 )
1559 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001560 devs.push(VirtioDeviceStub {
1561 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001562 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001563 });
1564 event_devices.push(EventDevice::touchscreen(event_device_socket));
1565 }
1566 if cfg.display_window_keyboard {
1567 let (event_device_socket, virtio_dev_socket) =
1568 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001569 let dev = virtio::new_keyboard(
1570 virtio_dev_socket,
1571 virtio::base_features(cfg.protected_vm),
1572 )
1573 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001574 devs.push(VirtioDeviceStub {
1575 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001576 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001577 });
1578 event_devices.push(EventDevice::keyboard(event_device_socket));
1579 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001580 devs.push(create_gpu_device(
1581 cfg,
1582 _exit_evt,
1583 gpu_device_socket,
1584 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001585 // Use the unnamed socket for GPU display screens.
1586 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001587 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001588 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001589 map_request,
Gurchetan Singhdb174782019-10-01 15:16:15 -07001590 mem,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001591 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001592 }
1593 }
1594
Zach Reizneraa575662018-08-15 10:46:32 -07001595 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001596 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001597 }
1598
Woody Chow5890b702021-02-12 14:57:02 +09001599 for vhost_user_fs in &cfg.vhost_user_fs {
1600 devs.push(create_vhost_user_fs_device(cfg, &vhost_user_fs)?);
1601 }
1602
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001603 for shared_dir in &cfg.shared_dirs {
1604 let SharedDir {
1605 src,
1606 tag,
1607 kind,
1608 uid_map,
1609 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001610 fs_cfg,
1611 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001612 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001613
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001614 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001615 SharedDirKind::FS => {
1616 let device_socket = fs_device_sockets.remove(0);
1617 create_fs_device(
1618 cfg,
1619 uid_map,
1620 gid_map,
1621 src,
1622 tag,
1623 fs_cfg.clone(),
1624 device_socket,
1625 )?
1626 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001627 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001628 };
1629 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001630 }
1631
1632 Ok(devs)
1633}
1634
1635fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001636 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001637 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001638 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001639 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001640 exit_evt: &Event,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001641 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001642 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001643 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001644 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001645 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001646 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001647 fs_device_sockets: &mut Vec<FsMappingRequestSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001648 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001649 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001650) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001651 let stubs = create_virtio_devices(
1652 &cfg,
1653 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001654 vm,
1655 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001656 exit_evt,
1657 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001658 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001659 balloon_device_socket,
1660 disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001661 pmem_device_sockets,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001662 map_request,
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001663 fs_device_sockets,
David Tolnay2b089fc2019-03-04 15:33:22 -08001664 )?;
1665
1666 let mut pci_devices = Vec::new();
1667
1668 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001669 let (msi_host_socket, msi_device_socket) =
1670 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1671 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1672 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1673 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001674 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001675 pci_devices.push((dev, stub.jail));
1676 }
1677
Andrew Scull1590e6f2020-03-18 18:00:47 +00001678 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001679 for ac97_param in &cfg.ac97_parameters {
1680 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001681 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1682 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001683 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001684
Jingkui Wang100e6e42019-03-08 20:41:57 -08001685 // Create xhci controller.
1686 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001687 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001688
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001689 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001690 let vfio_container = Arc::new(Mutex::new(
1691 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1692 ));
1693
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001694 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001695 // create MSI, MSI-X, and Mem request sockets for each vfio device
1696 let (vfio_host_socket_msi, vfio_device_socket_msi) =
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001697 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
Daniel Verkamp10154a92020-09-28 17:44:40 -07001698 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msi));
1699
1700 let (vfio_host_socket_msix, vfio_device_socket_msix) =
1701 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1702 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001703
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001704 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1705 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>()
1706 .map_err(Error::CreateSocket)?;
1707 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001708
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001709 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1710 .map_err(Error::CreateVfioDevice)?;
Tomasz Jeznach502b5de2021-02-03 21:45:47 -08001711 let mut vfiopcidevice = Box::new(VfioPciDevice::new(
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001712 vfiodevice,
Daniel Verkamp10154a92020-09-28 17:44:40 -07001713 vfio_device_socket_msi,
1714 vfio_device_socket_msix,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001715 vfio_device_socket_mem,
1716 ));
Tomasz Jeznach502b5de2021-02-03 21:45:47 -08001717 // early reservation for pass-through PCI devices.
1718 if vfiopcidevice.allocate_address(resources).is_err() {
1719 warn!(
1720 "address reservation failed for vfio {}",
1721 vfiopcidevice.debug_label()
1722 );
1723 }
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001724 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1725 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001726 }
1727
David Tolnay2b089fc2019-03-04 15:33:22 -08001728 Ok(pci_devices)
1729}
1730
1731#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001732#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001733struct Ids {
1734 uid: uid_t,
1735 gid: gid_t,
1736}
1737
David Tolnay48c48292019-03-01 16:54:25 -08001738// Set the uid/gid for the jailed process and give a basic id map. This is
1739// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001740fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001741 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1742
1743 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1744 Ok(u) => u,
1745 Err(e) => {
1746 warn!("falling back to current user id for {}: {}", feature, e);
1747 geteuid()
1748 }
1749 };
1750
1751 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1752 Ok(u) => u,
1753 Err(e) => {
1754 warn!("falling back to current group id for {}: {}", feature, e);
1755 getegid()
1756 }
1757 };
1758
1759 jail.change_uid(crosvm_uid);
1760 jail.change_gid(crosvm_gid);
1761 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1762 .map_err(Error::SettingUidMap)?;
1763 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1764 .map_err(Error::SettingGidMap)?;
1765
David Tolnay41a6f842019-03-01 16:18:44 -08001766 Ok(Ids {
1767 uid: crosvm_uid,
1768 gid: crosvm_gid,
1769 })
David Tolnay48c48292019-03-01 16:54:25 -08001770}
1771
Michael Hoylea596a072020-11-10 19:32:45 -08001772fn raw_descriptor_from_path(path: &Path) -> Result<RawDescriptor> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001773 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001774 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001775 }
Michael Hoylea596a072020-11-10 19:32:45 -08001776 let raw_descriptor = path
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001777 .file_name()
1778 .and_then(|fd_osstr| fd_osstr.to_str())
1779 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1780 .ok_or(Error::InvalidFdPath)?;
Michael Hoylea596a072020-11-10 19:32:45 -08001781 validate_raw_descriptor(raw_descriptor).map_err(Error::ValidateRawDescriptor)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001782}
1783
Zach Reizner65b98f12019-11-22 17:34:58 -08001784trait IntoUnixStream {
1785 fn into_unix_stream(self) -> Result<UnixStream>;
1786}
1787
1788impl<'a> IntoUnixStream for &'a Path {
1789 fn into_unix_stream(self) -> Result<UnixStream> {
1790 if self.parent() == Some(Path::new("/proc/self/fd")) {
1791 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -08001792 unsafe { Ok(UnixStream::from_raw_fd(raw_descriptor_from_path(self)?)) }
Zach Reizner65b98f12019-11-22 17:34:58 -08001793 } else {
1794 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1795 }
1796 }
1797}
1798impl<'a> IntoUnixStream for &'a PathBuf {
1799 fn into_unix_stream(self) -> Result<UnixStream> {
1800 self.as_path().into_unix_stream()
1801 }
1802}
1803
1804impl IntoUnixStream for UnixStream {
1805 fn into_unix_stream(self) -> Result<UnixStream> {
1806 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001807 }
1808}
1809
Steven Richmanf32d0b42020-06-20 21:45:32 -07001810fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1811 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001812 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001813 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001814 // Our signal handler does nothing and is trivially async signal safe.
1815 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1816 .map_err(Error::RegisterSignalHandler)?;
1817 }
1818 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1819 } else {
1820 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001821 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001822 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001823 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001824 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001825 .map_err(Error::RegisterSignalHandler)?;
1826 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001827 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001828 Ok(())
1829}
1830
Steven Richmanf32d0b42020-06-20 21:45:32 -07001831// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001832fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001833 cpu_id: usize,
1834 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001835 vm: impl VmArch,
1836 irq_chip: &mut impl IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001837 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001838 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001839 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001840 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001841 has_bios: bool,
1842 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001843) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001844where
Zach Reizner2c770e62020-09-30 16:49:59 -07001845 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001846{
Zach Reizner304e7312020-09-29 16:00:24 -07001847 let mut vcpu = match vcpu {
1848 Some(v) => v,
1849 None => {
1850 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1851 // the vcpu thread.
1852 match vm
1853 .create_vcpu(cpu_id)
1854 .map_err(Error::CreateVcpu)?
1855 .downcast::<V>()
1856 {
1857 Ok(v) => *v,
1858 Err(_) => panic!("VM created wrong type of VCPU"),
1859 }
1860 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001861 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001862
Steven Richmanf32d0b42020-06-20 21:45:32 -07001863 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001864 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001865 .map_err(Error::AddIrqChipVcpu)?;
1866
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001867 if !vcpu_affinity.is_empty() {
1868 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1869 error!("Failed to set CPU affinity: {}", e);
1870 }
1871 }
1872
Steven Richmanf32d0b42020-06-20 21:45:32 -07001873 Arch::configure_vcpu(
1874 vm.get_memory(),
1875 vm.get_hypervisor(),
1876 irq_chip,
1877 &mut vcpu,
1878 cpu_id,
1879 vcpu_count,
1880 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001881 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001882 )
1883 .map_err(Error::ConfigureVcpu)?;
1884
Steven Richmanf32d0b42020-06-20 21:45:32 -07001885 #[cfg(feature = "chromeos")]
1886 if let Err(e) = base::sched::enable_core_scheduling() {
1887 error!("Failed to enable core scheduling: {}", e);
1888 }
1889
Kansho Nishidaab205af2020-08-13 18:17:50 +09001890 if run_rt {
1891 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1892 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1893 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1894 {
1895 warn!("Failed to set vcpu to real time: {}", e);
1896 }
1897 }
1898
Steven Richmanf32d0b42020-06-20 21:45:32 -07001899 if use_hypervisor_signals {
1900 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1901 v.retain(|&x| x != SIGRTMIN() + 0);
1902 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1903 }
1904
Zach Reizner2c770e62020-09-30 16:49:59 -07001905 let vcpu_run_handle = vcpu
1906 .take_run_handle(Some(SIGRTMIN() + 0))
1907 .map_err(Error::RunnableVcpu)?;
1908
1909 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001910}
1911
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001912#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1913fn handle_debug_msg<V>(
1914 cpu_id: usize,
1915 vcpu: &V,
1916 guest_mem: &GuestMemory,
1917 d: VcpuDebug,
1918 reply_channel: &mpsc::Sender<VcpuDebugStatusMessage>,
1919) -> Result<()>
1920where
1921 V: VcpuArch + 'static,
1922{
1923 match d {
1924 VcpuDebug::ReadRegs => {
1925 let msg = VcpuDebugStatusMessage {
1926 cpu: cpu_id as usize,
1927 msg: VcpuDebugStatus::RegValues(
1928 Arch::debug_read_registers(vcpu as &V).map_err(Error::HandleDebugCommand)?,
1929 ),
1930 };
1931 reply_channel
1932 .send(msg)
1933 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1934 }
1935 VcpuDebug::WriteRegs(regs) => {
1936 Arch::debug_write_registers(vcpu as &V, &regs).map_err(Error::HandleDebugCommand)?;
1937 reply_channel
1938 .send(VcpuDebugStatusMessage {
1939 cpu: cpu_id as usize,
1940 msg: VcpuDebugStatus::CommandComplete,
1941 })
1942 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1943 }
1944 VcpuDebug::ReadMem(vaddr, len) => {
1945 let msg = VcpuDebugStatusMessage {
1946 cpu: cpu_id as usize,
1947 msg: VcpuDebugStatus::MemoryRegion(
1948 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
1949 .unwrap_or(Vec::new()),
1950 ),
1951 };
1952 reply_channel
1953 .send(msg)
1954 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1955 }
1956 VcpuDebug::WriteMem(vaddr, buf) => {
1957 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
1958 .map_err(Error::HandleDebugCommand)?;
1959 reply_channel
1960 .send(VcpuDebugStatusMessage {
1961 cpu: cpu_id as usize,
1962 msg: VcpuDebugStatus::CommandComplete,
1963 })
1964 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1965 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001966 VcpuDebug::EnableSinglestep => {
1967 Arch::debug_enable_singlestep(vcpu as &V).map_err(Error::HandleDebugCommand)?;
1968 reply_channel
1969 .send(VcpuDebugStatusMessage {
1970 cpu: cpu_id as usize,
1971 msg: VcpuDebugStatus::CommandComplete,
1972 })
1973 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1974 }
1975 VcpuDebug::SetHwBreakPoint(addrs) => {
1976 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
1977 .map_err(Error::HandleDebugCommand)?;
1978 reply_channel
1979 .send(VcpuDebugStatusMessage {
1980 cpu: cpu_id as usize,
1981 msg: VcpuDebugStatus::CommandComplete,
1982 })
1983 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1984 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001985 }
1986}
1987
Zach Reizner2c770e62020-09-30 16:49:59 -07001988fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001989 cpu_id: usize,
1990 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001991 vm: impl VmArch + 'static,
1992 mut irq_chip: impl IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001993 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001994 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001995 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001996 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001997 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001998 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001999 io_bus: devices::Bus,
2000 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002001 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002002 requires_pvclock_ctrl: bool,
Dylan Reid3d637062019-05-19 15:06:26 -07002003 from_main_channel: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002004 use_hypervisor_signals: bool,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002005 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_channel: Option<
2006 mpsc::Sender<VcpuDebugStatusMessage>,
2007 >,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002008) -> Result<JoinHandle<()>>
2009where
Zach Reizner2c770e62020-09-30 16:49:59 -07002010 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002011{
Zach Reizner8fb52112017-12-13 16:04:39 -08002012 thread::Builder::new()
2013 .name(format!("crosvm_vcpu{}", cpu_id))
2014 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002015 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2016 // implementation accomplishes that.
2017 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2018
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002019 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2020 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002021 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002022 cpu_id,
2023 vcpu,
2024 vm,
2025 &mut irq_chip,
2026 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002027 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002028 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002029 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002030 has_bios,
2031 use_hypervisor_signals,
2032 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002033
Zach Reizner8fb52112017-12-13 16:04:39 -08002034 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002035
Zach Reizner2c770e62020-09-30 16:49:59 -07002036 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002037 Ok(v) => v,
2038 Err(e) => {
2039 error!("failed to start vcpu {}: {}", cpu_id, e);
2040 return;
2041 }
2042 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002043
Dylan Reidb0492662019-05-17 14:50:13 -07002044 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002045 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2046 if to_gdb_channel.is_some() {
2047 // Wait until a GDB client attaches
2048 run_mode = VmRunMode::Breakpoint;
2049 }
2050
Dylan Reidb0492662019-05-17 14:50:13 -07002051 let mut interrupted_by_signal = false;
2052
2053 'vcpu_loop: loop {
2054 // Start by checking for messages to process and the run state of the CPU.
2055 // An extra check here for Running so there isn't a need to call recv unless a
2056 // message is likely to be ready because a signal was sent.
2057 if interrupted_by_signal || run_mode != VmRunMode::Running {
2058 'state_loop: loop {
2059 // Tries to get a pending message without blocking first.
2060 let msg = match from_main_channel.try_recv() {
2061 Ok(m) => m,
2062 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2063 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002064 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002065 break 'state_loop;
2066 }
2067 Err(mpsc::TryRecvError::Empty) => {
2068 // If the VM is not running, wait until a message is ready.
2069 match from_main_channel.recv() {
2070 Ok(m) => m,
2071 Err(mpsc::RecvError) => {
2072 error!("Failed to read from main channel in vcpu");
2073 break 'vcpu_loop;
2074 }
2075 }
2076 }
2077 Err(mpsc::TryRecvError::Disconnected) => {
2078 error!("Failed to read from main channel in vcpu");
2079 break 'vcpu_loop;
2080 }
2081 };
2082
2083 // Collect all pending messages.
2084 let mut messages = vec![msg];
2085 messages.append(&mut from_main_channel.try_iter().collect());
2086
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002087 for msg in messages {
2088 match msg {
2089 VcpuControl::RunState(new_mode) => {
2090 run_mode = new_mode;
2091 match run_mode {
2092 VmRunMode::Running => break 'state_loop,
2093 VmRunMode::Suspending => {
2094 // On KVM implementations that use a paravirtualized
2095 // clock (e.g. x86), a flag must be set to indicate to
2096 // the guest kernel that a vCPU was suspended. The guest
2097 // kernel will use this flag to prevent the soft lockup
2098 // detection from triggering when this vCPU resumes,
2099 // which could happen days later in realtime.
2100 if requires_pvclock_ctrl {
2101 if let Err(e) = vcpu.pvclock_ctrl() {
2102 error!(
2103 "failed to tell hypervisor vcpu {} is suspending: {}",
2104 cpu_id, e
2105 );
2106 }
2107 }
2108 }
2109 VmRunMode::Breakpoint => {}
2110 VmRunMode::Exiting => break 'vcpu_loop,
2111 }
2112 }
2113 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2114 VcpuControl::Debug(d) => {
2115 match &to_gdb_channel {
2116 Some(ref ch) => {
2117 if let Err(e) = handle_debug_msg(
2118 cpu_id, &vcpu, &guest_mem, d, &ch,
2119 ) {
2120 error!("Failed to handle gdb message: {}", e);
2121 }
2122 },
2123 None => {
2124 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002125 }
2126 }
2127 }
Dylan Reidb0492662019-05-17 14:50:13 -07002128 }
2129 }
2130 }
2131 }
2132
2133 interrupted_by_signal = false;
2134
Steven Richman11dc6712020-09-02 15:39:14 -07002135 // Vcpus may have run a HLT instruction, which puts them into a state other than
2136 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2137 // until either the irqchip receives an interrupt for this vcpu, or until the main
2138 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2139 // implementations HLT instructions do not make it to crosvm, and thus this is a
2140 // no-op that always returns VcpuRunState::Runnable.
2141 match irq_chip.wait_until_runnable(&vcpu) {
2142 Ok(VcpuRunState::Runnable) => {}
2143 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2144 Err(e) => error!(
2145 "error waiting for vcpu {} to become runnable: {}",
2146 cpu_id, e
2147 ),
2148 }
2149
2150 if !interrupted_by_signal {
2151 match vcpu.run(&vcpu_run_handle) {
2152 Ok(VcpuExit::IoIn { port, mut size }) => {
2153 let mut data = [0; 8];
2154 if size > data.len() {
2155 error!("unsupported IoIn size of {} bytes", size);
2156 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002157 }
Steven Richman11dc6712020-09-02 15:39:14 -07002158 io_bus.read(port as u64, &mut data[..size]);
2159 if let Err(e) = vcpu.set_data(&data[..size]) {
2160 error!("failed to set return data for IoIn: {}", e);
2161 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002162 }
Steven Richman11dc6712020-09-02 15:39:14 -07002163 Ok(VcpuExit::IoOut {
2164 port,
2165 mut size,
2166 data,
2167 }) => {
2168 if size > data.len() {
2169 error!("unsupported IoOut size of {} bytes", size);
2170 size = data.len();
2171 }
2172 io_bus.write(port as u64, &data[..size]);
2173 }
2174 Ok(VcpuExit::MmioRead { address, size }) => {
2175 let mut data = [0; 8];
2176 mmio_bus.read(address, &mut data[..size]);
2177 // Setting data for mmio can not fail.
2178 let _ = vcpu.set_data(&data[..size]);
2179 }
2180 Ok(VcpuExit::MmioWrite {
2181 address,
2182 size,
2183 data,
2184 }) => {
2185 mmio_bus.write(address, &data[..size]);
2186 }
2187 Ok(VcpuExit::IoapicEoi { vector }) => {
2188 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2189 error!(
2190 "failed to broadcast eoi {} on vcpu {}: {}",
2191 vector, cpu_id, e
2192 );
2193 }
2194 }
2195 Ok(VcpuExit::IrqWindowOpen) => {}
2196 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
2197 Ok(VcpuExit::Shutdown) => break,
2198 Ok(VcpuExit::FailEntry {
2199 hardware_entry_failure_reason,
2200 }) => {
2201 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002202 break;
2203 }
Steven Richman11dc6712020-09-02 15:39:14 -07002204 Ok(VcpuExit::SystemEvent(_, _)) => break,
2205 Ok(VcpuExit::Debug { .. }) => {
2206 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2207 {
2208 let msg = VcpuDebugStatusMessage {
2209 cpu: cpu_id as usize,
2210 msg: VcpuDebugStatus::HitBreakPoint,
2211 };
2212 if let Some(ref ch) = to_gdb_channel {
2213 if let Err(e) = ch.send(msg) {
2214 error!("failed to notify breakpoint to GDB thread: {}", e);
2215 break;
2216 }
2217 }
2218 run_mode = VmRunMode::Breakpoint;
2219 }
2220 }
2221 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2222 Err(e) => match e.errno() {
2223 libc::EINTR => interrupted_by_signal = true,
2224 libc::EAGAIN => {}
2225 _ => {
2226 error!("vcpu hit unknown error: {}", e);
2227 break;
2228 }
2229 },
2230 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002231 }
2232
2233 if interrupted_by_signal {
2234 if use_hypervisor_signals {
2235 // Try to clear the signal that we use to kick VCPU if it is pending before
2236 // attempting to handle pause requests.
2237 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2238 error!("failed to clear pending signal: {}", e);
2239 break;
2240 }
2241 } else {
2242 vcpu.set_immediate_exit(false);
2243 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002244 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002245
Steven Richman11dc6712020-09-02 15:39:14 -07002246 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2247 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2248 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002249 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002250 })
2251 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002252}
2253
Charles William Dick0bf8a552019-10-29 15:36:01 +09002254// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07002255// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002256fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002257 let mut file = File::open(path)?;
2258
2259 let mut buf = [0u8; 32];
2260 let count = file.read(&mut buf)?;
2261
Zach Reizner55a9e502018-10-03 10:22:32 -07002262 let content =
2263 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2264 content
2265 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07002266 .split_whitespace()
2267 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002268 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07002269 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
2270 })
2271 .collect()
2272}
2273
2274// Reads the contents of a file and converts them into a u64, and if there
2275// are multiple fields it only returns the first one.
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002276fn file_to_i64<P: AsRef<Path>>(path: P, nth: usize) -> io::Result<i64> {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002277 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07002278 .into_iter()
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002279 .nth(nth)
Sonny Raod5f66082019-04-24 12:24:38 -07002280 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002281}
2282
Christian Blichmann33d56772021-03-04 19:03:54 +01002283fn create_kvm(device_path: &Path, mem: GuestMemory) -> base::Result<KvmVm> {
2284 let kvm = Kvm::new_with_path(device_path)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002285 let vm = KvmVm::new(&kvm, mem)?;
2286 Ok(vm)
2287}
2288
2289fn create_kvm_kernel_irq_chip(
2290 vm: &KvmVm,
2291 vcpu_count: usize,
2292 _ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07002293) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002294 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
2295 Ok(irq_chip)
2296}
2297
2298#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2299fn create_kvm_split_irq_chip(
2300 vm: &KvmVm,
2301 vcpu_count: usize,
2302 ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07002303) -> base::Result<impl IrqChipArch> {
Tomasz Jeznacheb1114c2021-02-26 20:33:11 -08002304 let irq_chip =
2305 KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_socket, Some(120))?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002306 Ok(irq_chip)
2307}
2308
Dylan Reid059a1882018-07-23 17:58:09 -07002309pub fn run_config(cfg: Config) -> Result<()> {
Christian Blichmann33d56772021-03-04 19:03:54 +01002310 let kvm_device_path = cfg.kvm_device_path.clone();
2311 let create_kvm_with_path = |mem| create_kvm(&kvm_device_path, mem);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002312 if cfg.split_irqchip {
2313 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
2314 {
2315 unimplemented!("KVM split irqchip mode only supported on x86 processors")
2316 }
2317
2318 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2319 {
Christian Blichmann33d56772021-03-04 19:03:54 +01002320 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm_with_path, create_kvm_split_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07002321 }
2322 } else {
Christian Blichmann33d56772021-03-04 19:03:54 +01002323 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm_with_path, create_kvm_kernel_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07002324 }
2325}
2326
Zach Reizner304e7312020-09-29 16:00:24 -07002327fn run_vm<V, Vcpu, I, FV, FI>(cfg: Config, create_vm: FV, create_irq_chip: FI) -> Result<()>
Steven Richmanf32d0b42020-06-20 21:45:32 -07002328where
2329 V: VmArch + 'static,
Zach Reizner304e7312020-09-29 16:00:24 -07002330 Vcpu: VcpuArch + 'static,
2331 I: IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002332 FV: FnOnce(GuestMemory) -> base::Result<V>,
2333 FI: FnOnce(
2334 &V,
2335 usize, // vcpu_count
2336 VmIrqRequestSocket, // ioapic_device_socket
2337 ) -> base::Result<I>,
2338{
Lepton Wu9105e9f2019-03-14 11:38:31 -07002339 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07002340 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2341 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2342 // access to those files will not be possible.
2343 info!("crosvm entering multiprocess mode");
2344 }
2345
Jingkui Wang100e6e42019-03-08 20:41:57 -08002346 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07002347 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002348 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2349 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2350 // quickly.
2351 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
2352
David Tolnay2b089fc2019-03-04 15:33:22 -08002353 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
2354 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002355 } else {
2356 None
2357 };
2358
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002359 let vm_image = match cfg.executable_path {
2360 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
2361 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
2362 ),
2363 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
2364 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
2365 ),
2366 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2367 };
2368
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002369 let mut control_sockets = Vec::new();
2370 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2371 let gdb_socket = if let Some(port) = cfg.gdb {
2372 // GDB needs a control socket to interrupt vcpus.
2373 let (gdb_host_socket, gdb_control_socket) =
2374 msg_socket::pair::<VmResponse, VmRequest>().map_err(Error::CreateSocket)?;
2375 control_sockets.push(TaggedControlSocket::Vm(gdb_host_socket));
2376 Some((port, gdb_control_socket))
2377 } else {
2378 None
2379 };
2380
Dylan Reid059a1882018-07-23 17:58:09 -07002381 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002382 memory_size: cfg
2383 .memory
2384 .unwrap_or(256)
2385 .checked_mul(1024 * 1024)
2386 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07002387 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002388 vcpu_affinity: cfg.vcpu_affinity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002389 no_smt: cfg.no_smt,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002390 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002391 android_fstab: cfg
2392 .android_fstab
2393 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002394 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002395 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002396 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002397 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002398 extra_kernel_params: cfg.params.clone(),
2399 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07002400 acpi_sdts: cfg
2401 .acpi_tables
2402 .iter()
2403 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2404 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002405 rt_cpus: cfg.rt_cpus.clone(),
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002406 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002407 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2408 gdb: gdb_socket,
Dylan Reid059a1882018-07-23 17:58:09 -07002409 };
2410
Zach Reiznera60744b2019-02-13 17:33:32 -08002411 let control_server_socket = match &cfg.socket_path {
2412 Some(path) => Some(UnlinkUnixSeqpacketListener(
2413 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
2414 )),
2415 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002416 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002417
Zach Reizner55a9e502018-10-03 10:22:32 -07002418 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07002419 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2420 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07002421 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07002422 let (balloon_host_socket, balloon_device_socket) =
Charles William Dick664cc3c2020-01-10 14:31:52 +09002423 msg_socket::pair::<BalloonControlCommand, BalloonControlResult>()
2424 .map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002425
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002426 // Create one control socket per disk.
2427 let mut disk_device_sockets = Vec::new();
2428 let mut disk_host_sockets = Vec::new();
2429 let disk_count = cfg.disks.len();
2430 for _ in 0..disk_count {
2431 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07002432 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
2433 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002434 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07002435 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002436 }
2437
Daniel Verkampe1980a92020-02-07 11:00:55 -08002438 let mut pmem_device_sockets = Vec::new();
2439 let pmem_count = cfg.pmem_devices.len();
2440 for _ in 0..pmem_count {
2441 let (pmem_host_socket, pmem_device_socket) =
2442 msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
2443 pmem_device_sockets.push(pmem_device_socket);
2444 control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
2445 }
2446
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002447 let (gpu_host_socket, gpu_device_socket) =
2448 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2449 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
2450
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002451 let (ioapic_host_socket, ioapic_device_socket) =
2452 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
2453 control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket));
2454
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002455 let battery = if cfg.battery_type.is_some() {
Alex Lauf408c732020-11-10 18:24:04 +09002456 let jail = match simple_jail(&cfg, "battery")? {
2457 #[cfg_attr(not(feature = "powerd-monitor-powerd"), allow(unused_mut))]
2458 Some(mut jail) => {
2459 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2460 #[cfg(feature = "power-monitor-powerd")]
2461 {
2462 add_crosvm_user_to_jail(&mut jail, "battery")?;
2463
2464 // Create a tmpfs in the device's root directory so that we can bind mount files.
2465 jail.mount_with_data(
2466 Path::new("none"),
2467 Path::new("/"),
2468 "tmpfs",
2469 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2470 "size=67108864",
2471 )?;
2472
2473 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2474 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2475 }
2476 Some(jail)
2477 }
2478 None => None,
2479 };
2480 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002481 } else {
2482 (&cfg.battery_type, None)
2483 };
2484
Gurchetan Singh293913c2020-12-09 10:44:13 -08002485 let gralloc = RutabagaGralloc::new().map_err(Error::CreateGrallocError)?;
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002486 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2487
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002488 let fs_count = cfg
2489 .shared_dirs
2490 .iter()
2491 .filter(|sd| sd.kind == SharedDirKind::FS)
2492 .count();
2493 let mut fs_device_sockets = Vec::with_capacity(fs_count);
2494 for _ in 0..fs_count {
2495 let (fs_host_socket, fs_device_socket) =
2496 msg_socket::pair::<VmResponse, FsMappingRequest>().map_err(Error::CreateSocket)?;
2497 control_sockets.push(TaggedControlSocket::Fs(fs_host_socket));
2498 fs_device_sockets.push(fs_device_socket);
2499 }
2500
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002501 let mut linux: RunnableLinuxVm<_, Vcpu, _> = Arch::build_vm(
Trent Begin17ccaad2019-04-17 13:51:25 -06002502 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06002503 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002504 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002505 battery,
Jakub Starona3411ea2019-04-24 10:55:25 -07002506 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06002507 create_devices(
2508 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07002509 mem,
2510 vm,
2511 sys_allocator,
2512 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07002513 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002514 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002515 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06002516 balloon_device_socket,
2517 &mut disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08002518 &mut pmem_device_sockets,
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002519 &mut fs_device_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002520 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002521 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06002522 )
2523 },
Steven Richmanf32d0b42020-06-20 21:45:32 -07002524 create_vm,
2525 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_socket),
Trent Begin17ccaad2019-04-17 13:51:25 -06002526 )
David Tolnaybe034262019-03-04 17:48:36 -08002527 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002528
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002529 #[cfg(feature = "direct")]
2530 if let Some(pmio) = &cfg.direct_pmio {
2531 let direct_io =
2532 Arc::new(devices::DirectIo::new(&pmio.path, false).map_err(Error::DirectIo)?);
2533 for range in pmio.ranges.iter() {
2534 linux
2535 .io_bus
2536 .insert_sync(direct_io.clone(), range.0, range.1)
2537 .unwrap();
2538 }
2539 };
2540
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002541 run_control(
2542 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08002543 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002544 control_sockets,
2545 balloon_host_socket,
2546 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08002547 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002548 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002549 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002550 Arc::clone(&map_request),
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002551 cfg.balloon_bias,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002552 gralloc,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002553 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002554}
2555
Steven Richman11dc6712020-09-02 15:39:14 -07002556/// Signals all running VCPUs to vmexit, sends VmRunMode message to each VCPU channel, and tells
2557/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
2558/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2559/// loop.
2560fn kick_all_vcpus(
2561 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
2562 irq_chip: &impl IrqChip,
2563 run_mode: &VmRunMode,
2564) {
2565 for (handle, channel) in vcpu_handles {
2566 if let Err(e) = channel.send(VcpuControl::RunState(run_mode.clone())) {
2567 error!("failed to send VmRunMode: {}", e);
2568 }
2569 let _ = handle.kill(SIGRTMIN() + 0);
2570 }
2571 irq_chip.kick_halted_vcpus();
2572}
2573
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002574// BalloonPolicy determines the size to set the balloon.
2575struct BalloonPolicy {
2576 // Estimate for when the guest starts aggressivly freeing memory.
2577 critical_guest_available: i64,
2578 critical_host_available: i64, // ChromeOS critical margin.
2579 guest_available_bias: i64,
2580 max_balloon_actual: i64, // The largest the balloon has ever been observed.
2581 prev_balloon_full_percent: i64, // How full was the balloon at the previous timestep.
2582 prev_guest_available: i64, // Available memory in the guest at the previous timestep.
2583}
2584
2585const ONE_KB: i64 = 1024;
2586const ONE_MB: i64 = 1024 * ONE_KB;
2587
2588const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
2589const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
2590
2591// BalloonPolicy implements the virtio balloon sizing logic.
2592// The balloon is sized with the following heuristics:
2593// Balance Available
2594// The balloon is sized to balance the amount of available memory above a
2595// critical margin. The critical margin is the level at which memory is
2596// freed. In the host, this is the ChromeOS available critical margin, which
2597// is the trigger to kill tabs. In the guest, we estimate this level by
2598// tracking the minimum amount of available memory, discounting sharp
2599// 'valleys'. If the guest manages to keep available memory above a given
2600// level even with some pressure, then we determine that this is the
2601// 'critical' level for the guest. We don't update this critical value if
2602// the balloon is fully inflated because in that case, the guest may be out
2603// of memory to free.
2604// guest_available_bias
2605// Even if available memory is perfectly balanced between host and guest,
2606// The size of the balloon will still drift randomly depending on whether
2607// those host or guest reclaims memory first/faster every time memory is
2608// low. To encourage large balloons to shrink and small balloons to grow,
2609// the following bias is added to the guest critical margin:
2610// (guest_available_bias * balloon_full_percent) / 100
2611// This give the guest more memory when the balloon is full.
2612impl BalloonPolicy {
2613 fn new(
2614 memory_size: i64,
2615 critical_host_available: i64,
2616 guest_available_bias: i64,
2617 ) -> BalloonPolicy {
2618 // Estimate some reasonable initial maximum for balloon size.
2619 let max_balloon_actual = (memory_size * 3) / 4;
2620 // 400MB is above the zone min margin even for Crostini VMs on 16GB
2621 // devices (~85MB), and is above when Android Low Memory Killer kills
2622 // apps (~250MB).
2623 let critical_guest_available = 400 * ONE_MB;
2624
2625 BalloonPolicy {
2626 critical_guest_available,
2627 critical_host_available,
2628 guest_available_bias,
2629 max_balloon_actual,
2630 prev_balloon_full_percent: 0,
2631 prev_guest_available: 0,
2632 }
2633 }
2634 fn delta(&mut self, stats: BalloonStats, balloon_actual_u: u64) -> Result<i64> {
2635 let guest_free = stats
2636 .free_memory
2637 .map(i64::try_from)
2638 .ok_or(Error::GuestFreeMissing())?
2639 .map_err(Error::GuestFreeTooLarge)?;
2640 let guest_cached = stats
2641 .disk_caches
2642 .map(i64::try_from)
2643 .ok_or(Error::GuestFreeMissing())?
2644 .map_err(Error::GuestFreeTooLarge)?;
2645 let balloon_actual = match balloon_actual_u {
2646 size if size < i64::max_value() as u64 => size as i64,
2647 _ => return Err(Error::BalloonActualTooLarge),
2648 };
2649 let guest_available = guest_free + guest_cached;
2650 // Available memory is reported in MB, and we need bytes.
2651 let host_available =
2652 file_to_i64(LOWMEM_AVAILABLE, 0).map_err(Error::ReadMemAvailable)? * ONE_MB;
2653 if self.max_balloon_actual < balloon_actual {
2654 self.max_balloon_actual = balloon_actual;
2655 info!(
2656 "balloon updated max_balloon_actual to {} MiB",
2657 self.max_balloon_actual / ONE_MB,
2658 );
2659 }
2660 let balloon_full_percent = balloon_actual * 100 / self.max_balloon_actual;
2661 // Update critical_guest_available if we see a lower available with the
2662 // balloon not fully inflated. If the balloon is completely inflated
2663 // there is a risk that the low available level we see comes at the cost
2664 // of stability. The Linux OOM Killer might have been forced to kill
2665 // something important, or page reclaim was so aggressive that there are
2666 // long UI hangs.
2667 if guest_available < self.critical_guest_available && balloon_full_percent < 95 {
2668 // To ignore temporary low memory states, we require that two guest
2669 // available measurements in a row are low.
2670 if self.prev_guest_available < self.critical_guest_available
2671 && self.prev_balloon_full_percent < 95
2672 {
2673 self.critical_guest_available = self.prev_guest_available;
2674 info!(
2675 "balloon updated critical_guest_available to {} MiB",
2676 self.critical_guest_available / ONE_MB,
2677 );
2678 }
2679 }
2680
2681 // Compute the difference in available memory above the host and guest
2682 // critical thresholds.
2683 let bias = (self.guest_available_bias * balloon_full_percent) / 100;
2684 let guest_above_critical = guest_available - self.critical_guest_available - bias;
2685 let host_above_critical = host_available - self.critical_host_available;
2686 let balloon_delta = guest_above_critical - host_above_critical;
2687 // Only let the balloon take up MAX_CRITICAL_DELTA of available memory
2688 // below the critical level in host or guest.
2689 const MAX_CRITICAL_DELTA: i64 = 10 * ONE_MB;
2690 let balloon_delta_capped = if balloon_delta < 0 {
2691 // The balloon is deflating, taking memory from the host. Don't let
2692 // it take more than the amount of available memory above the
2693 // critical margin, plus MAX_CRITICAL_DELTA.
2694 max(
2695 balloon_delta,
2696 -(host_available - self.critical_host_available + MAX_CRITICAL_DELTA),
2697 )
2698 } else {
2699 // The balloon is inflating, taking memory from the guest. Don't let
2700 // it take more than the amount of available memory above the
2701 // critical margin, plus MAX_CRITICAL_DELTA.
2702 min(
2703 balloon_delta,
2704 guest_available - self.critical_guest_available + MAX_CRITICAL_DELTA,
2705 )
2706 };
2707
2708 self.prev_balloon_full_percent = balloon_full_percent;
2709 self.prev_guest_available = guest_available;
2710
2711 // Only return a value if target would change available above critical
2712 // by more than 1%, or we are within 1 MB of critical in host or guest.
2713 if guest_above_critical < ONE_MB
2714 || host_above_critical < ONE_MB
2715 || (balloon_delta.abs() * 100) / guest_above_critical > 1
2716 || (balloon_delta.abs() * 100) / host_above_critical > 1
2717 {
2718 // Finally, make sure the balloon delta won't cause a negative size.
2719 let result = max(balloon_delta_capped, -balloon_actual);
2720 if result != 0 {
2721 info!(
2722 "balloon delta={:<6} ha={:<6} hc={:<6} ga={:<6} gc={:<6} bias={:<6} full={:>3}%",
2723 result / ONE_MB,
2724 host_available / ONE_MB,
2725 self.critical_host_available / ONE_MB,
2726 guest_available / ONE_MB,
2727 self.critical_guest_available / ONE_MB,
2728 bias / ONE_MB,
2729 balloon_full_percent,
2730 );
2731 }
2732 return Ok(result);
2733 }
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002734 Ok(0)
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002735 }
2736}
2737
Zach Reizner304e7312020-09-29 16:00:24 -07002738fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static, I: IrqChipArch + 'static>(
2739 mut linux: RunnableLinuxVm<V, Vcpu, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08002740 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002741 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07002742 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07002743 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08002744 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07002745 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002746 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002747 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002748 balloon_bias: i64,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002749 mut gralloc: RutabagaGralloc,
Zach Reizner55a9e502018-10-03 10:22:32 -07002750) -> Result<()> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002751 #[derive(PollToken)]
2752 enum Token {
2753 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002754 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002755 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002756 IrqFd { index: IrqEventIndex },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002757 BalanceMemory,
2758 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002759 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002760 VmControl { index: usize },
2761 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002762
Zach Reizner19ad1f32019-12-12 18:58:50 -08002763 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002764 .set_raw_mode()
2765 .expect("failed to set terminal raw mode");
2766
Michael Hoylee392c462020-10-07 03:29:24 -07002767 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerb2110be2019-07-23 15:55:03 -07002768 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002769 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002770 (&sigchld_fd, Token::ChildSignal),
2771 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002772 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002773
Zach Reiznera60744b2019-02-13 17:33:32 -08002774 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002775 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002776 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002777 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002778 }
Dylan Reid059a1882018-07-23 17:58:09 -07002779 for (index, socket) in control_sockets.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002780 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002781 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002782 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002783 }
2784
Steven Richmanf32d0b42020-06-20 21:45:32 -07002785 let events = linux
2786 .irq_chip
2787 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002788 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002789
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002790 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002791 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002792 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002793 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002794 }
2795
Charles William Dick0bf8a552019-10-29 15:36:01 +09002796 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002797 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002798 let mut balloon_policy = if let Ok(critical_margin) = file_to_i64(LOWMEM_MARGIN, 0) {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002799 // Create timer request balloon stats every 1s.
Michael Hoylee392c462020-10-07 03:29:24 -07002800 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002801 .add(&balancemem_timer, Token::BalanceMemory)
Michael Hoylee392c462020-10-07 03:29:24 -07002802 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002803 let balancemem_dur = Duration::from_secs(1);
2804 let balancemem_int = Duration::from_secs(1);
2805 balancemem_timer
2806 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002807 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002808
2809 // Listen for balloon statistics from the guest so we can balance.
Michael Hoylee392c462020-10-07 03:29:24 -07002810 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002811 .add(&balloon_host_socket, Token::BalloonResult)
Michael Hoylee392c462020-10-07 03:29:24 -07002812 .map_err(Error::WaitContextAdd)?;
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002813 Some(BalloonPolicy::new(
2814 linux.vm.get_memory().memory_size() as i64,
2815 critical_margin * ONE_MB,
2816 balloon_bias,
2817 ))
Charles William Dick0bf8a552019-10-29 15:36:01 +09002818 } else {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002819 warn!("Unable to open low mem margin, maybe not a chrome os kernel");
2820 None
2821 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002822
Lepton Wu20333e42019-03-14 10:48:03 -07002823 if sandbox {
2824 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2825 drop_capabilities().map_err(Error::DropCapabilities)?;
2826 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002827
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002828 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2829 // Create a channel for GDB thread.
2830 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2831 let (s, r) = mpsc::channel();
2832 (Some(s), Some(r))
2833 } else {
2834 (None, None)
2835 };
2836
Steven Richmanf32d0b42020-06-20 21:45:32 -07002837 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2838 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002839 let use_hypervisor_signals = !linux
2840 .vm
2841 .get_hypervisor()
2842 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002843 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002844
Zach Reizner304e7312020-09-29 16:00:24 -07002845 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002846 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002847 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2848 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002849 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002850 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002851 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2852 Some(VcpuAffinity::Global(v)) => v,
2853 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2854 None => Default::default(),
2855 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002856 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002857 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002858 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002859 linux.vm.try_clone().map_err(Error::CloneEvent)?,
2860 linux.irq_chip.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002861 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002862 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002863 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002864 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002865 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002866 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002867 linux.io_bus.clone(),
2868 linux.mmio_bus.clone(),
Michael Hoyle685316f2020-09-16 15:29:20 -07002869 linux.exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002870 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07002871 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002872 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002873 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2874 to_gdb_channel.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07002875 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07002876 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07002877 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002878
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002879 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2880 // Spawn GDB thread.
2881 if let Some((gdb_port_num, gdb_control_socket)) = linux.gdb.take() {
2882 let to_vcpu_channels = vcpu_handles
2883 .iter()
2884 .map(|(_handle, channel)| channel.clone())
2885 .collect();
2886 let target = GdbStub::new(
2887 gdb_control_socket,
2888 to_vcpu_channels,
2889 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
2890 );
2891 thread::Builder::new()
2892 .name("gdb".to_owned())
2893 .spawn(move || gdb_thread(target, gdb_port_num))
2894 .map_err(Error::SpawnGdbServer)?;
2895 };
2896
Dylan Reid059a1882018-07-23 17:58:09 -07002897 vcpu_thread_barrier.wait();
2898
Michael Hoylee392c462020-10-07 03:29:24 -07002899 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002900 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002901 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002902 Ok(v) => v,
2903 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002904 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002905 break;
2906 }
2907 }
2908 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002909
Steven Richmanf32d0b42020-06-20 21:45:32 -07002910 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2911 warn!("can't deliver delayed irqs: {}", e);
2912 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002913
Zach Reiznera60744b2019-02-13 17:33:32 -08002914 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002915 for event in events.iter().filter(|e| e.is_readable) {
2916 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002917 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002918 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002919 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002920 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002921 Token::Suspend => {
2922 info!("VM requested suspend");
2923 linux.suspend_evt.read().unwrap();
Steven Richman11dc6712020-09-02 15:39:14 -07002924 kick_all_vcpus(&vcpu_handles, &linux.irq_chip, &VmRunMode::Suspending);
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002925 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002926 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002927 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002928 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002929 let pid = siginfo.ssi_pid;
2930 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2931 Some(label) => format!("{} (pid {})", label, pid),
2932 None => format!("pid {}", pid),
2933 };
David Tolnayf5032762018-12-03 10:46:45 -08002934 error!(
2935 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002936 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002937 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002938 }
Michael Hoylee392c462020-10-07 03:29:24 -07002939 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002940 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002941 Token::IrqFd { index } => {
2942 if let Err(e) = linux.irq_chip.service_irq_event(index) {
2943 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002944 }
2945 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002946 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07002947 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002948 let command = BalloonControlCommand::Stats {};
2949 if let Err(e) = balloon_host_socket.send(&command) {
2950 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002951 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002952 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002953 Token::BalloonResult => {
2954 match balloon_host_socket.recv() {
2955 Ok(BalloonControlResult::Stats {
2956 stats,
2957 balloon_actual: balloon_actual_u,
2958 }) => {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002959 match balloon_policy
2960 .as_mut()
2961 .map(|p| p.delta(stats, balloon_actual_u))
2962 {
2963 None => {
2964 error!(
2965 "got result from balloon stats, but no policy is running"
2966 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002967 }
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002968 Some(Err(e)) => {
2969 warn!("failed to run balloon policy {}", e);
2970 }
2971 Some(Ok(delta)) if delta != 0 => {
2972 let target = max((balloon_actual_u as i64) + delta, 0) as u64;
2973 let command =
2974 BalloonControlCommand::Adjust { num_bytes: target };
2975 if let Err(e) = balloon_host_socket.send(&command) {
2976 warn!(
2977 "failed to send memory value to balloon device: {}",
2978 e
2979 );
2980 }
2981 }
2982 Some(Ok(_)) => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09002983 }
2984 }
2985 Err(e) => {
2986 error!("failed to recv BalloonControlResult: {}", e);
2987 }
2988 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002989 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002990 Token::VmControlServer => {
2991 if let Some(socket_server) = &control_server_socket {
2992 match socket_server.accept() {
2993 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07002994 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002995 .add(
2996 &socket,
2997 Token::VmControl {
2998 index: control_sockets.len(),
2999 },
3000 )
Michael Hoylee392c462020-10-07 03:29:24 -07003001 .map_err(Error::WaitContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003002 control_sockets
3003 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003004 }
3005 Err(e) => error!("failed to accept socket: {}", e),
3006 }
3007 }
3008 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003009 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08003010 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003011 match socket {
3012 TaggedControlSocket::Vm(socket) => match socket.recv() {
3013 Ok(request) => {
3014 let mut run_mode_opt = None;
3015 let response = request.execute(
3016 &mut run_mode_opt,
3017 &balloon_host_socket,
3018 disk_host_sockets,
3019 &usb_control_socket,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08003020 &mut linux.bat_control,
Jakub Starond99cd0a2019-04-11 14:09:39 -07003021 );
3022 if let Err(e) = socket.send(&response) {
3023 error!("failed to send VmResponse: {}", e);
3024 }
3025 if let Some(run_mode) = run_mode_opt {
3026 info!("control socket changed run mode to {}", run_mode);
3027 match run_mode {
3028 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003029 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003030 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003031 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003032 if other == VmRunMode::Running {
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003033 linux.io_bus.notify_resume();
3034 }
Steven Richman11dc6712020-09-02 15:39:14 -07003035 kick_all_vcpus(
3036 &vcpu_handles,
3037 &linux.irq_chip,
3038 &other,
3039 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003040 }
3041 }
3042 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003043 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003044 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08003045 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003046 vm_control_indices_to_remove.push(index);
3047 } else {
3048 error!("failed to recv VmRequest: {}", e);
3049 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003050 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003051 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07003052 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003053 Ok(request) => {
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003054 let response = request.execute(
3055 &mut linux.vm,
3056 &mut linux.resources,
3057 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08003058 &mut gralloc,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003059 );
Jakub Starond99cd0a2019-04-11 14:09:39 -07003060 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07003061 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07003062 }
3063 }
3064 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08003065 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003066 vm_control_indices_to_remove.push(index);
3067 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07003068 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07003069 }
3070 }
3071 },
Xiong Zhang2515b752019-09-19 10:29:02 +08003072 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
3073 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003074 let response = {
3075 let irq_chip = &mut linux.irq_chip;
3076 request.execute(
3077 |setup| match setup {
3078 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003079 if let Some(event_index) = irq_chip
3080 .register_irq_event(irq, ev, None)?
3081 {
3082 match wait_ctx.add(
3083 ev,
3084 Token::IrqFd {
3085 index: event_index
3086 },
3087 ) {
3088 Err(e) => {
3089 warn!("failed to add IrqFd to poll context: {}", e);
3090 Err(e)
3091 },
3092 Ok(_) => {
3093 Ok(())
3094 }
3095 }
3096 } else {
3097 Ok(())
3098 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003099 }
3100 IrqSetup::Route(route) => irq_chip.route_irq(route),
3101 },
3102 &mut linux.resources,
3103 )
3104 };
Xiong Zhang2515b752019-09-19 10:29:02 +08003105 if let Err(e) = socket.send(&response) {
3106 error!("failed to send VmIrqResponse: {}", e);
3107 }
3108 }
3109 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08003110 if let MsgError::RecvZero = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003111 vm_control_indices_to_remove.push(index);
3112 } else {
3113 error!("failed to recv VmIrqRequest: {}", e);
3114 }
3115 }
3116 },
Daniel Verkampe1980a92020-02-07 11:00:55 -08003117 TaggedControlSocket::VmMsync(socket) => match socket.recv() {
3118 Ok(request) => {
3119 let response = request.execute(&mut linux.vm);
3120 if let Err(e) = socket.send(&response) {
3121 error!("failed to send VmMsyncResponse: {}", e);
3122 }
3123 }
3124 Err(e) => {
3125 if let MsgError::BadRecvSize { actual: 0, .. } = e {
3126 vm_control_indices_to_remove.push(index);
3127 } else {
3128 error!("failed to recv VmMsyncRequest: {}", e);
3129 }
3130 }
3131 },
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003132 TaggedControlSocket::Fs(socket) => match socket.recv() {
3133 Ok(request) => {
3134 let response =
3135 request.execute(&mut linux.vm, &mut linux.resources);
3136 if let Err(e) = socket.send(&response) {
3137 error!("failed to send VmResponse: {}", e);
3138 }
3139 }
3140 Err(e) => {
3141 if let MsgError::BadRecvSize { actual: 0, .. } = e {
3142 vm_control_indices_to_remove.push(index);
3143 } else {
3144 error!("failed to recv VmResponse: {}", e);
3145 }
3146 }
3147 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003148 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003149 }
3150 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003151 }
3152 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003153
Michael Hoylee392c462020-10-07 03:29:24 -07003154 for event in events.iter().filter(|e| e.is_hungup) {
3155 match event.token {
Zach Reiznera60744b2019-02-13 17:33:32 -08003156 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003157 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08003158 Token::ChildSignal => {}
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003159 Token::IrqFd { index: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09003160 Token::BalanceMemory => {}
3161 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08003162 Token::VmControlServer => {}
3163 Token::VmControl { index } => {
3164 // It's possible more data is readable and buffered while the socket is hungup,
3165 // so don't delete the socket from the poll context until we're sure all the
3166 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07003167 match control_sockets
3168 .get(index)
3169 .map(|s| s.as_ref().get_readable_bytes())
3170 {
Zach Reiznera60744b2019-02-13 17:33:32 -08003171 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
3172 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
3173 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07003174 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003175 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003176 }
3177 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003178
3179 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003180 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003181 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003182 vm_control_indices_to_remove.dedup();
3183 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003184 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3185 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003186 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003187 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003188 // that has already been closed. Because the token associated with that spurious event
3189 // now belongs to a different socket, the control loop will start to interact with
3190 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3191 // blocking on a socket that will never be ready. See also: crbug.com/1019986
3192 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003193 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08003194 }
3195
3196 // This line implicitly drops the socket at `index` when it gets returned by
3197 // `swap_remove`. After this line, the socket at `index` is not the one from
3198 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003199 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08003200 control_sockets.swap_remove(index);
3201 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003202 wait_ctx
3203 .modify(socket, EventType::Read, Token::VmControl { index })
3204 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003205 }
3206 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003207 }
3208
Steven Richman11dc6712020-09-02 15:39:14 -07003209 kick_all_vcpus(&vcpu_handles, &linux.irq_chip, &VmRunMode::Exiting);
3210 for (handle, _) in vcpu_handles {
3211 if let Err(e) = handle.join() {
3212 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003213 }
3214 }
3215
Daniel Verkamp94c35272019-09-12 13:31:30 -07003216 // Explicitly drop the VM structure here to allow the devices to clean up before the
3217 // control sockets are closed when this function exits.
3218 mem::drop(linux);
3219
Zach Reizner19ad1f32019-12-12 18:58:50 -08003220 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003221 .set_canon_mode()
3222 .expect("failed to restore canonical mode for terminal");
3223
3224 Ok(())
3225}