blob: 2906ec78530fc02242bd0b021821b6b501dacdc0 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08006use std::collections::{BTreeMap, HashSet};
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00007use std::convert::{TryFrom, TryInto};
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000011use std::io::prelude::*;
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090012use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070013use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070014use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080015use std::net::Ipv4Addr;
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -070016use std::os::unix::net::UnixListener;
Christian Blichmann50f95912021-11-05 16:59:39 +010017use std::os::unix::{io::FromRawFd, net::UnixStream, prelude::OpenOptionsExt};
Zach Reizner39aa26b2017-12-12 18:03:23 -080018use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070019use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070020use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090021use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070022
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000023use std::process;
Zach Reizner39aa26b2017-12-12 18:03:23 -080024use std::thread;
25use std::thread::JoinHandle;
26
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -080027use libc::{self, c_int, c_ulong, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080028
Tomasz Jeznach42644642020-05-20 23:27:59 -070029use acpi_tables::sdt::SDT;
30
Daniel Verkamp6b298582021-08-16 15:37:11 -070031use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090032use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080033use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090034use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070035use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090036#[cfg(feature = "audio_cras")]
37use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -070038use devices::virtio::vhost::user::proxy::VirtioVhostUser;
Woody Chow1b16db12021-04-02 16:59:59 +090039#[cfg(feature = "audio")]
40use devices::virtio::vhost::user::vmm::Snd as VhostUserSnd;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090041use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070042 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
Chirantan Ekbote84091e52021-09-10 18:43:17 +090043 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Vsock as VhostUserVsock,
44 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090045};
Alexandre Courbotb42b3e52021-07-09 23:38:57 +090046#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
47use devices::virtio::VideoBackendType;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070048use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090049#[cfg(feature = "gpu")]
50use devices::virtio::{
Chia-I Wu16fb6592021-11-10 11:45:32 -080051 gpu::{GpuRenderServerParameters, DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
Chirantan Ekbote44292f52021-06-25 18:31:41 +090052 vhost::user::vmm::Gpu as VhostUserGpu,
53 EventDevice,
54};
paulhsiace17e6e2020-08-28 18:37:45 +080055#[cfg(feature = "audio")]
56use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080057use devices::{
Xiong Zhangf82f2dc2021-05-21 16:54:12 +080058 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqChip, IrqEventIndex, KvmKernelIrqChip,
59 PciAddress, PciBridge, PciDevice, PcieRootPort, StubPciDevice, VcpuRunState, VfioContainer,
60 VfioDevice, VfioPciDevice, VfioPlatformDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080061};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080062use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070063#[cfg(feature = "usb")]
64use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070065use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Andrew Walbran00f1c9f2021-12-10 17:13:08 +000066use hypervisor::{HypervisorCap, ProtectionType, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070067use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070068use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080069use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080070use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070071use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080072use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090073use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080074
Keiichi Watanabec5262e92020-10-21 15:57:33 +090075#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
76use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090077use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000078 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
Christian Blichmann50f95912021-11-05 16:59:39 +010079 VhostUserFsOption, VhostUserOption, VhostUserWlOption, VhostVsockDeviceParameter,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090080};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070081use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090082 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070083};
Sonny Raoed517d12018-02-13 22:09:43 -080084
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080085#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070086use {
87 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070088 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070089 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
90};
Zach Reizner55a9e502018-10-03 10:22:32 -070091#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070092use {
Steven Richman11dc6712020-09-02 15:39:14 -070093 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
94 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070095 x86_64::X8664arch as Arch,
96};
Zach Reizner39aa26b2017-12-12 18:03:23 -080097
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080098enum TaggedControlTube {
99 Fs(Tube),
100 Vm(Tube),
101 VmMemory(Tube),
102 VmIrq(Tube),
103 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700104}
105
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800106impl AsRef<Tube> for TaggedControlTube {
107 fn as_ref(&self) -> &Tube {
108 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700109 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800110 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700111 }
112 }
113}
114
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800115impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700116 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800117 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700118 }
119}
120
Matt Delcoc24ad782020-02-14 13:24:36 -0800121struct SandboxConfig<'a> {
122 limit_caps: bool,
123 log_failures: bool,
124 seccomp_policy: &'a Path,
125 uid_map: Option<&'a str>,
126 gid_map: Option<&'a str>,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -0800127 remount_mode: Option<c_ulong>,
Matt Delcoc24ad782020-02-14 13:24:36 -0800128}
129
Zach Reizner44863792019-06-26 14:22:08 -0700130fn create_base_minijail(
131 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800132 r_limit: Option<u64>,
133 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700134) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800135 // All child jails run in a new user namespace without any users mapped,
136 // they run as nobody unless otherwise configured.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700137 let mut j = Minijail::new().context("failed to jail device")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800138
139 if let Some(config) = config {
140 j.namespace_pids();
141 j.namespace_user();
142 j.namespace_user_disable_setgroups();
143 if config.limit_caps {
144 // Don't need any capabilities.
145 j.use_caps(0);
146 }
147 if let Some(uid_map) = config.uid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700148 j.uidmap(uid_map).context("error setting UID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800149 }
150 if let Some(gid_map) = config.gid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700151 j.gidmap(gid_map).context("error setting GID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800152 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900153 // Run in a new mount namespace.
154 j.namespace_vfs();
155
Matt Delcoc24ad782020-02-14 13:24:36 -0800156 // Run in an empty network namespace.
157 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900158
159 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800160 j.no_new_privs();
161
162 // By default we'll prioritize using the pre-compiled .bpf over the .policy
163 // file (the .bpf is expected to be compiled using "trap" as the failure
164 // behavior instead of the default "kill" behavior).
165 // Refer to the code comment for the "seccomp-log-failures"
166 // command-line parameter for an explanation about why the |log_failures|
167 // flag forces the use of .policy files (and the build-time alternative to
168 // this run-time flag).
169 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
170 if bpf_policy_file.exists() && !config.log_failures {
171 j.parse_seccomp_program(&bpf_policy_file)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700172 .context("failed to parse precompiled seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800173 } else {
174 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
175 // which will correctly kill the entire device process if a worker
176 // thread commits a seccomp violation.
177 j.set_seccomp_filter_tsync();
178 if config.log_failures {
179 j.log_seccomp_filter_failures();
180 }
181 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700182 .context("failed to parse seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800183 }
184 j.use_seccomp_filter();
185 // Don't do init setup.
186 j.run_as_init();
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -0800187 // Set up requested remount mode instead of default MS_PRIVATE.
188 if let Some(mode) = config.remount_mode {
189 j.set_remount_mode(mode);
190 }
Matt Delcoc24ad782020-02-14 13:24:36 -0800191 }
192
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900193 // Only pivot_root if we are not re-using the current root directory.
194 if root != Path::new("/") {
195 // It's safe to call `namespace_vfs` multiple times.
196 j.namespace_vfs();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700197 j.enter_pivot_root(root)
198 .context("failed to pivot root device")?;
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900199 }
Matt Delco45caf912019-11-13 08:11:09 -0800200
Matt Delcoc24ad782020-02-14 13:24:36 -0800201 // Most devices don't need to open many fds.
202 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
203 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700204 .context("error setting max open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800205
Zach Reizner39aa26b2017-12-12 18:03:23 -0800206 Ok(j)
207}
208
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800209fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700210 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800211 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
212 // A directory for a jailed device's pivot root.
213 let root_path = Path::new(pivot_root);
214 if !root_path.exists() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700215 bail!("{} doesn't exist, can't jail devices", pivot_root);
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800216 }
217 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800218 let config = SandboxConfig {
219 limit_caps: true,
220 log_failures: cfg.seccomp_log_failures,
221 seccomp_policy: &policy_path,
222 uid_map: None,
223 gid_map: None,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -0800224 remount_mode: None,
Matt Delcoc24ad782020-02-14 13:24:36 -0800225 };
226 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800227 } else {
228 Ok(None)
229 }
230}
231
Daniel Verkamp6b298582021-08-16 15:37:11 -0700232type DeviceResult<T = VirtioDeviceStub> = Result<T>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800233
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000234fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900235 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700236 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800237 // Lock the disk image to prevent other crosvm instances from using it.
238 let lock_op = if disk.read_only {
239 FlockOperation::LockShared
240 } else {
241 FlockOperation::LockExclusive
242 };
Daniel Verkamp6b298582021-08-16 15:37:11 -0700243 flock(&raw_image, lock_op, true).context("failed to lock disk image")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800244
Junichi Uekawa52437db2021-09-29 17:33:07 +0900245 info!("Trying to attach block device: {}", disk.path.display());
Daniel Verkamp6b298582021-08-16 15:37:11 -0700246 let dev = if disk::async_ok(&raw_image).context("failed to check disk async_ok")? {
247 let async_file = disk::create_async_disk_file(raw_image)
248 .context("failed to create async virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700249 Box::new(
250 virtio::BlockAsync::new(
251 virtio::base_features(cfg.protected_vm),
252 async_file,
253 disk.read_only,
254 disk.sparse,
255 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700256 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800257 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700258 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700259 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700260 ) as Box<dyn VirtioDevice>
261 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700262 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700263 .context("failed to create virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700264 Box::new(
265 virtio::Block::new(
266 virtio::base_features(cfg.protected_vm),
267 disk_file,
268 disk.read_only,
269 disk.sparse,
270 disk.block_size,
271 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800272 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700273 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700274 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700275 ) as Box<dyn VirtioDevice>
276 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800277
278 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700279 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700280 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800281 })
282}
283
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900284fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
285 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700286 .context("failed to set up vhost-user block device")?;
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900287
288 Ok(VirtioDeviceStub {
289 dev: Box::new(dev),
290 // no sandbox here because virtqueue handling is exported to a different process.
291 jail: None,
292 })
293}
294
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900295fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
296 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700297 .context("failed to set up vhost-user console device")?;
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900298
299 Ok(VirtioDeviceStub {
300 dev: Box::new(dev),
301 // no sandbox here because virtqueue handling is exported to a different process.
302 jail: None,
303 })
304}
305
Woody Chow5890b702021-02-12 14:57:02 +0900306fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
307 let dev = VhostUserFs::new(
308 virtio::base_features(cfg.protected_vm),
309 &option.socket,
310 &option.tag,
311 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700312 .context("failed to set up vhost-user fs device")?;
Woody Chow5890b702021-02-12 14:57:02 +0900313
314 Ok(VirtioDeviceStub {
315 dev: Box::new(dev),
316 // no sandbox here because virtqueue handling is exported to a different process.
317 jail: None,
318 })
319}
320
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900321fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
322 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700323 .context("failed to set up vhost-user mac80211_hwsim device")?;
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900324
325 Ok(VirtioDeviceStub {
326 dev: Box::new(dev),
327 // no sandbox here because virtqueue handling is exported to a different process.
328 jail: None,
329 })
330}
331
Woody Chow1b16db12021-04-02 16:59:59 +0900332#[cfg(feature = "audio")]
333fn create_vhost_user_snd_device(cfg: &Config, option: &VhostUserOption) -> DeviceResult {
334 let dev = VhostUserSnd::new(virtio::base_features(cfg.protected_vm), &option.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700335 .context("failed to set up vhost-user snd device")?;
Woody Chow1b16db12021-04-02 16:59:59 +0900336
337 Ok(VirtioDeviceStub {
338 dev: Box::new(dev),
339 // no sandbox here because virtqueue handling is exported to a different process.
340 jail: None,
341 })
342}
343
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700344fn create_vvu_proxy_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
345 let listener = UnixListener::bind(&opt.socket).map_err(|e| {
346 error!("failed to bind listener for vvu proxy device: {}", e);
347 e
348 })?;
349
350 let dev = VirtioVhostUser::new(virtio::base_features(cfg.protected_vm), listener)
351 .context("failed to create VVU proxy device")?;
352
353 Ok(VirtioDeviceStub {
354 dev: Box::new(dev),
355 jail: simple_jail(cfg, "vvu_proxy_device")?,
356 })
357}
358
David Tolnay2b089fc2019-03-04 15:33:22 -0800359fn create_rng_device(cfg: &Config) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700360 let dev = virtio::Rng::new(virtio::base_features(cfg.protected_vm))
361 .context("failed to set up rng")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800362
363 Ok(VirtioDeviceStub {
364 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700365 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800366 })
367}
368
Woody Chow737ff122021-03-22 17:49:57 +0900369#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900370fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
371 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
372 virtio::base_features(cfg.protected_vm),
373 cras_snd,
374 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700375 .context("failed to create cras sound device")?;
Woody Chow737ff122021-03-22 17:49:57 +0900376
377 let jail = match simple_jail(&cfg, "cras_snd_device")? {
378 Some(mut jail) => {
379 // Create a tmpfs in the device's root directory for cras_snd_device.
380 // The size is 20*1024, or 20 KB.
381 jail.mount_with_data(
382 Path::new("none"),
383 Path::new("/"),
384 "tmpfs",
385 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
386 "size=20480",
387 )?;
388
389 let run_cras_path = Path::new("/run/cras");
390 jail.mount_bind(run_cras_path, run_cras_path, true)?;
391
392 add_current_user_to_jail(&mut jail)?;
393
394 Some(jail)
395 }
396 None => None,
397 };
398
399 Ok(VirtioDeviceStub {
400 dev: Box::new(dev),
401 jail,
402 })
403}
404
David Tolnay2b089fc2019-03-04 15:33:22 -0800405#[cfg(feature = "tpm")]
406fn create_tpm_device(cfg: &Config) -> DeviceResult {
407 use std::ffi::CString;
408 use std::fs;
David Tolnay2b089fc2019-03-04 15:33:22 -0800409
410 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700411 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800412
413 match &mut tpm_jail {
414 Some(jail) => {
415 // Create a tmpfs in the device's root directory for tpm
416 // simulator storage. The size is 20*1024, or 20 KB.
417 jail.mount_with_data(
418 Path::new("none"),
419 Path::new("/"),
420 "tmpfs",
421 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
422 "size=20480",
423 )?;
424
Fergus Dall51200512021-08-19 12:54:26 +1000425 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800426
427 let pid = process::id();
428 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
429 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700430 fs::create_dir_all(&tpm_storage).with_context(|| {
431 format!("failed to create tpm storage dir {}", tpm_storage.display())
432 })?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800433 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800434 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700435 .context("failed to chown tpm storage")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800436
437 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
438 }
439 None => {
440 // Path used inside cros_sdk which does not have /run/vm.
441 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
442 }
443 }
444
445 let dev = virtio::Tpm::new(tpm_storage);
446
447 Ok(VirtioDeviceStub {
448 dev: Box::new(dev),
449 jail: tpm_jail,
450 })
451}
452
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700453fn create_single_touch_device(
454 cfg: &Config,
455 single_touch_spec: &TouchDeviceOption,
456 idx: u32,
457) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800458 let socket = single_touch_spec
459 .get_path()
460 .into_unix_stream()
461 .map_err(|e| {
462 error!("failed configuring virtio single touch: {:?}", e);
463 e
464 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800465
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800466 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700467 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700468 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700469 socket,
470 width,
471 height,
472 virtio::base_features(cfg.protected_vm),
473 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700474 .context("failed to set up input device")?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800475 Ok(VirtioDeviceStub {
476 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700477 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800478 })
479}
480
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700481fn create_multi_touch_device(
482 cfg: &Config,
483 multi_touch_spec: &TouchDeviceOption,
484 idx: u32,
485) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000486 let socket = multi_touch_spec
487 .get_path()
488 .into_unix_stream()
489 .map_err(|e| {
490 error!("failed configuring virtio multi touch: {:?}", e);
491 e
492 })?;
493
494 let (width, height) = multi_touch_spec.get_size();
495 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700496 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000497 socket,
498 width,
499 height,
500 virtio::base_features(cfg.protected_vm),
501 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700502 .context("failed to set up input device")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000503
504 Ok(VirtioDeviceStub {
505 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700506 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000507 })
508}
509
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700510fn create_trackpad_device(
511 cfg: &Config,
512 trackpad_spec: &TouchDeviceOption,
513 idx: u32,
514) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800515 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000516 error!("failed configuring virtio trackpad: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800517 e
518 })?;
519
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800520 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700521 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700522 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700523 socket,
524 width,
525 height,
526 virtio::base_features(cfg.protected_vm),
527 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700528 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800529
530 Ok(VirtioDeviceStub {
531 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700532 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800533 })
534}
535
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700536fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800537 let socket = mouse_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000538 error!("failed configuring virtio mouse: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800539 e
540 })?;
541
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700542 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700543 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800544
545 Ok(VirtioDeviceStub {
546 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700547 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800548 })
549}
550
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700551fn create_keyboard_device<T: IntoUnixStream>(
552 cfg: &Config,
553 keyboard_socket: T,
554 idx: u32,
555) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800556 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000557 error!("failed configuring virtio keyboard: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800558 e
559 })?;
560
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700561 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700562 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800563
564 Ok(VirtioDeviceStub {
565 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700566 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800567 })
568}
569
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700570fn create_switches_device<T: IntoUnixStream>(
571 cfg: &Config,
572 switches_socket: T,
573 idx: u32,
574) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800575 let socket = switches_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000576 error!("failed configuring virtio switches: {:#}", e);
Daniel Norman5e23df72021-03-11 10:11:02 -0800577 e
578 })?;
579
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700580 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700581 .context("failed to set up input device")?;
Daniel Norman5e23df72021-03-11 10:11:02 -0800582
583 Ok(VirtioDeviceStub {
584 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700585 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800586 })
587}
588
David Tolnay2b089fc2019-03-04 15:33:22 -0800589fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
590 let dev_file = OpenOptions::new()
591 .read(true)
592 .write(true)
593 .open(dev_path)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700594 .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800595
Noah Goldd4ca29b2020-10-27 12:21:52 -0700596 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700597 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800598
599 Ok(VirtioDeviceStub {
600 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700601 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800602 })
603}
604
David Stevens06d157a2022-01-13 23:44:48 +0900605fn create_balloon_device(
606 cfg: &Config,
607 tube: Tube,
608 inflate_tube: Option<Tube>,
609 init_balloon_size: u64,
610) -> DeviceResult {
611 let dev = virtio::Balloon::new(
612 virtio::base_features(cfg.protected_vm),
613 tube,
614 inflate_tube,
615 init_balloon_size,
616 )
617 .context("failed to create balloon")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800618
619 Ok(VirtioDeviceStub {
620 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700621 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800622 })
623}
624
Alexandre Courbot911773a2021-12-10 14:31:10 +0900625/// Generic method for creating a network device. `create_device` is a closure that takes the virtio
626/// features and number of queue pairs as parameters, and is responsible for creating the device
627/// itself.
628fn create_net_device<F, T>(cfg: &Config, policy: &str, create_device: F) -> DeviceResult
629where
630 F: Fn(u64, u16) -> Result<T>,
631 T: VirtioDevice + 'static,
632{
Xiong Zhang773c7072020-03-20 10:39:55 +0800633 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
634 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700635 if vcpu_count < vq_pairs as usize {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900636 warn!("the number of net vq pairs must not exceed the vcpu count, falling back to single queue mode");
Xiong Zhang773c7072020-03-20 10:39:55 +0800637 vq_pairs = 1;
638 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100639 let features = virtio::base_features(cfg.protected_vm);
Alexandre Courbot911773a2021-12-10 14:31:10 +0900640
641 let dev = create_device(features, vq_pairs)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800642
643 Ok(VirtioDeviceStub {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900644 dev: Box::new(dev) as Box<dyn VirtioDevice>,
645 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800646 })
647}
648
Alexandre Courbot911773a2021-12-10 14:31:10 +0900649/// Returns a network device created from a new TAP interface configured with `host_ip`, `netmask`,
650/// and `mac_address`.
651fn create_net_device_from_config(
David Tolnay2b089fc2019-03-04 15:33:22 -0800652 cfg: &Config,
653 host_ip: Ipv4Addr,
654 netmask: Ipv4Addr,
655 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800656) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800657 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800658 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800659 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800660 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800661 };
662
Alexandre Courbot911773a2021-12-10 14:31:10 +0900663 if cfg.vhost_net {
664 create_net_device(cfg, policy, |features, _vq_pairs| {
665 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
666 &cfg.vhost_net_device_path,
667 features,
668 host_ip,
669 netmask,
670 mac_address,
671 )
672 .context("failed to set up vhost networking")
673 })
674 } else {
675 create_net_device(cfg, policy, |features, vq_pairs| {
676 virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
677 .context("failed to create virtio network device")
678 })
679 }
680}
681
682/// Returns a network device from a file descriptor to a configured TAP interface.
683fn create_tap_net_device_from_fd(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
684 create_net_device(cfg, "net_device", |features, vq_pairs| {
685 // Safe because we ensure that we get a unique handle to the fd.
686 let tap = unsafe {
687 Tap::from_raw_descriptor(
688 validate_raw_descriptor(tap_fd).context("failed to validate tap descriptor")?,
689 )
690 .context("failed to create tap device")?
691 };
692
693 virtio::Net::from(features, tap, vq_pairs).context("failed to create tap net device")
David Tolnay2b089fc2019-03-04 15:33:22 -0800694 })
695}
696
Alexandre Courbot993aa7f2021-12-09 14:51:29 +0900697/// Returns a network device created by opening the persistent, configured TAP interface `tap_name`.
698fn create_tap_net_device_from_name(cfg: &Config, tap_name: &[u8]) -> DeviceResult {
699 create_net_device(cfg, "net_device", |features, vq_pairs| {
700 virtio::Net::<Tap>::new_from_name(features, tap_name, vq_pairs)
701 .context("failed to create configured virtio network device")
702 })
703}
704
Keiichi Watanabe60686582021-03-12 04:53:51 +0900705fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
706 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700707 .context("failed to set up vhost-user net device")?;
Keiichi Watanabe60686582021-03-12 04:53:51 +0900708
709 Ok(VirtioDeviceStub {
710 dev: Box::new(dev),
711 // no sandbox here because virtqueue handling is exported to a different process.
712 jail: None,
713 })
714}
715
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900716fn create_vhost_user_vsock_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
717 let dev = VhostUserVsock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700718 .context("failed to set up vhost-user vsock device")?;
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900719
720 Ok(VirtioDeviceStub {
721 dev: Box::new(dev),
722 // no sandbox here because virtqueue handling is exported to a different process.
723 jail: None,
724 })
725}
726
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900727fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
728 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
729 // connection.
730 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700731 .context("failed to set up vhost-user wl device")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900732
733 Ok(VirtioDeviceStub {
734 dev: Box::new(dev),
735 // no sandbox here because virtqueue handling is exported to a different process.
736 jail: None,
737 })
738}
739
David Tolnay2b089fc2019-03-04 15:33:22 -0800740#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900741fn create_vhost_user_gpu_device(
742 cfg: &Config,
743 opt: &VhostUserOption,
744 host_tube: Tube,
745 device_tube: Tube,
746) -> DeviceResult {
747 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
748 // connection.
749 let dev = VhostUserGpu::new(
750 virtio::base_features(cfg.protected_vm),
751 &opt.socket,
752 host_tube,
753 device_tube,
754 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700755 .context("failed to set up vhost-user gpu device")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900756
757 Ok(VirtioDeviceStub {
758 dev: Box::new(dev),
759 // no sandbox here because virtqueue handling is exported to a different process.
760 jail: None,
761 })
762}
763
Alexandre Courbot22740d82021-12-15 17:06:27 +0900764/// Mirror-mount all the directories in `dirs` into `jail` on a best-effort basis.
765///
766/// This function will not return an error if any of the directories in `dirs` is missing.
767#[cfg(any(feature = "gpu", feature = "video-decoder", feature = "video-encoder"))]
768fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
769 jail: &mut Minijail,
770 dirs: &[P],
771) -> Result<()> {
772 for dir in dirs {
773 let dir_path = Path::new(dir);
774 if dir_path.exists() {
775 jail.mount_bind(dir_path, dir_path, false)?;
776 }
777 }
778
779 Ok(())
780}
781
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900782#[cfg(feature = "gpu")]
Chia-I Wufffb5692021-12-01 13:25:35 -0800783fn gpu_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
784 match simple_jail(cfg, policy)? {
785 Some(mut jail) => {
786 // Create a tmpfs in the device's root directory so that we can bind mount the
787 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
788 jail.mount_with_data(
789 Path::new("none"),
790 Path::new("/"),
791 "tmpfs",
792 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
793 "size=67108864",
794 )?;
795
796 // Device nodes required for DRM.
797 let sys_dev_char_path = Path::new("/sys/dev/char");
798 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
799 let sys_devices_path = Path::new("/sys/devices");
800 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
801
802 let drm_dri_path = Path::new("/dev/dri");
803 if drm_dri_path.exists() {
804 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
805 }
806
807 // If the ARM specific devices exist on the host, bind mount them in.
808 let mali0_path = Path::new("/dev/mali0");
809 if mali0_path.exists() {
810 jail.mount_bind(mali0_path, mali0_path, true)?;
811 }
812
813 let pvr_sync_path = Path::new("/dev/pvr_sync");
814 if pvr_sync_path.exists() {
815 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
816 }
817
818 // If the udmabuf driver exists on the host, bind mount it in.
819 let udmabuf_path = Path::new("/dev/udmabuf");
820 if udmabuf_path.exists() {
821 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
822 }
823
824 // Libraries that are required when mesa drivers are dynamically loaded.
Alexandre Courbot22740d82021-12-15 17:06:27 +0900825 jail_mount_bind_if_exists(
826 &mut jail,
827 &[
828 "/usr/lib",
829 "/usr/lib64",
830 "/lib",
831 "/lib64",
Lepton Wua0638452022-01-19 22:49:53 -0800832 "/usr/share/drirc.d",
Alexandre Courbot22740d82021-12-15 17:06:27 +0900833 "/usr/share/glvnd",
834 "/usr/share/vulkan",
835 ],
836 )?;
Chia-I Wufffb5692021-12-01 13:25:35 -0800837
838 // pvr driver requires read access to /proc/self/task/*/comm.
839 let proc_path = Path::new("/proc");
840 jail.mount(
841 proc_path,
842 proc_path,
843 "proc",
844 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
845 )?;
846
847 // To enable perfetto tracing, we need to give access to the perfetto service IPC
848 // endpoints.
849 let perfetto_path = Path::new("/run/perfetto");
850 if perfetto_path.exists() {
851 jail.mount_bind(perfetto_path, perfetto_path, true)?;
852 }
853
854 Ok(Some(jail))
855 }
856 None => Ok(None),
857 }
858}
859
860#[cfg(feature = "gpu")]
Chia-I Wu13ec6962022-01-12 10:42:14 -0800861struct GpuCacheInfo<'a> {
862 directory: Option<&'a str>,
863 environment: Vec<(&'a str, &'a str)>,
864}
865
866#[cfg(feature = "gpu")]
867fn get_gpu_cache_info<'a>(
868 cache_dir: Option<&'a String>,
869 cache_size: Option<&'a String>,
870 sandbox: bool,
871) -> GpuCacheInfo<'a> {
872 let mut dir = None;
873 let mut env = Vec::new();
874
875 if let Some(cache_dir) = cache_dir {
876 if !Path::new(cache_dir).exists() {
877 warn!("shader caching dir {} does not exist", cache_dir);
878 env.push(("MESA_GLSL_CACHE_DISABLE", "true"));
879 } else if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && sandbox {
880 warn!("shader caching not yet supported on ARM with sandbox enabled");
881 env.push(("MESA_GLSL_CACHE_DISABLE", "true"));
882 } else {
883 dir = Some(cache_dir.as_str());
884
885 env.push(("MESA_GLSL_CACHE_DISABLE", "false"));
886 env.push(("MESA_GLSL_CACHE_DIR", cache_dir.as_str()));
887 if let Some(cache_size) = cache_size {
888 env.push(("MESA_GLSL_CACHE_MAX_SIZE", cache_size.as_str()));
889 }
890 }
891 }
892
893 GpuCacheInfo {
894 directory: dir,
895 environment: env,
896 }
897}
898
899#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800900fn create_gpu_device(
901 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700902 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800903 gpu_device_tube: Tube,
904 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900905 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700906 x_display: Option<String>,
Chia-I Wu16fb6592021-11-10 11:45:32 -0800907 render_server_fd: Option<SafeDescriptor>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800908 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700909 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800910) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700911 let mut display_backends = vec![
912 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800913 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700914 ];
915
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700916 let wayland_socket_dirs = cfg
917 .wayland_socket_paths
918 .iter()
919 .map(|(_name, path)| path.parent())
920 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700921 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700922
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900923 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700924 display_backends.insert(
925 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700926 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700927 );
928 }
929
David Tolnay2b089fc2019-03-04 15:33:22 -0800930 let dev = virtio::Gpu::new(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700931 exit_evt.try_clone().context("failed to clone event")?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800932 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800933 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700934 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800935 cfg.gpu_parameters.as_ref().unwrap(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800936 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800937 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700938 map_request,
939 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100940 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800941 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800942 );
943
Chia-I Wufffb5692021-12-01 13:25:35 -0800944 let jail = match gpu_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800945 Some(mut jail) => {
John Batesb220eac2020-09-14 17:03:02 -0700946 // Prepare GPU shader disk cache directory.
Chia-I Wu13ec6962022-01-12 10:42:14 -0800947 let (cache_dir, cache_size) = cfg
John Batesb220eac2020-09-14 17:03:02 -0700948 .gpu_parameters
949 .as_ref()
Chia-I Wu13ec6962022-01-12 10:42:14 -0800950 .map(|params| (params.cache_path.as_ref(), params.cache_size.as_ref()))
951 .unwrap();
952 let cache_info = get_gpu_cache_info(cache_dir, cache_size, cfg.sandbox);
953
954 if let Some(dir) = cache_info.directory {
955 jail.mount_bind(dir, dir, true)?;
956 }
957 for (key, val) in cache_info.environment {
958 env::set_var(key, val);
John Batesb220eac2020-09-14 17:03:02 -0700959 }
960
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700961 // Bind mount the wayland socket's directory into jail's root. This is necessary since
962 // each new wayland context must open() the socket. If the wayland socket is ever
963 // destroyed and remade in the same host directory, new connections will be possible
964 // without restarting the wayland device.
965 for dir in &wayland_socket_dirs {
966 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700967 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800968
Fergus Dall51200512021-08-19 12:54:26 +1000969 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800970
971 Some(jail)
972 }
973 None => None,
974 };
975
976 Ok(VirtioDeviceStub {
977 dev: Box::new(dev),
978 jail,
979 })
980}
981
Chia-I Wu16fb6592021-11-10 11:45:32 -0800982#[cfg(feature = "gpu")]
Chia-I Wu7f0f7c12022-01-12 10:42:18 -0800983fn get_gpu_render_server_environment(cache_info: &GpuCacheInfo) -> Result<Vec<String>> {
984 let mut env = Vec::new();
985
986 let mut cache_env_keys = HashSet::with_capacity(cache_info.environment.len());
987 for (key, val) in cache_info.environment.iter() {
988 env.push(format!("{}={}", key, val));
989 cache_env_keys.insert(*key);
990 }
991
992 for (key_os, val_os) in env::vars_os() {
993 // minijail should accept OsStr rather than str...
994 let into_string_err = |_| anyhow!("invalid environment key/val");
995 let key = key_os.into_string().map_err(into_string_err)?;
996 let val = val_os.into_string().map_err(into_string_err)?;
997
998 if !cache_env_keys.contains(key.as_str()) {
999 env.push(format!("{}={}", key, val));
1000 }
1001 }
1002
1003 Ok(env)
1004}
1005
1006#[cfg(feature = "gpu")]
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001007struct ScopedMinijail(Minijail);
1008
1009#[cfg(feature = "gpu")]
1010impl Drop for ScopedMinijail {
1011 fn drop(&mut self) {
1012 let _ = self.0.kill();
1013 }
1014}
1015
1016#[cfg(feature = "gpu")]
Chia-I Wu16fb6592021-11-10 11:45:32 -08001017fn start_gpu_render_server(
1018 cfg: &Config,
1019 render_server_parameters: &GpuRenderServerParameters,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001020) -> Result<(Minijail, SafeDescriptor)> {
Chia-I Wu16fb6592021-11-10 11:45:32 -08001021 let (server_socket, client_socket) =
1022 UnixSeqpacket::pair().context("failed to create render server socket")?;
1023
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001024 let mut env = None;
Chia-I Wu16fb6592021-11-10 11:45:32 -08001025 let jail = match gpu_jail(cfg, "gpu_render_server")? {
1026 Some(mut jail) => {
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001027 let cache_info = get_gpu_cache_info(
1028 render_server_parameters.cache_path.as_ref(),
1029 render_server_parameters.cache_size.as_ref(),
1030 cfg.sandbox,
1031 );
1032
1033 if let Some(dir) = cache_info.directory {
1034 jail.mount_bind(dir, dir, true)?;
1035 }
1036
1037 if !cache_info.environment.is_empty() {
1038 env = Some(get_gpu_render_server_environment(&cache_info)?);
1039 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001040
Chia-I Wub86f7f62021-12-13 12:10:22 -08001041 // bind mount /dev/log for syslog
1042 let log_path = Path::new("/dev/log");
1043 if log_path.exists() {
1044 jail.mount_bind(log_path, log_path, true)?;
1045 }
1046
Chia-I Wu16fb6592021-11-10 11:45:32 -08001047 // Run as root in the jail to keep capabilities after execve, which is needed for
1048 // mounting to work. All capabilities will be dropped afterwards.
1049 add_current_user_as_root_to_jail(&mut jail)?;
1050
1051 jail
1052 }
1053 None => Minijail::new().context("failed to create jail")?,
1054 };
1055
1056 let inheritable_fds = [
1057 server_socket.as_raw_descriptor(),
1058 libc::STDOUT_FILENO,
1059 libc::STDERR_FILENO,
1060 ];
1061
1062 let cmd = &render_server_parameters.path;
1063 let cmd_str = cmd
1064 .to_str()
1065 .ok_or_else(|| anyhow!("invalid render server path"))?;
1066 let fd_str = server_socket.as_raw_descriptor().to_string();
1067 let args = [cmd_str, "--socket-fd", &fd_str];
1068
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001069 let mut envp: Option<Vec<&str>> = None;
1070 if let Some(ref env) = env {
1071 envp = Some(env.iter().map(AsRef::as_ref).collect());
1072 }
1073
1074 jail.run_command(minijail::Command::new_for_path(
1075 cmd,
1076 &inheritable_fds,
1077 &args,
1078 envp.as_deref(),
1079 )?)
1080 .context("failed to start gpu render server")?;
Chia-I Wu16fb6592021-11-10 11:45:32 -08001081
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001082 Ok((jail, SafeDescriptor::from(client_socket)))
Chia-I Wu16fb6592021-11-10 11:45:32 -08001083}
1084
David Tolnay2b089fc2019-03-04 15:33:22 -08001085fn create_wayland_device(
1086 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001087 control_tube: Tube,
1088 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001089) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001090 let wayland_socket_dirs = cfg
1091 .wayland_socket_paths
1092 .iter()
1093 .map(|(_name, path)| path.parent())
1094 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001095 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001096
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001097 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001098 let dev = virtio::Wl::new(
1099 features,
1100 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001101 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +01001102 resource_bridge,
1103 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001104 .context("failed to create wayland device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001105
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001106 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -08001107 Some(mut jail) => {
1108 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
1109 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
1110 jail.mount_with_data(
1111 Path::new("none"),
1112 Path::new("/"),
1113 "tmpfs",
1114 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1115 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -08001116 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001117
1118 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1119 // each new wayland context must open() the socket. If the wayland socket is ever
1120 // destroyed and remade in the same host directory, new connections will be possible
1121 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001122 for dir in &wayland_socket_dirs {
1123 jail.mount_bind(dir, dir, true)?;
1124 }
Fergus Dall51200512021-08-19 12:54:26 +10001125 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001126
1127 Some(jail)
1128 }
1129 None => None,
1130 };
1131
1132 Ok(VirtioDeviceStub {
1133 dev: Box::new(dev),
1134 jail,
1135 })
1136}
1137
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001138#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1139fn create_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001140 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001141 cfg: &Config,
1142 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001143 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001144) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001145 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001146 Some(mut jail) => {
1147 match typ {
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001148 #[cfg(feature = "video-decoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001149 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001150 #[cfg(feature = "video-encoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001151 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001152 };
1153
1154 // Create a tmpfs in the device's root directory so that we can bind mount files.
1155 jail.mount_with_data(
1156 Path::new("none"),
1157 Path::new("/"),
1158 "tmpfs",
1159 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1160 "size=67108864",
1161 )?;
1162
Alexandre Courbotc02960d2021-07-11 23:06:30 +09001163 #[cfg(feature = "libvda")]
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001164 // Render node for libvda.
Alexandre Courbot54cf8342021-12-20 18:10:08 +09001165 if backend == VideoBackendType::Libvda || backend == VideoBackendType::LibvdaVd {
Chih-Yu Huangd2c2bd12021-12-06 14:09:59 +09001166 // follow the implementation at:
1167 // https://source.corp.google.com/chromeos_public/src/platform/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=90;bpv=0;cl=c06cc9cccb3cf3c7f9d2aec706c27c34cd6162a0
1168 const DRM_NUM_NODES: u32 = 63;
1169 const DRM_RENDER_NODE_START: u32 = 128;
1170 for offset in 0..DRM_NUM_NODES {
1171 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
1172 let dev_dri_path = Path::new(&path_str);
1173 if !dev_dri_path.exists() {
1174 break;
1175 }
1176 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1177 }
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001178 }
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001179
David Stevense341d0a2020-10-08 18:02:32 +09001180 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1181 {
1182 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1183 let sys_dev_char_path = Path::new("/sys/dev/char");
1184 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1185 let sys_devices_path = Path::new("/sys/devices");
1186 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1187
1188 // Required for loading dri libraries loaded by minigbm on AMD devices.
Alexandre Courbot22740d82021-12-15 17:06:27 +09001189 jail_mount_bind_if_exists(&mut jail, &["/usr/lib64"])?;
David Stevense341d0a2020-10-08 18:02:32 +09001190 }
1191
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001192 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1193 let dev_urandom_path = Path::new("/dev/urandom");
1194 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1195 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1196 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1197
1198 Some(jail)
1199 }
1200 None => None,
1201 };
1202
1203 Ok(VirtioDeviceStub {
1204 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001205 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001206 typ,
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001207 backend,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001208 Some(resource_bridge),
1209 )),
1210 jail,
1211 })
1212}
1213
1214#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1215fn register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001216 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001217 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001218 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001219 cfg: &Config,
1220 typ: devices::virtio::VideoDeviceType,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001221) -> Result<()> {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001222 devs.push(create_video_device(backend, cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001223 Ok(())
1224}
1225
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001226fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001227 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann50f95912021-11-05 16:59:39 +01001228
1229 let device_file = match cfg
1230 .vhost_vsock_device
1231 .as_ref()
1232 .unwrap_or(&VhostVsockDeviceParameter::default())
1233 {
1234 VhostVsockDeviceParameter::Fd(fd) => {
1235 let fd = validate_raw_descriptor(*fd)
1236 .context("failed to validate fd for virtual socker device")?;
1237 // Safe because the `fd` is actually owned by this process and
1238 // we have a unique handle to it.
1239 unsafe { File::from_raw_fd(fd) }
1240 }
1241 VhostVsockDeviceParameter::Path(path) => OpenOptions::new()
1242 .read(true)
1243 .write(true)
1244 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
1245 .open(path)
1246 .context("failed to open virtual socket device")?,
1247 };
1248
1249 let dev = virtio::vhost::Vsock::new(device_file, features, cid)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001250 .context("failed to set up virtual socket device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001251
1252 Ok(VirtioDeviceStub {
1253 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001254 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001255 })
1256}
1257
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001258fn create_fs_device(
1259 cfg: &Config,
1260 uid_map: &str,
1261 gid_map: &str,
1262 src: &Path,
1263 tag: &str,
1264 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001265 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001266) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001267 let max_open_files =
1268 base::get_max_open_files().context("failed to get max number of open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001269 let j = if cfg.sandbox {
1270 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1271 let config = SandboxConfig {
1272 limit_caps: false,
1273 uid_map: Some(uid_map),
1274 gid_map: Some(gid_map),
1275 log_failures: cfg.seccomp_log_failures,
1276 seccomp_policy: &seccomp_policy,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001277 // We want bind mounts from the parent namespaces to propagate into the fs device's
1278 // namespace.
1279 remount_mode: Some(libc::MS_SLAVE),
Matt Delcoc24ad782020-02-14 13:24:36 -08001280 };
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001281 create_base_minijail(src, Some(max_open_files), Some(&config))?
Matt Delcoc24ad782020-02-14 13:24:36 -08001282 } else {
1283 create_base_minijail(src, Some(max_open_files), None)?
1284 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001285
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001286 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001287 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1288 // when num_queues > 1.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001289 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1290 .context("failed to create fs device")?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001291
1292 Ok(VirtioDeviceStub {
1293 dev: Box::new(dev),
1294 jail: Some(j),
1295 })
1296}
1297
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001298fn create_9p_device(
1299 cfg: &Config,
1300 uid_map: &str,
1301 gid_map: &str,
1302 src: &Path,
1303 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001304 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001305) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001306 let max_open_files =
1307 base::get_max_open_files().context("failed to get max number of open files")?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001308 let (jail, root) = if cfg.sandbox {
1309 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1310 let config = SandboxConfig {
1311 limit_caps: false,
1312 uid_map: Some(uid_map),
1313 gid_map: Some(gid_map),
1314 log_failures: cfg.seccomp_log_failures,
1315 seccomp_policy: &seccomp_policy,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001316 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1317 // namespace.
1318 remount_mode: Some(libc::MS_SLAVE),
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001319 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001320
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001321 let jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
Chirantan Ekbote055de382020-01-24 12:16:58 +09001322
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001323 // The shared directory becomes the root of the device's file system.
1324 let root = Path::new("/");
1325 (Some(jail), root)
1326 } else {
1327 // There's no mount namespace so we tell the server to treat the source directory as the
1328 // root.
1329 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001330 };
1331
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001332 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001333 p9_cfg.root = root.into();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001334 let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001335
1336 Ok(VirtioDeviceStub {
1337 dev: Box::new(dev),
1338 jail,
1339 })
1340}
1341
Jakub Starona3411ea2019-04-24 10:55:25 -07001342fn create_pmem_device(
1343 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001344 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001345 resources: &mut SystemAllocator,
1346 disk: &DiskOption,
1347 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001348 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001349) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001350 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001351 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001352
1353 let (disk_size, arena_size) = {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001354 let metadata = std::fs::metadata(&disk.path).with_context(|| {
1355 format!("failed to get disk image {} metadata", disk.path.display())
1356 })?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001357 let disk_len = metadata.len();
1358 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1359 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1360 // we just align the size of the file to 2 MiB then access beyond the last page of the
1361 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1362 // padding up to 2 MiB.
1363 let alignment = 2 * 1024 * 1024;
1364 let align_adjust = if disk_len % alignment != 0 {
1365 alignment - (disk_len % alignment)
1366 } else {
1367 0
1368 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001369 (
1370 disk_len,
1371 disk_len
1372 .checked_add(align_adjust)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001373 .ok_or_else(|| anyhow!("pmem device image too big"))?,
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001374 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001375 };
1376
1377 let protection = {
1378 if disk.read_only {
1379 Protection::read()
1380 } else {
1381 Protection::read_write()
1382 }
1383 };
1384
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001385 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001386 // Conversion from u64 to usize may fail on 32bit system.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001387 let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1388 let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001389
Daniel Verkamp6b298582021-08-16 15:37:11 -07001390 let mut arena =
1391 MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001392 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001393 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001394 .context("failed to reserve pmem memory")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001395
1396 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1397 // of the page with zeroes. However, the anonymous mapping added below must start on a
1398 // page boundary, so round up the size before calculating the offset of the anon region.
1399 let disk_size = round_up_to_page_size(disk_size);
1400
1401 if arena_size > disk_size {
1402 // Add an anonymous region with the same protection as the disk mapping if the arena
1403 // size was aligned.
1404 arena
1405 .add_anon_protection(disk_size, arena_size - disk_size, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001406 .context("failed to reserve pmem padding")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001407 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001408 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001409 };
1410
1411 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001412 .mmio_allocator(MmioType::High)
Daniel Verkamp57e4f542021-10-28 09:56:40 -07001413 .reverse_allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001414 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001415 Alloc::PmemDevice(index),
1416 format!("pmem_disk_image_{}", index),
1417 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1418 128 * 1024 * 1024, /* 128 MiB */
1419 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001420 .context("failed to allocate memory for pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001421
Daniel Verkampe1980a92020-02-07 11:00:55 -08001422 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001423 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001424 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001425 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001426 /* read_only = */ disk.read_only,
1427 /* log_dirty_pages = */ false,
1428 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001429 .context("failed to add pmem device memory")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001430
Daniel Verkampe1980a92020-02-07 11:00:55 -08001431 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001432 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001433 fd,
1434 GuestAddress(mapping_address),
1435 slot,
1436 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001437 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001438 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001439 .context("failed to create pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001440
1441 Ok(VirtioDeviceStub {
1442 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001443 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001444 })
1445}
1446
Zide Chendfc4b882021-03-10 16:35:37 -08001447fn create_iommu_device(
1448 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001449 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001450 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1451) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001452 let dev = virtio::Iommu::new(
1453 virtio::base_features(cfg.protected_vm),
1454 endpoints,
1455 phys_max_addr,
1456 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001457 .context("failed to create IOMMU device")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001458
1459 Ok(VirtioDeviceStub {
1460 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001461 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001462 })
1463}
1464
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001465fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001466 let mut keep_rds = Vec::new();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001467 let evt = Event::new().context("failed to create event")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001468 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001469 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001470 .context("failed to create console device")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001471
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001472 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001473 Some(mut jail) => {
1474 // Create a tmpfs in the device's root directory so that we can bind mount the
1475 // log socket directory into it.
1476 // The size=67108864 is size=64*1024*1024 or size=64MB.
1477 jail.mount_with_data(
1478 Path::new("none"),
1479 Path::new("/"),
1480 "tmpfs",
1481 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1482 "size=67108864",
1483 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001484 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001485 let res = param.add_bind_mounts(&mut jail);
1486 if res.is_err() {
1487 error!("failed to add bind mounts for console device");
1488 }
1489 Some(jail)
1490 }
1491 None => None,
1492 };
1493
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001494 Ok(VirtioDeviceStub {
1495 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001496 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001497 })
1498}
1499
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001500#[cfg(feature = "audio")]
1501fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1502 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001503 .context("failed to create sound device")?;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001504
1505 Ok(VirtioDeviceStub {
1506 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001507 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001508 })
1509}
1510
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001511// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001512#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001513fn create_virtio_devices(
1514 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001515 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001516 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001517 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001518 wayland_device_tube: Tube,
1519 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001520 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001521 balloon_device_tube: Option<Tube>,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08001522 balloon_inflate_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +09001523 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001524 disk_device_tubes: &mut Vec<Tube>,
1525 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001526 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001527 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001528 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001529) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001530 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001531
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001532 for (_, param) in cfg
1533 .serial_parameters
1534 .iter()
1535 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1536 {
1537 let dev = create_console_device(cfg, param)?;
1538 devs.push(dev);
1539 }
1540
Zach Reizner8fb52112017-12-13 16:04:39 -08001541 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001542 let disk_device_tube = disk_device_tubes.remove(0);
1543 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001544 }
1545
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001546 for blk in &cfg.vhost_user_blk {
1547 devs.push(create_vhost_user_block_device(cfg, blk)?);
1548 }
1549
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001550 for console in &cfg.vhost_user_console {
1551 devs.push(create_vhost_user_console_device(cfg, console)?);
1552 }
1553
Jakub Starona3411ea2019-04-24 10:55:25 -07001554 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001555 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001556 devs.push(create_pmem_device(
1557 cfg,
1558 vm,
1559 resources,
1560 pmem_disk,
1561 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001562 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001563 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001564 }
1565
David Tolnay2b089fc2019-03-04 15:33:22 -08001566 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001567
David Tolnayde6b29a2018-12-20 11:49:46 -08001568 #[cfg(feature = "tpm")]
1569 {
David Tolnay43f8e212019-02-13 17:28:16 -08001570 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001571 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001572 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001573 }
1574
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001575 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1576 devs.push(create_single_touch_device(
1577 cfg,
1578 single_touch_spec,
1579 idx as u32,
1580 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001581 }
1582
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001583 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1584 devs.push(create_multi_touch_device(
1585 cfg,
1586 multi_touch_spec,
1587 idx as u32,
1588 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001589 }
1590
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001591 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1592 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001593 }
1594
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001595 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1596 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001597 }
1598
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001599 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1600 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001601 }
1602
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001603 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1604 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001605 }
1606
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001607 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001608 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001609 }
1610
Andrew Walbran3cd93602022-01-25 13:59:23 +00001611 if let Some(balloon_device_tube) = balloon_device_tube {
1612 devs.push(create_balloon_device(
1613 cfg,
1614 balloon_device_tube,
1615 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +09001616 init_balloon_size,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001617 )?);
1618 }
Dylan Reid295ccac2017-11-06 14:06:24 -08001619
Zach Reizner39aa26b2017-12-12 18:03:23 -08001620 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001621 for tap_fd in &cfg.tap_fd {
Alexandre Courbot911773a2021-12-10 14:31:10 +09001622 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001623 }
1624
David Tolnay2b089fc2019-03-04 15:33:22 -08001625 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1626 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1627 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001628 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001629 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +09001630 }
Alexandre Courbot911773a2021-12-10 14:31:10 +09001631 devs.push(create_net_device_from_config(
1632 cfg,
1633 host_ip,
1634 netmask,
1635 mac_address,
1636 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001637 }
1638
Alexandre Courbot993aa7f2021-12-09 14:51:29 +09001639 for tap_name in &cfg.tap_name {
1640 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
1641 }
1642
Keiichi Watanabe60686582021-03-12 04:53:51 +09001643 for net in &cfg.vhost_user_net {
1644 devs.push(create_vhost_user_net_device(cfg, net)?);
1645 }
1646
Chirantan Ekbote84091e52021-09-10 18:43:17 +09001647 for vsock in &cfg.vhost_user_vsock {
1648 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
1649 }
1650
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001651 for opt in &cfg.vhost_user_wl {
1652 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1653 }
1654
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001655 #[cfg(feature = "gpu")]
1656 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1657 devs.push(create_vhost_user_gpu_device(
1658 cfg,
1659 opt,
1660 host_tube,
1661 device_tube,
1662 )?);
1663 }
1664
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -07001665 for opt in &cfg.vvu_proxy {
1666 devs.push(create_vvu_proxy_device(cfg, opt)?);
1667 }
1668
David Tolnayfa701712019-02-13 16:42:54 -08001669 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001670 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001671
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001672 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001673 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001674 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001675
1676 #[cfg(feature = "gpu")]
1677 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001678 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001679 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001680 resource_bridges.push(gpu_socket);
1681 wl_resource_bridge = Some(wl_socket);
1682 }
1683 }
1684
1685 devs.push(create_wayland_device(
1686 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001687 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001688 wl_resource_bridge,
1689 )?);
1690 }
David Tolnayfa701712019-02-13 16:42:54 -08001691
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001692 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001693 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001694 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001695 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001696 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001697 } else {
1698 None
1699 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001700
1701 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001702 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001703 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001704 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001705 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001706 } else {
1707 None
1708 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001709
Zach Reizner3a8100a2017-09-13 19:15:43 -07001710 #[cfg(feature = "gpu")]
1711 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001712 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001713 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1714 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1715 if !gpu_parameters.displays.is_empty() {
1716 gpu_display_w = gpu_parameters.displays[0].width;
1717 gpu_display_h = gpu_parameters.displays[0].height;
1718 }
1719
Zach Reizner65b98f12019-11-22 17:34:58 -08001720 let mut event_devices = Vec::new();
1721 if cfg.display_window_mouse {
1722 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001723 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001724 let (multi_touch_width, multi_touch_height) = cfg
1725 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001726 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001727 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001728 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001729 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001730 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001731 // u32::MAX is the least likely to collide with the indices generated above for
1732 // the multi_touch options, which begin at 0.
1733 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001734 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001735 multi_touch_width,
1736 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001737 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001738 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001739 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001740 devs.push(VirtioDeviceStub {
1741 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001742 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001743 });
1744 event_devices.push(EventDevice::touchscreen(event_device_socket));
1745 }
1746 if cfg.display_window_keyboard {
1747 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001748 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001749 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001750 // u32::MAX is the least likely to collide with the indices generated above for
1751 // the multi_touch options, which begin at 0.
1752 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001753 virtio_dev_socket,
1754 virtio::base_features(cfg.protected_vm),
1755 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001756 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001757 devs.push(VirtioDeviceStub {
1758 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001759 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001760 });
1761 event_devices.push(EventDevice::keyboard(event_device_socket));
1762 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001763
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001764 devs.push(create_gpu_device(
1765 cfg,
1766 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001767 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001768 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001769 // Use the unnamed socket for GPU display screens.
1770 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001771 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -08001772 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -08001773 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001774 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001775 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001776 }
1777 }
1778
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +08001779 #[cfg(feature = "audio_cras")]
1780 {
1781 for cras_snd in &cfg.cras_snds {
1782 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
1783 }
1784 }
1785
Daniel Verkampffb59122021-03-18 14:06:15 -07001786 #[cfg(feature = "video-decoder")]
1787 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001788 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001789 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001790 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001791 &mut devs,
1792 video_dec_tube,
1793 cfg,
1794 devices::virtio::VideoDeviceType::Decoder,
1795 )?;
1796 }
1797 }
1798
1799 #[cfg(feature = "video-encoder")]
1800 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001801 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001802 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001803 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001804 &mut devs,
1805 video_enc_tube,
1806 cfg,
1807 devices::virtio::VideoDeviceType::Encoder,
1808 )?;
1809 }
1810 }
1811
Zach Reizneraa575662018-08-15 10:46:32 -07001812 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001813 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001814 }
1815
Woody Chow5890b702021-02-12 14:57:02 +09001816 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001817 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001818 }
1819
Woody Chow1b16db12021-04-02 16:59:59 +09001820 #[cfg(feature = "audio")]
1821 for vhost_user_snd in &cfg.vhost_user_snd {
1822 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
1823 }
1824
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001825 for shared_dir in &cfg.shared_dirs {
1826 let SharedDir {
1827 src,
1828 tag,
1829 kind,
1830 uid_map,
1831 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001832 fs_cfg,
1833 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001834 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001835
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001836 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001837 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001838 let device_tube = fs_device_tubes.remove(0);
1839 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001840 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001841 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001842 };
1843 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001844 }
1845
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001846 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1847 devs.push(create_vhost_user_mac80211_hwsim_device(
1848 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001849 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001850 )?);
1851 }
1852
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001853 #[cfg(feature = "audio")]
1854 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001855 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001856 }
1857
David Tolnay2b089fc2019-03-04 15:33:22 -08001858 Ok(devs)
1859}
1860
Xiong Zhang10f15052021-04-08 17:23:33 +08001861fn create_vfio_device(
1862 cfg: &Config,
1863 vm: &impl Vm,
1864 resources: &mut SystemAllocator,
1865 control_tubes: &mut Vec<TaggedControlTube>,
1866 vfio_path: &Path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001867 bus_num: Option<u8>,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001868 iommu_endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1869 coiommu_endpoints: Option<&mut Vec<u16>>,
1870 iommu_dev: IommuDevType,
Xiong Zhang10f15052021-04-08 17:23:33 +08001871) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001872 let vfio_container = VfioCommonSetup::vfio_get_container(iommu_dev, Some(vfio_path))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001873 .context("failed to get vfio container")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001874
1875 // create MSI, MSI-X, and Mem request sockets for each vfio device
Daniel Verkamp6b298582021-08-16 15:37:11 -07001876 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1877 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001878 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1879
Daniel Verkamp6b298582021-08-16 15:37:11 -07001880 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1881 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001882 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1883
Daniel Verkamp6b298582021-08-16 15:37:11 -07001884 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1885 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001886 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1887
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001888 let hotplug = bus_num.is_some();
Xiong Zhang81ae6f32021-06-26 00:16:00 +08001889 let vfio_device_tube_vm = if hotplug {
1890 let (vfio_host_tube_vm, device_tube_vm) = Tube::pair().context("failed to create tube")?;
1891 control_tubes.push(TaggedControlTube::Vm(vfio_host_tube_vm));
1892 Some(device_tube_vm)
1893 } else {
1894 None
1895 };
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001896
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001897 let vfio_device = VfioDevice::new_passthrough(
1898 &vfio_path,
1899 vm,
1900 vfio_container.clone(),
1901 iommu_dev != IommuDevType::NoIommu,
1902 )
1903 .context("failed to create vfio device")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001904 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1905 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001906 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001907 vfio_device_tube_msi,
1908 vfio_device_tube_msix,
1909 vfio_device_tube_mem,
Xiong Zhang81ae6f32021-06-26 00:16:00 +08001910 vfio_device_tube_vm,
Xiong Zhang10f15052021-04-08 17:23:33 +08001911 ));
1912 // early reservation for pass-through PCI devices.
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001913 let endpoint_addr = vfio_pci_device
1914 .allocate_address(resources)
1915 .context("failed to allocate resources early for vfio pci dev")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001916
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001917 match iommu_dev {
1918 IommuDevType::NoIommu => {}
1919 IommuDevType::VirtioIommu => {
1920 iommu_endpoints.insert(endpoint_addr.to_u32(), vfio_container);
1921 }
1922 IommuDevType::CoIommu => {
1923 if let Some(endpoints) = coiommu_endpoints {
1924 endpoints.push(endpoint_addr.to_u32() as u16);
1925 } else {
1926 bail!("Missed coiommu_endpoints vector to store the endpoint addr");
1927 }
1928 }
Zide Chendfc4b882021-03-10 16:35:37 -08001929 }
1930
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001931 if hotplug {
1932 Ok((vfio_pci_device, None))
1933 } else {
1934 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1935 }
Xiong Zhang10f15052021-04-08 17:23:33 +08001936}
1937
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001938fn create_vfio_platform_device(
1939 cfg: &Config,
1940 vm: &impl Vm,
1941 _resources: &mut SystemAllocator,
1942 control_tubes: &mut Vec<TaggedControlTube>,
1943 vfio_path: &Path,
1944 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001945 iommu_dev: IommuDevType,
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001946) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001947 let vfio_container = VfioCommonSetup::vfio_get_container(iommu_dev, Some(vfio_path))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001948 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001949
Daniel Verkamp6b298582021-08-16 15:37:11 -07001950 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1951 Tube::pair().context("failed to create tube")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001952 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1953
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001954 let vfio_device = VfioDevice::new_passthrough(
1955 &vfio_path,
1956 vm,
1957 vfio_container,
1958 iommu_dev != IommuDevType::NoIommu,
1959 )
1960 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001961 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1962
1963 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1964}
1965
David Tolnay2b089fc2019-03-04 15:33:22 -08001966fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001967 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001968 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001969 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001970 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001971 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001972 control_tubes: &mut Vec<TaggedControlTube>,
1973 wayland_device_tube: Tube,
1974 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001975 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001976 balloon_device_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +09001977 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001978 disk_device_tubes: &mut Vec<Tube>,
1979 pmem_device_tubes: &mut Vec<Tube>,
1980 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001981 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001982 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001983 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001984) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08001985 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
1986 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -07001987 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001988 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1989 BTreeMap::new();
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001990 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -08001991
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001992 for vfio_dev in cfg
1993 .vfio
1994 .iter()
1995 .filter(|dev| dev.get_type() == VfioType::Pci)
1996 {
1997 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001998 let (vfio_pci_device, jail) = create_vfio_device(
1999 cfg,
2000 vm,
2001 resources,
2002 control_tubes,
2003 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002004 None,
Zide Chendfc4b882021-03-10 16:35:37 -08002005 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002006 Some(&mut coiommu_attached_endpoints),
2007 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -07002008 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08002009
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002010 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07002011 }
Zide Chendfc4b882021-03-10 16:35:37 -08002012
Tomasz Nowicki344eb142021-09-22 05:51:58 +00002013 for vfio_dev in cfg
2014 .vfio
2015 .iter()
2016 .filter(|dev| dev.get_type() == VfioType::Platform)
2017 {
2018 let vfio_path = &vfio_dev.vfio_path;
2019 let (vfio_plat_dev, jail) = create_vfio_platform_device(
2020 cfg,
2021 vm,
2022 resources,
2023 control_tubes,
2024 vfio_path.as_path(),
2025 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002026 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +00002027 )?;
2028
2029 devices.push((Box::new(vfio_plat_dev), jail));
2030 }
2031
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08002032 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
2033 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
2034 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
2035 if res == 0 {
2036 let limit = unsafe { buf.assume_init() };
2037 let rlim_new = limit
2038 .rlim_cur
2039 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
2040 let rlim_max = max(limit.rlim_max, rlim_new);
2041 if limit.rlim_cur < rlim_new {
2042 let limit_arg = libc::rlimit {
2043 rlim_cur: rlim_new as libc::rlim_t,
2044 rlim_max: rlim_max as libc::rlim_t,
2045 };
2046 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
2047 if res != 0 {
2048 bail!("Set rlimit failed");
2049 }
2050 }
2051 } else {
2052 bail!("Get rlimit failed");
2053 }
2054 }
2055
Zide Chendfc4b882021-03-10 16:35:37 -08002056 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08002057 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08002058
Daniel Verkamp6b298582021-08-16 15:37:11 -07002059 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -08002060 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07002061 let mut dev =
2062 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002063 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07002064 // early reservation for viommu.
2065 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002066 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07002067 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002068 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08002069 }
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002070
2071 if !coiommu_attached_endpoints.is_empty() {
2072 let vfio_container =
2073 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
2074 .context("failed to get vfio container")?;
2075 let (coiommu_host_tube, coiommu_device_tube) =
2076 Tube::pair().context("failed to create coiommu tube")?;
2077 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
2078 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002079 let (coiommu_tube, balloon_tube) =
2080 Tube::pair().context("failed to create coiommu tube")?;
2081 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002082 let dev = CoIommuDev::new(
2083 vm.get_memory().clone(),
2084 vfio_container,
2085 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002086 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002087 coiommu_attached_endpoints,
2088 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +08002089 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002090 )
2091 .context("failed to create coiommu device")?;
2092
2093 devices.push((Box::new(dev), simple_jail(cfg, "coiommu")?));
2094 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08002095 }
2096
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002097 let stubs = create_virtio_devices(
2098 cfg,
2099 vm,
2100 resources,
2101 exit_evt,
2102 wayland_device_tube,
2103 gpu_device_tube,
2104 vhost_user_gpu_tubes,
2105 balloon_device_tube,
2106 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +09002107 init_balloon_size,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002108 disk_device_tubes,
2109 pmem_device_tubes,
2110 map_request,
2111 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08002112 #[cfg(feature = "gpu")]
2113 render_server_fd,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002114 )?;
2115
2116 for stub in stubs {
2117 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2118 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
2119 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
2120 .context("failed to create virtio pci dev")?;
2121 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
2122 devices.push((dev, stub.jail));
2123 }
2124
2125 #[cfg(feature = "audio")]
2126 for ac97_param in &cfg.ac97_parameters {
2127 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
2128 .context("failed to create ac97 device")?;
2129 let jail = simple_jail(cfg, dev.minijail_policy())?;
2130 devices.push((Box::new(dev), jail));
2131 }
2132
2133 #[cfg(feature = "usb")]
2134 {
2135 // Create xhci controller.
2136 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
2137 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
2138 }
2139
Mattias Nisslerde2c6402021-10-21 12:05:29 +00002140 for params in &cfg.stub_pci_devices {
2141 // Stub devices don't need jailing since they don't do anything.
2142 devices.push((Box::new(StubPciDevice::new(params)), None));
2143 }
2144
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002145 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08002146}
2147
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00002148fn create_file_backed_mappings(
2149 cfg: &Config,
2150 vm: &mut impl Vm,
2151 resources: &mut SystemAllocator,
2152) -> Result<()> {
2153 for mapping in &cfg.file_backed_mappings {
2154 let file = OpenOptions::new()
2155 .read(true)
2156 .write(mapping.writable)
2157 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
2158 .open(&mapping.path)
2159 .context("failed to open file for file-backed mapping")?;
2160 let prot = if mapping.writable {
2161 Protection::read_write()
2162 } else {
2163 Protection::read()
2164 };
2165 let size = mapping
2166 .size
2167 .try_into()
2168 .context("Invalid size for file-backed mapping")?;
2169 let memory_mapping = MemoryMappingBuilder::new(size)
2170 .from_file(&file)
2171 .offset(mapping.offset)
2172 .protection(prot)
2173 .build()
2174 .context("failed to map backing file for file-backed mapping")?;
2175
2176 resources
2177 .mmio_allocator_any()
2178 .allocate_at(
2179 mapping.address,
2180 mapping.size,
2181 Alloc::FileBacked(mapping.address),
2182 "file-backed mapping".to_owned(),
2183 )
2184 .context("failed to allocate guest address for file-backed mapping")?;
2185
2186 vm.add_memory_region(
2187 GuestAddress(mapping.address),
2188 Box::new(memory_mapping),
2189 !mapping.writable,
2190 /* log_dirty_pages = */ false,
2191 )
2192 .context("failed to configure file-backed mapping")?;
2193 }
2194
2195 Ok(())
2196}
2197
David Tolnay2b089fc2019-03-04 15:33:22 -08002198#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09002199#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08002200struct Ids {
2201 uid: uid_t,
2202 gid: gid_t,
2203}
2204
David Tolnay48c48292019-03-01 16:54:25 -08002205// Set the uid/gid for the jailed process and give a basic id map. This is
2206// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10002207fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
2208 let crosvm_uid = geteuid();
2209 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08002210
David Tolnay48c48292019-03-01 16:54:25 -08002211 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002212 .context("error setting UID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08002213 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002214 .context("error setting GID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08002215
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09002216 if crosvm_uid != 0 {
2217 jail.change_uid(crosvm_uid);
2218 }
2219 if crosvm_gid != 0 {
2220 jail.change_gid(crosvm_gid);
2221 }
Fergus Dall51200512021-08-19 12:54:26 +10002222
David Tolnay41a6f842019-03-01 16:18:44 -08002223 Ok(Ids {
2224 uid: crosvm_uid,
2225 gid: crosvm_gid,
2226 })
David Tolnay48c48292019-03-01 16:54:25 -08002227}
2228
Chia-I Wu16fb6592021-11-10 11:45:32 -08002229fn add_current_user_as_root_to_jail(jail: &mut Minijail) -> Result<Ids> {
2230 let crosvm_uid = geteuid();
2231 let crosvm_gid = getegid();
2232 jail.uidmap(&format!("0 {0} 1", crosvm_uid))
2233 .context("error setting UID map")?;
2234 jail.gidmap(&format!("0 {0} 1", crosvm_gid))
2235 .context("error setting GID map")?;
2236
2237 Ok(Ids {
2238 uid: crosvm_uid,
2239 gid: crosvm_gid,
2240 })
2241}
2242
Zach Reizner65b98f12019-11-22 17:34:58 -08002243trait IntoUnixStream {
2244 fn into_unix_stream(self) -> Result<UnixStream>;
2245}
2246
2247impl<'a> IntoUnixStream for &'a Path {
2248 fn into_unix_stream(self) -> Result<UnixStream> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002249 if let Some(fd) = safe_descriptor_from_path(self).context("failed to open event device")? {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002250 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08002251 } else {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002252 UnixStream::connect(self).context("failed to open event device")
Zach Reizner65b98f12019-11-22 17:34:58 -08002253 }
2254 }
2255}
2256impl<'a> IntoUnixStream for &'a PathBuf {
2257 fn into_unix_stream(self) -> Result<UnixStream> {
2258 self.as_path().into_unix_stream()
2259 }
2260}
2261
2262impl IntoUnixStream for UnixStream {
2263 fn into_unix_stream(self) -> Result<UnixStream> {
2264 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08002265 }
2266}
2267
Steven Richmanf32d0b42020-06-20 21:45:32 -07002268fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
2269 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07002270 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002271 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07002272 // Our signal handler does nothing and is trivially async signal safe.
2273 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002274 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002275 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002276 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002277 } else {
2278 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002279 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002280 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07002281 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002282 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002283 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002284 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002285 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002286 Ok(())
2287}
2288
Steven Richmanf32d0b42020-06-20 21:45:32 -07002289// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07002290fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002291 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002292 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002293 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002294 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07002295 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002296 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002297 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002298 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002299 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002300 has_bios: bool,
2301 use_hypervisor_signals: bool,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002302 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002303 host_cpu_topology: bool,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00002304 vcpu_cgroup_tasks_file: Option<File>,
Zach Reizner2c770e62020-09-30 16:49:59 -07002305) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07002306where
Zach Reizner2c770e62020-09-30 16:49:59 -07002307 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002308{
Zach Reizner304e7312020-09-29 16:00:24 -07002309 let mut vcpu = match vcpu {
2310 Some(v) => v,
2311 None => {
2312 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
2313 // the vcpu thread.
2314 match vm
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002315 .create_vcpu(kvm_vcpu_id)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002316 .context("failed to create vcpu")?
Zach Reizner304e7312020-09-29 16:00:24 -07002317 .downcast::<V>()
2318 {
2319 Ok(v) => *v,
2320 Err(_) => panic!("VM created wrong type of VCPU"),
2321 }
2322 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002323 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002324
Steven Richmanf32d0b42020-06-20 21:45:32 -07002325 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07002326 .add_vcpu(cpu_id, &vcpu)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002327 .context("failed to add vcpu to irq chip")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002328
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07002329 if !vcpu_affinity.is_empty() {
2330 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
2331 error!("Failed to set CPU affinity: {}", e);
2332 }
2333 }
2334
Steven Richmanf32d0b42020-06-20 21:45:32 -07002335 Arch::configure_vcpu(
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08002336 &vm,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002337 vm.get_hypervisor(),
2338 irq_chip,
2339 &mut vcpu,
2340 cpu_id,
2341 vcpu_count,
2342 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002343 no_smt,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002344 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002345 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002346 .context("failed to configure vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002347
Yusuke Sato31e136a2021-08-18 11:51:38 -07002348 if !enable_per_vm_core_scheduling {
2349 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
2350 if let Err(e) = enable_core_scheduling() {
2351 error!("Failed to enable core scheduling: {}", e);
2352 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002353 }
2354
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00002355 // Move vcpu thread to cgroup
2356 if let Some(mut f) = vcpu_cgroup_tasks_file {
2357 f.write_all(base::gettid().to_string().as_bytes())
2358 .context("failed to write vcpu tid to cgroup tasks")?;
2359 }
2360
Kansho Nishidaab205af2020-08-13 18:17:50 +09002361 if run_rt {
2362 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2363 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
2364 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
2365 {
2366 warn!("Failed to set vcpu to real time: {}", e);
2367 }
2368 }
2369
Steven Richmanf32d0b42020-06-20 21:45:32 -07002370 if use_hypervisor_signals {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002371 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002372 v.retain(|&x| x != SIGRTMIN() + 0);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002373 vcpu.set_signal_mask(&v)
2374 .context("failed to set the signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002375 }
2376
Zach Reizner2c770e62020-09-30 16:49:59 -07002377 let vcpu_run_handle = vcpu
2378 .take_run_handle(Some(SIGRTMIN() + 0))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002379 .context("failed to set thread id for vcpu")?;
Zach Reizner2c770e62020-09-30 16:49:59 -07002380
2381 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002382}
2383
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002384#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2385fn handle_debug_msg<V>(
2386 cpu_id: usize,
2387 vcpu: &V,
2388 guest_mem: &GuestMemory,
2389 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002390 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002391) -> Result<()>
2392where
2393 V: VcpuArch + 'static,
2394{
2395 match d {
2396 VcpuDebug::ReadRegs => {
2397 let msg = VcpuDebugStatusMessage {
2398 cpu: cpu_id as usize,
2399 msg: VcpuDebugStatus::RegValues(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002400 Arch::debug_read_registers(vcpu as &V)
2401 .context("failed to handle a gdb ReadRegs command")?,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002402 ),
2403 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002404 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002405 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002406 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002407 }
2408 VcpuDebug::WriteRegs(regs) => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002409 Arch::debug_write_registers(vcpu as &V, &regs)
2410 .context("failed to handle a gdb WriteRegs command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002411 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002412 .send(VcpuDebugStatusMessage {
2413 cpu: cpu_id as usize,
2414 msg: VcpuDebugStatus::CommandComplete,
2415 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002416 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002417 }
2418 VcpuDebug::ReadMem(vaddr, len) => {
2419 let msg = VcpuDebugStatusMessage {
2420 cpu: cpu_id as usize,
2421 msg: VcpuDebugStatus::MemoryRegion(
2422 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2423 .unwrap_or(Vec::new()),
2424 ),
2425 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002426 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002427 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002428 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002429 }
2430 VcpuDebug::WriteMem(vaddr, buf) => {
2431 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002432 .context("failed to handle a gdb WriteMem command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002433 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002434 .send(VcpuDebugStatusMessage {
2435 cpu: cpu_id as usize,
2436 msg: VcpuDebugStatus::CommandComplete,
2437 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002438 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002439 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002440 VcpuDebug::EnableSinglestep => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002441 Arch::debug_enable_singlestep(vcpu as &V)
2442 .context("failed to handle a gdb EnableSingleStep command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002443 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002444 .send(VcpuDebugStatusMessage {
2445 cpu: cpu_id as usize,
2446 msg: VcpuDebugStatus::CommandComplete,
2447 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002448 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002449 }
2450 VcpuDebug::SetHwBreakPoint(addrs) => {
2451 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002452 .context("failed to handle a gdb SetHwBreakPoint command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002453 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002454 .send(VcpuDebugStatusMessage {
2455 cpu: cpu_id as usize,
2456 msg: VcpuDebugStatus::CommandComplete,
2457 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002458 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002459 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002460 }
2461}
2462
Zach Reizner2c770e62020-09-30 16:49:59 -07002463fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002464 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002465 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002466 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002467 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002468 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002469 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002470 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002471 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002472 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002473 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002474 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002475 has_bios: bool,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002476 mut io_bus: devices::Bus,
2477 mut mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002478 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002479 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002480 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002481 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002482 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002483 mpsc::Sender<VcpuDebugStatusMessage>,
2484 >,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002485 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002486 host_cpu_topology: bool,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00002487 vcpu_cgroup_tasks_file: Option<File>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002488) -> Result<JoinHandle<()>>
2489where
Zach Reizner2c770e62020-09-30 16:49:59 -07002490 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002491{
Zach Reizner8fb52112017-12-13 16:04:39 -08002492 thread::Builder::new()
2493 .name(format!("crosvm_vcpu{}", cpu_id))
2494 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002495 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2496 // implementation accomplishes that.
2497 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2498
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002499 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2500 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002501 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002502 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002503 kvm_vcpu_id,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002504 vcpu,
2505 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002506 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002507 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002508 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002509 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002510 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002511 has_bios,
2512 use_hypervisor_signals,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002513 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002514 host_cpu_topology,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00002515 vcpu_cgroup_tasks_file,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002516 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002517
Zach Reizner8fb52112017-12-13 16:04:39 -08002518 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002519
Zach Reizner2c770e62020-09-30 16:49:59 -07002520 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002521 Ok(v) => v,
2522 Err(e) => {
Maciek Swiechc3011222021-11-24 21:01:04 +00002523 error!("failed to start vcpu {}: {:#}", cpu_id, e);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002524 return;
2525 }
2526 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002527
Dylan Reidb0492662019-05-17 14:50:13 -07002528 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002529 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002530 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002531 // Wait until a GDB client attaches
2532 run_mode = VmRunMode::Breakpoint;
2533 }
2534
Dylan Reidb0492662019-05-17 14:50:13 -07002535 let mut interrupted_by_signal = false;
2536
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002537 mmio_bus.set_access_id(cpu_id);
2538 io_bus.set_access_id(cpu_id);
2539
Dylan Reidb0492662019-05-17 14:50:13 -07002540 'vcpu_loop: loop {
2541 // Start by checking for messages to process and the run state of the CPU.
2542 // An extra check here for Running so there isn't a need to call recv unless a
2543 // message is likely to be ready because a signal was sent.
2544 if interrupted_by_signal || run_mode != VmRunMode::Running {
2545 'state_loop: loop {
2546 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002547 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002548 Ok(m) => m,
2549 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2550 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002551 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002552 break 'state_loop;
2553 }
2554 Err(mpsc::TryRecvError::Empty) => {
2555 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002556 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002557 Ok(m) => m,
2558 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002559 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002560 break 'vcpu_loop;
2561 }
2562 }
2563 }
2564 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002565 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002566 break 'vcpu_loop;
2567 }
2568 };
2569
2570 // Collect all pending messages.
2571 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002572 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002573
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002574 for msg in messages {
2575 match msg {
2576 VcpuControl::RunState(new_mode) => {
2577 run_mode = new_mode;
2578 match run_mode {
2579 VmRunMode::Running => break 'state_loop,
2580 VmRunMode::Suspending => {
2581 // On KVM implementations that use a paravirtualized
2582 // clock (e.g. x86), a flag must be set to indicate to
2583 // the guest kernel that a vCPU was suspended. The guest
2584 // kernel will use this flag to prevent the soft lockup
2585 // detection from triggering when this vCPU resumes,
2586 // which could happen days later in realtime.
2587 if requires_pvclock_ctrl {
2588 if let Err(e) = vcpu.pvclock_ctrl() {
2589 error!(
2590 "failed to tell hypervisor vcpu {} is suspending: {}",
2591 cpu_id, e
2592 );
2593 }
2594 }
2595 }
2596 VmRunMode::Breakpoint => {}
2597 VmRunMode::Exiting => break 'vcpu_loop,
2598 }
2599 }
2600 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2601 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002602 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002603 Some(ref ch) => {
2604 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002605 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002606 ) {
2607 error!("Failed to handle gdb message: {}", e);
2608 }
2609 },
2610 None => {
2611 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002612 }
2613 }
2614 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002615 VcpuControl::MakeRT => {
2616 if run_rt && delay_rt {
2617 info!("Making vcpu {} RT\n", cpu_id);
2618 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2619 if let Err(e) = set_rt_prio_limit(
2620 u64::from(DEFAULT_VCPU_RT_LEVEL))
2621 .and_then(|_|
2622 set_rt_round_robin(
2623 i32::from(DEFAULT_VCPU_RT_LEVEL)
2624 ))
2625 {
2626 warn!("Failed to set vcpu to real time: {}", e);
2627 }
2628 }
2629 }
Dylan Reidb0492662019-05-17 14:50:13 -07002630 }
2631 }
2632 }
2633 }
2634
2635 interrupted_by_signal = false;
2636
Steven Richman11dc6712020-09-02 15:39:14 -07002637 // Vcpus may have run a HLT instruction, which puts them into a state other than
2638 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2639 // until either the irqchip receives an interrupt for this vcpu, or until the main
2640 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2641 // implementations HLT instructions do not make it to crosvm, and thus this is a
2642 // no-op that always returns VcpuRunState::Runnable.
2643 match irq_chip.wait_until_runnable(&vcpu) {
2644 Ok(VcpuRunState::Runnable) => {}
2645 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2646 Err(e) => error!(
2647 "error waiting for vcpu {} to become runnable: {}",
2648 cpu_id, e
2649 ),
2650 }
2651
2652 if !interrupted_by_signal {
2653 match vcpu.run(&vcpu_run_handle) {
2654 Ok(VcpuExit::IoIn { port, mut size }) => {
2655 let mut data = [0; 8];
2656 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002657 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002658 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002659 }
Steven Richman11dc6712020-09-02 15:39:14 -07002660 io_bus.read(port as u64, &mut data[..size]);
2661 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002662 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002663 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002664 }
Steven Richman11dc6712020-09-02 15:39:14 -07002665 Ok(VcpuExit::IoOut {
2666 port,
2667 mut size,
2668 data,
2669 }) => {
2670 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002671 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002672 size = data.len();
2673 }
2674 io_bus.write(port as u64, &data[..size]);
2675 }
2676 Ok(VcpuExit::MmioRead { address, size }) => {
2677 let mut data = [0; 8];
2678 mmio_bus.read(address, &mut data[..size]);
2679 // Setting data for mmio can not fail.
2680 let _ = vcpu.set_data(&data[..size]);
2681 }
2682 Ok(VcpuExit::MmioWrite {
2683 address,
2684 size,
2685 data,
2686 }) => {
2687 mmio_bus.write(address, &data[..size]);
2688 }
2689 Ok(VcpuExit::IoapicEoi { vector }) => {
2690 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2691 error!(
2692 "failed to broadcast eoi {} on vcpu {}: {}",
2693 vector, cpu_id, e
2694 );
2695 }
2696 }
2697 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002698 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002699 Ok(VcpuExit::Shutdown) => break,
2700 Ok(VcpuExit::FailEntry {
2701 hardware_entry_failure_reason,
2702 }) => {
2703 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002704 break;
2705 }
Steven Richman11dc6712020-09-02 15:39:14 -07002706 Ok(VcpuExit::SystemEvent(_, _)) => break,
2707 Ok(VcpuExit::Debug { .. }) => {
2708 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2709 {
2710 let msg = VcpuDebugStatusMessage {
2711 cpu: cpu_id as usize,
2712 msg: VcpuDebugStatus::HitBreakPoint,
2713 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002714 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002715 if let Err(e) = ch.send(msg) {
2716 error!("failed to notify breakpoint to GDB thread: {}", e);
2717 break;
2718 }
2719 }
2720 run_mode = VmRunMode::Breakpoint;
2721 }
2722 }
2723 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2724 Err(e) => match e.errno() {
2725 libc::EINTR => interrupted_by_signal = true,
2726 libc::EAGAIN => {}
2727 _ => {
2728 error!("vcpu hit unknown error: {}", e);
2729 break;
2730 }
2731 },
2732 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002733 }
2734
2735 if interrupted_by_signal {
2736 if use_hypervisor_signals {
2737 // Try to clear the signal that we use to kick VCPU if it is pending before
2738 // attempting to handle pause requests.
2739 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2740 error!("failed to clear pending signal: {}", e);
2741 break;
2742 }
2743 } else {
2744 vcpu.set_immediate_exit(false);
2745 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002746 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002747
Steven Richman11dc6712020-09-02 15:39:14 -07002748 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2749 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2750 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002751 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002752 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002753 .context("failed to spawn VCPU thread")
Zach Reizner39aa26b2017-12-12 18:03:23 -08002754}
2755
Zach Reiznera90649a2021-03-31 12:56:08 -07002756fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002757 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002758 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002759 open_file(
2760 initrd_path,
2761 true, /*read_only*/
2762 false, /*O_DIRECT*/
2763 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002764 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002765 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002766 } else {
2767 None
2768 };
2769
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002770 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002771 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002772 open_file(
2773 kernel_path,
2774 true, /*read_only*/
2775 false, /*O_DIRECT*/
2776 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002777 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002778 ),
2779 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002780 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002781 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002782 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002783 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2784 };
2785
Will Deaconc48e7832021-07-30 19:03:06 +01002786 let swiotlb = if let Some(size) = cfg.swiotlb {
2787 Some(
2788 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002789 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002790 )
2791 } else {
2792 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +00002793 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
2794 Some(64 * 1024 * 1024)
2795 }
Will Deaconc48e7832021-07-30 19:03:06 +01002796 ProtectionType::Unprotected => None,
2797 }
2798 };
2799
Zach Reiznera90649a2021-03-31 12:56:08 -07002800 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002801 memory_size: cfg
2802 .memory
2803 .unwrap_or(256)
2804 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002805 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002806 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002807 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002808 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002809 cpu_clusters: cfg.cpu_clusters.clone(),
2810 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002811 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002812 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002813 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002814 android_fstab: cfg
2815 .android_fstab
2816 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002817 .map(|x| {
2818 File::open(x)
2819 .with_context(|| format!("failed to open android fstab file {}", x.display()))
2820 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002821 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002822 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002823 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002824 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002825 acpi_sdts: cfg
2826 .acpi_tables
2827 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002828 .map(|path| {
2829 SDT::from_file(path)
2830 .with_context(|| format!("failed to open ACPI file {}", path.display()))
2831 })
Tomasz Jeznach42644642020-05-20 23:27:59 -07002832 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002833 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002834 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002835 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002836 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002837 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002838 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002839 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002840 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -07002841 })
2842}
2843
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002844pub enum ExitState {
2845 Reset,
2846 Stop,
2847}
2848
2849pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -07002850 let components = setup_vm_components(&cfg)?;
2851
2852 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002853 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
2854 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002855 let mut mem_policy = MemoryPolicy::empty();
2856 if components.hugepages {
2857 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2858 }
Quentin Perret26203802021-12-02 09:48:43 +00002859 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002860 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +00002861 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Daniel Verkamp6b298582021-08-16 15:37:11 -07002862 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002863
2864 enum KvmIrqChip {
2865 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2866 Split(KvmSplitIrqChip),
2867 Kernel(KvmKernelIrqChip),
2868 }
2869
2870 impl KvmIrqChip {
2871 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2872 match self {
2873 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2874 KvmIrqChip::Split(i) => i,
2875 KvmIrqChip::Kernel(i) => i,
2876 }
2877 }
2878 }
2879
2880 let ioapic_host_tube;
2881 let mut irq_chip = if cfg.split_irqchip {
2882 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2883 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2884 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2885 {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002886 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002887 ioapic_host_tube = Some(host_tube);
2888 KvmIrqChip::Split(
2889 KvmSplitIrqChip::new(
2890 vm_clone,
2891 components.vcpu_count,
2892 ioapic_device_tube,
2893 Some(120),
2894 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002895 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002896 )
2897 }
2898 } else {
2899 ioapic_host_tube = None;
2900 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002901 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
2902 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002903 )
2904 };
2905
2906 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2907}
2908
2909fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002910 cfg: Config,
2911 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002912 mut vm: V,
2913 irq_chip: &mut dyn IrqChipArch,
2914 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002915) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07002916where
2917 Vcpu: VcpuArch + 'static,
2918 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002919{
2920 if cfg.sandbox {
2921 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2922 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2923 // access to those files will not be possible.
2924 info!("crosvm entering multiprocess mode");
2925 }
2926
Daniel Verkampf1439d42021-05-21 13:55:10 -07002927 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002928 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002929 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002930
Zach Reiznera90649a2021-03-31 12:56:08 -07002931 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2932 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2933 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002934 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002935
Zach Reiznera60744b2019-02-13 17:33:32 -08002936 let control_server_socket = match &cfg.socket_path {
2937 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002938 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002939 )),
2940 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002941 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002942
Zach Reiznera90649a2021-03-31 12:56:08 -07002943 let mut control_tubes = Vec::new();
2944
2945 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2946 if let Some(port) = cfg.gdb {
2947 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002948 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07002949 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2950 components.gdb = Some((port, gdb_control_tube));
2951 }
2952
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002953 for wl_cfg in &cfg.vhost_user_wl {
2954 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2955 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002956 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002957 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2958 }
2959
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002960 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2961 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002962 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002963 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -07002964 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002965 device_tube,
2966 ));
2967 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2968 }
2969
Daniel Verkamp6b298582021-08-16 15:37:11 -07002970 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002971 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Andrew Walbran3cd93602022-01-25 13:59:23 +00002972
2973 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
2974 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
2975 let (balloon_host_tube, balloon_device_tube) =
2976 Tube::pair().context("failed to create tube")?;
2977 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is
2978 // ready.
2979 balloon_host_tube
2980 .set_recv_timeout(Some(Duration::from_millis(100)))
2981 .context("failed to create tube")?;
2982 (Some(balloon_host_tube), Some(balloon_device_tube))
2983 } else {
2984 (None, None)
2985 };
Dylan Reid059a1882018-07-23 17:58:09 -07002986
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002987 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002988 let mut disk_device_tubes = Vec::new();
2989 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002990 let disk_count = cfg.disks.len();
2991 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002992 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002993 disk_host_tubes.push(disk_host_tub);
2994 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002995 }
2996
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002997 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002998 let pmem_count = cfg.pmem_devices.len();
2999 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003000 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003001 pmem_device_tubes.push(pmem_device_tube);
3002 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08003003 }
3004
Daniel Verkamp6b298582021-08-16 15:37:11 -07003005 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003006 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07003007
Zach Reiznerdc748482021-04-14 13:59:30 -07003008 if let Some(ioapic_host_tube) = ioapic_host_tube {
3009 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
3010 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08003011
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08003012 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07003013 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09003014 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07003015 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09003016 Some(mut jail) => {
3017 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
3018 #[cfg(feature = "power-monitor-powerd")]
3019 {
Fergus Dall51200512021-08-19 12:54:26 +10003020 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09003021
3022 // Create a tmpfs in the device's root directory so that we can bind mount files.
3023 jail.mount_with_data(
3024 Path::new("none"),
3025 Path::new("/"),
3026 "tmpfs",
3027 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
3028 "size=67108864",
3029 )?;
3030
3031 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
3032 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
3033 }
3034 Some(jail)
3035 }
3036 None => None,
3037 };
3038 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08003039 } else {
3040 (&cfg.battery_type, None)
3041 };
3042
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003043 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
3044
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003045 let fs_count = cfg
3046 .shared_dirs
3047 .iter()
3048 .filter(|sd| sd.kind == SharedDirKind::FS)
3049 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003050 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003051 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003052 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003053 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
3054 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003055 }
3056
Daniel Verkamp6b298582021-08-16 15:37:11 -07003057 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003058 let reset_evt = Event::new().context("failed to create event")?;
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08003059 let mut sys_allocator = Arch::create_system_allocator(&vm);
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09003060
3061 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
3062 let ramoops_region = match &components.pstore {
3063 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07003064 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003065 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09003066 ),
3067 None => None,
3068 };
3069
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00003070 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
3071
Daniel Verkamp891ea3e2022-01-04 12:35:55 -08003072 let phys_max_addr = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08003073
3074 #[cfg(feature = "gpu")]
3075 // Hold on to the render server jail so it keeps running until we exit run_vm()
3076 let mut _render_server_jail = None;
3077 #[cfg(feature = "gpu")]
3078 let mut render_server_fd = None;
3079 #[cfg(feature = "gpu")]
3080 if let Some(gpu_parameters) = &cfg.gpu_parameters {
3081 if let Some(ref render_server_parameters) = gpu_parameters.render_server {
3082 let (jail, fd) = start_gpu_render_server(&cfg, render_server_parameters)?;
3083 _render_server_jail = Some(ScopedMinijail(jail));
3084 render_server_fd = Some(fd);
3085 }
3086 }
3087
David Stevens06d157a2022-01-13 23:44:48 +09003088 let init_balloon_size = components
3089 .memory_size
3090 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
3091 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
3092 }))
3093 .context("failed to calculate init balloon size")?;
3094
Tomasz Nowickiab86d522021-09-22 05:50:46 +00003095 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07003096 &cfg,
3097 &mut vm,
3098 &mut sys_allocator,
3099 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08003100 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07003101 &mut control_tubes,
3102 wayland_device_tube,
3103 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09003104 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07003105 balloon_device_tube,
David Stevens06d157a2022-01-13 23:44:48 +09003106 init_balloon_size,
Zach Reiznerdc748482021-04-14 13:59:30 -07003107 &mut disk_device_tubes,
3108 &mut pmem_device_tubes,
3109 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003110 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07003111 usb_provider,
3112 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08003113 #[cfg(feature = "gpu")]
3114 render_server_fd,
Zach Reiznerdc748482021-04-14 13:59:30 -07003115 )?;
3116
Peter Fangc2bba082021-04-19 18:40:24 -07003117 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00003118 for device in devices
3119 .iter_mut()
3120 .filter_map(|(dev, _)| dev.as_pci_device_mut())
3121 {
Peter Fangc2bba082021-04-19 18:40:24 -07003122 let sdts = device
3123 .generate_acpi(components.acpi_sdts)
3124 .or_else(|| {
3125 error!("ACPI table generation error");
3126 None
3127 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003128 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07003129 components.acpi_sdts = sdts;
3130 }
3131
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003132 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
3133 let mut kvm_vcpu_ids = Vec::new();
3134
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08003135 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07003136 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06003137 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07003138 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003139 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07003140 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06003141 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08003142 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08003143 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07003144 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09003145 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00003146 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07003147 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003148 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06003149 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003150 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08003151
Daniel Verkamp1286b482021-11-30 15:14:16 -08003152 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3153 {
3154 // Create Pcie Root Port
3155 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
3156 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
3157 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
3158 let sec_bus = (1..255)
3159 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
3160 .context("failed to find empty bus for Pci hotplug")?;
3161 let pci_bridge = Box::new(PciBridge::new(
3162 pcie_root_port.clone(),
3163 msi_device_tube,
3164 0,
3165 sec_bus,
3166 ));
3167 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
3168 .context("Failed to configure pci bridge device")?;
3169 linux.hotplug_bus.push(pcie_root_port);
3170 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003171
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08003172 #[cfg(feature = "direct")]
3173 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003174 let direct_io = Arc::new(
3175 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
3176 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08003177 for range in pmio.ranges.iter() {
3178 linux
3179 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09003180 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08003181 .unwrap();
3182 }
3183 };
3184
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003185 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07003186 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08003187 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09003188 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08003189 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003190 );
Xiong Zhang46471a02021-11-12 00:34:42 +08003191
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07003192 for range in mmio.ranges.iter() {
3193 linux
3194 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09003195 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07003196 .unwrap();
3197 }
3198 };
3199
3200 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003201 let mut irqs = Vec::new();
3202
3203 #[cfg(feature = "direct")]
3204 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07003205 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003206 warn!("irq {} already reserved.", irq);
3207 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07003208 let trigger = Event::new().context("failed to create event")?;
3209 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003210 linux
3211 .irq_chip
3212 .register_irq_event(*irq, &trigger, Some(&resample))
3213 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07003214 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
3215 .context("failed to enable interrupt forwarding")?;
3216 direct_irq
3217 .irq_enable(*irq)
3218 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003219 irqs.push(direct_irq);
3220 }
3221
3222 #[cfg(feature = "direct")]
3223 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07003224 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003225 warn!("irq {} already reserved.", irq);
3226 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07003227 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003228 linux
3229 .irq_chip
3230 .register_irq_event(*irq, &trigger, None)
3231 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07003232 let direct_irq = devices::DirectIrq::new(trigger, None)
3233 .context("failed to enable interrupt forwarding")?;
3234 direct_irq
3235 .irq_enable(*irq)
3236 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003237 irqs.push(direct_irq);
3238 }
3239
Daniel Verkamp6b298582021-08-16 15:37:11 -07003240 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003241 run_control(
3242 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07003243 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003244 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08003245 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003246 control_tubes,
3247 balloon_host_tube,
3248 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003249 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003250 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003251 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003252 reset_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003253 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003254 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08003255 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003256 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003257 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07003258}
3259
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003260fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
3261 linux: &RunnableLinuxVm<V, Vcpu>,
3262 host_addr: PciAddress,
3263) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
3264 for hp_bus in linux.hotplug_bus.iter() {
3265 if let Some(number) = hp_bus.lock().is_match(host_addr) {
3266 return Ok((hp_bus.clone(), number));
3267 }
3268 }
3269 Err(anyhow!("Failed to find a suitable hotplug bus"))
3270}
3271
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003272fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3273 linux: &mut RunnableLinuxVm<V, Vcpu>,
3274 sys_allocator: &mut SystemAllocator,
3275 cfg: &Config,
3276 control_tubes: &mut Vec<TaggedControlTube>,
3277 vfio_path: &Path,
3278) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003279 let host_os_str = vfio_path
3280 .file_name()
3281 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3282 let host_str = host_os_str
3283 .to_str()
3284 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3285 let host_addr = PciAddress::from_string(host_str);
3286
3287 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
3288
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003289 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
3290 let (vfio_pci_device, jail) = create_vfio_device(
3291 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003292 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003293 sys_allocator,
3294 control_tubes,
3295 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003296 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003297 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08003298 None,
3299 IommuDevType::NoIommu,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003300 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003301
3302 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003303 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003304
Daniel Verkamp6b298582021-08-16 15:37:11 -07003305 let host_os_str = vfio_path
3306 .file_name()
3307 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3308 let host_str = host_os_str
3309 .to_str()
3310 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003311 let host_addr = PciAddress::from_string(host_str);
3312 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003313 let mut hp_bus = hp_bus.lock();
3314 hp_bus.add_hotplug_device(host_key, pci_address);
3315 hp_bus.hot_plug(pci_address);
3316 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003317}
3318
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003319fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3320 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08003321 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003322 vfio_path: &Path,
3323) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003324 let host_os_str = vfio_path
3325 .file_name()
3326 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3327 let host_str = host_os_str
3328 .to_str()
3329 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003330 let host_addr = PciAddress::from_string(host_str);
3331 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003332 for hp_bus in linux.hotplug_bus.iter() {
3333 let mut hp_bus_lock = hp_bus.lock();
3334 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
3335 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08003336 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003337 return Ok(());
3338 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003339 }
3340
Daniel Verkamp6b298582021-08-16 15:37:11 -07003341 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003342}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003343
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003344fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
3345 linux: &mut RunnableLinuxVm<V, Vcpu>,
3346 sys_allocator: &mut SystemAllocator,
3347 cfg: &Config,
3348 add_tubes: &mut Vec<TaggedControlTube>,
3349 vfio_path: &Path,
3350 add: bool,
3351) -> VmResponse {
3352 let ret = if add {
3353 add_vfio_device(linux, sys_allocator, cfg, add_tubes, vfio_path)
3354 } else {
3355 remove_vfio_device(linux, sys_allocator, vfio_path)
3356 };
3357
3358 match ret {
3359 Ok(()) => VmResponse::Ok,
3360 Err(e) => {
3361 error!("hanlde_vfio_command failure: {}", e);
3362 add_tubes.clear();
3363 VmResponse::Err(base::Error::new(libc::EINVAL))
3364 }
3365 }
3366}
3367
Daniel Verkamp29409802021-02-24 14:46:19 -08003368/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
3369/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07003370/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
3371/// loop.
3372fn kick_all_vcpus(
3373 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07003374 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08003375 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07003376) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003377 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08003378 if let Err(e) = tube.send(message.clone()) {
3379 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07003380 }
3381 let _ = handle.kill(SIGRTMIN() + 0);
3382 }
3383 irq_chip.kick_halted_vcpus();
3384}
3385
Zach Reiznerdc748482021-04-14 13:59:30 -07003386fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3387 mut linux: RunnableLinuxVm<V, Vcpu>,
3388 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003389 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08003390 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003391 mut control_tubes: Vec<TaggedControlTube>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00003392 balloon_host_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003393 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07003394 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003395 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003396 reset_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07003397 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003398 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08003399 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003400 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003401) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003402 #[derive(PollToken)]
3403 enum Token {
3404 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003405 Reset,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003406 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003407 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003408 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08003409 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003410 VmControl { index: usize },
3411 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003412
Zach Reizner19ad1f32019-12-12 18:58:50 -08003413 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003414 .set_raw_mode()
3415 .expect("failed to set terminal raw mode");
3416
Michael Hoylee392c462020-10-07 03:29:24 -07003417 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07003418 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003419 (&reset_evt, Token::Reset),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003420 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07003421 (&sigchld_fd, Token::ChildSignal),
3422 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07003423 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07003424
Zach Reiznera60744b2019-02-13 17:33:32 -08003425 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07003426 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003427 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003428 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003429 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003430 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07003431 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07003432 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003433 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003434 }
3435
Steven Richmanf32d0b42020-06-20 21:45:32 -07003436 let events = linux
3437 .irq_chip
3438 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07003439 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003440
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003441 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07003442 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003443 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003444 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003445 }
3446
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003447 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07003448 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003449 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07003450 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08003451
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003452 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3453 // Create a channel for GDB thread.
3454 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
3455 let (s, r) = mpsc::channel();
3456 (Some(s), Some(r))
3457 } else {
3458 (None, None)
3459 };
3460
Steven Richmanf32d0b42020-06-20 21:45:32 -07003461 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
3462 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07003463 let use_hypervisor_signals = !linux
3464 .vm
3465 .get_hypervisor()
3466 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07003467 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003468
Zach Reizner304e7312020-09-29 16:00:24 -07003469 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00003470 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003471 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
3472 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07003473 // Enable core scheduling before creating vCPUs so that the cookie will be
3474 // shared by all vCPU threads.
3475 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
3476 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003477 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07003478 if let Err(e) = enable_core_scheduling() {
3479 error!("Failed to enable core scheduling: {}", e);
3480 }
3481 }
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00003482 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
3483 None => None,
3484 Some(cgroup_path) => {
3485 // Move main process to cgroup_path
3486 let mut f = File::create(&cgroup_path.join("tasks"))?;
3487 f.write_all(process::id().to_string().as_bytes())?;
3488 Some(f)
3489 }
3490 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07003491 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07003492 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07003493 let vcpu_affinity = match linux.vcpu_affinity.clone() {
3494 Some(VcpuAffinity::Global(v)) => v,
3495 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
3496 None => Default::default(),
3497 };
Zach Reizner55a9e502018-10-03 10:22:32 -07003498 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07003499 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003500 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07003501 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003502 linux.vm.try_clone().context("failed to clone vm")?,
3503 linux
3504 .irq_chip
3505 .try_box_clone()
3506 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003507 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09003508 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07003509 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09003510 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09003511 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07003512 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003513 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07003514 (*linux.io_bus).clone(),
3515 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07003516 exit_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003517 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003518 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003519 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003520 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3521 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003522 cfg.per_vm_core_scheduling,
3523 cfg.host_cpu_topology,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00003524 match vcpu_cgroup_tasks_file {
3525 None => None,
3526 Some(ref f) => Some(
3527 f.try_clone()
3528 .context("failed to clone vcpu cgroup tasks file")?,
3529 ),
3530 },
Zach Reizner55a9e502018-10-03 10:22:32 -07003531 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003532 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003533 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003534
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003535 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3536 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003537 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003538 let to_vcpu_channels = vcpu_handles
3539 .iter()
3540 .map(|(_handle, channel)| channel.clone())
3541 .collect();
3542 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003543 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003544 to_vcpu_channels,
3545 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3546 );
3547 thread::Builder::new()
3548 .name("gdb".to_owned())
3549 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07003550 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003551 };
3552
Dylan Reid059a1882018-07-23 17:58:09 -07003553 vcpu_thread_barrier.wait();
3554
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003555 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09003556 let mut balloon_stats_id: u64 = 0;
3557
Michael Hoylee392c462020-10-07 03:29:24 -07003558 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003559 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003560 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003561 Ok(v) => v,
3562 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003563 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003564 break;
3565 }
3566 }
3567 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003568
Steven Richmanf32d0b42020-06-20 21:45:32 -07003569 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3570 warn!("can't deliver delayed irqs: {}", e);
3571 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003572
Zach Reiznera60744b2019-02-13 17:33:32 -08003573 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003574 for event in events.iter().filter(|e| e.is_readable) {
3575 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003576 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003577 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003578 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003579 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003580 Token::Reset => {
3581 info!("vcpu requested reset");
3582 exit_state = ExitState::Reset;
3583 break 'wait;
3584 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003585 Token::Suspend => {
3586 info!("VM requested suspend");
3587 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003588 kick_all_vcpus(
3589 &vcpu_handles,
3590 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003591 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003592 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003593 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003594 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003595 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003596 while let Some(siginfo) =
3597 sigchld_fd.read().context("failed to create signalfd")?
3598 {
Zach Reizner3ba00982019-01-23 19:04:43 -08003599 let pid = siginfo.ssi_pid;
3600 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3601 Some(label) => format!("{} (pid {})", label, pid),
3602 None => format!("pid {}", pid),
3603 };
David Tolnayf5032762018-12-03 10:46:45 -08003604 error!(
3605 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003606 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003607 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003608 }
Michael Hoylee392c462020-10-07 03:29:24 -07003609 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003610 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003611 Token::IrqFd { index } => {
3612 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3613 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003614 }
3615 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003616 Token::VmControlServer => {
3617 if let Some(socket_server) = &control_server_socket {
3618 match socket_server.accept() {
3619 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003620 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003621 .add(
3622 &socket,
3623 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003624 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003625 },
3626 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003627 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003628 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003629 }
3630 Err(e) => error!("failed to accept socket: {}", e),
3631 }
3632 }
3633 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003634 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003635 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003636 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003637 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003638 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003639 Ok(request) => {
3640 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003641 let response = match request {
3642 VmRequest::VfioCommand { vfio_path, add } => {
3643 handle_vfio_command(
3644 &mut linux,
3645 &mut sys_allocator,
3646 &cfg,
3647 &mut add_tubes,
3648 &vfio_path,
3649 add,
3650 )
3651 }
3652 _ => request.execute(
3653 &mut run_mode_opt,
Andrew Walbran3cd93602022-01-25 13:59:23 +00003654 balloon_host_tube.as_ref(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003655 &mut balloon_stats_id,
3656 disk_host_tubes,
3657 #[cfg(feature = "usb")]
3658 Some(&usb_control_tube),
3659 #[cfg(not(feature = "usb"))]
3660 None,
3661 &mut linux.bat_control,
3662 &vcpu_handles,
3663 ),
3664 };
3665
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003666 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003667 error!("failed to send VmResponse: {}", e);
3668 }
3669 if let Some(run_mode) = run_mode_opt {
3670 info!("control socket changed run mode to {}", run_mode);
3671 match run_mode {
3672 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003673 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003674 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003675 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003676 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07003677 for dev in &linux.resume_notify_devices {
3678 dev.lock().resume_imminent();
3679 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003680 }
Steven Richman11dc6712020-09-02 15:39:14 -07003681 kick_all_vcpus(
3682 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003683 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003684 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003685 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003686 }
3687 }
3688 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003689 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003690 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003691 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003692 vm_control_indices_to_remove.push(index);
3693 } else {
3694 error!("failed to recv VmRequest: {}", e);
3695 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003696 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003697 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003698 TaggedControlTube::VmMemory(tube) => {
3699 match tube.recv::<VmMemoryRequest>() {
3700 Ok(request) => {
3701 let response = request.execute(
3702 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003703 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003704 Arc::clone(&map_request),
3705 &mut gralloc,
3706 );
3707 if let Err(e) = tube.send(&response) {
3708 error!("failed to send VmMemoryControlResponse: {}", e);
3709 }
3710 }
3711 Err(e) => {
3712 if let TubeError::Disconnected = e {
3713 vm_control_indices_to_remove.push(index);
3714 } else {
3715 error!("failed to recv VmMemoryControlRequest: {}", e);
3716 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003717 }
3718 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003719 }
3720 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003721 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003722 let response = {
3723 let irq_chip = &mut linux.irq_chip;
3724 request.execute(
3725 |setup| match setup {
3726 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003727 if let Some(event_index) = irq_chip
3728 .register_irq_event(irq, ev, None)?
3729 {
3730 match wait_ctx.add(
3731 ev,
3732 Token::IrqFd {
3733 index: event_index
3734 },
3735 ) {
3736 Err(e) => {
3737 warn!("failed to add IrqFd to poll context: {}", e);
3738 Err(e)
3739 },
3740 Ok(_) => {
3741 Ok(())
3742 }
3743 }
3744 } else {
3745 Ok(())
3746 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003747 }
3748 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08003749 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003750 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003751 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003752 )
3753 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003754 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003755 error!("failed to send VmIrqResponse: {}", e);
3756 }
3757 }
3758 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003759 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003760 vm_control_indices_to_remove.push(index);
3761 } else {
3762 error!("failed to recv VmIrqRequest: {}", e);
3763 }
3764 }
3765 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003766 TaggedControlTube::VmMsync(tube) => {
3767 match tube.recv::<VmMsyncRequest>() {
3768 Ok(request) => {
3769 let response = request.execute(&mut linux.vm);
3770 if let Err(e) = tube.send(&response) {
3771 error!("failed to send VmMsyncResponse: {}", e);
3772 }
3773 }
3774 Err(e) => {
3775 if let TubeError::Disconnected = e {
3776 vm_control_indices_to_remove.push(index);
3777 } else {
3778 error!("failed to recv VmMsyncRequest: {}", e);
3779 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003780 }
3781 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003782 }
3783 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003784 Ok(request) => {
3785 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003786 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003787 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003788 error!("failed to send VmResponse: {}", e);
3789 }
3790 }
3791 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003792 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003793 vm_control_indices_to_remove.push(index);
3794 } else {
3795 error!("failed to recv VmResponse: {}", e);
3796 }
3797 }
3798 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003799 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003800 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003801 if !add_tubes.is_empty() {
3802 for (idx, socket) in add_tubes.iter().enumerate() {
3803 wait_ctx
3804 .add(
3805 socket.as_ref(),
3806 Token::VmControl {
3807 index: idx + control_tubes.len(),
3808 },
3809 )
3810 .context(
3811 "failed to add hotplug vfio-pci descriptor ot wait context",
3812 )?;
3813 }
3814 control_tubes.append(&mut add_tubes);
3815 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003816 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003817 }
3818 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003819
Vikram Auradkarede68c72021-07-01 14:33:54 -07003820 // It's possible more data is readable and buffered while the socket is hungup,
3821 // so don't delete the tube from the poll context until we're sure all the
3822 // data is read.
3823 // Below case covers a condition where we have received a hungup event and the tube is not
3824 // readable.
3825 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3826 // tube should fail. On such failure, we get Disconnected error and index gets added to
3827 // vm_control_indices_to_remove by the time we reach here.
3828 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3829 if let Token::VmControl { index } = event.token {
3830 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003831 }
3832 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003833
3834 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003835 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003836 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003837 vm_control_indices_to_remove.dedup();
3838 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003839 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3840 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003841 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003842 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003843 // that has already been closed. Because the token associated with that spurious event
3844 // now belongs to a different socket, the control loop will start to interact with
3845 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3846 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003847 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003848 wait_ctx
3849 .delete(socket)
3850 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08003851 }
3852
3853 // This line implicitly drops the socket at `index` when it gets returned by
3854 // `swap_remove`. After this line, the socket at `index` is not the one from
3855 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003856 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003857 control_tubes.swap_remove(index);
3858 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003859 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003860 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003861 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003862 }
3863 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003864 }
3865
Zach Reiznerdc748482021-04-14 13:59:30 -07003866 kick_all_vcpus(
3867 &vcpu_handles,
3868 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003869 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003870 );
Steven Richman11dc6712020-09-02 15:39:14 -07003871 for (handle, _) in vcpu_handles {
3872 if let Err(e) = handle.join() {
3873 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003874 }
3875 }
3876
Daniel Verkamp94c35272019-09-12 13:31:30 -07003877 // Explicitly drop the VM structure here to allow the devices to clean up before the
3878 // control sockets are closed when this function exits.
3879 mem::drop(linux);
3880
Zach Reizner19ad1f32019-12-12 18:58:50 -08003881 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003882 .set_canon_mode()
3883 .expect("failed to restore canonical mode for terminal");
3884
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003885 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08003886}